1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
46 #include "insn-attr.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
60 #include "target-globals.h"
62 #include "tm-constrs.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode
;
70 typedef struct minipool_fixup Mfix
;
72 void (*arm_lang_output_object_attributes_hook
)(void);
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx
);
81 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets
*arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
86 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set
*);
89 static int arm_address_register_rtx_p (rtx
, int);
90 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
91 static bool is_called_in_ARM_mode (tree
);
92 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
93 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
94 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
95 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
96 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
97 inline static int thumb1_index_register_rtx_p (rtx
, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx
, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx
, int);
103 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
104 static bool arm_print_operand_punct_valid_p (unsigned char code
);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
106 static arm_cc
get_arm_condition_code (rtx
);
107 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
108 static const char *output_multi_immediate (rtx
*, const char *, const char *,
110 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
111 static struct machine_function
*arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
114 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
115 static Mnode
*add_minipool_forward_ref (Mfix
*);
116 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_backward_ref (Mfix
*);
118 static void assign_minipool_offsets (Mfix
*);
119 static void arm_print_value (FILE *, rtx
);
120 static void dump_minipool (rtx_insn
*);
121 static int arm_barrier_cost (rtx_insn
*);
122 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
123 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
124 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree
);
131 static unsigned long arm_compute_func_type (void);
132 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
133 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
134 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
140 static int arm_comp_type_attributes (const_tree
, const_tree
);
141 static void arm_set_default_type_attributes (tree
);
142 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code
,
145 unsigned HOST_WIDE_INT val
,
146 struct four_ints
*return_sequence
);
147 static int optimal_immediate_sequence_1 (enum rtx_code code
,
148 unsigned HOST_WIDE_INT val
,
149 struct four_ints
*return_sequence
,
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree
, tree
);
153 static machine_mode
arm_promote_function_mode (const_tree
,
156 static bool arm_return_in_memory (const_tree
, const_tree
);
157 static rtx
arm_function_value (const_tree
, const_tree
, bool);
158 static rtx
arm_libcall_value_1 (machine_mode
);
159 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
166 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
167 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
168 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
169 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
172 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
173 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
174 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
175 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
176 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
177 static void emit_constant_insn (rtx cond
, rtx pattern
);
178 static rtx_insn
*emit_set_insn (rtx
, rtx
);
179 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
182 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
184 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
186 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
187 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
189 static rtx
aapcs_libcall_value (machine_mode
);
190 static int aapcs_select_return_coproc (const_tree
, const_tree
);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_encode_section_info (tree
, rtx
, int);
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree
, tree
*);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx_insn
*);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn
*);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
249 static bool arm_output_addr_const_extra (FILE *, rtx
);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree
);
252 static tree
arm_promoted_type (const_tree t
);
253 static tree
arm_convert_to_type (tree type
, tree expr
);
254 static bool arm_scalar_mode_supported_p (machine_mode
);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx
, tree
, rtx
);
259 static rtx
arm_trampoline_adjust_address (rtx
);
260 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
264 static bool arm_array_mode_supported_p (machine_mode
,
265 unsigned HOST_WIDE_INT
);
266 static machine_mode
arm_preferred_simd_mode (machine_mode
);
267 static bool arm_class_likely_spilled_p (reg_class_t
);
268 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
269 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
274 static void arm_conditional_register_usage (void);
275 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
283 const unsigned char *sel
);
285 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
289 int misalign ATTRIBUTE_UNUSED
);
290 static unsigned arm_add_stmt_cost (void *data
, int count
,
291 enum vect_cost_for_stmt kind
,
292 struct _stmt_vec_info
*stmt_info
,
294 enum vect_cost_model_location where
);
296 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
297 bool op0_preserve_value
);
298 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
305 /* Table of machine attributes. */
306 static const struct attribute_spec arm_attribute_table
[] =
308 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
309 affects_type_identity } */
310 /* Function calls made to this symbol must be done indirectly, because
311 it may lie outside of the 26 bit addressing range of a normal function
313 { "long_call", 0, 0, false, true, true, NULL
, false },
314 /* Whereas these functions are always known to reside within the 26 bit
316 { "short_call", 0, 0, false, true, true, NULL
, false },
317 /* Specify the procedure call conventions for a function. */
318 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
320 /* Interrupt Service Routines have special prologue and epilogue requirements. */
321 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
323 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
325 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
328 /* ARM/PE has three new attributes:
330 dllexport - for exporting a function/variable that will live in a dll
331 dllimport - for importing a function/variable from a dll
333 Microsoft allows multiple declspecs in one __declspec, separating
334 them with spaces. We do NOT support this. Instead, use __declspec
337 { "dllimport", 0, 0, true, false, false, NULL
, false },
338 { "dllexport", 0, 0, true, false, false, NULL
, false },
339 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
341 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
342 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
343 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
344 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
347 { NULL
, 0, 0, false, false, false, NULL
, false }
350 /* Initialize the GCC target structure. */
351 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 #undef TARGET_MERGE_DECL_ATTRIBUTES
353 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
356 #undef TARGET_LEGITIMIZE_ADDRESS
357 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
360 #define TARGET_LRA_P hook_bool_void_true
362 #undef TARGET_ATTRIBUTE_TABLE
363 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
365 #undef TARGET_INSERT_ATTRIBUTES
366 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START arm_file_start
370 #undef TARGET_ASM_FILE_END
371 #define TARGET_ASM_FILE_END arm_file_end
373 #undef TARGET_ASM_ALIGNED_SI_OP
374 #define TARGET_ASM_ALIGNED_SI_OP NULL
375 #undef TARGET_ASM_INTEGER
376 #define TARGET_ASM_INTEGER arm_assemble_integer
378 #undef TARGET_PRINT_OPERAND
379 #define TARGET_PRINT_OPERAND arm_print_operand
380 #undef TARGET_PRINT_OPERAND_ADDRESS
381 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
382 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
383 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
388 #undef TARGET_ASM_FUNCTION_PROLOGUE
389 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
391 #undef TARGET_ASM_FUNCTION_EPILOGUE
392 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
394 #undef TARGET_CAN_INLINE_P
395 #define TARGET_CAN_INLINE_P arm_can_inline_p
397 #undef TARGET_RELAYOUT_FUNCTION
398 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
400 #undef TARGET_OPTION_OVERRIDE
401 #define TARGET_OPTION_OVERRIDE arm_option_override
403 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
404 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
406 #undef TARGET_OPTION_PRINT
407 #define TARGET_OPTION_PRINT arm_option_print
409 #undef TARGET_COMP_TYPE_ATTRIBUTES
410 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
412 #undef TARGET_SCHED_MACRO_FUSION_P
413 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
415 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
416 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
418 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
419 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
421 #undef TARGET_SCHED_ADJUST_COST
422 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
424 #undef TARGET_SET_CURRENT_FUNCTION
425 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
427 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
428 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER arm_sched_reorder
433 #undef TARGET_REGISTER_MOVE_COST
434 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
436 #undef TARGET_MEMORY_MOVE_COST
437 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
439 #undef TARGET_ENCODE_SECTION_INFO
441 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
443 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
446 #undef TARGET_STRIP_NAME_ENCODING
447 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
449 #undef TARGET_ASM_INTERNAL_LABEL
450 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
453 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
455 #undef TARGET_FUNCTION_VALUE
456 #define TARGET_FUNCTION_VALUE arm_function_value
458 #undef TARGET_LIBCALL_VALUE
459 #define TARGET_LIBCALL_VALUE arm_libcall_value
461 #undef TARGET_FUNCTION_VALUE_REGNO_P
462 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
464 #undef TARGET_ASM_OUTPUT_MI_THUNK
465 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
469 #undef TARGET_RTX_COSTS
470 #define TARGET_RTX_COSTS arm_rtx_costs
471 #undef TARGET_ADDRESS_COST
472 #define TARGET_ADDRESS_COST arm_address_cost
474 #undef TARGET_SHIFT_TRUNCATION_MASK
475 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
478 #undef TARGET_ARRAY_MODE_SUPPORTED_P
479 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
480 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
481 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
482 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
483 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
484 arm_autovectorize_vector_sizes
486 #undef TARGET_MACHINE_DEPENDENT_REORG
487 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
489 #undef TARGET_INIT_BUILTINS
490 #define TARGET_INIT_BUILTINS arm_init_builtins
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
493 #undef TARGET_BUILTIN_DECL
494 #define TARGET_BUILTIN_DECL arm_builtin_decl
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
499 #undef TARGET_PROMOTE_FUNCTION_MODE
500 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
501 #undef TARGET_PROMOTE_PROTOTYPES
502 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
503 #undef TARGET_PASS_BY_REFERENCE
504 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
505 #undef TARGET_ARG_PARTIAL_BYTES
506 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
507 #undef TARGET_FUNCTION_ARG
508 #define TARGET_FUNCTION_ARG arm_function_arg
509 #undef TARGET_FUNCTION_ARG_ADVANCE
510 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
511 #undef TARGET_FUNCTION_ARG_BOUNDARY
512 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
514 #undef TARGET_SETUP_INCOMING_VARARGS
515 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
517 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
518 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
520 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
521 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
522 #undef TARGET_TRAMPOLINE_INIT
523 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
524 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
525 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
527 #undef TARGET_WARN_FUNC_RETURN
528 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
530 #undef TARGET_DEFAULT_SHORT_ENUMS
531 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
533 #undef TARGET_ALIGN_ANON_BITFIELD
534 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
536 #undef TARGET_NARROW_VOLATILE_BITFIELD
537 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
539 #undef TARGET_CXX_GUARD_TYPE
540 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
542 #undef TARGET_CXX_GUARD_MASK_BIT
543 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
545 #undef TARGET_CXX_GET_COOKIE_SIZE
546 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
548 #undef TARGET_CXX_COOKIE_HAS_SIZE
549 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
551 #undef TARGET_CXX_CDTOR_RETURNS_THIS
552 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
554 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
555 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
557 #undef TARGET_CXX_USE_AEABI_ATEXIT
558 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
560 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
561 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
562 arm_cxx_determine_class_data_visibility
564 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
565 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
567 #undef TARGET_RETURN_IN_MSB
568 #define TARGET_RETURN_IN_MSB arm_return_in_msb
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
573 #undef TARGET_MUST_PASS_IN_STACK
574 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
577 #undef TARGET_ASM_UNWIND_EMIT
578 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
580 /* EABI unwinding tables use a different format for the typeinfo tables. */
581 #undef TARGET_ASM_TTYPE
582 #define TARGET_ASM_TTYPE arm_output_ttype
584 #undef TARGET_ARM_EABI_UNWINDER
585 #define TARGET_ARM_EABI_UNWINDER true
587 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
588 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
590 #undef TARGET_ASM_INIT_SECTIONS
591 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
592 #endif /* ARM_UNWIND_INFO */
594 #undef TARGET_DWARF_REGISTER_SPAN
595 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
597 #undef TARGET_CANNOT_COPY_INSN_P
598 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
601 #undef TARGET_HAVE_TLS
602 #define TARGET_HAVE_TLS true
605 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
606 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
608 #undef TARGET_LEGITIMATE_CONSTANT_P
609 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
611 #undef TARGET_CANNOT_FORCE_CONST_MEM
612 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
614 #undef TARGET_MAX_ANCHOR_OFFSET
615 #define TARGET_MAX_ANCHOR_OFFSET 4095
617 /* The minimum is set such that the total size of the block
618 for a particular anchor is -4088 + 1 + 4095 bytes, which is
619 divisible by eight, ensuring natural spacing of anchors. */
620 #undef TARGET_MIN_ANCHOR_OFFSET
621 #define TARGET_MIN_ANCHOR_OFFSET -4088
623 #undef TARGET_SCHED_ISSUE_RATE
624 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
628 arm_first_cycle_multipass_dfa_lookahead
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
632 arm_first_cycle_multipass_dfa_lookahead_guard
634 #undef TARGET_MANGLE_TYPE
635 #define TARGET_MANGLE_TYPE arm_mangle_type
637 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
638 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
640 #undef TARGET_BUILD_BUILTIN_VA_LIST
641 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
642 #undef TARGET_EXPAND_BUILTIN_VA_START
643 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
644 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
645 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
648 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
649 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
652 #undef TARGET_LEGITIMATE_ADDRESS_P
653 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
658 #undef TARGET_PROMOTED_TYPE
659 #define TARGET_PROMOTED_TYPE arm_promoted_type
661 #undef TARGET_CONVERT_TO_TYPE
662 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
664 #undef TARGET_SCALAR_MODE_SUPPORTED_P
665 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
667 #undef TARGET_FRAME_POINTER_REQUIRED
668 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
670 #undef TARGET_CAN_ELIMINATE
671 #define TARGET_CAN_ELIMINATE arm_can_eliminate
673 #undef TARGET_CONDITIONAL_REGISTER_USAGE
674 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
676 #undef TARGET_CLASS_LIKELY_SPILLED_P
677 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
679 #undef TARGET_VECTORIZE_BUILTINS
680 #define TARGET_VECTORIZE_BUILTINS
682 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
683 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
684 arm_builtin_vectorized_function
686 #undef TARGET_VECTOR_ALIGNMENT
687 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
689 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
690 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
691 arm_vector_alignment_reachable
693 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
694 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
695 arm_builtin_support_vector_misalignment
697 #undef TARGET_PREFERRED_RENAME_CLASS
698 #define TARGET_PREFERRED_RENAME_CLASS \
699 arm_preferred_rename_class
701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
703 arm_vectorize_vec_perm_const_ok
705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
707 arm_builtin_vectorization_cost
708 #undef TARGET_VECTORIZE_ADD_STMT_COST
709 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
711 #undef TARGET_CANONICALIZE_COMPARISON
712 #define TARGET_CANONICALIZE_COMPARISON \
713 arm_canonicalize_comparison
715 #undef TARGET_ASAN_SHADOW_OFFSET
716 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
718 #undef MAX_INSN_PER_IT_BLOCK
719 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
721 #undef TARGET_CAN_USE_DOLOOP_P
722 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
724 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
725 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
727 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
728 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
730 #undef TARGET_SCHED_FUSION_PRIORITY
731 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
733 struct gcc_target targetm
= TARGET_INITIALIZER
;
735 /* Obstack for minipool constant handling. */
736 static struct obstack minipool_obstack
;
737 static char * minipool_startobj
;
739 /* The maximum number of insns skipped which
740 will be conditionalised if possible. */
741 static int max_insns_skipped
= 5;
743 extern FILE * asm_out_file
;
745 /* True if we are currently building a constant table. */
746 int making_const_table
;
748 /* The processor for which instructions should be scheduled. */
749 enum processor_type arm_tune
= arm_none
;
751 /* The current tuning set. */
752 const struct tune_params
*current_tune
;
754 /* Which floating point hardware to schedule for. */
757 /* Used for Thumb call_via trampolines. */
758 rtx thumb_call_via_label
[14];
759 static int thumb_call_reg_needed
;
761 /* The bits in this mask specify which
762 instructions we are allowed to generate. */
763 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
765 /* The bits in this mask specify which instruction scheduling options should
767 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
769 /* The highest ARM architecture version supported by the
771 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
773 /* The following are used in the arm.md file as equivalents to bits
774 in the above two flag variables. */
776 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
779 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
782 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
785 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
788 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
794 /* Nonzero if this chip supports the ARM 6K extensions. */
797 /* Nonzero if this chip supports the ARM 6KZ extensions. */
800 /* Nonzero if instructions present in ARMv6-M can be used. */
803 /* Nonzero if this chip supports the ARM 7 extensions. */
806 /* Nonzero if instructions not present in the 'M' profile can be used. */
807 int arm_arch_notm
= 0;
809 /* Nonzero if instructions present in ARMv7E-M can be used. */
812 /* Nonzero if instructions present in ARMv8 can be used. */
815 /* Nonzero if this chip supports the ARMv8.1 extensions. */
818 /* Nonzero if this chip can benefit from load scheduling. */
819 int arm_ld_sched
= 0;
821 /* Nonzero if this chip is a StrongARM. */
822 int arm_tune_strongarm
= 0;
824 /* Nonzero if this chip supports Intel Wireless MMX technology. */
825 int arm_arch_iwmmxt
= 0;
827 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
828 int arm_arch_iwmmxt2
= 0;
830 /* Nonzero if this chip is an XScale. */
831 int arm_arch_xscale
= 0;
833 /* Nonzero if tuning for XScale */
834 int arm_tune_xscale
= 0;
836 /* Nonzero if we want to tune for stores that access the write-buffer.
837 This typically means an ARM6 or ARM7 with MMU or MPU. */
838 int arm_tune_wbuf
= 0;
840 /* Nonzero if tuning for Cortex-A9. */
841 int arm_tune_cortex_a9
= 0;
843 /* Nonzero if we should define __THUMB_INTERWORK__ in the
845 XXX This is a bit of a hack, it's intended to help work around
846 problems in GLD which doesn't understand that armv5t code is
847 interworking clean. */
848 int arm_cpp_interwork
= 0;
850 /* Nonzero if chip supports Thumb 2. */
853 /* Nonzero if chip supports integer division instruction. */
854 int arm_arch_arm_hwdiv
;
855 int arm_arch_thumb_hwdiv
;
857 /* Nonzero if chip disallows volatile memory access in IT block. */
858 int arm_arch_no_volatile_ce
;
860 /* Nonzero if we should use Neon to handle 64-bits operations rather
861 than core registers. */
862 int prefer_neon_for_64bits
= 0;
864 /* Nonzero if we shouldn't use literal pools. */
865 bool arm_disable_literal_pool
= false;
867 /* The register number to be used for the PIC offset register. */
868 unsigned arm_pic_register
= INVALID_REGNUM
;
870 enum arm_pcs arm_pcs_default
;
872 /* For an explanation of these variables, see final_prescan_insn below. */
874 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
875 enum arm_cond_code arm_current_cc
;
878 int arm_target_label
;
879 /* The number of conditionally executed insns, including the current insn. */
880 int arm_condexec_count
= 0;
881 /* A bitmask specifying the patterns for the IT block.
882 Zero means do not output an IT block before this insn. */
883 int arm_condexec_mask
= 0;
884 /* The number of bits used in arm_condexec_mask. */
885 int arm_condexec_masklen
= 0;
887 /* Nonzero if chip supports the ARMv8 CRC instructions. */
888 int arm_arch_crc
= 0;
890 /* Nonzero if the core has a very small, high-latency, multiply unit. */
891 int arm_m_profile_small_mul
= 0;
893 /* The condition codes of the ARM, and the inverse function. */
894 static const char * const arm_condition_codes
[] =
896 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
897 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
900 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
901 int arm_regs_in_sequence
[] =
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
906 #define ARM_LSL_NAME "lsl"
907 #define streq(string1, string2) (strcmp (string1, string2) == 0)
909 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
910 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
911 | (1 << PIC_OFFSET_TABLE_REGNUM)))
913 /* Initialization code. */
917 const char *const name
;
918 enum processor_type core
;
920 enum base_architecture base_arch
;
921 const arm_feature_set flags
;
922 const struct tune_params
*const tune
;
926 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
927 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
934 /* arm generic vectorizer costs. */
936 struct cpu_vec_costs arm_default_vec_cost
= {
937 1, /* scalar_stmt_cost. */
938 1, /* scalar load_cost. */
939 1, /* scalar_store_cost. */
940 1, /* vec_stmt_cost. */
941 1, /* vec_to_scalar_cost. */
942 1, /* scalar_to_vec_cost. */
943 1, /* vec_align_load_cost. */
944 1, /* vec_unalign_load_cost. */
945 1, /* vec_unalign_store_cost. */
946 1, /* vec_store_cost. */
947 3, /* cond_taken_branch_cost. */
948 1, /* cond_not_taken_branch_cost. */
951 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
952 #include "aarch-cost-tables.h"
956 const struct cpu_cost_table cortexa9_extra_costs
=
963 COSTS_N_INSNS (1), /* shift_reg. */
964 COSTS_N_INSNS (1), /* arith_shift. */
965 COSTS_N_INSNS (2), /* arith_shift_reg. */
967 COSTS_N_INSNS (1), /* log_shift_reg. */
968 COSTS_N_INSNS (1), /* extend. */
969 COSTS_N_INSNS (2), /* extend_arith. */
970 COSTS_N_INSNS (1), /* bfi. */
971 COSTS_N_INSNS (1), /* bfx. */
975 true /* non_exec_costs_exec. */
980 COSTS_N_INSNS (3), /* simple. */
981 COSTS_N_INSNS (3), /* flag_setting. */
982 COSTS_N_INSNS (2), /* extend. */
983 COSTS_N_INSNS (3), /* add. */
984 COSTS_N_INSNS (2), /* extend_add. */
985 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
989 0, /* simple (N/A). */
990 0, /* flag_setting (N/A). */
991 COSTS_N_INSNS (4), /* extend. */
993 COSTS_N_INSNS (4), /* extend_add. */
999 COSTS_N_INSNS (2), /* load. */
1000 COSTS_N_INSNS (2), /* load_sign_extend. */
1001 COSTS_N_INSNS (2), /* ldrd. */
1002 COSTS_N_INSNS (2), /* ldm_1st. */
1003 1, /* ldm_regs_per_insn_1st. */
1004 2, /* ldm_regs_per_insn_subsequent. */
1005 COSTS_N_INSNS (5), /* loadf. */
1006 COSTS_N_INSNS (5), /* loadd. */
1007 COSTS_N_INSNS (1), /* load_unaligned. */
1008 COSTS_N_INSNS (2), /* store. */
1009 COSTS_N_INSNS (2), /* strd. */
1010 COSTS_N_INSNS (2), /* stm_1st. */
1011 1, /* stm_regs_per_insn_1st. */
1012 2, /* stm_regs_per_insn_subsequent. */
1013 COSTS_N_INSNS (1), /* storef. */
1014 COSTS_N_INSNS (1), /* stored. */
1015 COSTS_N_INSNS (1), /* store_unaligned. */
1016 COSTS_N_INSNS (1), /* loadv. */
1017 COSTS_N_INSNS (1) /* storev. */
1022 COSTS_N_INSNS (14), /* div. */
1023 COSTS_N_INSNS (4), /* mult. */
1024 COSTS_N_INSNS (7), /* mult_addsub. */
1025 COSTS_N_INSNS (30), /* fma. */
1026 COSTS_N_INSNS (3), /* addsub. */
1027 COSTS_N_INSNS (1), /* fpconst. */
1028 COSTS_N_INSNS (1), /* neg. */
1029 COSTS_N_INSNS (3), /* compare. */
1030 COSTS_N_INSNS (3), /* widen. */
1031 COSTS_N_INSNS (3), /* narrow. */
1032 COSTS_N_INSNS (3), /* toint. */
1033 COSTS_N_INSNS (3), /* fromint. */
1034 COSTS_N_INSNS (3) /* roundint. */
1038 COSTS_N_INSNS (24), /* div. */
1039 COSTS_N_INSNS (5), /* mult. */
1040 COSTS_N_INSNS (8), /* mult_addsub. */
1041 COSTS_N_INSNS (30), /* fma. */
1042 COSTS_N_INSNS (3), /* addsub. */
1043 COSTS_N_INSNS (1), /* fpconst. */
1044 COSTS_N_INSNS (1), /* neg. */
1045 COSTS_N_INSNS (3), /* compare. */
1046 COSTS_N_INSNS (3), /* widen. */
1047 COSTS_N_INSNS (3), /* narrow. */
1048 COSTS_N_INSNS (3), /* toint. */
1049 COSTS_N_INSNS (3), /* fromint. */
1050 COSTS_N_INSNS (3) /* roundint. */
1055 COSTS_N_INSNS (1) /* alu. */
1059 const struct cpu_cost_table cortexa8_extra_costs
=
1065 COSTS_N_INSNS (1), /* shift. */
1067 COSTS_N_INSNS (1), /* arith_shift. */
1068 0, /* arith_shift_reg. */
1069 COSTS_N_INSNS (1), /* log_shift. */
1070 0, /* log_shift_reg. */
1072 0, /* extend_arith. */
1078 true /* non_exec_costs_exec. */
1083 COSTS_N_INSNS (1), /* simple. */
1084 COSTS_N_INSNS (1), /* flag_setting. */
1085 COSTS_N_INSNS (1), /* extend. */
1086 COSTS_N_INSNS (1), /* add. */
1087 COSTS_N_INSNS (1), /* extend_add. */
1088 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1092 0, /* simple (N/A). */
1093 0, /* flag_setting (N/A). */
1094 COSTS_N_INSNS (2), /* extend. */
1096 COSTS_N_INSNS (2), /* extend_add. */
1102 COSTS_N_INSNS (1), /* load. */
1103 COSTS_N_INSNS (1), /* load_sign_extend. */
1104 COSTS_N_INSNS (1), /* ldrd. */
1105 COSTS_N_INSNS (1), /* ldm_1st. */
1106 1, /* ldm_regs_per_insn_1st. */
1107 2, /* ldm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* loadf. */
1109 COSTS_N_INSNS (1), /* loadd. */
1110 COSTS_N_INSNS (1), /* load_unaligned. */
1111 COSTS_N_INSNS (1), /* store. */
1112 COSTS_N_INSNS (1), /* strd. */
1113 COSTS_N_INSNS (1), /* stm_1st. */
1114 1, /* stm_regs_per_insn_1st. */
1115 2, /* stm_regs_per_insn_subsequent. */
1116 COSTS_N_INSNS (1), /* storef. */
1117 COSTS_N_INSNS (1), /* stored. */
1118 COSTS_N_INSNS (1), /* store_unaligned. */
1119 COSTS_N_INSNS (1), /* loadv. */
1120 COSTS_N_INSNS (1) /* storev. */
1125 COSTS_N_INSNS (36), /* div. */
1126 COSTS_N_INSNS (11), /* mult. */
1127 COSTS_N_INSNS (20), /* mult_addsub. */
1128 COSTS_N_INSNS (30), /* fma. */
1129 COSTS_N_INSNS (9), /* addsub. */
1130 COSTS_N_INSNS (3), /* fpconst. */
1131 COSTS_N_INSNS (3), /* neg. */
1132 COSTS_N_INSNS (6), /* compare. */
1133 COSTS_N_INSNS (4), /* widen. */
1134 COSTS_N_INSNS (4), /* narrow. */
1135 COSTS_N_INSNS (8), /* toint. */
1136 COSTS_N_INSNS (8), /* fromint. */
1137 COSTS_N_INSNS (8) /* roundint. */
1141 COSTS_N_INSNS (64), /* div. */
1142 COSTS_N_INSNS (16), /* mult. */
1143 COSTS_N_INSNS (25), /* mult_addsub. */
1144 COSTS_N_INSNS (30), /* fma. */
1145 COSTS_N_INSNS (9), /* addsub. */
1146 COSTS_N_INSNS (3), /* fpconst. */
1147 COSTS_N_INSNS (3), /* neg. */
1148 COSTS_N_INSNS (6), /* compare. */
1149 COSTS_N_INSNS (6), /* widen. */
1150 COSTS_N_INSNS (6), /* narrow. */
1151 COSTS_N_INSNS (8), /* toint. */
1152 COSTS_N_INSNS (8), /* fromint. */
1153 COSTS_N_INSNS (8) /* roundint. */
1158 COSTS_N_INSNS (1) /* alu. */
1162 const struct cpu_cost_table cortexa5_extra_costs
=
1168 COSTS_N_INSNS (1), /* shift. */
1169 COSTS_N_INSNS (1), /* shift_reg. */
1170 COSTS_N_INSNS (1), /* arith_shift. */
1171 COSTS_N_INSNS (1), /* arith_shift_reg. */
1172 COSTS_N_INSNS (1), /* log_shift. */
1173 COSTS_N_INSNS (1), /* log_shift_reg. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* extend_arith. */
1176 COSTS_N_INSNS (1), /* bfi. */
1177 COSTS_N_INSNS (1), /* bfx. */
1178 COSTS_N_INSNS (1), /* clz. */
1179 COSTS_N_INSNS (1), /* rev. */
1181 true /* non_exec_costs_exec. */
1188 COSTS_N_INSNS (1), /* flag_setting. */
1189 COSTS_N_INSNS (1), /* extend. */
1190 COSTS_N_INSNS (1), /* add. */
1191 COSTS_N_INSNS (1), /* extend_add. */
1192 COSTS_N_INSNS (7) /* idiv. */
1196 0, /* simple (N/A). */
1197 0, /* flag_setting (N/A). */
1198 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (2), /* extend_add. */
1206 COSTS_N_INSNS (1), /* load. */
1207 COSTS_N_INSNS (1), /* load_sign_extend. */
1208 COSTS_N_INSNS (6), /* ldrd. */
1209 COSTS_N_INSNS (1), /* ldm_1st. */
1210 1, /* ldm_regs_per_insn_1st. */
1211 2, /* ldm_regs_per_insn_subsequent. */
1212 COSTS_N_INSNS (2), /* loadf. */
1213 COSTS_N_INSNS (4), /* loadd. */
1214 COSTS_N_INSNS (1), /* load_unaligned. */
1215 COSTS_N_INSNS (1), /* store. */
1216 COSTS_N_INSNS (3), /* strd. */
1217 COSTS_N_INSNS (1), /* stm_1st. */
1218 1, /* stm_regs_per_insn_1st. */
1219 2, /* stm_regs_per_insn_subsequent. */
1220 COSTS_N_INSNS (2), /* storef. */
1221 COSTS_N_INSNS (2), /* stored. */
1222 COSTS_N_INSNS (1), /* store_unaligned. */
1223 COSTS_N_INSNS (1), /* loadv. */
1224 COSTS_N_INSNS (1) /* storev. */
1229 COSTS_N_INSNS (15), /* div. */
1230 COSTS_N_INSNS (3), /* mult. */
1231 COSTS_N_INSNS (7), /* mult_addsub. */
1232 COSTS_N_INSNS (7), /* fma. */
1233 COSTS_N_INSNS (3), /* addsub. */
1234 COSTS_N_INSNS (3), /* fpconst. */
1235 COSTS_N_INSNS (3), /* neg. */
1236 COSTS_N_INSNS (3), /* compare. */
1237 COSTS_N_INSNS (3), /* widen. */
1238 COSTS_N_INSNS (3), /* narrow. */
1239 COSTS_N_INSNS (3), /* toint. */
1240 COSTS_N_INSNS (3), /* fromint. */
1241 COSTS_N_INSNS (3) /* roundint. */
1245 COSTS_N_INSNS (30), /* div. */
1246 COSTS_N_INSNS (6), /* mult. */
1247 COSTS_N_INSNS (10), /* mult_addsub. */
1248 COSTS_N_INSNS (7), /* fma. */
1249 COSTS_N_INSNS (3), /* addsub. */
1250 COSTS_N_INSNS (3), /* fpconst. */
1251 COSTS_N_INSNS (3), /* neg. */
1252 COSTS_N_INSNS (3), /* compare. */
1253 COSTS_N_INSNS (3), /* widen. */
1254 COSTS_N_INSNS (3), /* narrow. */
1255 COSTS_N_INSNS (3), /* toint. */
1256 COSTS_N_INSNS (3), /* fromint. */
1257 COSTS_N_INSNS (3) /* roundint. */
1262 COSTS_N_INSNS (1) /* alu. */
1267 const struct cpu_cost_table cortexa7_extra_costs
=
1273 COSTS_N_INSNS (1), /* shift. */
1274 COSTS_N_INSNS (1), /* shift_reg. */
1275 COSTS_N_INSNS (1), /* arith_shift. */
1276 COSTS_N_INSNS (1), /* arith_shift_reg. */
1277 COSTS_N_INSNS (1), /* log_shift. */
1278 COSTS_N_INSNS (1), /* log_shift_reg. */
1279 COSTS_N_INSNS (1), /* extend. */
1280 COSTS_N_INSNS (1), /* extend_arith. */
1281 COSTS_N_INSNS (1), /* bfi. */
1282 COSTS_N_INSNS (1), /* bfx. */
1283 COSTS_N_INSNS (1), /* clz. */
1284 COSTS_N_INSNS (1), /* rev. */
1286 true /* non_exec_costs_exec. */
1293 COSTS_N_INSNS (1), /* flag_setting. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* add. */
1296 COSTS_N_INSNS (1), /* extend_add. */
1297 COSTS_N_INSNS (7) /* idiv. */
1301 0, /* simple (N/A). */
1302 0, /* flag_setting (N/A). */
1303 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (2), /* extend_add. */
1311 COSTS_N_INSNS (1), /* load. */
1312 COSTS_N_INSNS (1), /* load_sign_extend. */
1313 COSTS_N_INSNS (3), /* ldrd. */
1314 COSTS_N_INSNS (1), /* ldm_1st. */
1315 1, /* ldm_regs_per_insn_1st. */
1316 2, /* ldm_regs_per_insn_subsequent. */
1317 COSTS_N_INSNS (2), /* loadf. */
1318 COSTS_N_INSNS (2), /* loadd. */
1319 COSTS_N_INSNS (1), /* load_unaligned. */
1320 COSTS_N_INSNS (1), /* store. */
1321 COSTS_N_INSNS (3), /* strd. */
1322 COSTS_N_INSNS (1), /* stm_1st. */
1323 1, /* stm_regs_per_insn_1st. */
1324 2, /* stm_regs_per_insn_subsequent. */
1325 COSTS_N_INSNS (2), /* storef. */
1326 COSTS_N_INSNS (2), /* stored. */
1327 COSTS_N_INSNS (1), /* store_unaligned. */
1328 COSTS_N_INSNS (1), /* loadv. */
1329 COSTS_N_INSNS (1) /* storev. */
1334 COSTS_N_INSNS (15), /* div. */
1335 COSTS_N_INSNS (3), /* mult. */
1336 COSTS_N_INSNS (7), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1350 COSTS_N_INSNS (30), /* div. */
1351 COSTS_N_INSNS (6), /* mult. */
1352 COSTS_N_INSNS (10), /* mult_addsub. */
1353 COSTS_N_INSNS (7), /* fma. */
1354 COSTS_N_INSNS (3), /* addsub. */
1355 COSTS_N_INSNS (3), /* fpconst. */
1356 COSTS_N_INSNS (3), /* neg. */
1357 COSTS_N_INSNS (3), /* compare. */
1358 COSTS_N_INSNS (3), /* widen. */
1359 COSTS_N_INSNS (3), /* narrow. */
1360 COSTS_N_INSNS (3), /* toint. */
1361 COSTS_N_INSNS (3), /* fromint. */
1362 COSTS_N_INSNS (3) /* roundint. */
1367 COSTS_N_INSNS (1) /* alu. */
1371 const struct cpu_cost_table cortexa12_extra_costs
=
1378 COSTS_N_INSNS (1), /* shift_reg. */
1379 COSTS_N_INSNS (1), /* arith_shift. */
1380 COSTS_N_INSNS (1), /* arith_shift_reg. */
1381 COSTS_N_INSNS (1), /* log_shift. */
1382 COSTS_N_INSNS (1), /* log_shift_reg. */
1384 COSTS_N_INSNS (1), /* extend_arith. */
1386 COSTS_N_INSNS (1), /* bfx. */
1387 COSTS_N_INSNS (1), /* clz. */
1388 COSTS_N_INSNS (1), /* rev. */
1390 true /* non_exec_costs_exec. */
1395 COSTS_N_INSNS (2), /* simple. */
1396 COSTS_N_INSNS (3), /* flag_setting. */
1397 COSTS_N_INSNS (2), /* extend. */
1398 COSTS_N_INSNS (3), /* add. */
1399 COSTS_N_INSNS (2), /* extend_add. */
1400 COSTS_N_INSNS (18) /* idiv. */
1404 0, /* simple (N/A). */
1405 0, /* flag_setting (N/A). */
1406 COSTS_N_INSNS (3), /* extend. */
1408 COSTS_N_INSNS (3), /* extend_add. */
1414 COSTS_N_INSNS (3), /* load. */
1415 COSTS_N_INSNS (3), /* load_sign_extend. */
1416 COSTS_N_INSNS (3), /* ldrd. */
1417 COSTS_N_INSNS (3), /* ldm_1st. */
1418 1, /* ldm_regs_per_insn_1st. */
1419 2, /* ldm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (3), /* loadf. */
1421 COSTS_N_INSNS (3), /* loadd. */
1422 0, /* load_unaligned. */
1426 1, /* stm_regs_per_insn_1st. */
1427 2, /* stm_regs_per_insn_subsequent. */
1428 COSTS_N_INSNS (2), /* storef. */
1429 COSTS_N_INSNS (2), /* stored. */
1430 0, /* store_unaligned. */
1431 COSTS_N_INSNS (1), /* loadv. */
1432 COSTS_N_INSNS (1) /* storev. */
1437 COSTS_N_INSNS (17), /* div. */
1438 COSTS_N_INSNS (4), /* mult. */
1439 COSTS_N_INSNS (8), /* mult_addsub. */
1440 COSTS_N_INSNS (8), /* fma. */
1441 COSTS_N_INSNS (4), /* addsub. */
1442 COSTS_N_INSNS (2), /* fpconst. */
1443 COSTS_N_INSNS (2), /* neg. */
1444 COSTS_N_INSNS (2), /* compare. */
1445 COSTS_N_INSNS (4), /* widen. */
1446 COSTS_N_INSNS (4), /* narrow. */
1447 COSTS_N_INSNS (4), /* toint. */
1448 COSTS_N_INSNS (4), /* fromint. */
1449 COSTS_N_INSNS (4) /* roundint. */
1453 COSTS_N_INSNS (31), /* div. */
1454 COSTS_N_INSNS (4), /* mult. */
1455 COSTS_N_INSNS (8), /* mult_addsub. */
1456 COSTS_N_INSNS (8), /* fma. */
1457 COSTS_N_INSNS (4), /* addsub. */
1458 COSTS_N_INSNS (2), /* fpconst. */
1459 COSTS_N_INSNS (2), /* neg. */
1460 COSTS_N_INSNS (2), /* compare. */
1461 COSTS_N_INSNS (4), /* widen. */
1462 COSTS_N_INSNS (4), /* narrow. */
1463 COSTS_N_INSNS (4), /* toint. */
1464 COSTS_N_INSNS (4), /* fromint. */
1465 COSTS_N_INSNS (4) /* roundint. */
1470 COSTS_N_INSNS (1) /* alu. */
1474 const struct cpu_cost_table cortexa15_extra_costs
=
1482 COSTS_N_INSNS (1), /* arith_shift. */
1483 COSTS_N_INSNS (1), /* arith_shift_reg. */
1484 COSTS_N_INSNS (1), /* log_shift. */
1485 COSTS_N_INSNS (1), /* log_shift_reg. */
1487 COSTS_N_INSNS (1), /* extend_arith. */
1488 COSTS_N_INSNS (1), /* bfi. */
1493 true /* non_exec_costs_exec. */
1498 COSTS_N_INSNS (2), /* simple. */
1499 COSTS_N_INSNS (3), /* flag_setting. */
1500 COSTS_N_INSNS (2), /* extend. */
1501 COSTS_N_INSNS (2), /* add. */
1502 COSTS_N_INSNS (2), /* extend_add. */
1503 COSTS_N_INSNS (18) /* idiv. */
1507 0, /* simple (N/A). */
1508 0, /* flag_setting (N/A). */
1509 COSTS_N_INSNS (3), /* extend. */
1511 COSTS_N_INSNS (3), /* extend_add. */
1517 COSTS_N_INSNS (3), /* load. */
1518 COSTS_N_INSNS (3), /* load_sign_extend. */
1519 COSTS_N_INSNS (3), /* ldrd. */
1520 COSTS_N_INSNS (4), /* ldm_1st. */
1521 1, /* ldm_regs_per_insn_1st. */
1522 2, /* ldm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (4), /* loadf. */
1524 COSTS_N_INSNS (4), /* loadd. */
1525 0, /* load_unaligned. */
1528 COSTS_N_INSNS (1), /* stm_1st. */
1529 1, /* stm_regs_per_insn_1st. */
1530 2, /* stm_regs_per_insn_subsequent. */
1533 0, /* store_unaligned. */
1534 COSTS_N_INSNS (1), /* loadv. */
1535 COSTS_N_INSNS (1) /* storev. */
1540 COSTS_N_INSNS (17), /* div. */
1541 COSTS_N_INSNS (4), /* mult. */
1542 COSTS_N_INSNS (8), /* mult_addsub. */
1543 COSTS_N_INSNS (8), /* fma. */
1544 COSTS_N_INSNS (4), /* addsub. */
1545 COSTS_N_INSNS (2), /* fpconst. */
1546 COSTS_N_INSNS (2), /* neg. */
1547 COSTS_N_INSNS (5), /* compare. */
1548 COSTS_N_INSNS (4), /* widen. */
1549 COSTS_N_INSNS (4), /* narrow. */
1550 COSTS_N_INSNS (4), /* toint. */
1551 COSTS_N_INSNS (4), /* fromint. */
1552 COSTS_N_INSNS (4) /* roundint. */
1556 COSTS_N_INSNS (31), /* div. */
1557 COSTS_N_INSNS (4), /* mult. */
1558 COSTS_N_INSNS (8), /* mult_addsub. */
1559 COSTS_N_INSNS (8), /* fma. */
1560 COSTS_N_INSNS (4), /* addsub. */
1561 COSTS_N_INSNS (2), /* fpconst. */
1562 COSTS_N_INSNS (2), /* neg. */
1563 COSTS_N_INSNS (2), /* compare. */
1564 COSTS_N_INSNS (4), /* widen. */
1565 COSTS_N_INSNS (4), /* narrow. */
1566 COSTS_N_INSNS (4), /* toint. */
1567 COSTS_N_INSNS (4), /* fromint. */
1568 COSTS_N_INSNS (4) /* roundint. */
1573 COSTS_N_INSNS (1) /* alu. */
1577 const struct cpu_cost_table v7m_extra_costs
=
1585 0, /* arith_shift. */
1586 COSTS_N_INSNS (1), /* arith_shift_reg. */
1588 COSTS_N_INSNS (1), /* log_shift_reg. */
1590 COSTS_N_INSNS (1), /* extend_arith. */
1595 COSTS_N_INSNS (1), /* non_exec. */
1596 false /* non_exec_costs_exec. */
1601 COSTS_N_INSNS (1), /* simple. */
1602 COSTS_N_INSNS (1), /* flag_setting. */
1603 COSTS_N_INSNS (2), /* extend. */
1604 COSTS_N_INSNS (1), /* add. */
1605 COSTS_N_INSNS (3), /* extend_add. */
1606 COSTS_N_INSNS (8) /* idiv. */
1610 0, /* simple (N/A). */
1611 0, /* flag_setting (N/A). */
1612 COSTS_N_INSNS (2), /* extend. */
1614 COSTS_N_INSNS (3), /* extend_add. */
1620 COSTS_N_INSNS (2), /* load. */
1621 0, /* load_sign_extend. */
1622 COSTS_N_INSNS (3), /* ldrd. */
1623 COSTS_N_INSNS (2), /* ldm_1st. */
1624 1, /* ldm_regs_per_insn_1st. */
1625 1, /* ldm_regs_per_insn_subsequent. */
1626 COSTS_N_INSNS (2), /* loadf. */
1627 COSTS_N_INSNS (3), /* loadd. */
1628 COSTS_N_INSNS (1), /* load_unaligned. */
1629 COSTS_N_INSNS (2), /* store. */
1630 COSTS_N_INSNS (3), /* strd. */
1631 COSTS_N_INSNS (2), /* stm_1st. */
1632 1, /* stm_regs_per_insn_1st. */
1633 1, /* stm_regs_per_insn_subsequent. */
1634 COSTS_N_INSNS (2), /* storef. */
1635 COSTS_N_INSNS (3), /* stored. */
1636 COSTS_N_INSNS (1), /* store_unaligned. */
1637 COSTS_N_INSNS (1), /* loadv. */
1638 COSTS_N_INSNS (1) /* storev. */
1643 COSTS_N_INSNS (7), /* div. */
1644 COSTS_N_INSNS (2), /* mult. */
1645 COSTS_N_INSNS (5), /* mult_addsub. */
1646 COSTS_N_INSNS (3), /* fma. */
1647 COSTS_N_INSNS (1), /* addsub. */
1659 COSTS_N_INSNS (15), /* div. */
1660 COSTS_N_INSNS (5), /* mult. */
1661 COSTS_N_INSNS (7), /* mult_addsub. */
1662 COSTS_N_INSNS (7), /* fma. */
1663 COSTS_N_INSNS (3), /* addsub. */
1676 COSTS_N_INSNS (1) /* alu. */
1680 const struct tune_params arm_slowmul_tune
=
1682 arm_slowmul_rtx_costs
,
1683 NULL
, /* Insn extra costs. */
1684 NULL
, /* Sched adj cost. */
1685 arm_default_branch_cost
,
1686 &arm_default_vec_cost
,
1687 3, /* Constant limit. */
1688 5, /* Max cond insns. */
1689 8, /* Memset max inline. */
1690 1, /* Issue rate. */
1691 ARM_PREFETCH_NOT_BENEFICIAL
,
1692 tune_params::PREF_CONST_POOL_TRUE
,
1693 tune_params::PREF_LDRD_FALSE
,
1694 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1695 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1696 tune_params::DISPARAGE_FLAGS_NEITHER
,
1697 tune_params::PREF_NEON_64_FALSE
,
1698 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1699 tune_params::FUSE_NOTHING
,
1700 tune_params::SCHED_AUTOPREF_OFF
1703 const struct tune_params arm_fastmul_tune
=
1705 arm_fastmul_rtx_costs
,
1706 NULL
, /* Insn extra costs. */
1707 NULL
, /* Sched adj cost. */
1708 arm_default_branch_cost
,
1709 &arm_default_vec_cost
,
1710 1, /* Constant limit. */
1711 5, /* Max cond insns. */
1712 8, /* Memset max inline. */
1713 1, /* Issue rate. */
1714 ARM_PREFETCH_NOT_BENEFICIAL
,
1715 tune_params::PREF_CONST_POOL_TRUE
,
1716 tune_params::PREF_LDRD_FALSE
,
1717 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1718 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1719 tune_params::DISPARAGE_FLAGS_NEITHER
,
1720 tune_params::PREF_NEON_64_FALSE
,
1721 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1722 tune_params::FUSE_NOTHING
,
1723 tune_params::SCHED_AUTOPREF_OFF
1726 /* StrongARM has early execution of branches, so a sequence that is worth
1727 skipping is shorter. Set max_insns_skipped to a lower value. */
1729 const struct tune_params arm_strongarm_tune
=
1731 arm_fastmul_rtx_costs
,
1732 NULL
, /* Insn extra costs. */
1733 NULL
, /* Sched adj cost. */
1734 arm_default_branch_cost
,
1735 &arm_default_vec_cost
,
1736 1, /* Constant limit. */
1737 3, /* Max cond insns. */
1738 8, /* Memset max inline. */
1739 1, /* Issue rate. */
1740 ARM_PREFETCH_NOT_BENEFICIAL
,
1741 tune_params::PREF_CONST_POOL_TRUE
,
1742 tune_params::PREF_LDRD_FALSE
,
1743 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1744 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1745 tune_params::DISPARAGE_FLAGS_NEITHER
,
1746 tune_params::PREF_NEON_64_FALSE
,
1747 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1748 tune_params::FUSE_NOTHING
,
1749 tune_params::SCHED_AUTOPREF_OFF
1752 const struct tune_params arm_xscale_tune
=
1754 arm_xscale_rtx_costs
,
1755 NULL
, /* Insn extra costs. */
1756 xscale_sched_adjust_cost
,
1757 arm_default_branch_cost
,
1758 &arm_default_vec_cost
,
1759 2, /* Constant limit. */
1760 3, /* Max cond insns. */
1761 8, /* Memset max inline. */
1762 1, /* Issue rate. */
1763 ARM_PREFETCH_NOT_BENEFICIAL
,
1764 tune_params::PREF_CONST_POOL_TRUE
,
1765 tune_params::PREF_LDRD_FALSE
,
1766 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1767 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1768 tune_params::DISPARAGE_FLAGS_NEITHER
,
1769 tune_params::PREF_NEON_64_FALSE
,
1770 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1771 tune_params::FUSE_NOTHING
,
1772 tune_params::SCHED_AUTOPREF_OFF
1775 const struct tune_params arm_9e_tune
=
1778 NULL
, /* Insn extra costs. */
1779 NULL
, /* Sched adj cost. */
1780 arm_default_branch_cost
,
1781 &arm_default_vec_cost
,
1782 1, /* Constant limit. */
1783 5, /* Max cond insns. */
1784 8, /* Memset max inline. */
1785 1, /* Issue rate. */
1786 ARM_PREFETCH_NOT_BENEFICIAL
,
1787 tune_params::PREF_CONST_POOL_TRUE
,
1788 tune_params::PREF_LDRD_FALSE
,
1789 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1790 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1791 tune_params::DISPARAGE_FLAGS_NEITHER
,
1792 tune_params::PREF_NEON_64_FALSE
,
1793 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1794 tune_params::FUSE_NOTHING
,
1795 tune_params::SCHED_AUTOPREF_OFF
1798 const struct tune_params arm_marvell_pj4_tune
=
1801 NULL
, /* Insn extra costs. */
1802 NULL
, /* Sched adj cost. */
1803 arm_default_branch_cost
,
1804 &arm_default_vec_cost
,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 8, /* Memset max inline. */
1808 2, /* Issue rate. */
1809 ARM_PREFETCH_NOT_BENEFICIAL
,
1810 tune_params::PREF_CONST_POOL_TRUE
,
1811 tune_params::PREF_LDRD_FALSE
,
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1813 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1814 tune_params::DISPARAGE_FLAGS_NEITHER
,
1815 tune_params::PREF_NEON_64_FALSE
,
1816 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1817 tune_params::FUSE_NOTHING
,
1818 tune_params::SCHED_AUTOPREF_OFF
1821 const struct tune_params arm_v6t2_tune
=
1824 NULL
, /* Insn extra costs. */
1825 NULL
, /* Sched adj cost. */
1826 arm_default_branch_cost
,
1827 &arm_default_vec_cost
,
1828 1, /* Constant limit. */
1829 5, /* Max cond insns. */
1830 8, /* Memset max inline. */
1831 1, /* Issue rate. */
1832 ARM_PREFETCH_NOT_BENEFICIAL
,
1833 tune_params::PREF_CONST_POOL_FALSE
,
1834 tune_params::PREF_LDRD_FALSE
,
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1837 tune_params::DISPARAGE_FLAGS_NEITHER
,
1838 tune_params::PREF_NEON_64_FALSE
,
1839 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1840 tune_params::FUSE_NOTHING
,
1841 tune_params::SCHED_AUTOPREF_OFF
1845 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1846 const struct tune_params arm_cortex_tune
=
1849 &generic_extra_costs
,
1850 NULL
, /* Sched adj cost. */
1851 arm_default_branch_cost
,
1852 &arm_default_vec_cost
,
1853 1, /* Constant limit. */
1854 5, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 2, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL
,
1858 tune_params::PREF_CONST_POOL_FALSE
,
1859 tune_params::PREF_LDRD_FALSE
,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER
,
1863 tune_params::PREF_NEON_64_FALSE
,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1865 tune_params::FUSE_NOTHING
,
1866 tune_params::SCHED_AUTOPREF_OFF
1869 const struct tune_params arm_cortex_a8_tune
=
1872 &cortexa8_extra_costs
,
1873 NULL
, /* Sched adj cost. */
1874 arm_default_branch_cost
,
1875 &arm_default_vec_cost
,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 2, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL
,
1881 tune_params::PREF_CONST_POOL_FALSE
,
1882 tune_params::PREF_LDRD_FALSE
,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER
,
1886 tune_params::PREF_NEON_64_FALSE
,
1887 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1888 tune_params::FUSE_NOTHING
,
1889 tune_params::SCHED_AUTOPREF_OFF
1892 const struct tune_params arm_cortex_a7_tune
=
1895 &cortexa7_extra_costs
,
1896 NULL
, /* Sched adj cost. */
1897 arm_default_branch_cost
,
1898 &arm_default_vec_cost
,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 2, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL
,
1904 tune_params::PREF_CONST_POOL_FALSE
,
1905 tune_params::PREF_LDRD_FALSE
,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER
,
1909 tune_params::PREF_NEON_64_FALSE
,
1910 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1911 tune_params::FUSE_NOTHING
,
1912 tune_params::SCHED_AUTOPREF_OFF
1915 const struct tune_params arm_cortex_a15_tune
=
1918 &cortexa15_extra_costs
,
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 2, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 3, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_FALSE
,
1928 tune_params::PREF_LDRD_TRUE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_ALL
,
1932 tune_params::PREF_NEON_64_FALSE
,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1934 tune_params::FUSE_NOTHING
,
1935 tune_params::SCHED_AUTOPREF_FULL
1938 const struct tune_params arm_cortex_a35_tune
=
1941 &cortexa53_extra_costs
,
1942 NULL
, /* Sched adj cost. */
1943 arm_default_branch_cost
,
1944 &arm_default_vec_cost
,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL
,
1950 tune_params::PREF_CONST_POOL_FALSE
,
1951 tune_params::PREF_LDRD_FALSE
,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER
,
1955 tune_params::PREF_NEON_64_FALSE
,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1957 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1958 tune_params::SCHED_AUTOPREF_OFF
1961 const struct tune_params arm_cortex_a53_tune
=
1964 &cortexa53_extra_costs
,
1965 NULL
, /* Sched adj cost. */
1966 arm_default_branch_cost
,
1967 &arm_default_vec_cost
,
1968 1, /* Constant limit. */
1969 5, /* Max cond insns. */
1970 8, /* Memset max inline. */
1971 2, /* Issue rate. */
1972 ARM_PREFETCH_NOT_BENEFICIAL
,
1973 tune_params::PREF_CONST_POOL_FALSE
,
1974 tune_params::PREF_LDRD_FALSE
,
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1976 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1977 tune_params::DISPARAGE_FLAGS_NEITHER
,
1978 tune_params::PREF_NEON_64_FALSE
,
1979 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1980 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
1981 tune_params::SCHED_AUTOPREF_OFF
1984 const struct tune_params arm_cortex_a57_tune
=
1987 &cortexa57_extra_costs
,
1988 NULL
, /* Sched adj cost. */
1989 arm_default_branch_cost
,
1990 &arm_default_vec_cost
,
1991 1, /* Constant limit. */
1992 2, /* Max cond insns. */
1993 8, /* Memset max inline. */
1994 3, /* Issue rate. */
1995 ARM_PREFETCH_NOT_BENEFICIAL
,
1996 tune_params::PREF_CONST_POOL_FALSE
,
1997 tune_params::PREF_LDRD_TRUE
,
1998 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1999 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2000 tune_params::DISPARAGE_FLAGS_ALL
,
2001 tune_params::PREF_NEON_64_FALSE
,
2002 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2003 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2004 tune_params::SCHED_AUTOPREF_FULL
2007 const struct tune_params arm_exynosm1_tune
=
2010 &exynosm1_extra_costs
,
2011 NULL
, /* Sched adj cost. */
2012 arm_default_branch_cost
,
2013 &arm_default_vec_cost
,
2014 1, /* Constant limit. */
2015 2, /* Max cond insns. */
2016 8, /* Memset max inline. */
2017 3, /* Issue rate. */
2018 ARM_PREFETCH_NOT_BENEFICIAL
,
2019 tune_params::PREF_CONST_POOL_FALSE
,
2020 tune_params::PREF_LDRD_TRUE
,
2021 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2022 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2023 tune_params::DISPARAGE_FLAGS_ALL
,
2024 tune_params::PREF_NEON_64_FALSE
,
2025 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2026 tune_params::FUSE_NOTHING
,
2027 tune_params::SCHED_AUTOPREF_OFF
2030 const struct tune_params arm_xgene1_tune
=
2033 &xgene1_extra_costs
,
2034 NULL
, /* Sched adj cost. */
2035 arm_default_branch_cost
,
2036 &arm_default_vec_cost
,
2037 1, /* Constant limit. */
2038 2, /* Max cond insns. */
2039 32, /* Memset max inline. */
2040 4, /* Issue rate. */
2041 ARM_PREFETCH_NOT_BENEFICIAL
,
2042 tune_params::PREF_CONST_POOL_FALSE
,
2043 tune_params::PREF_LDRD_TRUE
,
2044 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2046 tune_params::DISPARAGE_FLAGS_ALL
,
2047 tune_params::PREF_NEON_64_FALSE
,
2048 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2049 tune_params::FUSE_NOTHING
,
2050 tune_params::SCHED_AUTOPREF_OFF
2053 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2054 less appealing. Set max_insns_skipped to a low value. */
2056 const struct tune_params arm_cortex_a5_tune
=
2059 &cortexa5_extra_costs
,
2060 NULL
, /* Sched adj cost. */
2061 arm_cortex_a5_branch_cost
,
2062 &arm_default_vec_cost
,
2063 1, /* Constant limit. */
2064 1, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 2, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL
,
2068 tune_params::PREF_CONST_POOL_FALSE
,
2069 tune_params::PREF_LDRD_FALSE
,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_NEITHER
,
2073 tune_params::PREF_NEON_64_FALSE
,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2075 tune_params::FUSE_NOTHING
,
2076 tune_params::SCHED_AUTOPREF_OFF
2079 const struct tune_params arm_cortex_a9_tune
=
2082 &cortexa9_extra_costs
,
2083 cortex_a9_sched_adjust_cost
,
2084 arm_default_branch_cost
,
2085 &arm_default_vec_cost
,
2086 1, /* Constant limit. */
2087 5, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_BENEFICIAL(4,32,32),
2091 tune_params::PREF_CONST_POOL_FALSE
,
2092 tune_params::PREF_LDRD_FALSE
,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER
,
2096 tune_params::PREF_NEON_64_FALSE
,
2097 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2098 tune_params::FUSE_NOTHING
,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a12_tune
=
2105 &cortexa12_extra_costs
,
2106 NULL
, /* Sched adj cost. */
2107 arm_default_branch_cost
,
2108 &arm_default_vec_cost
, /* Vectorizer costs. */
2109 1, /* Constant limit. */
2110 2, /* Max cond insns. */
2111 8, /* Memset max inline. */
2112 2, /* Issue rate. */
2113 ARM_PREFETCH_NOT_BENEFICIAL
,
2114 tune_params::PREF_CONST_POOL_FALSE
,
2115 tune_params::PREF_LDRD_TRUE
,
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2117 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2118 tune_params::DISPARAGE_FLAGS_ALL
,
2119 tune_params::PREF_NEON_64_FALSE
,
2120 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2121 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2122 tune_params::SCHED_AUTOPREF_OFF
2125 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2126 cycle to execute each. An LDR from the constant pool also takes two cycles
2127 to execute, but mildly increases pipelining opportunity (consecutive
2128 loads/stores can be pipelined together, saving one cycle), and may also
2129 improve icache utilisation. Hence we prefer the constant pool for such
2132 const struct tune_params arm_v7m_tune
=
2136 NULL
, /* Sched adj cost. */
2137 arm_cortex_m_branch_cost
,
2138 &arm_default_vec_cost
,
2139 1, /* Constant limit. */
2140 2, /* Max cond insns. */
2141 8, /* Memset max inline. */
2142 1, /* Issue rate. */
2143 ARM_PREFETCH_NOT_BENEFICIAL
,
2144 tune_params::PREF_CONST_POOL_TRUE
,
2145 tune_params::PREF_LDRD_FALSE
,
2146 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2147 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2148 tune_params::DISPARAGE_FLAGS_NEITHER
,
2149 tune_params::PREF_NEON_64_FALSE
,
2150 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2151 tune_params::FUSE_NOTHING
,
2152 tune_params::SCHED_AUTOPREF_OFF
2155 /* Cortex-M7 tuning. */
2157 const struct tune_params arm_cortex_m7_tune
=
2161 NULL
, /* Sched adj cost. */
2162 arm_cortex_m7_branch_cost
,
2163 &arm_default_vec_cost
,
2164 0, /* Constant limit. */
2165 1, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL
,
2169 tune_params::PREF_CONST_POOL_TRUE
,
2170 tune_params::PREF_LDRD_FALSE
,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER
,
2174 tune_params::PREF_NEON_64_FALSE
,
2175 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2176 tune_params::FUSE_NOTHING
,
2177 tune_params::SCHED_AUTOPREF_OFF
2180 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2181 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2182 const struct tune_params arm_v6m_tune
=
2185 NULL
, /* Insn extra costs. */
2186 NULL
, /* Sched adj cost. */
2187 arm_default_branch_cost
,
2188 &arm_default_vec_cost
, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 5, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 1, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL
,
2194 tune_params::PREF_CONST_POOL_FALSE
,
2195 tune_params::PREF_LDRD_FALSE
,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_NEITHER
,
2199 tune_params::PREF_NEON_64_FALSE
,
2200 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2201 tune_params::FUSE_NOTHING
,
2202 tune_params::SCHED_AUTOPREF_OFF
2205 const struct tune_params arm_fa726te_tune
=
2208 NULL
, /* Insn extra costs. */
2209 fa726te_sched_adjust_cost
,
2210 arm_default_branch_cost
,
2211 &arm_default_vec_cost
,
2212 1, /* Constant limit. */
2213 5, /* Max cond insns. */
2214 8, /* Memset max inline. */
2215 2, /* Issue rate. */
2216 ARM_PREFETCH_NOT_BENEFICIAL
,
2217 tune_params::PREF_CONST_POOL_TRUE
,
2218 tune_params::PREF_LDRD_FALSE
,
2219 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2220 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2221 tune_params::DISPARAGE_FLAGS_NEITHER
,
2222 tune_params::PREF_NEON_64_FALSE
,
2223 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2224 tune_params::FUSE_NOTHING
,
2225 tune_params::SCHED_AUTOPREF_OFF
2229 /* Not all of these give usefully different compilation alternatives,
2230 but there is no simple way of generalizing them. */
2231 static const struct processors all_cores
[] =
2234 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2235 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2236 FLAGS, &arm_##COSTS##_tune},
2237 #include "arm-cores.def"
2239 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2242 static const struct processors all_architectures
[] =
2244 /* ARM Architectures */
2245 /* We don't specify tuning costs here as it will be figured out
2248 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2249 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2250 #include "arm-arches.def"
2252 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2256 /* These are populated as commandline arguments are processed, or NULL
2257 if not specified. */
2258 static const struct processors
*arm_selected_arch
;
2259 static const struct processors
*arm_selected_cpu
;
2260 static const struct processors
*arm_selected_tune
;
2262 /* The name of the preprocessor macro to define for this architecture. */
2264 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2266 /* Available values for -mfpu=. */
2268 const struct arm_fpu_desc all_fpus
[] =
2270 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2271 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2272 #include "arm-fpus.def"
2276 /* Supported TLS relocations. */
2284 TLS_DESCSEQ
/* GNU scheme */
2287 /* The maximum number of insns to be used when loading a constant. */
2289 arm_constant_limit (bool size_p
)
2291 return size_p
? 1 : current_tune
->constant_limit
;
2294 /* Emit an insn that's a simple single-set. Both the operands must be known
2296 inline static rtx_insn
*
2297 emit_set_insn (rtx x
, rtx y
)
2299 return emit_insn (gen_rtx_SET (x
, y
));
2302 /* Return the number of bits set in VALUE. */
2304 bit_count (unsigned long value
)
2306 unsigned long count
= 0;
2311 value
&= value
- 1; /* Clear the least-significant set bit. */
2317 /* Return the number of features in feature-set SET. */
2319 feature_count (const arm_feature_set
* set
)
2321 return (bit_count (ARM_FSET_CPU1 (*set
))
2322 + bit_count (ARM_FSET_CPU2 (*set
)));
2329 } arm_fixed_mode_set
;
2331 /* A small helper for setting fixed-point library libfuncs. */
2334 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2335 const char *funcname
, const char *modename
,
2340 if (num_suffix
== 0)
2341 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2343 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2345 set_optab_libfunc (optable
, mode
, buffer
);
2349 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2350 machine_mode from
, const char *funcname
,
2351 const char *toname
, const char *fromname
)
2354 const char *maybe_suffix_2
= "";
2356 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2357 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2358 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2359 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2360 maybe_suffix_2
= "2";
2362 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2365 set_conv_libfunc (optable
, to
, from
, buffer
);
2368 /* Set up library functions unique to ARM. */
2371 arm_init_libfuncs (void)
2373 /* For Linux, we have access to kernel support for atomic operations. */
2374 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2375 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2377 /* There are no special library functions unless we are using the
2382 /* The functions below are described in Section 4 of the "Run-Time
2383 ABI for the ARM architecture", Version 1.0. */
2385 /* Double-precision floating-point arithmetic. Table 2. */
2386 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2387 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2388 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2389 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2390 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2392 /* Double-precision comparisons. Table 3. */
2393 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2394 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2395 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2396 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2397 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2398 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2399 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2401 /* Single-precision floating-point arithmetic. Table 4. */
2402 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2403 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2404 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2405 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2406 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2408 /* Single-precision comparisons. Table 5. */
2409 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2410 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2411 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2412 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2413 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2414 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2415 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2417 /* Floating-point to integer conversions. Table 6. */
2418 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2419 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2420 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2421 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2422 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2423 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2424 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2425 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2427 /* Conversions between floating types. Table 7. */
2428 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2429 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2431 /* Integer to floating-point conversions. Table 8. */
2432 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2433 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2434 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2435 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2436 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2437 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2438 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2439 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2441 /* Long long. Table 9. */
2442 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2443 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2444 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2445 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2446 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2447 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2448 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2449 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2451 /* Integer (32/32->32) division. \S 4.3.1. */
2452 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2453 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2455 /* The divmod functions are designed so that they can be used for
2456 plain division, even though they return both the quotient and the
2457 remainder. The quotient is returned in the usual location (i.e.,
2458 r0 for SImode, {r0, r1} for DImode), just as would be expected
2459 for an ordinary division routine. Because the AAPCS calling
2460 conventions specify that all of { r0, r1, r2, r3 } are
2461 callee-saved registers, there is no need to tell the compiler
2462 explicitly that those registers are clobbered by these
2464 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2465 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2467 /* For SImode division the ABI provides div-without-mod routines,
2468 which are faster. */
2469 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2470 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2472 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2473 divmod libcalls instead. */
2474 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2475 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2476 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2477 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2479 /* Half-precision float operations. The compiler handles all operations
2480 with NULL libfuncs by converting the SFmode. */
2481 switch (arm_fp16_format
)
2483 case ARM_FP16_FORMAT_IEEE
:
2484 case ARM_FP16_FORMAT_ALTERNATIVE
:
2487 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2488 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2490 : "__gnu_f2h_alternative"));
2491 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2492 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2494 : "__gnu_h2f_alternative"));
2497 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2498 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2499 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2500 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2501 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2504 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2505 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2506 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2507 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2508 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2509 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2510 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2517 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2519 const arm_fixed_mode_set fixed_arith_modes
[] =
2540 const arm_fixed_mode_set fixed_conv_modes
[] =
2570 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2572 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2573 "add", fixed_arith_modes
[i
].name
, 3);
2574 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2575 "ssadd", fixed_arith_modes
[i
].name
, 3);
2576 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2577 "usadd", fixed_arith_modes
[i
].name
, 3);
2578 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2579 "sub", fixed_arith_modes
[i
].name
, 3);
2580 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2581 "sssub", fixed_arith_modes
[i
].name
, 3);
2582 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2583 "ussub", fixed_arith_modes
[i
].name
, 3);
2584 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2585 "mul", fixed_arith_modes
[i
].name
, 3);
2586 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2587 "ssmul", fixed_arith_modes
[i
].name
, 3);
2588 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2589 "usmul", fixed_arith_modes
[i
].name
, 3);
2590 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2591 "div", fixed_arith_modes
[i
].name
, 3);
2592 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2593 "udiv", fixed_arith_modes
[i
].name
, 3);
2594 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2595 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2596 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2597 "usdiv", fixed_arith_modes
[i
].name
, 3);
2598 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2599 "neg", fixed_arith_modes
[i
].name
, 2);
2600 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2601 "ssneg", fixed_arith_modes
[i
].name
, 2);
2602 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2603 "usneg", fixed_arith_modes
[i
].name
, 2);
2604 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2605 "ashl", fixed_arith_modes
[i
].name
, 3);
2606 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2607 "ashr", fixed_arith_modes
[i
].name
, 3);
2608 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2609 "lshr", fixed_arith_modes
[i
].name
, 3);
2610 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2611 "ssashl", fixed_arith_modes
[i
].name
, 3);
2612 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2613 "usashl", fixed_arith_modes
[i
].name
, 3);
2614 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2615 "cmp", fixed_arith_modes
[i
].name
, 2);
2618 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2619 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2622 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2623 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2626 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2627 fixed_conv_modes
[j
].mode
, "fract",
2628 fixed_conv_modes
[i
].name
,
2629 fixed_conv_modes
[j
].name
);
2630 arm_set_fixed_conv_libfunc (satfract_optab
,
2631 fixed_conv_modes
[i
].mode
,
2632 fixed_conv_modes
[j
].mode
, "satfract",
2633 fixed_conv_modes
[i
].name
,
2634 fixed_conv_modes
[j
].name
);
2635 arm_set_fixed_conv_libfunc (fractuns_optab
,
2636 fixed_conv_modes
[i
].mode
,
2637 fixed_conv_modes
[j
].mode
, "fractuns",
2638 fixed_conv_modes
[i
].name
,
2639 fixed_conv_modes
[j
].name
);
2640 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2641 fixed_conv_modes
[i
].mode
,
2642 fixed_conv_modes
[j
].mode
, "satfractuns",
2643 fixed_conv_modes
[i
].name
,
2644 fixed_conv_modes
[j
].name
);
2648 if (TARGET_AAPCS_BASED
)
2649 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2652 /* On AAPCS systems, this is the "struct __va_list". */
2653 static GTY(()) tree va_list_type
;
2655 /* Return the type to use as __builtin_va_list. */
2657 arm_build_builtin_va_list (void)
2662 if (!TARGET_AAPCS_BASED
)
2663 return std_build_builtin_va_list ();
2665 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2673 The C Library ABI further reinforces this definition in \S
2676 We must follow this definition exactly. The structure tag
2677 name is visible in C++ mangled names, and thus forms a part
2678 of the ABI. The field name may be used by people who
2679 #include <stdarg.h>. */
2680 /* Create the type. */
2681 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2682 /* Give it the required name. */
2683 va_list_name
= build_decl (BUILTINS_LOCATION
,
2685 get_identifier ("__va_list"),
2687 DECL_ARTIFICIAL (va_list_name
) = 1;
2688 TYPE_NAME (va_list_type
) = va_list_name
;
2689 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2690 /* Create the __ap field. */
2691 ap_field
= build_decl (BUILTINS_LOCATION
,
2693 get_identifier ("__ap"),
2695 DECL_ARTIFICIAL (ap_field
) = 1;
2696 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2697 TYPE_FIELDS (va_list_type
) = ap_field
;
2698 /* Compute its layout. */
2699 layout_type (va_list_type
);
2701 return va_list_type
;
2704 /* Return an expression of type "void *" pointing to the next
2705 available argument in a variable-argument list. VALIST is the
2706 user-level va_list object, of type __builtin_va_list. */
2708 arm_extract_valist_ptr (tree valist
)
2710 if (TREE_TYPE (valist
) == error_mark_node
)
2711 return error_mark_node
;
2713 /* On an AAPCS target, the pointer is stored within "struct
2715 if (TARGET_AAPCS_BASED
)
2717 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2718 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2719 valist
, ap_field
, NULL_TREE
);
2725 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2727 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2729 valist
= arm_extract_valist_ptr (valist
);
2730 std_expand_builtin_va_start (valist
, nextarg
);
2733 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2735 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2738 valist
= arm_extract_valist_ptr (valist
);
2739 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2742 /* Check any incompatible options that the user has specified. */
2744 arm_option_check_internal (struct gcc_options
*opts
)
2746 int flags
= opts
->x_target_flags
;
2747 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[opts
->x_arm_fpu_index
];
2749 /* iWMMXt and NEON are incompatible. */
2750 if (TARGET_IWMMXT
&& TARGET_VFP
2751 && ARM_FPU_FSET_HAS (fpu_desc
->features
, FPU_FL_NEON
))
2752 error ("iWMMXt and NEON are incompatible");
2754 /* Make sure that the processor choice does not conflict with any of the
2755 other command line choices. */
2756 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2757 error ("target CPU does not support ARM mode");
2759 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2760 from here where no function is being compiled currently. */
2761 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2762 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2764 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2765 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2767 /* If this target is normally configured to use APCS frames, warn if they
2768 are turned off and debugging is turned on. */
2769 if (TARGET_ARM_P (flags
)
2770 && write_symbols
!= NO_DEBUG
2771 && !TARGET_APCS_FRAME
2772 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2773 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2775 /* iWMMXt unsupported under Thumb mode. */
2776 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2777 error ("iWMMXt unsupported under Thumb mode");
2779 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2780 error ("can not use -mtp=cp15 with 16-bit Thumb");
2782 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2784 error ("RTP PIC is incompatible with Thumb");
2788 /* We only support -mslow-flash-data on armv7-m targets. */
2789 if (target_slow_flash_data
2790 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2791 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2792 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2795 /* Recompute the global settings depending on target attribute options. */
2798 arm_option_params_internal (void)
2800 /* If we are not using the default (ARM mode) section anchor offset
2801 ranges, then set the correct ranges now. */
2804 /* Thumb-1 LDR instructions cannot have negative offsets.
2805 Permissible positive offset ranges are 5-bit (for byte loads),
2806 6-bit (for halfword loads), or 7-bit (for word loads).
2807 Empirical results suggest a 7-bit anchor range gives the best
2808 overall code size. */
2809 targetm
.min_anchor_offset
= 0;
2810 targetm
.max_anchor_offset
= 127;
2812 else if (TARGET_THUMB2
)
2814 /* The minimum is set such that the total size of the block
2815 for a particular anchor is 248 + 1 + 4095 bytes, which is
2816 divisible by eight, ensuring natural spacing of anchors. */
2817 targetm
.min_anchor_offset
= -248;
2818 targetm
.max_anchor_offset
= 4095;
2822 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2823 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2828 /* If optimizing for size, bump the number of instructions that we
2829 are prepared to conditionally execute (even on a StrongARM). */
2830 max_insns_skipped
= 6;
2832 /* For THUMB2, we limit the conditional sequence to one IT block. */
2834 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2837 /* When -mrestrict-it is in use tone down the if-conversion. */
2838 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2839 ? 1 : current_tune
->max_insns_skipped
;
2842 /* True if -mflip-thumb should next add an attribute for the default
2843 mode, false if it should next add an attribute for the opposite mode. */
2844 static GTY(()) bool thumb_flipper
;
2846 /* Options after initial target override. */
2847 static GTY(()) tree init_optimize
;
2850 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2852 if (opts
->x_align_functions
<= 0)
2853 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2854 && opts
->x_optimize_size
? 2 : 4;
2857 /* Implement targetm.override_options_after_change. */
2860 arm_override_options_after_change (void)
2862 arm_override_options_after_change_1 (&global_options
);
2865 /* Reset options between modes that the user has specified. */
2867 arm_option_override_internal (struct gcc_options
*opts
,
2868 struct gcc_options
*opts_set
)
2870 arm_override_options_after_change_1 (opts
);
2872 if (TARGET_INTERWORK
&& !ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
))
2874 /* The default is to enable interworking, so this warning message would
2875 be confusing to users who have just compiled with, eg, -march=armv3. */
2876 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2877 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2880 if (TARGET_THUMB_P (opts
->x_target_flags
)
2881 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2883 warning (0, "target CPU does not support THUMB instructions");
2884 opts
->x_target_flags
&= ~MASK_THUMB
;
2887 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2889 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2890 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2893 /* Callee super interworking implies thumb interworking. Adding
2894 this to the flags here simplifies the logic elsewhere. */
2895 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2896 opts
->x_target_flags
|= MASK_INTERWORK
;
2898 /* need to remember initial values so combinaisons of options like
2899 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2900 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2902 if (! opts_set
->x_arm_restrict_it
)
2903 opts
->x_arm_restrict_it
= arm_arch8
;
2905 if (!TARGET_THUMB2_P (opts
->x_target_flags
))
2906 opts
->x_arm_restrict_it
= 0;
2908 /* Enable -munaligned-access by default for
2909 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2910 i.e. Thumb2 and ARM state only.
2911 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2912 - ARMv8 architecture-base processors.
2914 Disable -munaligned-access by default for
2915 - all pre-ARMv6 architecture-based processors
2916 - ARMv6-M architecture-based processors. */
2918 if (! opts_set
->x_unaligned_access
)
2920 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2921 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2923 else if (opts
->x_unaligned_access
== 1
2924 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2926 warning (0, "target CPU does not support unaligned accesses");
2927 opts
->x_unaligned_access
= 0;
2930 /* Don't warn since it's on by default in -O2. */
2931 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2932 opts
->x_flag_schedule_insns
= 0;
2934 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2936 /* Disable shrink-wrap when optimizing function for size, since it tends to
2937 generate additional returns. */
2938 if (optimize_function_for_size_p (cfun
)
2939 && TARGET_THUMB2_P (opts
->x_target_flags
))
2940 opts
->x_flag_shrink_wrap
= false;
2942 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2944 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2945 - epilogue_insns - does not accurately model the corresponding insns
2946 emitted in the asm file. In particular, see the comment in thumb_exit
2947 'Find out how many of the (return) argument registers we can corrupt'.
2948 As a consequence, the epilogue may clobber registers without fipa-ra
2949 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2950 TODO: Accurately model clobbers for epilogue_insns and reenable
2952 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2953 opts
->x_flag_ipa_ra
= 0;
2955 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
2957 /* Thumb2 inline assembly code should always use unified syntax.
2958 This will apply to ARM and Thumb1 eventually. */
2959 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
2961 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
2962 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
2966 /* Fix up any incompatible options that the user has specified. */
2968 arm_option_override (void)
2970 arm_selected_arch
= NULL
;
2971 arm_selected_cpu
= NULL
;
2972 arm_selected_tune
= NULL
;
2974 if (global_options_set
.x_arm_arch_option
)
2975 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2977 if (global_options_set
.x_arm_cpu_option
)
2979 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2980 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2983 if (global_options_set
.x_arm_tune_option
)
2984 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2986 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2987 SUBTARGET_OVERRIDE_OPTIONS
;
2990 if (arm_selected_arch
)
2992 if (arm_selected_cpu
)
2994 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
2995 arm_feature_set selected_flags
;
2996 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
2997 arm_selected_arch
->flags
);
2998 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
2999 /* Check for conflict between mcpu and march. */
3000 if (!ARM_FSET_IS_EMPTY (selected_flags
))
3002 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3003 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3004 /* -march wins for code generation.
3005 -mcpu wins for default tuning. */
3006 if (!arm_selected_tune
)
3007 arm_selected_tune
= arm_selected_cpu
;
3009 arm_selected_cpu
= arm_selected_arch
;
3013 arm_selected_arch
= NULL
;
3016 /* Pick a CPU based on the architecture. */
3017 arm_selected_cpu
= arm_selected_arch
;
3020 /* If the user did not specify a processor, choose one for them. */
3021 if (!arm_selected_cpu
)
3023 const struct processors
* sel
;
3024 arm_feature_set sought
= ARM_FSET_EMPTY
;;
3026 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3027 if (!arm_selected_cpu
->name
)
3029 #ifdef SUBTARGET_CPU_DEFAULT
3030 /* Use the subtarget default CPU if none was specified by
3032 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
3034 /* Default to ARM6. */
3035 if (!arm_selected_cpu
->name
)
3036 arm_selected_cpu
= &all_cores
[arm6
];
3039 sel
= arm_selected_cpu
;
3040 insn_flags
= sel
->flags
;
3042 /* Now check to see if the user has specified some command line
3043 switch that require certain abilities from the cpu. */
3045 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3047 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
3048 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
3050 /* There are no ARM processors that support both APCS-26 and
3051 interworking. Therefore we force FL_MODE26 to be removed
3052 from insn_flags here (if it was set), so that the search
3053 below will always be able to find a compatible processor. */
3054 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
3057 if (!ARM_FSET_IS_EMPTY (sought
)
3058 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
3060 /* Try to locate a CPU type that supports all of the abilities
3061 of the default CPU, plus the extra abilities requested by
3063 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3064 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
3067 if (sel
->name
== NULL
)
3069 unsigned current_bit_count
= 0;
3070 const struct processors
* best_fit
= NULL
;
3072 /* Ideally we would like to issue an error message here
3073 saying that it was not possible to find a CPU compatible
3074 with the default CPU, but which also supports the command
3075 line options specified by the programmer, and so they
3076 ought to use the -mcpu=<name> command line option to
3077 override the default CPU type.
3079 If we cannot find a cpu that has both the
3080 characteristics of the default cpu and the given
3081 command line options we scan the array again looking
3082 for a best match. */
3083 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3085 arm_feature_set required
= ARM_FSET_EMPTY
;
3086 ARM_FSET_UNION (required
, sought
, insn_flags
);
3087 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3090 arm_feature_set flags
;
3091 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3092 count
= feature_count (&flags
);
3094 if (count
>= current_bit_count
)
3097 current_bit_count
= count
;
3101 gcc_assert (best_fit
);
3105 arm_selected_cpu
= sel
;
3109 gcc_assert (arm_selected_cpu
);
3110 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3111 if (!arm_selected_tune
)
3112 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3114 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3115 insn_flags
= arm_selected_cpu
->flags
;
3116 arm_base_arch
= arm_selected_cpu
->base_arch
;
3118 arm_tune
= arm_selected_tune
->core
;
3119 tune_flags
= arm_selected_tune
->flags
;
3120 current_tune
= arm_selected_tune
->tune
;
3122 /* TBD: Dwarf info for apcs frame is not handled yet. */
3123 if (TARGET_APCS_FRAME
)
3124 flag_shrink_wrap
= false;
3126 /* BPABI targets use linker tricks to allow interworking on cores
3127 without thumb support. */
3128 if (TARGET_INTERWORK
3129 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3131 warning (0, "target CPU does not support interworking" );
3132 target_flags
&= ~MASK_INTERWORK
;
3135 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3137 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3138 target_flags
|= MASK_APCS_FRAME
;
3141 if (TARGET_POKE_FUNCTION_NAME
)
3142 target_flags
|= MASK_APCS_FRAME
;
3144 if (TARGET_APCS_REENT
&& flag_pic
)
3145 error ("-fpic and -mapcs-reent are incompatible");
3147 if (TARGET_APCS_REENT
)
3148 warning (0, "APCS reentrant code not supported. Ignored");
3150 if (TARGET_APCS_FLOAT
)
3151 warning (0, "passing floating point arguments in fp regs not yet supported");
3153 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3154 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3155 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3156 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3157 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3158 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3159 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3160 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3161 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3162 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3163 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3164 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3165 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3166 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3167 arm_arch8_1
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_1
);
3168 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3169 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3171 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3172 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3173 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3174 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3175 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3176 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3177 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3178 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3179 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3180 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3181 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3182 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3184 /* V5 code we generate is completely interworking capable, so we turn off
3185 TARGET_INTERWORK here to avoid many tests later on. */
3187 /* XXX However, we must pass the right pre-processor defines to CPP
3188 or GLD can get confused. This is a hack. */
3189 if (TARGET_INTERWORK
)
3190 arm_cpp_interwork
= 1;
3193 target_flags
&= ~MASK_INTERWORK
;
3195 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3196 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3198 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3199 error ("iwmmxt abi requires an iwmmxt capable cpu");
3201 if (!global_options_set
.x_arm_fpu_index
)
3203 const char *target_fpu_name
;
3206 #ifdef FPUTYPE_DEFAULT
3207 target_fpu_name
= FPUTYPE_DEFAULT
;
3209 target_fpu_name
= "vfp";
3212 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3217 /* If soft-float is specified then don't use FPU. */
3218 if (TARGET_SOFT_FLOAT
)
3219 arm_fpu_attr
= FPU_NONE
;
3220 else if (TARGET_VFP
)
3221 arm_fpu_attr
= FPU_VFP
;
3225 if (TARGET_AAPCS_BASED
)
3227 if (TARGET_CALLER_INTERWORKING
)
3228 error ("AAPCS does not support -mcaller-super-interworking");
3230 if (TARGET_CALLEE_INTERWORKING
)
3231 error ("AAPCS does not support -mcallee-super-interworking");
3234 /* __fp16 support currently assumes the core has ldrh. */
3235 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3236 sorry ("__fp16 and no ldrh");
3238 if (TARGET_AAPCS_BASED
)
3240 if (arm_abi
== ARM_ABI_IWMMXT
)
3241 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3242 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3243 && TARGET_HARD_FLOAT
3245 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3247 arm_pcs_default
= ARM_PCS_AAPCS
;
3251 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3252 sorry ("-mfloat-abi=hard and VFP");
3254 if (arm_abi
== ARM_ABI_APCS
)
3255 arm_pcs_default
= ARM_PCS_APCS
;
3257 arm_pcs_default
= ARM_PCS_ATPCS
;
3260 /* For arm2/3 there is no need to do any scheduling if we are doing
3261 software floating-point. */
3262 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3263 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3265 /* Use the cp15 method if it is available. */
3266 if (target_thread_pointer
== TP_AUTO
)
3268 if (arm_arch6k
&& !TARGET_THUMB1
)
3269 target_thread_pointer
= TP_CP15
;
3271 target_thread_pointer
= TP_SOFT
;
3274 /* Override the default structure alignment for AAPCS ABI. */
3275 if (!global_options_set
.x_arm_structure_size_boundary
)
3277 if (TARGET_AAPCS_BASED
)
3278 arm_structure_size_boundary
= 8;
3282 if (arm_structure_size_boundary
!= 8
3283 && arm_structure_size_boundary
!= 32
3284 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3286 if (ARM_DOUBLEWORD_ALIGN
)
3288 "structure size boundary can only be set to 8, 32 or 64");
3290 warning (0, "structure size boundary can only be set to 8 or 32");
3291 arm_structure_size_boundary
3292 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3296 /* If stack checking is disabled, we can use r10 as the PIC register,
3297 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3298 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3300 if (TARGET_VXWORKS_RTP
)
3301 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3302 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3305 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3306 arm_pic_register
= 9;
3308 if (arm_pic_register_string
!= NULL
)
3310 int pic_register
= decode_reg_name (arm_pic_register_string
);
3313 warning (0, "-mpic-register= is useless without -fpic");
3315 /* Prevent the user from choosing an obviously stupid PIC register. */
3316 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3317 || pic_register
== HARD_FRAME_POINTER_REGNUM
3318 || pic_register
== STACK_POINTER_REGNUM
3319 || pic_register
>= PC_REGNUM
3320 || (TARGET_VXWORKS_RTP
3321 && (unsigned int) pic_register
!= arm_pic_register
))
3322 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3324 arm_pic_register
= pic_register
;
3327 if (TARGET_VXWORKS_RTP
3328 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3329 arm_pic_data_is_text_relative
= 0;
3331 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3332 if (fix_cm3_ldrd
== 2)
3334 if (arm_selected_cpu
->core
== cortexm3
)
3340 /* Hot/Cold partitioning is not currently supported, since we can't
3341 handle literal pool placement in that case. */
3342 if (flag_reorder_blocks_and_partition
)
3344 inform (input_location
,
3345 "-freorder-blocks-and-partition not supported on this architecture");
3346 flag_reorder_blocks_and_partition
= 0;
3347 flag_reorder_blocks
= 1;
3351 /* Hoisting PIC address calculations more aggressively provides a small,
3352 but measurable, size reduction for PIC code. Therefore, we decrease
3353 the bar for unrestricted expression hoisting to the cost of PIC address
3354 calculation, which is 2 instructions. */
3355 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3356 global_options
.x_param_values
,
3357 global_options_set
.x_param_values
);
3359 /* ARM EABI defaults to strict volatile bitfields. */
3360 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3361 && abi_version_at_least(2))
3362 flag_strict_volatile_bitfields
= 1;
3364 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3365 have deemed it beneficial (signified by setting
3366 prefetch.num_slots to 1 or more). */
3367 if (flag_prefetch_loop_arrays
< 0
3370 && current_tune
->prefetch
.num_slots
> 0)
3371 flag_prefetch_loop_arrays
= 1;
3373 /* Set up parameters to be used in prefetching algorithm. Do not
3374 override the defaults unless we are tuning for a core we have
3375 researched values for. */
3376 if (current_tune
->prefetch
.num_slots
> 0)
3377 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3378 current_tune
->prefetch
.num_slots
,
3379 global_options
.x_param_values
,
3380 global_options_set
.x_param_values
);
3381 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3382 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3383 current_tune
->prefetch
.l1_cache_line_size
,
3384 global_options
.x_param_values
,
3385 global_options_set
.x_param_values
);
3386 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3387 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3388 current_tune
->prefetch
.l1_cache_size
,
3389 global_options
.x_param_values
,
3390 global_options_set
.x_param_values
);
3392 /* Use Neon to perform 64-bits operations rather than core
3394 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3395 if (use_neon_for_64bits
== 1)
3396 prefer_neon_for_64bits
= true;
3398 /* Use the alternative scheduling-pressure algorithm by default. */
3399 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3400 global_options
.x_param_values
,
3401 global_options_set
.x_param_values
);
3403 /* Look through ready list and all of queue for instructions
3404 relevant for L2 auto-prefetcher. */
3405 int param_sched_autopref_queue_depth
;
3407 switch (current_tune
->sched_autopref
)
3409 case tune_params::SCHED_AUTOPREF_OFF
:
3410 param_sched_autopref_queue_depth
= -1;
3413 case tune_params::SCHED_AUTOPREF_RANK
:
3414 param_sched_autopref_queue_depth
= 0;
3417 case tune_params::SCHED_AUTOPREF_FULL
:
3418 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3425 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3426 param_sched_autopref_queue_depth
,
3427 global_options
.x_param_values
,
3428 global_options_set
.x_param_values
);
3430 /* Currently, for slow flash data, we just disable literal pools. */
3431 if (target_slow_flash_data
)
3432 arm_disable_literal_pool
= true;
3434 /* Disable scheduling fusion by default if it's not armv7 processor
3435 or doesn't prefer ldrd/strd. */
3436 if (flag_schedule_fusion
== 2
3437 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3438 flag_schedule_fusion
= 0;
3440 /* Need to remember initial options before they are overriden. */
3441 init_optimize
= build_optimization_node (&global_options
);
3443 arm_option_override_internal (&global_options
, &global_options_set
);
3444 arm_option_check_internal (&global_options
);
3445 arm_option_params_internal ();
3447 /* Register global variables with the garbage collector. */
3448 arm_add_gc_roots ();
3450 /* Save the initial options in case the user does function specific
3451 options or #pragma target. */
3452 target_option_default_node
= target_option_current_node
3453 = build_target_option_node (&global_options
);
3455 /* Init initial mode for testing. */
3456 thumb_flipper
= TARGET_THUMB
;
3460 arm_add_gc_roots (void)
3462 gcc_obstack_init(&minipool_obstack
);
3463 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3466 /* A table of known ARM exception types.
3467 For use with the interrupt function attribute. */
3471 const char *const arg
;
3472 const unsigned long return_value
;
3476 static const isr_attribute_arg isr_attribute_args
[] =
3478 { "IRQ", ARM_FT_ISR
},
3479 { "irq", ARM_FT_ISR
},
3480 { "FIQ", ARM_FT_FIQ
},
3481 { "fiq", ARM_FT_FIQ
},
3482 { "ABORT", ARM_FT_ISR
},
3483 { "abort", ARM_FT_ISR
},
3484 { "ABORT", ARM_FT_ISR
},
3485 { "abort", ARM_FT_ISR
},
3486 { "UNDEF", ARM_FT_EXCEPTION
},
3487 { "undef", ARM_FT_EXCEPTION
},
3488 { "SWI", ARM_FT_EXCEPTION
},
3489 { "swi", ARM_FT_EXCEPTION
},
3490 { NULL
, ARM_FT_NORMAL
}
3493 /* Returns the (interrupt) function type of the current
3494 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3496 static unsigned long
3497 arm_isr_value (tree argument
)
3499 const isr_attribute_arg
* ptr
;
3503 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3505 /* No argument - default to IRQ. */
3506 if (argument
== NULL_TREE
)
3509 /* Get the value of the argument. */
3510 if (TREE_VALUE (argument
) == NULL_TREE
3511 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3512 return ARM_FT_UNKNOWN
;
3514 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3516 /* Check it against the list of known arguments. */
3517 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3518 if (streq (arg
, ptr
->arg
))
3519 return ptr
->return_value
;
3521 /* An unrecognized interrupt type. */
3522 return ARM_FT_UNKNOWN
;
3525 /* Computes the type of the current function. */
3527 static unsigned long
3528 arm_compute_func_type (void)
3530 unsigned long type
= ARM_FT_UNKNOWN
;
3534 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3536 /* Decide if the current function is volatile. Such functions
3537 never return, and many memory cycles can be saved by not storing
3538 register values that will never be needed again. This optimization
3539 was added to speed up context switching in a kernel application. */
3541 && (TREE_NOTHROW (current_function_decl
)
3542 || !(flag_unwind_tables
3544 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3545 && TREE_THIS_VOLATILE (current_function_decl
))
3546 type
|= ARM_FT_VOLATILE
;
3548 if (cfun
->static_chain_decl
!= NULL
)
3549 type
|= ARM_FT_NESTED
;
3551 attr
= DECL_ATTRIBUTES (current_function_decl
);
3553 a
= lookup_attribute ("naked", attr
);
3555 type
|= ARM_FT_NAKED
;
3557 a
= lookup_attribute ("isr", attr
);
3559 a
= lookup_attribute ("interrupt", attr
);
3562 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3564 type
|= arm_isr_value (TREE_VALUE (a
));
3569 /* Returns the type of the current function. */
3572 arm_current_func_type (void)
3574 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3575 cfun
->machine
->func_type
= arm_compute_func_type ();
3577 return cfun
->machine
->func_type
;
3581 arm_allocate_stack_slots_for_args (void)
3583 /* Naked functions should not allocate stack slots for arguments. */
3584 return !IS_NAKED (arm_current_func_type ());
3588 arm_warn_func_return (tree decl
)
3590 /* Naked functions are implemented entirely in assembly, including the
3591 return sequence, so suppress warnings about this. */
3592 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3596 /* Output assembler code for a block containing the constant parts
3597 of a trampoline, leaving space for the variable parts.
3599 On the ARM, (if r8 is the static chain regnum, and remembering that
3600 referencing pc adds an offset of 8) the trampoline looks like:
3603 .word static chain value
3604 .word function's address
3605 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3608 arm_asm_trampoline_template (FILE *f
)
3610 fprintf (f
, "\t.syntax unified\n");
3614 fprintf (f
, "\t.arm\n");
3615 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3616 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3618 else if (TARGET_THUMB2
)
3620 fprintf (f
, "\t.thumb\n");
3621 /* The Thumb-2 trampoline is similar to the arm implementation.
3622 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3623 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3624 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3625 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3629 ASM_OUTPUT_ALIGN (f
, 2);
3630 fprintf (f
, "\t.code\t16\n");
3631 fprintf (f
, ".Ltrampoline_start:\n");
3632 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3633 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3634 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3635 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3636 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3637 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3639 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3640 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3643 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3646 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3648 rtx fnaddr
, mem
, a_tramp
;
3650 emit_block_move (m_tramp
, assemble_trampoline_template (),
3651 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3653 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3654 emit_move_insn (mem
, chain_value
);
3656 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3657 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3658 emit_move_insn (mem
, fnaddr
);
3660 a_tramp
= XEXP (m_tramp
, 0);
3661 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3662 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3663 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3666 /* Thumb trampolines should be entered in thumb mode, so set
3667 the bottom bit of the address. */
3670 arm_trampoline_adjust_address (rtx addr
)
3673 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3674 NULL
, 0, OPTAB_LIB_WIDEN
);
3678 /* Return 1 if it is possible to return using a single instruction.
3679 If SIBLING is non-null, this is a test for a return before a sibling
3680 call. SIBLING is the call insn, so we can examine its register usage. */
3683 use_return_insn (int iscond
, rtx sibling
)
3686 unsigned int func_type
;
3687 unsigned long saved_int_regs
;
3688 unsigned HOST_WIDE_INT stack_adjust
;
3689 arm_stack_offsets
*offsets
;
3691 /* Never use a return instruction before reload has run. */
3692 if (!reload_completed
)
3695 func_type
= arm_current_func_type ();
3697 /* Naked, volatile and stack alignment functions need special
3699 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3702 /* So do interrupt functions that use the frame pointer and Thumb
3703 interrupt functions. */
3704 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3707 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3708 && !optimize_function_for_size_p (cfun
))
3711 offsets
= arm_get_frame_offsets ();
3712 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3714 /* As do variadic functions. */
3715 if (crtl
->args
.pretend_args_size
3716 || cfun
->machine
->uses_anonymous_args
3717 /* Or if the function calls __builtin_eh_return () */
3718 || crtl
->calls_eh_return
3719 /* Or if the function calls alloca */
3720 || cfun
->calls_alloca
3721 /* Or if there is a stack adjustment. However, if the stack pointer
3722 is saved on the stack, we can use a pre-incrementing stack load. */
3723 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3724 && stack_adjust
== 4))
3725 /* Or if the static chain register was saved above the frame, under the
3726 assumption that the stack pointer isn't saved on the stack. */
3727 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3728 && arm_compute_static_chain_stack_bytes() != 0))
3731 saved_int_regs
= offsets
->saved_regs_mask
;
3733 /* Unfortunately, the insn
3735 ldmib sp, {..., sp, ...}
3737 triggers a bug on most SA-110 based devices, such that the stack
3738 pointer won't be correctly restored if the instruction takes a
3739 page fault. We work around this problem by popping r3 along with
3740 the other registers, since that is never slower than executing
3741 another instruction.
3743 We test for !arm_arch5 here, because code for any architecture
3744 less than this could potentially be run on one of the buggy
3746 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3748 /* Validate that r3 is a call-clobbered register (always true in
3749 the default abi) ... */
3750 if (!call_used_regs
[3])
3753 /* ... that it isn't being used for a return value ... */
3754 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3757 /* ... or for a tail-call argument ... */
3760 gcc_assert (CALL_P (sibling
));
3762 if (find_regno_fusage (sibling
, USE
, 3))
3766 /* ... and that there are no call-saved registers in r0-r2
3767 (always true in the default ABI). */
3768 if (saved_int_regs
& 0x7)
3772 /* Can't be done if interworking with Thumb, and any registers have been
3774 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3777 /* On StrongARM, conditional returns are expensive if they aren't
3778 taken and multiple registers have been stacked. */
3779 if (iscond
&& arm_tune_strongarm
)
3781 /* Conditional return when just the LR is stored is a simple
3782 conditional-load instruction, that's not expensive. */
3783 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3787 && arm_pic_register
!= INVALID_REGNUM
3788 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3792 /* If there are saved registers but the LR isn't saved, then we need
3793 two instructions for the return. */
3794 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3797 /* Can't be done if any of the VFP regs are pushed,
3798 since this also requires an insn. */
3799 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3800 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3801 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3804 if (TARGET_REALLY_IWMMXT
)
3805 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3806 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3812 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3813 shrink-wrapping if possible. This is the case if we need to emit a
3814 prologue, which we can test by looking at the offsets. */
3816 use_simple_return_p (void)
3818 arm_stack_offsets
*offsets
;
3820 offsets
= arm_get_frame_offsets ();
3821 return offsets
->outgoing_args
!= 0;
3824 /* Return TRUE if int I is a valid immediate ARM constant. */
3827 const_ok_for_arm (HOST_WIDE_INT i
)
3831 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3832 be all zero, or all one. */
3833 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3834 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3835 != ((~(unsigned HOST_WIDE_INT
) 0)
3836 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3839 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3841 /* Fast return for 0 and small values. We must do this for zero, since
3842 the code below can't handle that one case. */
3843 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3846 /* Get the number of trailing zeros. */
3847 lowbit
= ffs((int) i
) - 1;
3849 /* Only even shifts are allowed in ARM mode so round down to the
3850 nearest even number. */
3854 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3859 /* Allow rotated constants in ARM mode. */
3861 && ((i
& ~0xc000003f) == 0
3862 || (i
& ~0xf000000f) == 0
3863 || (i
& ~0xfc000003) == 0))
3870 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3873 if (i
== v
|| i
== (v
| (v
<< 8)))
3876 /* Allow repeated pattern 0xXY00XY00. */
3886 /* Return true if I is a valid constant for the operation CODE. */
3888 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3890 if (const_ok_for_arm (i
))
3896 /* See if we can use movw. */
3897 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3900 /* Otherwise, try mvn. */
3901 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3904 /* See if we can use addw or subw. */
3906 && ((i
& 0xfffff000) == 0
3907 || ((-i
) & 0xfffff000) == 0))
3909 /* else fall through. */
3929 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3931 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3937 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3941 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3948 /* Return true if I is a valid di mode constant for the operation CODE. */
3950 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3952 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3953 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3954 rtx hi
= GEN_INT (hi_val
);
3955 rtx lo
= GEN_INT (lo_val
);
3965 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3966 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3968 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3975 /* Emit a sequence of insns to handle a large constant.
3976 CODE is the code of the operation required, it can be any of SET, PLUS,
3977 IOR, AND, XOR, MINUS;
3978 MODE is the mode in which the operation is being performed;
3979 VAL is the integer to operate on;
3980 SOURCE is the other operand (a register, or a null-pointer for SET);
3981 SUBTARGETS means it is safe to create scratch registers if that will
3982 either produce a simpler sequence, or we will want to cse the values.
3983 Return value is the number of insns emitted. */
3985 /* ??? Tweak this for thumb2. */
3987 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3988 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3992 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3993 cond
= COND_EXEC_TEST (PATTERN (insn
));
3997 if (subtargets
|| code
== SET
3998 || (REG_P (target
) && REG_P (source
)
3999 && REGNO (target
) != REGNO (source
)))
4001 /* After arm_reorg has been called, we can't fix up expensive
4002 constants by pushing them into memory so we must synthesize
4003 them in-line, regardless of the cost. This is only likely to
4004 be more costly on chips that have load delay slots and we are
4005 compiling without running the scheduler (so no splitting
4006 occurred before the final instruction emission).
4008 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4010 if (!cfun
->machine
->after_arm_reorg
4012 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4014 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4019 /* Currently SET is the only monadic value for CODE, all
4020 the rest are diadic. */
4021 if (TARGET_USE_MOVT
)
4022 arm_emit_movpair (target
, GEN_INT (val
));
4024 emit_set_insn (target
, GEN_INT (val
));
4030 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4032 if (TARGET_USE_MOVT
)
4033 arm_emit_movpair (temp
, GEN_INT (val
));
4035 emit_set_insn (temp
, GEN_INT (val
));
4037 /* For MINUS, the value is subtracted from, since we never
4038 have subtraction of a constant. */
4040 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4042 emit_set_insn (target
,
4043 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4049 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4053 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4054 ARM/THUMB2 immediates, and add up to VAL.
4055 Thr function return value gives the number of insns required. */
4057 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4058 struct four_ints
*return_sequence
)
4060 int best_consecutive_zeros
= 0;
4064 struct four_ints tmp_sequence
;
4066 /* If we aren't targeting ARM, the best place to start is always at
4067 the bottom, otherwise look more closely. */
4070 for (i
= 0; i
< 32; i
+= 2)
4072 int consecutive_zeros
= 0;
4074 if (!(val
& (3 << i
)))
4076 while ((i
< 32) && !(val
& (3 << i
)))
4078 consecutive_zeros
+= 2;
4081 if (consecutive_zeros
> best_consecutive_zeros
)
4083 best_consecutive_zeros
= consecutive_zeros
;
4084 best_start
= i
- consecutive_zeros
;
4091 /* So long as it won't require any more insns to do so, it's
4092 desirable to emit a small constant (in bits 0...9) in the last
4093 insn. This way there is more chance that it can be combined with
4094 a later addressing insn to form a pre-indexed load or store
4095 operation. Consider:
4097 *((volatile int *)0xe0000100) = 1;
4098 *((volatile int *)0xe0000110) = 2;
4100 We want this to wind up as:
4104 str rB, [rA, #0x100]
4106 str rB, [rA, #0x110]
4108 rather than having to synthesize both large constants from scratch.
4110 Therefore, we calculate how many insns would be required to emit
4111 the constant starting from `best_start', and also starting from
4112 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4113 yield a shorter sequence, we may as well use zero. */
4114 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4116 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4118 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4119 if (insns2
<= insns1
)
4121 *return_sequence
= tmp_sequence
;
4129 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4131 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4132 struct four_ints
*return_sequence
, int i
)
4134 int remainder
= val
& 0xffffffff;
4137 /* Try and find a way of doing the job in either two or three
4140 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4141 location. We start at position I. This may be the MSB, or
4142 optimial_immediate_sequence may have positioned it at the largest block
4143 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4144 wrapping around to the top of the word when we drop off the bottom.
4145 In the worst case this code should produce no more than four insns.
4147 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4148 constants, shifted to any arbitrary location. We should always start
4153 unsigned int b1
, b2
, b3
, b4
;
4154 unsigned HOST_WIDE_INT result
;
4157 gcc_assert (insns
< 4);
4162 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4163 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4166 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4167 /* We can use addw/subw for the last 12 bits. */
4171 /* Use an 8-bit shifted/rotated immediate. */
4175 result
= remainder
& ((0x0ff << end
)
4176 | ((i
< end
) ? (0xff >> (32 - end
))
4183 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4184 arbitrary shifts. */
4185 i
-= TARGET_ARM
? 2 : 1;
4189 /* Next, see if we can do a better job with a thumb2 replicated
4192 We do it this way around to catch the cases like 0x01F001E0 where
4193 two 8-bit immediates would work, but a replicated constant would
4196 TODO: 16-bit constants that don't clear all the bits, but still win.
4197 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4200 b1
= (remainder
& 0xff000000) >> 24;
4201 b2
= (remainder
& 0x00ff0000) >> 16;
4202 b3
= (remainder
& 0x0000ff00) >> 8;
4203 b4
= remainder
& 0xff;
4207 /* The 8-bit immediate already found clears b1 (and maybe b2),
4208 but must leave b3 and b4 alone. */
4210 /* First try to find a 32-bit replicated constant that clears
4211 almost everything. We can assume that we can't do it in one,
4212 or else we wouldn't be here. */
4213 unsigned int tmp
= b1
& b2
& b3
& b4
;
4214 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4216 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4217 + (tmp
== b3
) + (tmp
== b4
);
4219 && (matching_bytes
>= 3
4220 || (matching_bytes
== 2
4221 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4223 /* At least 3 of the bytes match, and the fourth has at
4224 least as many bits set, or two of the bytes match
4225 and it will only require one more insn to finish. */
4233 /* Second, try to find a 16-bit replicated constant that can
4234 leave three of the bytes clear. If b2 or b4 is already
4235 zero, then we can. If the 8-bit from above would not
4236 clear b2 anyway, then we still win. */
4237 else if (b1
== b3
&& (!b2
|| !b4
4238 || (remainder
& 0x00ff0000 & ~result
)))
4240 result
= remainder
& 0xff00ff00;
4246 /* The 8-bit immediate already found clears b2 (and maybe b3)
4247 and we don't get here unless b1 is alredy clear, but it will
4248 leave b4 unchanged. */
4250 /* If we can clear b2 and b4 at once, then we win, since the
4251 8-bits couldn't possibly reach that far. */
4254 result
= remainder
& 0x00ff00ff;
4260 return_sequence
->i
[insns
++] = result
;
4261 remainder
&= ~result
;
4263 if (code
== SET
|| code
== MINUS
)
4271 /* Emit an instruction with the indicated PATTERN. If COND is
4272 non-NULL, conditionalize the execution of the instruction on COND
4276 emit_constant_insn (rtx cond
, rtx pattern
)
4279 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4280 emit_insn (pattern
);
4283 /* As above, but extra parameter GENERATE which, if clear, suppresses
4287 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4288 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4289 int subtargets
, int generate
)
4293 int final_invert
= 0;
4295 int set_sign_bit_copies
= 0;
4296 int clear_sign_bit_copies
= 0;
4297 int clear_zero_bit_copies
= 0;
4298 int set_zero_bit_copies
= 0;
4299 int insns
= 0, neg_insns
, inv_insns
;
4300 unsigned HOST_WIDE_INT temp1
, temp2
;
4301 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4302 struct four_ints
*immediates
;
4303 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4305 /* Find out which operations are safe for a given CODE. Also do a quick
4306 check for degenerate cases; these can occur when DImode operations
4319 if (remainder
== 0xffffffff)
4322 emit_constant_insn (cond
,
4323 gen_rtx_SET (target
,
4324 GEN_INT (ARM_SIGN_EXTEND (val
))));
4330 if (reload_completed
&& rtx_equal_p (target
, source
))
4334 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4343 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4346 if (remainder
== 0xffffffff)
4348 if (reload_completed
&& rtx_equal_p (target
, source
))
4351 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4360 if (reload_completed
&& rtx_equal_p (target
, source
))
4363 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4367 if (remainder
== 0xffffffff)
4370 emit_constant_insn (cond
,
4371 gen_rtx_SET (target
,
4372 gen_rtx_NOT (mode
, source
)));
4379 /* We treat MINUS as (val - source), since (source - val) is always
4380 passed as (source + (-val)). */
4384 emit_constant_insn (cond
,
4385 gen_rtx_SET (target
,
4386 gen_rtx_NEG (mode
, source
)));
4389 if (const_ok_for_arm (val
))
4392 emit_constant_insn (cond
,
4393 gen_rtx_SET (target
,
4394 gen_rtx_MINUS (mode
, GEN_INT (val
),
4405 /* If we can do it in one insn get out quickly. */
4406 if (const_ok_for_op (val
, code
))
4409 emit_constant_insn (cond
,
4410 gen_rtx_SET (target
,
4412 ? gen_rtx_fmt_ee (code
, mode
, source
,
4418 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4420 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4421 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4425 if (mode
== SImode
&& i
== 16)
4426 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4428 emit_constant_insn (cond
,
4429 gen_zero_extendhisi2
4430 (target
, gen_lowpart (HImode
, source
)));
4432 /* Extz only supports SImode, but we can coerce the operands
4434 emit_constant_insn (cond
,
4435 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4436 gen_lowpart (SImode
, source
),
4437 GEN_INT (i
), const0_rtx
));
4443 /* Calculate a few attributes that may be useful for specific
4445 /* Count number of leading zeros. */
4446 for (i
= 31; i
>= 0; i
--)
4448 if ((remainder
& (1 << i
)) == 0)
4449 clear_sign_bit_copies
++;
4454 /* Count number of leading 1's. */
4455 for (i
= 31; i
>= 0; i
--)
4457 if ((remainder
& (1 << i
)) != 0)
4458 set_sign_bit_copies
++;
4463 /* Count number of trailing zero's. */
4464 for (i
= 0; i
<= 31; i
++)
4466 if ((remainder
& (1 << i
)) == 0)
4467 clear_zero_bit_copies
++;
4472 /* Count number of trailing 1's. */
4473 for (i
= 0; i
<= 31; i
++)
4475 if ((remainder
& (1 << i
)) != 0)
4476 set_zero_bit_copies
++;
4484 /* See if we can do this by sign_extending a constant that is known
4485 to be negative. This is a good, way of doing it, since the shift
4486 may well merge into a subsequent insn. */
4487 if (set_sign_bit_copies
> 1)
4489 if (const_ok_for_arm
4490 (temp1
= ARM_SIGN_EXTEND (remainder
4491 << (set_sign_bit_copies
- 1))))
4495 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4496 emit_constant_insn (cond
,
4497 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4498 emit_constant_insn (cond
,
4499 gen_ashrsi3 (target
, new_src
,
4500 GEN_INT (set_sign_bit_copies
- 1)));
4504 /* For an inverted constant, we will need to set the low bits,
4505 these will be shifted out of harm's way. */
4506 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4507 if (const_ok_for_arm (~temp1
))
4511 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4512 emit_constant_insn (cond
,
4513 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4514 emit_constant_insn (cond
,
4515 gen_ashrsi3 (target
, new_src
,
4516 GEN_INT (set_sign_bit_copies
- 1)));
4522 /* See if we can calculate the value as the difference between two
4523 valid immediates. */
4524 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4526 int topshift
= clear_sign_bit_copies
& ~1;
4528 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4529 & (0xff000000 >> topshift
));
4531 /* If temp1 is zero, then that means the 9 most significant
4532 bits of remainder were 1 and we've caused it to overflow.
4533 When topshift is 0 we don't need to do anything since we
4534 can borrow from 'bit 32'. */
4535 if (temp1
== 0 && topshift
!= 0)
4536 temp1
= 0x80000000 >> (topshift
- 1);
4538 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4540 if (const_ok_for_arm (temp2
))
4544 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4545 emit_constant_insn (cond
,
4546 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4547 emit_constant_insn (cond
,
4548 gen_addsi3 (target
, new_src
,
4556 /* See if we can generate this by setting the bottom (or the top)
4557 16 bits, and then shifting these into the other half of the
4558 word. We only look for the simplest cases, to do more would cost
4559 too much. Be careful, however, not to generate this when the
4560 alternative would take fewer insns. */
4561 if (val
& 0xffff0000)
4563 temp1
= remainder
& 0xffff0000;
4564 temp2
= remainder
& 0x0000ffff;
4566 /* Overlaps outside this range are best done using other methods. */
4567 for (i
= 9; i
< 24; i
++)
4569 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4570 && !const_ok_for_arm (temp2
))
4572 rtx new_src
= (subtargets
4573 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4575 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4576 source
, subtargets
, generate
);
4584 gen_rtx_ASHIFT (mode
, source
,
4591 /* Don't duplicate cases already considered. */
4592 for (i
= 17; i
< 24; i
++)
4594 if (((temp1
| (temp1
>> i
)) == remainder
)
4595 && !const_ok_for_arm (temp1
))
4597 rtx new_src
= (subtargets
4598 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4600 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4601 source
, subtargets
, generate
);
4606 gen_rtx_SET (target
,
4609 gen_rtx_LSHIFTRT (mode
, source
,
4620 /* If we have IOR or XOR, and the constant can be loaded in a
4621 single instruction, and we can find a temporary to put it in,
4622 then this can be done in two instructions instead of 3-4. */
4624 /* TARGET can't be NULL if SUBTARGETS is 0 */
4625 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4627 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4631 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4633 emit_constant_insn (cond
,
4634 gen_rtx_SET (sub
, GEN_INT (val
)));
4635 emit_constant_insn (cond
,
4636 gen_rtx_SET (target
,
4637 gen_rtx_fmt_ee (code
, mode
,
4648 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4649 and the remainder 0s for e.g. 0xfff00000)
4650 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4652 This can be done in 2 instructions by using shifts with mov or mvn.
4657 mvn r0, r0, lsr #12 */
4658 if (set_sign_bit_copies
> 8
4659 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4663 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4664 rtx shift
= GEN_INT (set_sign_bit_copies
);
4670 gen_rtx_ASHIFT (mode
,
4675 gen_rtx_SET (target
,
4677 gen_rtx_LSHIFTRT (mode
, sub
,
4684 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4686 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4688 For eg. r0 = r0 | 0xfff
4693 if (set_zero_bit_copies
> 8
4694 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4698 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4699 rtx shift
= GEN_INT (set_zero_bit_copies
);
4705 gen_rtx_LSHIFTRT (mode
,
4710 gen_rtx_SET (target
,
4712 gen_rtx_ASHIFT (mode
, sub
,
4718 /* This will never be reached for Thumb2 because orn is a valid
4719 instruction. This is for Thumb1 and the ARM 32 bit cases.
4721 x = y | constant (such that ~constant is a valid constant)
4723 x = ~(~y & ~constant).
4725 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4729 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4730 emit_constant_insn (cond
,
4732 gen_rtx_NOT (mode
, source
)));
4735 sub
= gen_reg_rtx (mode
);
4736 emit_constant_insn (cond
,
4738 gen_rtx_AND (mode
, source
,
4740 emit_constant_insn (cond
,
4741 gen_rtx_SET (target
,
4742 gen_rtx_NOT (mode
, sub
)));
4749 /* See if two shifts will do 2 or more insn's worth of work. */
4750 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4752 HOST_WIDE_INT shift_mask
= ((0xffffffff
4753 << (32 - clear_sign_bit_copies
))
4756 if ((remainder
| shift_mask
) != 0xffffffff)
4758 HOST_WIDE_INT new_val
4759 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4763 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4764 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4765 new_src
, source
, subtargets
, 1);
4770 rtx targ
= subtargets
? NULL_RTX
: target
;
4771 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4772 targ
, source
, subtargets
, 0);
4778 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4779 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4781 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4782 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4788 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4790 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4792 if ((remainder
| shift_mask
) != 0xffffffff)
4794 HOST_WIDE_INT new_val
4795 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4798 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4800 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4801 new_src
, source
, subtargets
, 1);
4806 rtx targ
= subtargets
? NULL_RTX
: target
;
4808 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4809 targ
, source
, subtargets
, 0);
4815 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4816 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4818 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4819 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4831 /* Calculate what the instruction sequences would be if we generated it
4832 normally, negated, or inverted. */
4834 /* AND cannot be split into multiple insns, so invert and use BIC. */
4837 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4840 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4845 if (can_invert
|| final_invert
)
4846 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4851 immediates
= &pos_immediates
;
4853 /* Is the negated immediate sequence more efficient? */
4854 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4857 immediates
= &neg_immediates
;
4862 /* Is the inverted immediate sequence more efficient?
4863 We must allow for an extra NOT instruction for XOR operations, although
4864 there is some chance that the final 'mvn' will get optimized later. */
4865 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4868 immediates
= &inv_immediates
;
4876 /* Now output the chosen sequence as instructions. */
4879 for (i
= 0; i
< insns
; i
++)
4881 rtx new_src
, temp1_rtx
;
4883 temp1
= immediates
->i
[i
];
4885 if (code
== SET
|| code
== MINUS
)
4886 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4887 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4888 new_src
= gen_reg_rtx (mode
);
4894 else if (can_negate
)
4897 temp1
= trunc_int_for_mode (temp1
, mode
);
4898 temp1_rtx
= GEN_INT (temp1
);
4902 else if (code
== MINUS
)
4903 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4905 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4907 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4912 can_negate
= can_invert
;
4916 else if (code
== MINUS
)
4924 emit_constant_insn (cond
, gen_rtx_SET (target
,
4925 gen_rtx_NOT (mode
, source
)));
4932 /* Canonicalize a comparison so that we are more likely to recognize it.
4933 This can be done for a few constant compares, where we can make the
4934 immediate value easier to load. */
4937 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4938 bool op0_preserve_value
)
4941 unsigned HOST_WIDE_INT i
, maxval
;
4943 mode
= GET_MODE (*op0
);
4944 if (mode
== VOIDmode
)
4945 mode
= GET_MODE (*op1
);
4947 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
4949 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4950 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4951 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4952 for GTU/LEU in Thumb mode. */
4956 if (*code
== GT
|| *code
== LE
4957 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4959 /* Missing comparison. First try to use an available
4961 if (CONST_INT_P (*op1
))
4969 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4971 *op1
= GEN_INT (i
+ 1);
4972 *code
= *code
== GT
? GE
: LT
;
4978 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4979 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4981 *op1
= GEN_INT (i
+ 1);
4982 *code
= *code
== GTU
? GEU
: LTU
;
4991 /* If that did not work, reverse the condition. */
4992 if (!op0_preserve_value
)
4994 std::swap (*op0
, *op1
);
4995 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5001 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5002 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5003 to facilitate possible combining with a cmp into 'ands'. */
5005 && GET_CODE (*op0
) == ZERO_EXTEND
5006 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5007 && GET_MODE (XEXP (*op0
, 0)) == QImode
5008 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5009 && subreg_lowpart_p (XEXP (*op0
, 0))
5010 && *op1
== const0_rtx
)
5011 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5014 /* Comparisons smaller than DImode. Only adjust comparisons against
5015 an out-of-range constant. */
5016 if (!CONST_INT_P (*op1
)
5017 || const_ok_for_arm (INTVAL (*op1
))
5018 || const_ok_for_arm (- INTVAL (*op1
)))
5032 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5034 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5035 *code
= *code
== GT
? GE
: LT
;
5043 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5045 *op1
= GEN_INT (i
- 1);
5046 *code
= *code
== GE
? GT
: LE
;
5053 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5054 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5056 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5057 *code
= *code
== GTU
? GEU
: LTU
;
5065 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5067 *op1
= GEN_INT (i
- 1);
5068 *code
= *code
== GEU
? GTU
: LEU
;
5079 /* Define how to find the value returned by a function. */
5082 arm_function_value(const_tree type
, const_tree func
,
5083 bool outgoing ATTRIBUTE_UNUSED
)
5086 int unsignedp ATTRIBUTE_UNUSED
;
5087 rtx r ATTRIBUTE_UNUSED
;
5089 mode
= TYPE_MODE (type
);
5091 if (TARGET_AAPCS_BASED
)
5092 return aapcs_allocate_return_reg (mode
, type
, func
);
5094 /* Promote integer types. */
5095 if (INTEGRAL_TYPE_P (type
))
5096 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5098 /* Promotes small structs returned in a register to full-word size
5099 for big-endian AAPCS. */
5100 if (arm_return_in_msb (type
))
5102 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5103 if (size
% UNITS_PER_WORD
!= 0)
5105 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5106 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5110 return arm_libcall_value_1 (mode
);
5113 /* libcall hashtable helpers. */
5115 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5117 static inline hashval_t
hash (const rtx_def
*);
5118 static inline bool equal (const rtx_def
*, const rtx_def
*);
5119 static inline void remove (rtx_def
*);
5123 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5125 return rtx_equal_p (p1
, p2
);
5129 libcall_hasher::hash (const rtx_def
*p1
)
5131 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5134 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5137 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5139 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5143 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5145 static bool init_done
= false;
5146 static libcall_table_type
*libcall_htab
= NULL
;
5152 libcall_htab
= new libcall_table_type (31);
5153 add_libcall (libcall_htab
,
5154 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5155 add_libcall (libcall_htab
,
5156 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5157 add_libcall (libcall_htab
,
5158 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5159 add_libcall (libcall_htab
,
5160 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5162 add_libcall (libcall_htab
,
5163 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5164 add_libcall (libcall_htab
,
5165 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5166 add_libcall (libcall_htab
,
5167 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5168 add_libcall (libcall_htab
,
5169 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5171 add_libcall (libcall_htab
,
5172 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5173 add_libcall (libcall_htab
,
5174 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5175 add_libcall (libcall_htab
,
5176 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5177 add_libcall (libcall_htab
,
5178 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5179 add_libcall (libcall_htab
,
5180 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5181 add_libcall (libcall_htab
,
5182 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5183 add_libcall (libcall_htab
,
5184 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5185 add_libcall (libcall_htab
,
5186 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5188 /* Values from double-precision helper functions are returned in core
5189 registers if the selected core only supports single-precision
5190 arithmetic, even if we are using the hard-float ABI. The same is
5191 true for single-precision helpers, but we will never be using the
5192 hard-float ABI on a CPU which doesn't support single-precision
5193 operations in hardware. */
5194 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5195 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5196 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5197 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5198 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5199 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5200 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5201 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5202 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5203 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5204 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5205 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5207 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5211 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5215 arm_libcall_value_1 (machine_mode mode
)
5217 if (TARGET_AAPCS_BASED
)
5218 return aapcs_libcall_value (mode
);
5219 else if (TARGET_IWMMXT_ABI
5220 && arm_vector_mode_supported_p (mode
))
5221 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5223 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5226 /* Define how to find the value returned by a library function
5227 assuming the value has mode MODE. */
5230 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5232 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5233 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5235 /* The following libcalls return their result in integer registers,
5236 even though they return a floating point value. */
5237 if (arm_libcall_uses_aapcs_base (libcall
))
5238 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5242 return arm_libcall_value_1 (mode
);
5245 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5248 arm_function_value_regno_p (const unsigned int regno
)
5250 if (regno
== ARG_REGISTER (1)
5252 && TARGET_AAPCS_BASED
5254 && TARGET_HARD_FLOAT
5255 && regno
== FIRST_VFP_REGNUM
)
5256 || (TARGET_IWMMXT_ABI
5257 && regno
== FIRST_IWMMXT_REGNUM
))
5263 /* Determine the amount of memory needed to store the possible return
5264 registers of an untyped call. */
5266 arm_apply_result_size (void)
5272 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5274 if (TARGET_IWMMXT_ABI
)
5281 /* Decide whether TYPE should be returned in memory (true)
5282 or in a register (false). FNTYPE is the type of the function making
5285 arm_return_in_memory (const_tree type
, const_tree fntype
)
5289 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5291 if (TARGET_AAPCS_BASED
)
5293 /* Simple, non-aggregate types (ie not including vectors and
5294 complex) are always returned in a register (or registers).
5295 We don't care about which register here, so we can short-cut
5296 some of the detail. */
5297 if (!AGGREGATE_TYPE_P (type
)
5298 && TREE_CODE (type
) != VECTOR_TYPE
5299 && TREE_CODE (type
) != COMPLEX_TYPE
)
5302 /* Any return value that is no larger than one word can be
5304 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5307 /* Check any available co-processors to see if they accept the
5308 type as a register candidate (VFP, for example, can return
5309 some aggregates in consecutive registers). These aren't
5310 available if the call is variadic. */
5311 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5314 /* Vector values should be returned using ARM registers, not
5315 memory (unless they're over 16 bytes, which will break since
5316 we only have four call-clobbered registers to play with). */
5317 if (TREE_CODE (type
) == VECTOR_TYPE
)
5318 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5320 /* The rest go in memory. */
5324 if (TREE_CODE (type
) == VECTOR_TYPE
)
5325 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5327 if (!AGGREGATE_TYPE_P (type
) &&
5328 (TREE_CODE (type
) != VECTOR_TYPE
))
5329 /* All simple types are returned in registers. */
5332 if (arm_abi
!= ARM_ABI_APCS
)
5334 /* ATPCS and later return aggregate types in memory only if they are
5335 larger than a word (or are variable size). */
5336 return (size
< 0 || size
> UNITS_PER_WORD
);
5339 /* For the arm-wince targets we choose to be compatible with Microsoft's
5340 ARM and Thumb compilers, which always return aggregates in memory. */
5342 /* All structures/unions bigger than one word are returned in memory.
5343 Also catch the case where int_size_in_bytes returns -1. In this case
5344 the aggregate is either huge or of variable size, and in either case
5345 we will want to return it via memory and not in a register. */
5346 if (size
< 0 || size
> UNITS_PER_WORD
)
5349 if (TREE_CODE (type
) == RECORD_TYPE
)
5353 /* For a struct the APCS says that we only return in a register
5354 if the type is 'integer like' and every addressable element
5355 has an offset of zero. For practical purposes this means
5356 that the structure can have at most one non bit-field element
5357 and that this element must be the first one in the structure. */
5359 /* Find the first field, ignoring non FIELD_DECL things which will
5360 have been created by C++. */
5361 for (field
= TYPE_FIELDS (type
);
5362 field
&& TREE_CODE (field
) != FIELD_DECL
;
5363 field
= DECL_CHAIN (field
))
5367 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5369 /* Check that the first field is valid for returning in a register. */
5371 /* ... Floats are not allowed */
5372 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5375 /* ... Aggregates that are not themselves valid for returning in
5376 a register are not allowed. */
5377 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5380 /* Now check the remaining fields, if any. Only bitfields are allowed,
5381 since they are not addressable. */
5382 for (field
= DECL_CHAIN (field
);
5384 field
= DECL_CHAIN (field
))
5386 if (TREE_CODE (field
) != FIELD_DECL
)
5389 if (!DECL_BIT_FIELD_TYPE (field
))
5396 if (TREE_CODE (type
) == UNION_TYPE
)
5400 /* Unions can be returned in registers if every element is
5401 integral, or can be returned in an integer register. */
5402 for (field
= TYPE_FIELDS (type
);
5404 field
= DECL_CHAIN (field
))
5406 if (TREE_CODE (field
) != FIELD_DECL
)
5409 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5412 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5418 #endif /* not ARM_WINCE */
5420 /* Return all other types in memory. */
5424 const struct pcs_attribute_arg
5428 } pcs_attribute_args
[] =
5430 {"aapcs", ARM_PCS_AAPCS
},
5431 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5433 /* We could recognize these, but changes would be needed elsewhere
5434 * to implement them. */
5435 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5436 {"atpcs", ARM_PCS_ATPCS
},
5437 {"apcs", ARM_PCS_APCS
},
5439 {NULL
, ARM_PCS_UNKNOWN
}
5443 arm_pcs_from_attribute (tree attr
)
5445 const struct pcs_attribute_arg
*ptr
;
5448 /* Get the value of the argument. */
5449 if (TREE_VALUE (attr
) == NULL_TREE
5450 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5451 return ARM_PCS_UNKNOWN
;
5453 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5455 /* Check it against the list of known arguments. */
5456 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5457 if (streq (arg
, ptr
->arg
))
5460 /* An unrecognized interrupt type. */
5461 return ARM_PCS_UNKNOWN
;
5464 /* Get the PCS variant to use for this call. TYPE is the function's type
5465 specification, DECL is the specific declartion. DECL may be null if
5466 the call could be indirect or if this is a library call. */
5468 arm_get_pcs_model (const_tree type
, const_tree decl
)
5470 bool user_convention
= false;
5471 enum arm_pcs user_pcs
= arm_pcs_default
;
5476 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5479 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5480 user_convention
= true;
5483 if (TARGET_AAPCS_BASED
)
5485 /* Detect varargs functions. These always use the base rules
5486 (no argument is ever a candidate for a co-processor
5488 bool base_rules
= stdarg_p (type
);
5490 if (user_convention
)
5492 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5493 sorry ("non-AAPCS derived PCS variant");
5494 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5495 error ("variadic functions must use the base AAPCS variant");
5499 return ARM_PCS_AAPCS
;
5500 else if (user_convention
)
5502 else if (decl
&& flag_unit_at_a_time
)
5504 /* Local functions never leak outside this compilation unit,
5505 so we are free to use whatever conventions are
5507 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5508 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5510 return ARM_PCS_AAPCS_LOCAL
;
5513 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5514 sorry ("PCS variant");
5516 /* For everything else we use the target's default. */
5517 return arm_pcs_default
;
5522 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5523 const_tree fntype ATTRIBUTE_UNUSED
,
5524 rtx libcall ATTRIBUTE_UNUSED
,
5525 const_tree fndecl ATTRIBUTE_UNUSED
)
5527 /* Record the unallocated VFP registers. */
5528 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5529 pcum
->aapcs_vfp_reg_alloc
= 0;
5532 /* Walk down the type tree of TYPE counting consecutive base elements.
5533 If *MODEP is VOIDmode, then set it to the first valid floating point
5534 type. If a non-floating point type is found, or if a floating point
5535 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5536 otherwise return the count in the sub-tree. */
5538 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5543 switch (TREE_CODE (type
))
5546 mode
= TYPE_MODE (type
);
5547 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5550 if (*modep
== VOIDmode
)
5559 mode
= TYPE_MODE (TREE_TYPE (type
));
5560 if (mode
!= DFmode
&& mode
!= SFmode
)
5563 if (*modep
== VOIDmode
)
5572 /* Use V2SImode and V4SImode as representatives of all 64-bit
5573 and 128-bit vector types, whether or not those modes are
5574 supported with the present options. */
5575 size
= int_size_in_bytes (type
);
5588 if (*modep
== VOIDmode
)
5591 /* Vector modes are considered to be opaque: two vectors are
5592 equivalent for the purposes of being homogeneous aggregates
5593 if they are the same size. */
5602 tree index
= TYPE_DOMAIN (type
);
5604 /* Can't handle incomplete types nor sizes that are not
5606 if (!COMPLETE_TYPE_P (type
)
5607 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5610 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5613 || !TYPE_MAX_VALUE (index
)
5614 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5615 || !TYPE_MIN_VALUE (index
)
5616 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5620 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5621 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5623 /* There must be no padding. */
5624 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5636 /* Can't handle incomplete types nor sizes that are not
5638 if (!COMPLETE_TYPE_P (type
)
5639 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5642 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5644 if (TREE_CODE (field
) != FIELD_DECL
)
5647 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5653 /* There must be no padding. */
5654 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5661 case QUAL_UNION_TYPE
:
5663 /* These aren't very interesting except in a degenerate case. */
5668 /* Can't handle incomplete types nor sizes that are not
5670 if (!COMPLETE_TYPE_P (type
)
5671 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5674 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5676 if (TREE_CODE (field
) != FIELD_DECL
)
5679 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5682 count
= count
> sub_count
? count
: sub_count
;
5685 /* There must be no padding. */
5686 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5699 /* Return true if PCS_VARIANT should use VFP registers. */
5701 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5703 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5705 static bool seen_thumb1_vfp
= false;
5707 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5709 sorry ("Thumb-1 hard-float VFP ABI");
5710 /* sorry() is not immediately fatal, so only display this once. */
5711 seen_thumb1_vfp
= true;
5717 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5720 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5721 (TARGET_VFP_DOUBLE
|| !is_double
));
5724 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5725 suitable for passing or returning in VFP registers for the PCS
5726 variant selected. If it is, then *BASE_MODE is updated to contain
5727 a machine mode describing each element of the argument's type and
5728 *COUNT to hold the number of such elements. */
5730 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5731 machine_mode mode
, const_tree type
,
5732 machine_mode
*base_mode
, int *count
)
5734 machine_mode new_mode
= VOIDmode
;
5736 /* If we have the type information, prefer that to working things
5737 out from the mode. */
5740 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5742 if (ag_count
> 0 && ag_count
<= 4)
5747 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5748 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5749 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5754 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5757 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5763 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5766 *base_mode
= new_mode
;
5771 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5772 machine_mode mode
, const_tree type
)
5774 int count ATTRIBUTE_UNUSED
;
5775 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5777 if (!use_vfp_abi (pcs_variant
, false))
5779 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5784 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5787 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5790 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5791 &pcum
->aapcs_vfp_rmode
,
5792 &pcum
->aapcs_vfp_rcount
);
5795 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5796 for the behaviour of this function. */
5799 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5800 const_tree type ATTRIBUTE_UNUSED
)
5803 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
5804 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
5805 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5808 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5809 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5811 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5813 || (mode
== TImode
&& ! TARGET_NEON
)
5814 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5817 int rcount
= pcum
->aapcs_vfp_rcount
;
5819 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5823 /* Avoid using unsupported vector modes. */
5824 if (rmode
== V2SImode
)
5826 else if (rmode
== V4SImode
)
5833 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5834 for (i
= 0; i
< rcount
; i
++)
5836 rtx tmp
= gen_rtx_REG (rmode
,
5837 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5838 tmp
= gen_rtx_EXPR_LIST
5840 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5841 XVECEXP (par
, 0, i
) = tmp
;
5844 pcum
->aapcs_reg
= par
;
5847 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5853 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5854 comment there for the behaviour of this function. */
5857 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5859 const_tree type ATTRIBUTE_UNUSED
)
5861 if (!use_vfp_abi (pcs_variant
, false))
5865 || (GET_MODE_CLASS (mode
) == MODE_INT
5866 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
5870 machine_mode ag_mode
;
5875 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5880 if (ag_mode
== V2SImode
)
5882 else if (ag_mode
== V4SImode
)
5888 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5889 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5890 for (i
= 0; i
< count
; i
++)
5892 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5893 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5894 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5895 XVECEXP (par
, 0, i
) = tmp
;
5901 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5905 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5906 machine_mode mode ATTRIBUTE_UNUSED
,
5907 const_tree type ATTRIBUTE_UNUSED
)
5909 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5910 pcum
->aapcs_vfp_reg_alloc
= 0;
5914 #define AAPCS_CP(X) \
5916 aapcs_ ## X ## _cum_init, \
5917 aapcs_ ## X ## _is_call_candidate, \
5918 aapcs_ ## X ## _allocate, \
5919 aapcs_ ## X ## _is_return_candidate, \
5920 aapcs_ ## X ## _allocate_return_reg, \
5921 aapcs_ ## X ## _advance \
5924 /* Table of co-processors that can be used to pass arguments in
5925 registers. Idealy no arugment should be a candidate for more than
5926 one co-processor table entry, but the table is processed in order
5927 and stops after the first match. If that entry then fails to put
5928 the argument into a co-processor register, the argument will go on
5932 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5933 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5935 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5936 BLKmode) is a candidate for this co-processor's registers; this
5937 function should ignore any position-dependent state in
5938 CUMULATIVE_ARGS and only use call-type dependent information. */
5939 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5941 /* Return true if the argument does get a co-processor register; it
5942 should set aapcs_reg to an RTX of the register allocated as is
5943 required for a return from FUNCTION_ARG. */
5944 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5946 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
5947 be returned in this co-processor's registers. */
5948 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5950 /* Allocate and return an RTX element to hold the return type of a call. This
5951 routine must not fail and will only be called if is_return_candidate
5952 returned true with the same parameters. */
5953 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5955 /* Finish processing this argument and prepare to start processing
5957 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5958 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5966 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5971 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5972 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5979 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5981 /* We aren't passed a decl, so we can't check that a call is local.
5982 However, it isn't clear that that would be a win anyway, since it
5983 might limit some tail-calling opportunities. */
5984 enum arm_pcs pcs_variant
;
5988 const_tree fndecl
= NULL_TREE
;
5990 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5993 fntype
= TREE_TYPE (fntype
);
5996 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5999 pcs_variant
= arm_pcs_default
;
6001 if (pcs_variant
!= ARM_PCS_AAPCS
)
6005 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6006 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6015 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6018 /* We aren't passed a decl, so we can't check that a call is local.
6019 However, it isn't clear that that would be a win anyway, since it
6020 might limit some tail-calling opportunities. */
6021 enum arm_pcs pcs_variant
;
6022 int unsignedp ATTRIBUTE_UNUSED
;
6026 const_tree fndecl
= NULL_TREE
;
6028 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6031 fntype
= TREE_TYPE (fntype
);
6034 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6037 pcs_variant
= arm_pcs_default
;
6039 /* Promote integer types. */
6040 if (type
&& INTEGRAL_TYPE_P (type
))
6041 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6043 if (pcs_variant
!= ARM_PCS_AAPCS
)
6047 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6048 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6050 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6054 /* Promotes small structs returned in a register to full-word size
6055 for big-endian AAPCS. */
6056 if (type
&& arm_return_in_msb (type
))
6058 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6059 if (size
% UNITS_PER_WORD
!= 0)
6061 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6062 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6066 return gen_rtx_REG (mode
, R0_REGNUM
);
6070 aapcs_libcall_value (machine_mode mode
)
6072 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6073 && GET_MODE_SIZE (mode
) <= 4)
6076 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6079 /* Lay out a function argument using the AAPCS rules. The rule
6080 numbers referred to here are those in the AAPCS. */
6082 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6083 const_tree type
, bool named
)
6088 /* We only need to do this once per argument. */
6089 if (pcum
->aapcs_arg_processed
)
6092 pcum
->aapcs_arg_processed
= true;
6094 /* Special case: if named is false then we are handling an incoming
6095 anonymous argument which is on the stack. */
6099 /* Is this a potential co-processor register candidate? */
6100 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6102 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6103 pcum
->aapcs_cprc_slot
= slot
;
6105 /* We don't have to apply any of the rules from part B of the
6106 preparation phase, these are handled elsewhere in the
6111 /* A Co-processor register candidate goes either in its own
6112 class of registers or on the stack. */
6113 if (!pcum
->aapcs_cprc_failed
[slot
])
6115 /* C1.cp - Try to allocate the argument to co-processor
6117 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6120 /* C2.cp - Put the argument on the stack and note that we
6121 can't assign any more candidates in this slot. We also
6122 need to note that we have allocated stack space, so that
6123 we won't later try to split a non-cprc candidate between
6124 core registers and the stack. */
6125 pcum
->aapcs_cprc_failed
[slot
] = true;
6126 pcum
->can_split
= false;
6129 /* We didn't get a register, so this argument goes on the
6131 gcc_assert (pcum
->can_split
== false);
6136 /* C3 - For double-word aligned arguments, round the NCRN up to the
6137 next even number. */
6138 ncrn
= pcum
->aapcs_ncrn
;
6139 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6142 nregs
= ARM_NUM_REGS2(mode
, type
);
6144 /* Sigh, this test should really assert that nregs > 0, but a GCC
6145 extension allows empty structs and then gives them empty size; it
6146 then allows such a structure to be passed by value. For some of
6147 the code below we have to pretend that such an argument has
6148 non-zero size so that we 'locate' it correctly either in
6149 registers or on the stack. */
6150 gcc_assert (nregs
>= 0);
6152 nregs2
= nregs
? nregs
: 1;
6154 /* C4 - Argument fits entirely in core registers. */
6155 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6157 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6158 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6162 /* C5 - Some core registers left and there are no arguments already
6163 on the stack: split this argument between the remaining core
6164 registers and the stack. */
6165 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6167 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6168 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6169 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6173 /* C6 - NCRN is set to 4. */
6174 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6176 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6180 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6181 for a call to a function whose data type is FNTYPE.
6182 For a library call, FNTYPE is NULL. */
6184 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6186 tree fndecl ATTRIBUTE_UNUSED
)
6188 /* Long call handling. */
6190 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6192 pcum
->pcs_variant
= arm_pcs_default
;
6194 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6196 if (arm_libcall_uses_aapcs_base (libname
))
6197 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6199 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6200 pcum
->aapcs_reg
= NULL_RTX
;
6201 pcum
->aapcs_partial
= 0;
6202 pcum
->aapcs_arg_processed
= false;
6203 pcum
->aapcs_cprc_slot
= -1;
6204 pcum
->can_split
= true;
6206 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6210 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6212 pcum
->aapcs_cprc_failed
[i
] = false;
6213 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6221 /* On the ARM, the offset starts at 0. */
6223 pcum
->iwmmxt_nregs
= 0;
6224 pcum
->can_split
= true;
6226 /* Varargs vectors are treated the same as long long.
6227 named_count avoids having to change the way arm handles 'named' */
6228 pcum
->named_count
= 0;
6231 if (TARGET_REALLY_IWMMXT
&& fntype
)
6235 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6237 fn_arg
= TREE_CHAIN (fn_arg
))
6238 pcum
->named_count
+= 1;
6240 if (! pcum
->named_count
)
6241 pcum
->named_count
= INT_MAX
;
6245 /* Return true if mode/type need doubleword alignment. */
6247 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6250 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6252 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6253 if (!AGGREGATE_TYPE_P (type
))
6254 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6256 /* Array types: Use member alignment of element type. */
6257 if (TREE_CODE (type
) == ARRAY_TYPE
)
6258 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6260 /* Record/aggregate types: Use greatest member alignment of any member. */
6261 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6262 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6269 /* Determine where to put an argument to a function.
6270 Value is zero to push the argument on the stack,
6271 or a hard register in which to store the argument.
6273 MODE is the argument's machine mode.
6274 TYPE is the data type of the argument (as a tree).
6275 This is null for libcalls where that information may
6277 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6278 the preceding args and about the function being called.
6279 NAMED is nonzero if this argument is a named parameter
6280 (otherwise it is an extra parameter matching an ellipsis).
6282 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6283 other arguments are passed on the stack. If (NAMED == 0) (which happens
6284 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6285 defined), say it is passed in the stack (function_prologue will
6286 indeed make it pass in the stack if necessary). */
6289 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6290 const_tree type
, bool named
)
6292 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6295 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6296 a call insn (op3 of a call_value insn). */
6297 if (mode
== VOIDmode
)
6300 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6302 aapcs_layout_arg (pcum
, mode
, type
, named
);
6303 return pcum
->aapcs_reg
;
6306 /* Varargs vectors are treated the same as long long.
6307 named_count avoids having to change the way arm handles 'named' */
6308 if (TARGET_IWMMXT_ABI
6309 && arm_vector_mode_supported_p (mode
)
6310 && pcum
->named_count
> pcum
->nargs
+ 1)
6312 if (pcum
->iwmmxt_nregs
<= 9)
6313 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6316 pcum
->can_split
= false;
6321 /* Put doubleword aligned quantities in even register pairs. */
6323 && ARM_DOUBLEWORD_ALIGN
6324 && arm_needs_doubleword_align (mode
, type
))
6327 /* Only allow splitting an arg between regs and memory if all preceding
6328 args were allocated to regs. For args passed by reference we only count
6329 the reference pointer. */
6330 if (pcum
->can_split
)
6333 nregs
= ARM_NUM_REGS2 (mode
, type
);
6335 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6338 return gen_rtx_REG (mode
, pcum
->nregs
);
6342 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6344 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6345 ? DOUBLEWORD_ALIGNMENT
6350 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6351 tree type
, bool named
)
6353 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6354 int nregs
= pcum
->nregs
;
6356 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6358 aapcs_layout_arg (pcum
, mode
, type
, named
);
6359 return pcum
->aapcs_partial
;
6362 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6365 if (NUM_ARG_REGS
> nregs
6366 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6368 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6373 /* Update the data in PCUM to advance over an argument
6374 of mode MODE and data type TYPE.
6375 (TYPE is null for libcalls where that information may not be available.) */
6378 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6379 const_tree type
, bool named
)
6381 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6383 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6385 aapcs_layout_arg (pcum
, mode
, type
, named
);
6387 if (pcum
->aapcs_cprc_slot
>= 0)
6389 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6391 pcum
->aapcs_cprc_slot
= -1;
6394 /* Generic stuff. */
6395 pcum
->aapcs_arg_processed
= false;
6396 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6397 pcum
->aapcs_reg
= NULL_RTX
;
6398 pcum
->aapcs_partial
= 0;
6403 if (arm_vector_mode_supported_p (mode
)
6404 && pcum
->named_count
> pcum
->nargs
6405 && TARGET_IWMMXT_ABI
)
6406 pcum
->iwmmxt_nregs
+= 1;
6408 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6412 /* Variable sized types are passed by reference. This is a GCC
6413 extension to the ARM ABI. */
6416 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6417 machine_mode mode ATTRIBUTE_UNUSED
,
6418 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6420 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6423 /* Encode the current state of the #pragma [no_]long_calls. */
6426 OFF
, /* No #pragma [no_]long_calls is in effect. */
6427 LONG
, /* #pragma long_calls is in effect. */
6428 SHORT
/* #pragma no_long_calls is in effect. */
6431 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6434 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6436 arm_pragma_long_calls
= LONG
;
6440 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6442 arm_pragma_long_calls
= SHORT
;
6446 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6448 arm_pragma_long_calls
= OFF
;
6451 /* Handle an attribute requiring a FUNCTION_DECL;
6452 arguments as in struct attribute_spec.handler. */
6454 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6455 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6457 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6459 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6461 *no_add_attrs
= true;
6467 /* Handle an "interrupt" or "isr" attribute;
6468 arguments as in struct attribute_spec.handler. */
6470 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6475 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6477 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6479 *no_add_attrs
= true;
6481 /* FIXME: the argument if any is checked for type attributes;
6482 should it be checked for decl ones? */
6486 if (TREE_CODE (*node
) == FUNCTION_TYPE
6487 || TREE_CODE (*node
) == METHOD_TYPE
)
6489 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6491 warning (OPT_Wattributes
, "%qE attribute ignored",
6493 *no_add_attrs
= true;
6496 else if (TREE_CODE (*node
) == POINTER_TYPE
6497 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6498 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6499 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6501 *node
= build_variant_type_copy (*node
);
6502 TREE_TYPE (*node
) = build_type_attribute_variant
6504 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6505 *no_add_attrs
= true;
6509 /* Possibly pass this attribute on from the type to a decl. */
6510 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6511 | (int) ATTR_FLAG_FUNCTION_NEXT
6512 | (int) ATTR_FLAG_ARRAY_NEXT
))
6514 *no_add_attrs
= true;
6515 return tree_cons (name
, args
, NULL_TREE
);
6519 warning (OPT_Wattributes
, "%qE attribute ignored",
6528 /* Handle a "pcs" attribute; arguments as in struct
6529 attribute_spec.handler. */
6531 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6532 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6534 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6536 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6537 *no_add_attrs
= true;
6542 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6543 /* Handle the "notshared" attribute. This attribute is another way of
6544 requesting hidden visibility. ARM's compiler supports
6545 "__declspec(notshared)"; we support the same thing via an
6549 arm_handle_notshared_attribute (tree
*node
,
6550 tree name ATTRIBUTE_UNUSED
,
6551 tree args ATTRIBUTE_UNUSED
,
6552 int flags ATTRIBUTE_UNUSED
,
6555 tree decl
= TYPE_NAME (*node
);
6559 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6560 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6561 *no_add_attrs
= false;
6567 /* Return 0 if the attributes for two types are incompatible, 1 if they
6568 are compatible, and 2 if they are nearly compatible (which causes a
6569 warning to be generated). */
6571 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6575 /* Check for mismatch of non-default calling convention. */
6576 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6579 /* Check for mismatched call attributes. */
6580 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6581 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6582 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6583 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6585 /* Only bother to check if an attribute is defined. */
6586 if (l1
| l2
| s1
| s2
)
6588 /* If one type has an attribute, the other must have the same attribute. */
6589 if ((l1
!= l2
) || (s1
!= s2
))
6592 /* Disallow mixed attributes. */
6593 if ((l1
& s2
) || (l2
& s1
))
6597 /* Check for mismatched ISR attribute. */
6598 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6600 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6601 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6603 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6610 /* Assigns default attributes to newly defined type. This is used to
6611 set short_call/long_call attributes for function types of
6612 functions defined inside corresponding #pragma scopes. */
6614 arm_set_default_type_attributes (tree type
)
6616 /* Add __attribute__ ((long_call)) to all functions, when
6617 inside #pragma long_calls or __attribute__ ((short_call)),
6618 when inside #pragma no_long_calls. */
6619 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6621 tree type_attr_list
, attr_name
;
6622 type_attr_list
= TYPE_ATTRIBUTES (type
);
6624 if (arm_pragma_long_calls
== LONG
)
6625 attr_name
= get_identifier ("long_call");
6626 else if (arm_pragma_long_calls
== SHORT
)
6627 attr_name
= get_identifier ("short_call");
6631 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6632 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6636 /* Return true if DECL is known to be linked into section SECTION. */
6639 arm_function_in_section_p (tree decl
, section
*section
)
6641 /* We can only be certain about the prevailing symbol definition. */
6642 if (!decl_binds_to_current_def_p (decl
))
6645 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6646 if (!DECL_SECTION_NAME (decl
))
6648 /* Make sure that we will not create a unique section for DECL. */
6649 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6653 return function_section (decl
) == section
;
6656 /* Return nonzero if a 32-bit "long_call" should be generated for
6657 a call from the current function to DECL. We generate a long_call
6660 a. has an __attribute__((long call))
6661 or b. is within the scope of a #pragma long_calls
6662 or c. the -mlong-calls command line switch has been specified
6664 However we do not generate a long call if the function:
6666 d. has an __attribute__ ((short_call))
6667 or e. is inside the scope of a #pragma no_long_calls
6668 or f. is defined in the same section as the current function. */
6671 arm_is_long_call_p (tree decl
)
6676 return TARGET_LONG_CALLS
;
6678 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6679 if (lookup_attribute ("short_call", attrs
))
6682 /* For "f", be conservative, and only cater for cases in which the
6683 whole of the current function is placed in the same section. */
6684 if (!flag_reorder_blocks_and_partition
6685 && TREE_CODE (decl
) == FUNCTION_DECL
6686 && arm_function_in_section_p (decl
, current_function_section ()))
6689 if (lookup_attribute ("long_call", attrs
))
6692 return TARGET_LONG_CALLS
;
6695 /* Return nonzero if it is ok to make a tail-call to DECL. */
6697 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6699 unsigned long func_type
;
6701 if (cfun
->machine
->sibcall_blocked
)
6704 /* Never tailcall something if we are generating code for Thumb-1. */
6708 /* The PIC register is live on entry to VxWorks PLT entries, so we
6709 must make the call before restoring the PIC register. */
6710 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6713 /* If we are interworking and the function is not declared static
6714 then we can't tail-call it unless we know that it exists in this
6715 compilation unit (since it might be a Thumb routine). */
6716 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6717 && !TREE_ASM_WRITTEN (decl
))
6720 func_type
= arm_current_func_type ();
6721 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6722 if (IS_INTERRUPT (func_type
))
6725 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6727 /* Check that the return value locations are the same. For
6728 example that we aren't returning a value from the sibling in
6729 a VFP register but then need to transfer it to a core
6732 tree decl_or_type
= decl
;
6734 /* If it is an indirect function pointer, get the function type. */
6736 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
6738 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6739 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6741 if (!rtx_equal_p (a
, b
))
6745 /* Never tailcall if function may be called with a misaligned SP. */
6746 if (IS_STACKALIGN (func_type
))
6749 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6750 references should become a NOP. Don't convert such calls into
6752 if (TARGET_AAPCS_BASED
6753 && arm_abi
== ARM_ABI_AAPCS
6755 && DECL_WEAK (decl
))
6758 /* Everything else is ok. */
6763 /* Addressing mode support functions. */
6765 /* Return nonzero if X is a legitimate immediate operand when compiling
6766 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6768 legitimate_pic_operand_p (rtx x
)
6770 if (GET_CODE (x
) == SYMBOL_REF
6771 || (GET_CODE (x
) == CONST
6772 && GET_CODE (XEXP (x
, 0)) == PLUS
6773 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6779 /* Record that the current function needs a PIC register. Initialize
6780 cfun->machine->pic_reg if we have not already done so. */
6783 require_pic_register (void)
6785 /* A lot of the logic here is made obscure by the fact that this
6786 routine gets called as part of the rtx cost estimation process.
6787 We don't want those calls to affect any assumptions about the real
6788 function; and further, we can't call entry_of_function() until we
6789 start the real expansion process. */
6790 if (!crtl
->uses_pic_offset_table
)
6792 gcc_assert (can_create_pseudo_p ());
6793 if (arm_pic_register
!= INVALID_REGNUM
6794 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6796 if (!cfun
->machine
->pic_reg
)
6797 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6799 /* Play games to avoid marking the function as needing pic
6800 if we are being called as part of the cost-estimation
6802 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6803 crtl
->uses_pic_offset_table
= 1;
6807 rtx_insn
*seq
, *insn
;
6809 if (!cfun
->machine
->pic_reg
)
6810 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6812 /* Play games to avoid marking the function as needing pic
6813 if we are being called as part of the cost-estimation
6815 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6817 crtl
->uses_pic_offset_table
= 1;
6820 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6821 && arm_pic_register
> LAST_LO_REGNUM
)
6822 emit_move_insn (cfun
->machine
->pic_reg
,
6823 gen_rtx_REG (Pmode
, arm_pic_register
));
6825 arm_load_pic_register (0UL);
6830 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6832 INSN_LOCATION (insn
) = prologue_location
;
6834 /* We can be called during expansion of PHI nodes, where
6835 we can't yet emit instructions directly in the final
6836 insn stream. Queue the insns on the entry edge, they will
6837 be committed after everything else is expanded. */
6838 insert_insn_on_edge (seq
,
6839 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6846 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6848 if (GET_CODE (orig
) == SYMBOL_REF
6849 || GET_CODE (orig
) == LABEL_REF
)
6855 gcc_assert (can_create_pseudo_p ());
6856 reg
= gen_reg_rtx (Pmode
);
6859 /* VxWorks does not impose a fixed gap between segments; the run-time
6860 gap can be different from the object-file gap. We therefore can't
6861 use GOTOFF unless we are absolutely sure that the symbol is in the
6862 same segment as the GOT. Unfortunately, the flexibility of linker
6863 scripts means that we can't be sure of that in general, so assume
6864 that GOTOFF is never valid on VxWorks. */
6865 if ((GET_CODE (orig
) == LABEL_REF
6866 || (GET_CODE (orig
) == SYMBOL_REF
&&
6867 SYMBOL_REF_LOCAL_P (orig
)))
6869 && arm_pic_data_is_text_relative
)
6870 insn
= arm_pic_static_addr (orig
, reg
);
6876 /* If this function doesn't have a pic register, create one now. */
6877 require_pic_register ();
6879 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6881 /* Make the MEM as close to a constant as possible. */
6882 mem
= SET_SRC (pat
);
6883 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6884 MEM_READONLY_P (mem
) = 1;
6885 MEM_NOTRAP_P (mem
) = 1;
6887 insn
= emit_insn (pat
);
6890 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6892 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6896 else if (GET_CODE (orig
) == CONST
)
6900 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6901 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6904 /* Handle the case where we have: const (UNSPEC_TLS). */
6905 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6906 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6909 /* Handle the case where we have:
6910 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6912 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6913 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6914 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6916 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6922 gcc_assert (can_create_pseudo_p ());
6923 reg
= gen_reg_rtx (Pmode
);
6926 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6928 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6929 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6930 base
== reg
? 0 : reg
);
6932 if (CONST_INT_P (offset
))
6934 /* The base register doesn't really matter, we only want to
6935 test the index for the appropriate mode. */
6936 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6938 gcc_assert (can_create_pseudo_p ());
6939 offset
= force_reg (Pmode
, offset
);
6942 if (CONST_INT_P (offset
))
6943 return plus_constant (Pmode
, base
, INTVAL (offset
));
6946 if (GET_MODE_SIZE (mode
) > 4
6947 && (GET_MODE_CLASS (mode
) == MODE_INT
6948 || TARGET_SOFT_FLOAT
))
6950 emit_insn (gen_addsi3 (reg
, base
, offset
));
6954 return gen_rtx_PLUS (Pmode
, base
, offset
);
6961 /* Find a spare register to use during the prolog of a function. */
6964 thumb_find_work_register (unsigned long pushed_regs_mask
)
6968 /* Check the argument registers first as these are call-used. The
6969 register allocation order means that sometimes r3 might be used
6970 but earlier argument registers might not, so check them all. */
6971 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6972 if (!df_regs_ever_live_p (reg
))
6975 /* Before going on to check the call-saved registers we can try a couple
6976 more ways of deducing that r3 is available. The first is when we are
6977 pushing anonymous arguments onto the stack and we have less than 4
6978 registers worth of fixed arguments(*). In this case r3 will be part of
6979 the variable argument list and so we can be sure that it will be
6980 pushed right at the start of the function. Hence it will be available
6981 for the rest of the prologue.
6982 (*): ie crtl->args.pretend_args_size is greater than 0. */
6983 if (cfun
->machine
->uses_anonymous_args
6984 && crtl
->args
.pretend_args_size
> 0)
6985 return LAST_ARG_REGNUM
;
6987 /* The other case is when we have fixed arguments but less than 4 registers
6988 worth. In this case r3 might be used in the body of the function, but
6989 it is not being used to convey an argument into the function. In theory
6990 we could just check crtl->args.size to see how many bytes are
6991 being passed in argument registers, but it seems that it is unreliable.
6992 Sometimes it will have the value 0 when in fact arguments are being
6993 passed. (See testcase execute/20021111-1.c for an example). So we also
6994 check the args_info.nregs field as well. The problem with this field is
6995 that it makes no allowances for arguments that are passed to the
6996 function but which are not used. Hence we could miss an opportunity
6997 when a function has an unused argument in r3. But it is better to be
6998 safe than to be sorry. */
6999 if (! cfun
->machine
->uses_anonymous_args
7000 && crtl
->args
.size
>= 0
7001 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7002 && (TARGET_AAPCS_BASED
7003 ? crtl
->args
.info
.aapcs_ncrn
< 4
7004 : crtl
->args
.info
.nregs
< 4))
7005 return LAST_ARG_REGNUM
;
7007 /* Otherwise look for a call-saved register that is going to be pushed. */
7008 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7009 if (pushed_regs_mask
& (1 << reg
))
7014 /* Thumb-2 can use high regs. */
7015 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7016 if (pushed_regs_mask
& (1 << reg
))
7019 /* Something went wrong - thumb_compute_save_reg_mask()
7020 should have arranged for a suitable register to be pushed. */
7024 static GTY(()) int pic_labelno
;
7026 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7030 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7032 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7034 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7037 gcc_assert (flag_pic
);
7039 pic_reg
= cfun
->machine
->pic_reg
;
7040 if (TARGET_VXWORKS_RTP
)
7042 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7043 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7044 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7046 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7048 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7049 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7053 /* We use an UNSPEC rather than a LABEL_REF because this label
7054 never appears in the code stream. */
7056 labelno
= GEN_INT (pic_labelno
++);
7057 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7058 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7060 /* On the ARM the PC register contains 'dot + 8' at the time of the
7061 addition, on the Thumb it is 'dot + 4'. */
7062 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7063 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7065 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7069 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7071 else /* TARGET_THUMB1 */
7073 if (arm_pic_register
!= INVALID_REGNUM
7074 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7076 /* We will have pushed the pic register, so we should always be
7077 able to find a work register. */
7078 pic_tmp
= gen_rtx_REG (SImode
,
7079 thumb_find_work_register (saved_regs
));
7080 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7081 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7082 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7084 else if (arm_pic_register
!= INVALID_REGNUM
7085 && arm_pic_register
> LAST_LO_REGNUM
7086 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7088 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7089 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7090 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7093 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7097 /* Need to emit this whether or not we obey regdecls,
7098 since setjmp/longjmp can cause life info to screw up. */
7102 /* Generate code to load the address of a static var when flag_pic is set. */
7104 arm_pic_static_addr (rtx orig
, rtx reg
)
7106 rtx l1
, labelno
, offset_rtx
, insn
;
7108 gcc_assert (flag_pic
);
7110 /* We use an UNSPEC rather than a LABEL_REF because this label
7111 never appears in the code stream. */
7112 labelno
= GEN_INT (pic_labelno
++);
7113 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7114 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7116 /* On the ARM the PC register contains 'dot + 8' at the time of the
7117 addition, on the Thumb it is 'dot + 4'. */
7118 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7119 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7120 UNSPEC_SYMBOL_OFFSET
);
7121 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7123 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7127 /* Return nonzero if X is valid as an ARM state addressing register. */
7129 arm_address_register_rtx_p (rtx x
, int strict_p
)
7139 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7141 return (regno
<= LAST_ARM_REGNUM
7142 || regno
>= FIRST_PSEUDO_REGISTER
7143 || regno
== FRAME_POINTER_REGNUM
7144 || regno
== ARG_POINTER_REGNUM
);
7147 /* Return TRUE if this rtx is the difference of a symbol and a label,
7148 and will reduce to a PC-relative relocation in the object file.
7149 Expressions like this can be left alone when generating PIC, rather
7150 than forced through the GOT. */
7152 pcrel_constant_p (rtx x
)
7154 if (GET_CODE (x
) == MINUS
)
7155 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7160 /* Return true if X will surely end up in an index register after next
7163 will_be_in_index_register (const_rtx x
)
7165 /* arm.md: calculate_pic_address will split this into a register. */
7166 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7169 /* Return nonzero if X is a valid ARM state address operand. */
7171 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7175 enum rtx_code code
= GET_CODE (x
);
7177 if (arm_address_register_rtx_p (x
, strict_p
))
7180 use_ldrd
= (TARGET_LDRD
7182 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7184 if (code
== POST_INC
|| code
== PRE_DEC
7185 || ((code
== PRE_INC
|| code
== POST_DEC
)
7186 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7187 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7189 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7190 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7191 && GET_CODE (XEXP (x
, 1)) == PLUS
7192 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7194 rtx addend
= XEXP (XEXP (x
, 1), 1);
7196 /* Don't allow ldrd post increment by register because it's hard
7197 to fixup invalid register choices. */
7199 && GET_CODE (x
) == POST_MODIFY
7203 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7204 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7207 /* After reload constants split into minipools will have addresses
7208 from a LABEL_REF. */
7209 else if (reload_completed
7210 && (code
== LABEL_REF
7212 && GET_CODE (XEXP (x
, 0)) == PLUS
7213 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7214 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7217 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7220 else if (code
== PLUS
)
7222 rtx xop0
= XEXP (x
, 0);
7223 rtx xop1
= XEXP (x
, 1);
7225 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7226 && ((CONST_INT_P (xop1
)
7227 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7228 || (!strict_p
&& will_be_in_index_register (xop1
))))
7229 || (arm_address_register_rtx_p (xop1
, strict_p
)
7230 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7234 /* Reload currently can't handle MINUS, so disable this for now */
7235 else if (GET_CODE (x
) == MINUS
)
7237 rtx xop0
= XEXP (x
, 0);
7238 rtx xop1
= XEXP (x
, 1);
7240 return (arm_address_register_rtx_p (xop0
, strict_p
)
7241 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7245 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7246 && code
== SYMBOL_REF
7247 && CONSTANT_POOL_ADDRESS_P (x
)
7249 && symbol_mentioned_p (get_pool_constant (x
))
7250 && ! pcrel_constant_p (get_pool_constant (x
))))
7256 /* Return nonzero if X is a valid Thumb-2 address operand. */
7258 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7261 enum rtx_code code
= GET_CODE (x
);
7263 if (arm_address_register_rtx_p (x
, strict_p
))
7266 use_ldrd
= (TARGET_LDRD
7268 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7270 if (code
== POST_INC
|| code
== PRE_DEC
7271 || ((code
== PRE_INC
|| code
== POST_DEC
)
7272 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7273 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7275 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7276 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7277 && GET_CODE (XEXP (x
, 1)) == PLUS
7278 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7280 /* Thumb-2 only has autoincrement by constant. */
7281 rtx addend
= XEXP (XEXP (x
, 1), 1);
7282 HOST_WIDE_INT offset
;
7284 if (!CONST_INT_P (addend
))
7287 offset
= INTVAL(addend
);
7288 if (GET_MODE_SIZE (mode
) <= 4)
7289 return (offset
> -256 && offset
< 256);
7291 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7292 && (offset
& 3) == 0);
7295 /* After reload constants split into minipools will have addresses
7296 from a LABEL_REF. */
7297 else if (reload_completed
7298 && (code
== LABEL_REF
7300 && GET_CODE (XEXP (x
, 0)) == PLUS
7301 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7302 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7305 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7308 else if (code
== PLUS
)
7310 rtx xop0
= XEXP (x
, 0);
7311 rtx xop1
= XEXP (x
, 1);
7313 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7314 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7315 || (!strict_p
&& will_be_in_index_register (xop1
))))
7316 || (arm_address_register_rtx_p (xop1
, strict_p
)
7317 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7320 /* Normally we can assign constant values to target registers without
7321 the help of constant pool. But there are cases we have to use constant
7323 1) assign a label to register.
7324 2) sign-extend a 8bit value to 32bit and then assign to register.
7326 Constant pool access in format:
7327 (set (reg r0) (mem (symbol_ref (".LC0"))))
7328 will cause the use of literal pool (later in function arm_reorg).
7329 So here we mark such format as an invalid format, then the compiler
7330 will adjust it into:
7331 (set (reg r0) (symbol_ref (".LC0")))
7332 (set (reg r0) (mem (reg r0))).
7333 No extra register is required, and (mem (reg r0)) won't cause the use
7334 of literal pools. */
7335 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7336 && CONSTANT_POOL_ADDRESS_P (x
))
7339 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7340 && code
== SYMBOL_REF
7341 && CONSTANT_POOL_ADDRESS_P (x
)
7343 && symbol_mentioned_p (get_pool_constant (x
))
7344 && ! pcrel_constant_p (get_pool_constant (x
))))
7350 /* Return nonzero if INDEX is valid for an address index operand in
7353 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7356 HOST_WIDE_INT range
;
7357 enum rtx_code code
= GET_CODE (index
);
7359 /* Standard coprocessor addressing modes. */
7360 if (TARGET_HARD_FLOAT
7362 && (mode
== SFmode
|| mode
== DFmode
))
7363 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7364 && INTVAL (index
) > -1024
7365 && (INTVAL (index
) & 3) == 0);
7367 /* For quad modes, we restrict the constant offset to be slightly less
7368 than what the instruction format permits. We do this because for
7369 quad mode moves, we will actually decompose them into two separate
7370 double-mode reads or writes. INDEX must therefore be a valid
7371 (double-mode) offset and so should INDEX+8. */
7372 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7373 return (code
== CONST_INT
7374 && INTVAL (index
) < 1016
7375 && INTVAL (index
) > -1024
7376 && (INTVAL (index
) & 3) == 0);
7378 /* We have no such constraint on double mode offsets, so we permit the
7379 full range of the instruction format. */
7380 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7381 return (code
== CONST_INT
7382 && INTVAL (index
) < 1024
7383 && INTVAL (index
) > -1024
7384 && (INTVAL (index
) & 3) == 0);
7386 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7387 return (code
== CONST_INT
7388 && INTVAL (index
) < 1024
7389 && INTVAL (index
) > -1024
7390 && (INTVAL (index
) & 3) == 0);
7392 if (arm_address_register_rtx_p (index
, strict_p
)
7393 && (GET_MODE_SIZE (mode
) <= 4))
7396 if (mode
== DImode
|| mode
== DFmode
)
7398 if (code
== CONST_INT
)
7400 HOST_WIDE_INT val
= INTVAL (index
);
7403 return val
> -256 && val
< 256;
7405 return val
> -4096 && val
< 4092;
7408 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7411 if (GET_MODE_SIZE (mode
) <= 4
7415 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7419 rtx xiop0
= XEXP (index
, 0);
7420 rtx xiop1
= XEXP (index
, 1);
7422 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7423 && power_of_two_operand (xiop1
, SImode
))
7424 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7425 && power_of_two_operand (xiop0
, SImode
)));
7427 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7428 || code
== ASHIFT
|| code
== ROTATERT
)
7430 rtx op
= XEXP (index
, 1);
7432 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7435 && INTVAL (op
) <= 31);
7439 /* For ARM v4 we may be doing a sign-extend operation during the
7445 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7451 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7453 return (code
== CONST_INT
7454 && INTVAL (index
) < range
7455 && INTVAL (index
) > -range
);
7458 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7459 index operand. i.e. 1, 2, 4 or 8. */
7461 thumb2_index_mul_operand (rtx op
)
7465 if (!CONST_INT_P (op
))
7469 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7472 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7474 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7476 enum rtx_code code
= GET_CODE (index
);
7478 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7479 /* Standard coprocessor addressing modes. */
7480 if (TARGET_HARD_FLOAT
7482 && (mode
== SFmode
|| mode
== DFmode
))
7483 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7484 /* Thumb-2 allows only > -256 index range for it's core register
7485 load/stores. Since we allow SF/DF in core registers, we have
7486 to use the intersection between -256~4096 (core) and -1024~1024
7488 && INTVAL (index
) > -256
7489 && (INTVAL (index
) & 3) == 0);
7491 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7493 /* For DImode assume values will usually live in core regs
7494 and only allow LDRD addressing modes. */
7495 if (!TARGET_LDRD
|| mode
!= DImode
)
7496 return (code
== CONST_INT
7497 && INTVAL (index
) < 1024
7498 && INTVAL (index
) > -1024
7499 && (INTVAL (index
) & 3) == 0);
7502 /* For quad modes, we restrict the constant offset to be slightly less
7503 than what the instruction format permits. We do this because for
7504 quad mode moves, we will actually decompose them into two separate
7505 double-mode reads or writes. INDEX must therefore be a valid
7506 (double-mode) offset and so should INDEX+8. */
7507 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7508 return (code
== CONST_INT
7509 && INTVAL (index
) < 1016
7510 && INTVAL (index
) > -1024
7511 && (INTVAL (index
) & 3) == 0);
7513 /* We have no such constraint on double mode offsets, so we permit the
7514 full range of the instruction format. */
7515 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7516 return (code
== CONST_INT
7517 && INTVAL (index
) < 1024
7518 && INTVAL (index
) > -1024
7519 && (INTVAL (index
) & 3) == 0);
7521 if (arm_address_register_rtx_p (index
, strict_p
)
7522 && (GET_MODE_SIZE (mode
) <= 4))
7525 if (mode
== DImode
|| mode
== DFmode
)
7527 if (code
== CONST_INT
)
7529 HOST_WIDE_INT val
= INTVAL (index
);
7530 /* ??? Can we assume ldrd for thumb2? */
7531 /* Thumb-2 ldrd only has reg+const addressing modes. */
7532 /* ldrd supports offsets of +-1020.
7533 However the ldr fallback does not. */
7534 return val
> -256 && val
< 256 && (val
& 3) == 0;
7542 rtx xiop0
= XEXP (index
, 0);
7543 rtx xiop1
= XEXP (index
, 1);
7545 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7546 && thumb2_index_mul_operand (xiop1
))
7547 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7548 && thumb2_index_mul_operand (xiop0
)));
7550 else if (code
== ASHIFT
)
7552 rtx op
= XEXP (index
, 1);
7554 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7557 && INTVAL (op
) <= 3);
7560 return (code
== CONST_INT
7561 && INTVAL (index
) < 4096
7562 && INTVAL (index
) > -256);
7565 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7567 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7577 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7579 return (regno
<= LAST_LO_REGNUM
7580 || regno
> LAST_VIRTUAL_REGISTER
7581 || regno
== FRAME_POINTER_REGNUM
7582 || (GET_MODE_SIZE (mode
) >= 4
7583 && (regno
== STACK_POINTER_REGNUM
7584 || regno
>= FIRST_PSEUDO_REGISTER
7585 || x
== hard_frame_pointer_rtx
7586 || x
== arg_pointer_rtx
)));
7589 /* Return nonzero if x is a legitimate index register. This is the case
7590 for any base register that can access a QImode object. */
7592 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7594 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7597 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7599 The AP may be eliminated to either the SP or the FP, so we use the
7600 least common denominator, e.g. SImode, and offsets from 0 to 64.
7602 ??? Verify whether the above is the right approach.
7604 ??? Also, the FP may be eliminated to the SP, so perhaps that
7605 needs special handling also.
7607 ??? Look at how the mips16 port solves this problem. It probably uses
7608 better ways to solve some of these problems.
7610 Although it is not incorrect, we don't accept QImode and HImode
7611 addresses based on the frame pointer or arg pointer until the
7612 reload pass starts. This is so that eliminating such addresses
7613 into stack based ones won't produce impossible code. */
7615 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7617 /* ??? Not clear if this is right. Experiment. */
7618 if (GET_MODE_SIZE (mode
) < 4
7619 && !(reload_in_progress
|| reload_completed
)
7620 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7621 || reg_mentioned_p (arg_pointer_rtx
, x
)
7622 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7623 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7624 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7625 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7628 /* Accept any base register. SP only in SImode or larger. */
7629 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7632 /* This is PC relative data before arm_reorg runs. */
7633 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7634 && GET_CODE (x
) == SYMBOL_REF
7635 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7638 /* This is PC relative data after arm_reorg runs. */
7639 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7641 && (GET_CODE (x
) == LABEL_REF
7642 || (GET_CODE (x
) == CONST
7643 && GET_CODE (XEXP (x
, 0)) == PLUS
7644 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7645 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7648 /* Post-inc indexing only supported for SImode and larger. */
7649 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7650 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7653 else if (GET_CODE (x
) == PLUS
)
7655 /* REG+REG address can be any two index registers. */
7656 /* We disallow FRAME+REG addressing since we know that FRAME
7657 will be replaced with STACK, and SP relative addressing only
7658 permits SP+OFFSET. */
7659 if (GET_MODE_SIZE (mode
) <= 4
7660 && XEXP (x
, 0) != frame_pointer_rtx
7661 && XEXP (x
, 1) != frame_pointer_rtx
7662 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7663 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7664 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7667 /* REG+const has 5-7 bit offset for non-SP registers. */
7668 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7669 || XEXP (x
, 0) == arg_pointer_rtx
)
7670 && CONST_INT_P (XEXP (x
, 1))
7671 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7674 /* REG+const has 10-bit offset for SP, but only SImode and
7675 larger is supported. */
7676 /* ??? Should probably check for DI/DFmode overflow here
7677 just like GO_IF_LEGITIMATE_OFFSET does. */
7678 else if (REG_P (XEXP (x
, 0))
7679 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7680 && GET_MODE_SIZE (mode
) >= 4
7681 && CONST_INT_P (XEXP (x
, 1))
7682 && INTVAL (XEXP (x
, 1)) >= 0
7683 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7684 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7687 else if (REG_P (XEXP (x
, 0))
7688 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7689 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7690 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7691 && REGNO (XEXP (x
, 0))
7692 <= LAST_VIRTUAL_POINTER_REGISTER
))
7693 && GET_MODE_SIZE (mode
) >= 4
7694 && CONST_INT_P (XEXP (x
, 1))
7695 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7699 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7700 && GET_MODE_SIZE (mode
) == 4
7701 && GET_CODE (x
) == SYMBOL_REF
7702 && CONSTANT_POOL_ADDRESS_P (x
)
7704 && symbol_mentioned_p (get_pool_constant (x
))
7705 && ! pcrel_constant_p (get_pool_constant (x
))))
7711 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7712 instruction of mode MODE. */
7714 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7716 switch (GET_MODE_SIZE (mode
))
7719 return val
>= 0 && val
< 32;
7722 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7726 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7732 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7735 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7736 else if (TARGET_THUMB2
)
7737 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7738 else /* if (TARGET_THUMB1) */
7739 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7742 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7744 Given an rtx X being reloaded into a reg required to be
7745 in class CLASS, return the class of reg to actually use.
7746 In general this is just CLASS, but for the Thumb core registers and
7747 immediate constants we prefer a LO_REGS class or a subset. */
7750 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7756 if (rclass
== GENERAL_REGS
)
7763 /* Build the SYMBOL_REF for __tls_get_addr. */
7765 static GTY(()) rtx tls_get_addr_libfunc
;
7768 get_tls_get_addr (void)
7770 if (!tls_get_addr_libfunc
)
7771 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7772 return tls_get_addr_libfunc
;
7776 arm_load_tp (rtx target
)
7779 target
= gen_reg_rtx (SImode
);
7783 /* Can return in any reg. */
7784 emit_insn (gen_load_tp_hard (target
));
7788 /* Always returned in r0. Immediately copy the result into a pseudo,
7789 otherwise other uses of r0 (e.g. setting up function arguments) may
7790 clobber the value. */
7794 emit_insn (gen_load_tp_soft ());
7796 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7797 emit_move_insn (target
, tmp
);
7803 load_tls_operand (rtx x
, rtx reg
)
7807 if (reg
== NULL_RTX
)
7808 reg
= gen_reg_rtx (SImode
);
7810 tmp
= gen_rtx_CONST (SImode
, x
);
7812 emit_move_insn (reg
, tmp
);
7818 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7820 rtx insns
, label
, labelno
, sum
;
7822 gcc_assert (reloc
!= TLS_DESCSEQ
);
7825 labelno
= GEN_INT (pic_labelno
++);
7826 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7827 label
= gen_rtx_CONST (VOIDmode
, label
);
7829 sum
= gen_rtx_UNSPEC (Pmode
,
7830 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7831 GEN_INT (TARGET_ARM
? 8 : 4)),
7833 reg
= load_tls_operand (sum
, reg
);
7836 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7838 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7840 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7841 LCT_PURE
, /* LCT_CONST? */
7842 Pmode
, 1, reg
, Pmode
);
7844 insns
= get_insns ();
7851 arm_tls_descseq_addr (rtx x
, rtx reg
)
7853 rtx labelno
= GEN_INT (pic_labelno
++);
7854 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7855 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7856 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7857 gen_rtx_CONST (VOIDmode
, label
),
7858 GEN_INT (!TARGET_ARM
)),
7860 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7862 emit_insn (gen_tlscall (x
, labelno
));
7864 reg
= gen_reg_rtx (SImode
);
7866 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7868 emit_move_insn (reg
, reg0
);
7874 legitimize_tls_address (rtx x
, rtx reg
)
7876 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7877 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7881 case TLS_MODEL_GLOBAL_DYNAMIC
:
7882 if (TARGET_GNU2_TLS
)
7884 reg
= arm_tls_descseq_addr (x
, reg
);
7886 tp
= arm_load_tp (NULL_RTX
);
7888 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7892 /* Original scheme */
7893 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7894 dest
= gen_reg_rtx (Pmode
);
7895 emit_libcall_block (insns
, dest
, ret
, x
);
7899 case TLS_MODEL_LOCAL_DYNAMIC
:
7900 if (TARGET_GNU2_TLS
)
7902 reg
= arm_tls_descseq_addr (x
, reg
);
7904 tp
= arm_load_tp (NULL_RTX
);
7906 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7910 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7912 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7913 share the LDM result with other LD model accesses. */
7914 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7916 dest
= gen_reg_rtx (Pmode
);
7917 emit_libcall_block (insns
, dest
, ret
, eqv
);
7919 /* Load the addend. */
7920 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7921 GEN_INT (TLS_LDO32
)),
7923 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7924 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7928 case TLS_MODEL_INITIAL_EXEC
:
7929 labelno
= GEN_INT (pic_labelno
++);
7930 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7931 label
= gen_rtx_CONST (VOIDmode
, label
);
7932 sum
= gen_rtx_UNSPEC (Pmode
,
7933 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7934 GEN_INT (TARGET_ARM
? 8 : 4)),
7936 reg
= load_tls_operand (sum
, reg
);
7939 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7940 else if (TARGET_THUMB2
)
7941 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7944 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7945 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7948 tp
= arm_load_tp (NULL_RTX
);
7950 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7952 case TLS_MODEL_LOCAL_EXEC
:
7953 tp
= arm_load_tp (NULL_RTX
);
7955 reg
= gen_rtx_UNSPEC (Pmode
,
7956 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7958 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7960 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7967 /* Try machine-dependent ways of modifying an illegitimate address
7968 to be legitimate. If we find one, return the new, valid address. */
7970 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7972 if (arm_tls_referenced_p (x
))
7976 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7978 addend
= XEXP (XEXP (x
, 0), 1);
7979 x
= XEXP (XEXP (x
, 0), 0);
7982 if (GET_CODE (x
) != SYMBOL_REF
)
7985 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7987 x
= legitimize_tls_address (x
, NULL_RTX
);
7991 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8000 /* TODO: legitimize_address for Thumb2. */
8003 return thumb_legitimize_address (x
, orig_x
, mode
);
8006 if (GET_CODE (x
) == PLUS
)
8008 rtx xop0
= XEXP (x
, 0);
8009 rtx xop1
= XEXP (x
, 1);
8011 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8012 xop0
= force_reg (SImode
, xop0
);
8014 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8015 && !symbol_mentioned_p (xop1
))
8016 xop1
= force_reg (SImode
, xop1
);
8018 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8019 && CONST_INT_P (xop1
))
8021 HOST_WIDE_INT n
, low_n
;
8025 /* VFP addressing modes actually allow greater offsets, but for
8026 now we just stick with the lowest common denominator. */
8028 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
8040 low_n
= ((mode
) == TImode
? 0
8041 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8045 base_reg
= gen_reg_rtx (SImode
);
8046 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8047 emit_move_insn (base_reg
, val
);
8048 x
= plus_constant (Pmode
, base_reg
, low_n
);
8050 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8051 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8054 /* XXX We don't allow MINUS any more -- see comment in
8055 arm_legitimate_address_outer_p (). */
8056 else if (GET_CODE (x
) == MINUS
)
8058 rtx xop0
= XEXP (x
, 0);
8059 rtx xop1
= XEXP (x
, 1);
8061 if (CONSTANT_P (xop0
))
8062 xop0
= force_reg (SImode
, xop0
);
8064 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8065 xop1
= force_reg (SImode
, xop1
);
8067 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8068 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8071 /* Make sure to take full advantage of the pre-indexed addressing mode
8072 with absolute addresses which often allows for the base register to
8073 be factorized for multiple adjacent memory references, and it might
8074 even allows for the mini pool to be avoided entirely. */
8075 else if (CONST_INT_P (x
) && optimize
> 0)
8078 HOST_WIDE_INT mask
, base
, index
;
8081 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8082 use a 8-bit index. So let's use a 12-bit index for SImode only and
8083 hope that arm_gen_constant will enable ldrb to use more bits. */
8084 bits
= (mode
== SImode
) ? 12 : 8;
8085 mask
= (1 << bits
) - 1;
8086 base
= INTVAL (x
) & ~mask
;
8087 index
= INTVAL (x
) & mask
;
8088 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8090 /* It'll most probably be more efficient to generate the base
8091 with more bits set and use a negative index instead. */
8095 base_reg
= force_reg (SImode
, GEN_INT (base
));
8096 x
= plus_constant (Pmode
, base_reg
, index
);
8101 /* We need to find and carefully transform any SYMBOL and LABEL
8102 references; so go back to the original address expression. */
8103 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8105 if (new_x
!= orig_x
)
8113 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8114 to be legitimate. If we find one, return the new, valid address. */
8116 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8118 if (GET_CODE (x
) == PLUS
8119 && CONST_INT_P (XEXP (x
, 1))
8120 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8121 || INTVAL (XEXP (x
, 1)) < 0))
8123 rtx xop0
= XEXP (x
, 0);
8124 rtx xop1
= XEXP (x
, 1);
8125 HOST_WIDE_INT offset
= INTVAL (xop1
);
8127 /* Try and fold the offset into a biasing of the base register and
8128 then offsetting that. Don't do this when optimizing for space
8129 since it can cause too many CSEs. */
8130 if (optimize_size
&& offset
>= 0
8131 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8133 HOST_WIDE_INT delta
;
8136 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8137 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8138 delta
= 31 * GET_MODE_SIZE (mode
);
8140 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8142 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8144 x
= plus_constant (Pmode
, xop0
, delta
);
8146 else if (offset
< 0 && offset
> -256)
8147 /* Small negative offsets are best done with a subtract before the
8148 dereference, forcing these into a register normally takes two
8150 x
= force_operand (x
, NULL_RTX
);
8153 /* For the remaining cases, force the constant into a register. */
8154 xop1
= force_reg (SImode
, xop1
);
8155 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8158 else if (GET_CODE (x
) == PLUS
8159 && s_register_operand (XEXP (x
, 1), SImode
)
8160 && !s_register_operand (XEXP (x
, 0), SImode
))
8162 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8164 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8169 /* We need to find and carefully transform any SYMBOL and LABEL
8170 references; so go back to the original address expression. */
8171 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8173 if (new_x
!= orig_x
)
8180 /* Return TRUE if X contains any TLS symbol references. */
8183 arm_tls_referenced_p (rtx x
)
8185 if (! TARGET_HAVE_TLS
)
8188 subrtx_iterator::array_type array
;
8189 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8191 const_rtx x
= *iter
;
8192 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8195 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8196 TLS offsets, not real symbol references. */
8197 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8198 iter
.skip_subrtxes ();
8203 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8205 On the ARM, allow any integer (invalid ones are removed later by insn
8206 patterns), nice doubles and symbol_refs which refer to the function's
8209 When generating pic allow anything. */
8212 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8214 return flag_pic
|| !label_mentioned_p (x
);
8218 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8220 return (CONST_INT_P (x
)
8221 || CONST_DOUBLE_P (x
)
8222 || CONSTANT_ADDRESS_P (x
)
8227 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8229 return (!arm_cannot_force_const_mem (mode
, x
)
8231 ? arm_legitimate_constant_p_1 (mode
, x
)
8232 : thumb_legitimate_constant_p (mode
, x
)));
8235 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8238 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8242 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8244 split_const (x
, &base
, &offset
);
8245 if (GET_CODE (base
) == SYMBOL_REF
8246 && !offset_within_block_p (base
, INTVAL (offset
)))
8249 return arm_tls_referenced_p (x
);
8252 #define REG_OR_SUBREG_REG(X) \
8254 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8256 #define REG_OR_SUBREG_RTX(X) \
8257 (REG_P (X) ? (X) : SUBREG_REG (X))
8260 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8262 machine_mode mode
= GET_MODE (x
);
8271 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8278 return COSTS_N_INSNS (1);
8281 if (CONST_INT_P (XEXP (x
, 1)))
8284 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8291 return COSTS_N_INSNS (2) + cycles
;
8293 return COSTS_N_INSNS (1) + 16;
8296 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8298 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8299 return (COSTS_N_INSNS (words
)
8300 + 4 * ((MEM_P (SET_SRC (x
)))
8301 + MEM_P (SET_DEST (x
))));
8306 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8308 if (thumb_shiftable_const (INTVAL (x
)))
8309 return COSTS_N_INSNS (2);
8310 return COSTS_N_INSNS (3);
8312 else if ((outer
== PLUS
|| outer
== COMPARE
)
8313 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8315 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8316 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8317 return COSTS_N_INSNS (1);
8318 else if (outer
== AND
)
8321 /* This duplicates the tests in the andsi3 expander. */
8322 for (i
= 9; i
<= 31; i
++)
8323 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8324 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8325 return COSTS_N_INSNS (2);
8327 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8328 || outer
== LSHIFTRT
)
8330 return COSTS_N_INSNS (2);
8336 return COSTS_N_INSNS (3);
8354 /* XXX another guess. */
8355 /* Memory costs quite a lot for the first word, but subsequent words
8356 load at the equivalent of a single insn each. */
8357 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8358 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8363 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8369 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8370 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8376 return total
+ COSTS_N_INSNS (1);
8378 /* Assume a two-shift sequence. Increase the cost slightly so
8379 we prefer actual shifts over an extend operation. */
8380 return total
+ 1 + COSTS_N_INSNS (2);
8388 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8390 machine_mode mode
= GET_MODE (x
);
8391 enum rtx_code subcode
;
8393 enum rtx_code code
= GET_CODE (x
);
8399 /* Memory costs quite a lot for the first word, but subsequent words
8400 load at the equivalent of a single insn each. */
8401 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8408 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8409 *total
= COSTS_N_INSNS (2);
8410 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8411 *total
= COSTS_N_INSNS (4);
8413 *total
= COSTS_N_INSNS (20);
8417 if (REG_P (XEXP (x
, 1)))
8418 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8419 else if (!CONST_INT_P (XEXP (x
, 1)))
8420 *total
= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8426 *total
+= COSTS_N_INSNS (4);
8431 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8432 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8435 *total
+= COSTS_N_INSNS (3);
8439 *total
+= COSTS_N_INSNS (1);
8440 /* Increase the cost of complex shifts because they aren't any faster,
8441 and reduce dual issue opportunities. */
8442 if (arm_tune_cortex_a9
8443 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8451 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8452 if (CONST_INT_P (XEXP (x
, 0))
8453 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8455 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8459 if (CONST_INT_P (XEXP (x
, 1))
8460 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8462 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8469 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8471 if (TARGET_HARD_FLOAT
8473 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8475 *total
= COSTS_N_INSNS (1);
8476 if (CONST_DOUBLE_P (XEXP (x
, 0))
8477 && arm_const_double_rtx (XEXP (x
, 0)))
8479 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8483 if (CONST_DOUBLE_P (XEXP (x
, 1))
8484 && arm_const_double_rtx (XEXP (x
, 1)))
8486 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8492 *total
= COSTS_N_INSNS (20);
8496 *total
= COSTS_N_INSNS (1);
8497 if (CONST_INT_P (XEXP (x
, 0))
8498 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8500 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8504 subcode
= GET_CODE (XEXP (x
, 1));
8505 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8506 || subcode
== LSHIFTRT
8507 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8509 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8510 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8514 /* A shift as a part of RSB costs no more than RSB itself. */
8515 if (GET_CODE (XEXP (x
, 0)) == MULT
8516 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8518 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, speed
);
8519 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8524 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8526 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8527 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8531 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8532 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8534 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8536 if (REG_P (XEXP (XEXP (x
, 1), 0))
8537 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8538 *total
+= COSTS_N_INSNS (1);
8546 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8547 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8548 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8550 *total
= COSTS_N_INSNS (1);
8551 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
8552 GET_CODE (XEXP (x
, 0)), 0, speed
);
8553 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8557 /* MLA: All arguments must be registers. We filter out
8558 multiplication by a power of two, so that we fall down into
8560 if (GET_CODE (XEXP (x
, 0)) == MULT
8561 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8563 /* The cost comes from the cost of the multiply. */
8567 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8569 if (TARGET_HARD_FLOAT
8571 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8573 *total
= COSTS_N_INSNS (1);
8574 if (CONST_DOUBLE_P (XEXP (x
, 1))
8575 && arm_const_double_rtx (XEXP (x
, 1)))
8577 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8584 *total
= COSTS_N_INSNS (20);
8588 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8589 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8591 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), mode
, code
,
8593 if (REG_P (XEXP (XEXP (x
, 0), 0))
8594 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8595 *total
+= COSTS_N_INSNS (1);
8601 case AND
: case XOR
: case IOR
:
8603 /* Normally the frame registers will be spilt into reg+const during
8604 reload, so it is a bad idea to combine them with other instructions,
8605 since then they might not be moved outside of loops. As a compromise
8606 we allow integration with ops that have a constant as their second
8608 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8609 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8610 && !CONST_INT_P (XEXP (x
, 1)))
8611 *total
= COSTS_N_INSNS (1);
8615 *total
+= COSTS_N_INSNS (2);
8616 if (CONST_INT_P (XEXP (x
, 1))
8617 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8619 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8626 *total
+= COSTS_N_INSNS (1);
8627 if (CONST_INT_P (XEXP (x
, 1))
8628 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8630 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8633 subcode
= GET_CODE (XEXP (x
, 0));
8634 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8635 || subcode
== LSHIFTRT
8636 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8638 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8639 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8644 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8646 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8647 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8651 if (subcode
== UMIN
|| subcode
== UMAX
8652 || subcode
== SMIN
|| subcode
== SMAX
)
8654 *total
= COSTS_N_INSNS (3);
8661 /* This should have been handled by the CPU specific routines. */
8665 if (arm_arch3m
&& mode
== SImode
8666 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8667 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8668 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8669 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8670 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8671 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8673 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, LSHIFTRT
,
8677 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8681 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8683 if (TARGET_HARD_FLOAT
8685 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8687 *total
= COSTS_N_INSNS (1);
8690 *total
= COSTS_N_INSNS (2);
8696 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8697 if (mode
== SImode
&& code
== NOT
)
8699 subcode
= GET_CODE (XEXP (x
, 0));
8700 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8701 || subcode
== LSHIFTRT
8702 || subcode
== ROTATE
|| subcode
== ROTATERT
8704 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8706 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
,
8708 /* Register shifts cost an extra cycle. */
8709 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8710 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8720 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8722 *total
= COSTS_N_INSNS (4);
8726 operand
= XEXP (x
, 0);
8728 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8729 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8730 && REG_P (XEXP (operand
, 0))
8731 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8732 *total
+= COSTS_N_INSNS (1);
8733 *total
+= rtx_cost (XEXP (x
, 1), VOIDmode
, code
, 1, speed
);
8734 *total
+= rtx_cost (XEXP (x
, 2), VOIDmode
, code
, 2, speed
);
8738 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8740 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8747 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8748 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8750 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8757 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8758 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8760 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8781 /* SCC insns. In the case where the comparison has already been
8782 performed, then they cost 2 instructions. Otherwise they need
8783 an additional comparison before them. */
8784 *total
= COSTS_N_INSNS (2);
8785 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8792 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8798 *total
+= COSTS_N_INSNS (1);
8799 if (CONST_INT_P (XEXP (x
, 1))
8800 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8802 *total
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed
);
8806 subcode
= GET_CODE (XEXP (x
, 0));
8807 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8808 || subcode
== LSHIFTRT
8809 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8811 mode
= GET_MODE (XEXP (x
, 0));
8812 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8813 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8818 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8820 mode
= GET_MODE (XEXP (x
, 0));
8821 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8822 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8832 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8833 if (!CONST_INT_P (XEXP (x
, 1))
8834 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8835 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8839 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8841 if (TARGET_HARD_FLOAT
8843 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8845 *total
= COSTS_N_INSNS (1);
8848 *total
= COSTS_N_INSNS (20);
8851 *total
= COSTS_N_INSNS (1);
8853 *total
+= COSTS_N_INSNS (3);
8859 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8861 rtx op
= XEXP (x
, 0);
8862 machine_mode opmode
= GET_MODE (op
);
8865 *total
+= COSTS_N_INSNS (1);
8867 if (opmode
!= SImode
)
8871 /* If !arm_arch4, we use one of the extendhisi2_mem
8872 or movhi_bytes patterns for HImode. For a QImode
8873 sign extension, we first zero-extend from memory
8874 and then perform a shift sequence. */
8875 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8876 *total
+= COSTS_N_INSNS (2);
8879 *total
+= COSTS_N_INSNS (1);
8881 /* We don't have the necessary insn, so we need to perform some
8883 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8884 /* An and with constant 255. */
8885 *total
+= COSTS_N_INSNS (1);
8887 /* A shift sequence. Increase costs slightly to avoid
8888 combining two shifts into an extend operation. */
8889 *total
+= COSTS_N_INSNS (2) + 1;
8895 switch (GET_MODE (XEXP (x
, 0)))
8902 *total
= COSTS_N_INSNS (1);
8912 mode
= GET_MODE (XEXP (x
, 0));
8913 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8917 if (const_ok_for_arm (INTVAL (x
))
8918 || const_ok_for_arm (~INTVAL (x
)))
8919 *total
= COSTS_N_INSNS (1);
8921 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8922 INTVAL (x
), NULL_RTX
,
8929 *total
= COSTS_N_INSNS (3);
8933 *total
= COSTS_N_INSNS (1);
8937 *total
= COSTS_N_INSNS (1);
8938 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8942 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8943 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8944 *total
= COSTS_N_INSNS (1);
8946 *total
= COSTS_N_INSNS (4);
8950 /* The vec_extract patterns accept memory operands that require an
8951 address reload. Account for the cost of that reload to give the
8952 auto-inc-dec pass an incentive to try to replace them. */
8953 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8954 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8956 mode
= GET_MODE (SET_DEST (x
));
8957 *total
= rtx_cost (SET_DEST (x
), mode
, code
, 0, speed
);
8958 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8959 *total
+= COSTS_N_INSNS (1);
8962 /* Likewise for the vec_set patterns. */
8963 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8964 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8965 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8967 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8968 mode
= GET_MODE (SET_DEST (x
));
8969 *total
= rtx_cost (mem
, mode
, code
, 0, speed
);
8970 if (!neon_vector_mem_operand (mem
, 2, true))
8971 *total
+= COSTS_N_INSNS (1);
8977 /* We cost this as high as our memory costs to allow this to
8978 be hoisted from loops. */
8979 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8981 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8987 && TARGET_HARD_FLOAT
8989 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8990 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8991 *total
= COSTS_N_INSNS (1);
8993 *total
= COSTS_N_INSNS (4);
8997 *total
= COSTS_N_INSNS (4);
9002 /* Estimates the size cost of thumb1 instructions.
9003 For now most of the code is copied from thumb1_rtx_costs. We need more
9004 fine grain tuning when we have more related test cases. */
9006 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9008 machine_mode mode
= GET_MODE (x
);
9017 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9021 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9022 defined by RTL expansion, especially for the expansion of
9024 if ((GET_CODE (XEXP (x
, 0)) == MULT
9025 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9026 || (GET_CODE (XEXP (x
, 1)) == MULT
9027 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9028 return COSTS_N_INSNS (2);
9029 /* On purpose fall through for normal RTX. */
9033 return COSTS_N_INSNS (1);
9036 if (CONST_INT_P (XEXP (x
, 1)))
9038 /* Thumb1 mul instruction can't operate on const. We must Load it
9039 into a register first. */
9040 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9041 /* For the targets which have a very small and high-latency multiply
9042 unit, we prefer to synthesize the mult with up to 5 instructions,
9043 giving a good balance between size and performance. */
9044 if (arm_arch6m
&& arm_m_profile_small_mul
)
9045 return COSTS_N_INSNS (5);
9047 return COSTS_N_INSNS (1) + const_size
;
9049 return COSTS_N_INSNS (1);
9052 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9054 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9055 return COSTS_N_INSNS (words
)
9056 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
9057 || satisfies_constraint_K (SET_SRC (x
))
9058 /* thumb1_movdi_insn. */
9059 || ((words
> 1) && MEM_P (SET_SRC (x
))));
9064 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
9065 return COSTS_N_INSNS (1);
9066 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9067 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9068 return COSTS_N_INSNS (2);
9069 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9070 if (thumb_shiftable_const (INTVAL (x
)))
9071 return COSTS_N_INSNS (2);
9072 return COSTS_N_INSNS (3);
9074 else if ((outer
== PLUS
|| outer
== COMPARE
)
9075 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9077 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9078 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9079 return COSTS_N_INSNS (1);
9080 else if (outer
== AND
)
9083 /* This duplicates the tests in the andsi3 expander. */
9084 for (i
= 9; i
<= 31; i
++)
9085 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9086 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9087 return COSTS_N_INSNS (2);
9089 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9090 || outer
== LSHIFTRT
)
9092 return COSTS_N_INSNS (2);
9098 return COSTS_N_INSNS (3);
9112 return COSTS_N_INSNS (1);
9115 return (COSTS_N_INSNS (1)
9117 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9118 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9119 ? COSTS_N_INSNS (1) : 0));
9123 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9128 /* XXX still guessing. */
9129 switch (GET_MODE (XEXP (x
, 0)))
9132 return (1 + (mode
== DImode
? 4 : 0)
9133 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9136 return (4 + (mode
== DImode
? 4 : 0)
9137 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9140 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9151 /* RTX costs when optimizing for size. */
9153 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9156 machine_mode mode
= GET_MODE (x
);
9159 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9163 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9167 /* A memory access costs 1 insn if the mode is small, or the address is
9168 a single register, otherwise it costs one insn per word. */
9169 if (REG_P (XEXP (x
, 0)))
9170 *total
= COSTS_N_INSNS (1);
9172 && GET_CODE (XEXP (x
, 0)) == PLUS
9173 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9174 /* This will be split into two instructions.
9175 See arm.md:calculate_pic_address. */
9176 *total
= COSTS_N_INSNS (2);
9178 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9185 /* Needs a libcall, so it costs about this. */
9186 *total
= COSTS_N_INSNS (2);
9190 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9192 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
9201 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9203 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), mode
, code
,
9207 else if (mode
== SImode
)
9209 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
9211 /* Slightly disparage register shifts, but not by much. */
9212 if (!CONST_INT_P (XEXP (x
, 1)))
9213 *total
+= 1 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9217 /* Needs a libcall. */
9218 *total
= COSTS_N_INSNS (2);
9222 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9223 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9225 *total
= COSTS_N_INSNS (1);
9231 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9232 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9234 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9235 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9236 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9237 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9238 || subcode1
== ASHIFTRT
)
9240 /* It's just the cost of the two operands. */
9245 *total
= COSTS_N_INSNS (1);
9249 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9253 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9254 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9256 *total
= COSTS_N_INSNS (1);
9260 /* A shift as a part of ADD costs nothing. */
9261 if (GET_CODE (XEXP (x
, 0)) == MULT
9262 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9264 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9265 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, false);
9266 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9271 case AND
: case XOR
: case IOR
:
9274 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9276 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9277 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9278 || (code
== AND
&& subcode
== NOT
))
9280 /* It's just the cost of the two operands. */
9286 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9290 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9294 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9295 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9297 *total
= COSTS_N_INSNS (1);
9303 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9312 if (cc_register (XEXP (x
, 0), VOIDmode
))
9315 *total
= COSTS_N_INSNS (1);
9319 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9320 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9321 *total
= COSTS_N_INSNS (1);
9323 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9328 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9331 if (const_ok_for_arm (INTVAL (x
)))
9332 /* A multiplication by a constant requires another instruction
9333 to load the constant to a register. */
9334 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9336 else if (const_ok_for_arm (~INTVAL (x
)))
9337 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9338 else if (const_ok_for_arm (-INTVAL (x
)))
9340 if (outer_code
== COMPARE
|| outer_code
== PLUS
9341 || outer_code
== MINUS
)
9344 *total
= COSTS_N_INSNS (1);
9347 *total
= COSTS_N_INSNS (2);
9353 *total
= COSTS_N_INSNS (2);
9357 *total
= COSTS_N_INSNS (4);
9362 && TARGET_HARD_FLOAT
9363 && outer_code
== SET
9364 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9365 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9366 *total
= COSTS_N_INSNS (1);
9368 *total
= COSTS_N_INSNS (4);
9373 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9374 cost of these slightly. */
9375 *total
= COSTS_N_INSNS (1) + 1;
9382 if (mode
!= VOIDmode
)
9383 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9385 *total
= COSTS_N_INSNS (4); /* How knows? */
9390 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9391 operand, then return the operand that is being shifted. If the shift
9392 is not by a constant, then set SHIFT_REG to point to the operand.
9393 Return NULL if OP is not a shifter operand. */
9395 shifter_op_p (rtx op
, rtx
*shift_reg
)
9397 enum rtx_code code
= GET_CODE (op
);
9399 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9400 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9401 return XEXP (op
, 0);
9402 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9403 return XEXP (op
, 0);
9404 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9405 || code
== ASHIFTRT
)
9407 if (!CONST_INT_P (XEXP (op
, 1)))
9408 *shift_reg
= XEXP (op
, 1);
9409 return XEXP (op
, 0);
9416 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9418 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9419 rtx_code code
= GET_CODE (x
);
9420 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9422 switch (XINT (x
, 1))
9424 case UNSPEC_UNALIGNED_LOAD
:
9425 /* We can only do unaligned loads into the integer unit, and we can't
9427 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9429 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9430 + extra_cost
->ldst
.load_unaligned
);
9433 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9434 ADDR_SPACE_GENERIC
, speed_p
);
9438 case UNSPEC_UNALIGNED_STORE
:
9439 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9441 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9442 + extra_cost
->ldst
.store_unaligned
);
9444 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9446 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9447 ADDR_SPACE_GENERIC
, speed_p
);
9458 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9462 *cost
= COSTS_N_INSNS (2);
9468 /* Cost of a libcall. We assume one insn per argument, an amount for the
9469 call (one insn for -Os) and then one for processing the result. */
9470 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9472 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9475 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9476 if (shift_op != NULL \
9477 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9482 *cost += extra_cost->alu.arith_shift_reg; \
9483 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9484 ASHIFT, 1, speed_p); \
9487 *cost += extra_cost->alu.arith_shift; \
9489 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9490 ASHIFT, 0, speed_p) \
9491 + rtx_cost (XEXP (x, 1 - IDX), \
9492 GET_MODE (shift_op), \
9499 /* RTX costs. Make an estimate of the cost of executing the operation
9500 X, which is contained with an operation with code OUTER_CODE.
9501 SPEED_P indicates whether the cost desired is the performance cost,
9502 or the size cost. The estimate is stored in COST and the return
9503 value is TRUE if the cost calculation is final, or FALSE if the
9504 caller should recurse through the operands of X to add additional
9507 We currently make no attempt to model the size savings of Thumb-2
9508 16-bit instructions. At the normal points in compilation where
9509 this code is called we have no measure of whether the condition
9510 flags are live or not, and thus no realistic way to determine what
9511 the size will eventually be. */
9513 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9514 const struct cpu_cost_table
*extra_cost
,
9515 int *cost
, bool speed_p
)
9517 machine_mode mode
= GET_MODE (x
);
9519 *cost
= COSTS_N_INSNS (1);
9524 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9526 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9534 /* SET RTXs don't have a mode so we get it from the destination. */
9535 mode
= GET_MODE (SET_DEST (x
));
9537 if (REG_P (SET_SRC (x
))
9538 && REG_P (SET_DEST (x
)))
9540 /* Assume that most copies can be done with a single insn,
9541 unless we don't have HW FP, in which case everything
9542 larger than word mode will require two insns. */
9543 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9544 && GET_MODE_SIZE (mode
) > 4)
9547 /* Conditional register moves can be encoded
9548 in 16 bits in Thumb mode. */
9549 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9555 if (CONST_INT_P (SET_SRC (x
)))
9557 /* Handle CONST_INT here, since the value doesn't have a mode
9558 and we would otherwise be unable to work out the true cost. */
9559 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9562 /* Slightly lower the cost of setting a core reg to a constant.
9563 This helps break up chains and allows for better scheduling. */
9564 if (REG_P (SET_DEST (x
))
9565 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9568 /* Immediate moves with an immediate in the range [0, 255] can be
9569 encoded in 16 bits in Thumb mode. */
9570 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9571 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9573 goto const_int_cost
;
9579 /* A memory access costs 1 insn if the mode is small, or the address is
9580 a single register, otherwise it costs one insn per word. */
9581 if (REG_P (XEXP (x
, 0)))
9582 *cost
= COSTS_N_INSNS (1);
9584 && GET_CODE (XEXP (x
, 0)) == PLUS
9585 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9586 /* This will be split into two instructions.
9587 See arm.md:calculate_pic_address. */
9588 *cost
= COSTS_N_INSNS (2);
9590 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9592 /* For speed optimizations, add the costs of the address and
9593 accessing memory. */
9596 *cost
+= (extra_cost
->ldst
.load
9597 + arm_address_cost (XEXP (x
, 0), mode
,
9598 ADDR_SPACE_GENERIC
, speed_p
));
9600 *cost
+= extra_cost
->ldst
.load
;
9606 /* Calculations of LDM costs are complex. We assume an initial cost
9607 (ldm_1st) which will load the number of registers mentioned in
9608 ldm_regs_per_insn_1st registers; then each additional
9609 ldm_regs_per_insn_subsequent registers cost one more insn. The
9610 formula for N regs is thus:
9612 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9613 + ldm_regs_per_insn_subsequent - 1)
9614 / ldm_regs_per_insn_subsequent).
9616 Additional costs may also be added for addressing. A similar
9617 formula is used for STM. */
9619 bool is_ldm
= load_multiple_operation (x
, SImode
);
9620 bool is_stm
= store_multiple_operation (x
, SImode
);
9622 if (is_ldm
|| is_stm
)
9626 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9627 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9628 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9629 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9630 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9631 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9632 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9634 *cost
+= regs_per_insn_1st
9635 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9636 + regs_per_insn_sub
- 1)
9637 / regs_per_insn_sub
);
9646 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9647 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9648 *cost
+= COSTS_N_INSNS (speed_p
9649 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9650 else if (mode
== SImode
&& TARGET_IDIV
)
9651 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9653 *cost
= LIBCALL_COST (2);
9654 return false; /* All arguments must be in registers. */
9657 /* MOD by a power of 2 can be expanded as:
9659 and r0, r0, #(n - 1)
9660 and r1, r1, #(n - 1)
9661 rsbpl r0, r1, #0. */
9662 if (CONST_INT_P (XEXP (x
, 1))
9663 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9666 *cost
+= COSTS_N_INSNS (3);
9669 *cost
+= 2 * extra_cost
->alu
.logical
9670 + extra_cost
->alu
.arith
;
9676 *cost
= LIBCALL_COST (2);
9677 return false; /* All arguments must be in registers. */
9680 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9682 *cost
+= (COSTS_N_INSNS (1)
9683 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9685 *cost
+= extra_cost
->alu
.shift_reg
;
9693 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9695 *cost
+= (COSTS_N_INSNS (2)
9696 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9698 *cost
+= 2 * extra_cost
->alu
.shift
;
9701 else if (mode
== SImode
)
9703 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9704 /* Slightly disparage register shifts at -Os, but not by much. */
9705 if (!CONST_INT_P (XEXP (x
, 1)))
9706 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9707 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9710 else if (GET_MODE_CLASS (mode
) == MODE_INT
9711 && GET_MODE_SIZE (mode
) < 4)
9715 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9716 /* Slightly disparage register shifts at -Os, but not by
9718 if (!CONST_INT_P (XEXP (x
, 1)))
9719 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9720 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9722 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9724 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9726 /* Can use SBFX/UBFX. */
9728 *cost
+= extra_cost
->alu
.bfx
;
9729 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9733 *cost
+= COSTS_N_INSNS (1);
9734 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9737 if (CONST_INT_P (XEXP (x
, 1)))
9738 *cost
+= 2 * extra_cost
->alu
.shift
;
9740 *cost
+= (extra_cost
->alu
.shift
9741 + extra_cost
->alu
.shift_reg
);
9744 /* Slightly disparage register shifts. */
9745 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9750 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9751 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9754 if (CONST_INT_P (XEXP (x
, 1)))
9755 *cost
+= (2 * extra_cost
->alu
.shift
9756 + extra_cost
->alu
.log_shift
);
9758 *cost
+= (extra_cost
->alu
.shift
9759 + extra_cost
->alu
.shift_reg
9760 + extra_cost
->alu
.log_shift_reg
);
9766 *cost
= LIBCALL_COST (2);
9775 *cost
+= extra_cost
->alu
.rev
;
9782 /* No rev instruction available. Look at arm_legacy_rev
9783 and thumb_legacy_rev for the form of RTL used then. */
9786 *cost
+= COSTS_N_INSNS (9);
9790 *cost
+= 6 * extra_cost
->alu
.shift
;
9791 *cost
+= 3 * extra_cost
->alu
.logical
;
9796 *cost
+= COSTS_N_INSNS (4);
9800 *cost
+= 2 * extra_cost
->alu
.shift
;
9801 *cost
+= extra_cost
->alu
.arith_shift
;
9802 *cost
+= 2 * extra_cost
->alu
.logical
;
9810 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9811 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9813 if (GET_CODE (XEXP (x
, 0)) == MULT
9814 || GET_CODE (XEXP (x
, 1)) == MULT
)
9816 rtx mul_op0
, mul_op1
, sub_op
;
9819 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9821 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9823 mul_op0
= XEXP (XEXP (x
, 0), 0);
9824 mul_op1
= XEXP (XEXP (x
, 0), 1);
9825 sub_op
= XEXP (x
, 1);
9829 mul_op0
= XEXP (XEXP (x
, 1), 0);
9830 mul_op1
= XEXP (XEXP (x
, 1), 1);
9831 sub_op
= XEXP (x
, 0);
9834 /* The first operand of the multiply may be optionally
9836 if (GET_CODE (mul_op0
) == NEG
)
9837 mul_op0
= XEXP (mul_op0
, 0);
9839 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9840 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9841 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9847 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9853 rtx shift_by_reg
= NULL
;
9857 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9858 if (shift_op
== NULL
)
9860 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9861 non_shift_op
= XEXP (x
, 0);
9864 non_shift_op
= XEXP (x
, 1);
9866 if (shift_op
!= NULL
)
9868 if (shift_by_reg
!= NULL
)
9871 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9872 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9875 *cost
+= extra_cost
->alu
.arith_shift
;
9877 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9878 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9883 && GET_CODE (XEXP (x
, 1)) == MULT
)
9887 *cost
+= extra_cost
->mult
[0].add
;
9888 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9889 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9890 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9894 if (CONST_INT_P (XEXP (x
, 0)))
9896 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9897 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9899 *cost
= COSTS_N_INSNS (insns
);
9901 *cost
+= insns
* extra_cost
->alu
.arith
;
9902 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9906 *cost
+= extra_cost
->alu
.arith
;
9911 if (GET_MODE_CLASS (mode
) == MODE_INT
9912 && GET_MODE_SIZE (mode
) < 4)
9914 rtx shift_op
, shift_reg
;
9917 /* We check both sides of the MINUS for shifter operands since,
9918 unlike PLUS, it's not commutative. */
9920 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9921 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9923 /* Slightly disparage, as we might need to widen the result. */
9926 *cost
+= extra_cost
->alu
.arith
;
9928 if (CONST_INT_P (XEXP (x
, 0)))
9930 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9939 *cost
+= COSTS_N_INSNS (1);
9941 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9943 rtx op1
= XEXP (x
, 1);
9946 *cost
+= 2 * extra_cost
->alu
.arith
;
9948 if (GET_CODE (op1
) == ZERO_EXTEND
)
9949 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9952 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9953 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9957 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9960 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9961 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9963 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9966 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9967 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9970 *cost
+= (extra_cost
->alu
.arith
9971 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9972 ? extra_cost
->alu
.arith
9973 : extra_cost
->alu
.arith_shift
));
9974 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9975 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9976 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9981 *cost
+= 2 * extra_cost
->alu
.arith
;
9987 *cost
= LIBCALL_COST (2);
9991 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9992 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9994 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9996 rtx mul_op0
, mul_op1
, add_op
;
9999 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10001 mul_op0
= XEXP (XEXP (x
, 0), 0);
10002 mul_op1
= XEXP (XEXP (x
, 0), 1);
10003 add_op
= XEXP (x
, 1);
10005 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10006 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10007 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10013 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10016 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10018 *cost
= LIBCALL_COST (2);
10022 /* Narrow modes can be synthesized in SImode, but the range
10023 of useful sub-operations is limited. Check for shift operations
10024 on one of the operands. Only left shifts can be used in the
10026 if (GET_MODE_CLASS (mode
) == MODE_INT
10027 && GET_MODE_SIZE (mode
) < 4)
10029 rtx shift_op
, shift_reg
;
10032 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
10034 if (CONST_INT_P (XEXP (x
, 1)))
10036 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10037 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10039 *cost
= COSTS_N_INSNS (insns
);
10041 *cost
+= insns
* extra_cost
->alu
.arith
;
10042 /* Slightly penalize a narrow operation as the result may
10044 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10048 /* Slightly penalize a narrow operation as the result may
10052 *cost
+= extra_cost
->alu
.arith
;
10057 if (mode
== SImode
)
10059 rtx shift_op
, shift_reg
;
10061 if (TARGET_INT_SIMD
10062 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10063 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10065 /* UXTA[BH] or SXTA[BH]. */
10067 *cost
+= extra_cost
->alu
.extend_arith
;
10068 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10070 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10075 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10076 if (shift_op
!= NULL
)
10081 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10082 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10085 *cost
+= extra_cost
->alu
.arith_shift
;
10087 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10088 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10091 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10093 rtx mul_op
= XEXP (x
, 0);
10095 if (TARGET_DSP_MULTIPLY
10096 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10097 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10098 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10099 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10100 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10101 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10102 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10103 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10104 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10105 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10106 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10107 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10110 /* SMLA[BT][BT]. */
10112 *cost
+= extra_cost
->mult
[0].extend_add
;
10113 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10114 SIGN_EXTEND
, 0, speed_p
)
10115 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10116 SIGN_EXTEND
, 0, speed_p
)
10117 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10122 *cost
+= extra_cost
->mult
[0].add
;
10123 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10124 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10125 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10128 if (CONST_INT_P (XEXP (x
, 1)))
10130 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10131 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10133 *cost
= COSTS_N_INSNS (insns
);
10135 *cost
+= insns
* extra_cost
->alu
.arith
;
10136 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10140 *cost
+= extra_cost
->alu
.arith
;
10145 if (mode
== DImode
)
10148 && GET_CODE (XEXP (x
, 0)) == MULT
10149 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10150 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10151 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10152 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10155 *cost
+= extra_cost
->mult
[1].extend_add
;
10156 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10157 ZERO_EXTEND
, 0, speed_p
)
10158 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10159 ZERO_EXTEND
, 0, speed_p
)
10160 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10164 *cost
+= COSTS_N_INSNS (1);
10166 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10167 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10170 *cost
+= (extra_cost
->alu
.arith
10171 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10172 ? extra_cost
->alu
.arith
10173 : extra_cost
->alu
.arith_shift
));
10175 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10177 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10182 *cost
+= 2 * extra_cost
->alu
.arith
;
10187 *cost
= LIBCALL_COST (2);
10190 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10193 *cost
+= extra_cost
->alu
.rev
;
10197 /* Fall through. */
10198 case AND
: case XOR
:
10199 if (mode
== SImode
)
10201 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10202 rtx op0
= XEXP (x
, 0);
10203 rtx shift_op
, shift_reg
;
10207 || (code
== IOR
&& TARGET_THUMB2
)))
10208 op0
= XEXP (op0
, 0);
10211 shift_op
= shifter_op_p (op0
, &shift_reg
);
10212 if (shift_op
!= NULL
)
10217 *cost
+= extra_cost
->alu
.log_shift_reg
;
10218 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10221 *cost
+= extra_cost
->alu
.log_shift
;
10223 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10224 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10228 if (CONST_INT_P (XEXP (x
, 1)))
10230 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10231 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10234 *cost
= COSTS_N_INSNS (insns
);
10236 *cost
+= insns
* extra_cost
->alu
.logical
;
10237 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10242 *cost
+= extra_cost
->alu
.logical
;
10243 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10244 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10248 if (mode
== DImode
)
10250 rtx op0
= XEXP (x
, 0);
10251 enum rtx_code subcode
= GET_CODE (op0
);
10253 *cost
+= COSTS_N_INSNS (1);
10257 || (code
== IOR
&& TARGET_THUMB2
)))
10258 op0
= XEXP (op0
, 0);
10260 if (GET_CODE (op0
) == ZERO_EXTEND
)
10263 *cost
+= 2 * extra_cost
->alu
.logical
;
10265 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10267 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10270 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10273 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10275 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10277 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10282 *cost
+= 2 * extra_cost
->alu
.logical
;
10288 *cost
= LIBCALL_COST (2);
10292 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10293 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10295 rtx op0
= XEXP (x
, 0);
10297 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10298 op0
= XEXP (op0
, 0);
10301 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10303 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10304 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10307 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10309 *cost
= LIBCALL_COST (2);
10313 if (mode
== SImode
)
10315 if (TARGET_DSP_MULTIPLY
10316 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10317 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10318 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10319 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10320 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10321 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10322 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10323 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10324 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10325 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10326 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10327 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10330 /* SMUL[TB][TB]. */
10332 *cost
+= extra_cost
->mult
[0].extend
;
10333 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10334 SIGN_EXTEND
, 0, speed_p
);
10335 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10336 SIGN_EXTEND
, 1, speed_p
);
10340 *cost
+= extra_cost
->mult
[0].simple
;
10344 if (mode
== DImode
)
10347 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10348 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10349 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10350 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10353 *cost
+= extra_cost
->mult
[1].extend
;
10354 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10355 ZERO_EXTEND
, 0, speed_p
)
10356 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10357 ZERO_EXTEND
, 0, speed_p
));
10361 *cost
= LIBCALL_COST (2);
10366 *cost
= LIBCALL_COST (2);
10370 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10371 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10373 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10376 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10381 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10385 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10387 *cost
= LIBCALL_COST (1);
10391 if (mode
== SImode
)
10393 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10395 *cost
+= COSTS_N_INSNS (1);
10396 /* Assume the non-flag-changing variant. */
10398 *cost
+= (extra_cost
->alu
.log_shift
10399 + extra_cost
->alu
.arith_shift
);
10400 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10404 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10405 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10407 *cost
+= COSTS_N_INSNS (1);
10408 /* No extra cost for MOV imm and MVN imm. */
10409 /* If the comparison op is using the flags, there's no further
10410 cost, otherwise we need to add the cost of the comparison. */
10411 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10412 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10413 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10415 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10416 *cost
+= (COSTS_N_INSNS (1)
10417 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10419 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10422 *cost
+= extra_cost
->alu
.arith
;
10428 *cost
+= extra_cost
->alu
.arith
;
10432 if (GET_MODE_CLASS (mode
) == MODE_INT
10433 && GET_MODE_SIZE (mode
) < 4)
10435 /* Slightly disparage, as we might need an extend operation. */
10438 *cost
+= extra_cost
->alu
.arith
;
10442 if (mode
== DImode
)
10444 *cost
+= COSTS_N_INSNS (1);
10446 *cost
+= 2 * extra_cost
->alu
.arith
;
10451 *cost
= LIBCALL_COST (1);
10455 if (mode
== SImode
)
10458 rtx shift_reg
= NULL
;
10460 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10464 if (shift_reg
!= NULL
)
10467 *cost
+= extra_cost
->alu
.log_shift_reg
;
10468 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10471 *cost
+= extra_cost
->alu
.log_shift
;
10472 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10477 *cost
+= extra_cost
->alu
.logical
;
10480 if (mode
== DImode
)
10482 *cost
+= COSTS_N_INSNS (1);
10488 *cost
+= LIBCALL_COST (1);
10493 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10495 *cost
+= COSTS_N_INSNS (3);
10498 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10499 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10501 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10502 /* Assume that if one arm of the if_then_else is a register,
10503 that it will be tied with the result and eliminate the
10504 conditional insn. */
10505 if (REG_P (XEXP (x
, 1)))
10507 else if (REG_P (XEXP (x
, 2)))
10513 if (extra_cost
->alu
.non_exec_costs_exec
)
10514 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10516 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10519 *cost
+= op1cost
+ op2cost
;
10525 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10529 machine_mode op0mode
;
10530 /* We'll mostly assume that the cost of a compare is the cost of the
10531 LHS. However, there are some notable exceptions. */
10533 /* Floating point compares are never done as side-effects. */
10534 op0mode
= GET_MODE (XEXP (x
, 0));
10535 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10536 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10539 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10541 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10543 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10549 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10551 *cost
= LIBCALL_COST (2);
10555 /* DImode compares normally take two insns. */
10556 if (op0mode
== DImode
)
10558 *cost
+= COSTS_N_INSNS (1);
10560 *cost
+= 2 * extra_cost
->alu
.arith
;
10564 if (op0mode
== SImode
)
10569 if (XEXP (x
, 1) == const0_rtx
10570 && !(REG_P (XEXP (x
, 0))
10571 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10572 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10574 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10576 /* Multiply operations that set the flags are often
10577 significantly more expensive. */
10579 && GET_CODE (XEXP (x
, 0)) == MULT
10580 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10581 *cost
+= extra_cost
->mult
[0].flag_setting
;
10584 && GET_CODE (XEXP (x
, 0)) == PLUS
10585 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10586 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10588 *cost
+= extra_cost
->mult
[0].flag_setting
;
10593 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10594 if (shift_op
!= NULL
)
10596 if (shift_reg
!= NULL
)
10598 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10601 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10604 *cost
+= extra_cost
->alu
.arith_shift
;
10605 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10606 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10611 *cost
+= extra_cost
->alu
.arith
;
10612 if (CONST_INT_P (XEXP (x
, 1))
10613 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10615 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10623 *cost
= LIBCALL_COST (2);
10646 if (outer_code
== SET
)
10648 /* Is it a store-flag operation? */
10649 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10650 && XEXP (x
, 1) == const0_rtx
)
10652 /* Thumb also needs an IT insn. */
10653 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10656 if (XEXP (x
, 1) == const0_rtx
)
10661 /* LSR Rd, Rn, #31. */
10663 *cost
+= extra_cost
->alu
.shift
;
10673 *cost
+= COSTS_N_INSNS (1);
10677 /* RSBS T1, Rn, Rn, LSR #31
10679 *cost
+= COSTS_N_INSNS (1);
10681 *cost
+= extra_cost
->alu
.arith_shift
;
10685 /* RSB Rd, Rn, Rn, ASR #1
10686 LSR Rd, Rd, #31. */
10687 *cost
+= COSTS_N_INSNS (1);
10689 *cost
+= (extra_cost
->alu
.arith_shift
10690 + extra_cost
->alu
.shift
);
10696 *cost
+= COSTS_N_INSNS (1);
10698 *cost
+= extra_cost
->alu
.shift
;
10702 /* Remaining cases are either meaningless or would take
10703 three insns anyway. */
10704 *cost
= COSTS_N_INSNS (3);
10707 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10712 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10713 if (CONST_INT_P (XEXP (x
, 1))
10714 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10716 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10723 /* Not directly inside a set. If it involves the condition code
10724 register it must be the condition for a branch, cond_exec or
10725 I_T_E operation. Since the comparison is performed elsewhere
10726 this is just the control part which has no additional
10728 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10729 && XEXP (x
, 1) == const0_rtx
)
10737 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10738 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10741 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10745 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10747 *cost
= LIBCALL_COST (1);
10751 if (mode
== SImode
)
10754 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10758 *cost
= LIBCALL_COST (1);
10762 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10763 && MEM_P (XEXP (x
, 0)))
10765 if (mode
== DImode
)
10766 *cost
+= COSTS_N_INSNS (1);
10771 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10772 *cost
+= extra_cost
->ldst
.load
;
10774 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10776 if (mode
== DImode
)
10777 *cost
+= extra_cost
->alu
.shift
;
10782 /* Widening from less than 32-bits requires an extend operation. */
10783 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10785 /* We have SXTB/SXTH. */
10786 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10788 *cost
+= extra_cost
->alu
.extend
;
10790 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10792 /* Needs two shifts. */
10793 *cost
+= COSTS_N_INSNS (1);
10794 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10796 *cost
+= 2 * extra_cost
->alu
.shift
;
10799 /* Widening beyond 32-bits requires one more insn. */
10800 if (mode
== DImode
)
10802 *cost
+= COSTS_N_INSNS (1);
10804 *cost
+= extra_cost
->alu
.shift
;
10811 || GET_MODE (XEXP (x
, 0)) == SImode
10812 || GET_MODE (XEXP (x
, 0)) == QImode
)
10813 && MEM_P (XEXP (x
, 0)))
10815 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10817 if (mode
== DImode
)
10818 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10823 /* Widening from less than 32-bits requires an extend operation. */
10824 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10826 /* UXTB can be a shorter instruction in Thumb2, but it might
10827 be slower than the AND Rd, Rn, #255 alternative. When
10828 optimizing for speed it should never be slower to use
10829 AND, and we don't really model 16-bit vs 32-bit insns
10832 *cost
+= extra_cost
->alu
.logical
;
10834 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10836 /* We have UXTB/UXTH. */
10837 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10839 *cost
+= extra_cost
->alu
.extend
;
10841 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10843 /* Needs two shifts. It's marginally preferable to use
10844 shifts rather than two BIC instructions as the second
10845 shift may merge with a subsequent insn as a shifter
10847 *cost
= COSTS_N_INSNS (2);
10848 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10850 *cost
+= 2 * extra_cost
->alu
.shift
;
10853 /* Widening beyond 32-bits requires one more insn. */
10854 if (mode
== DImode
)
10856 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10863 /* CONST_INT has no mode, so we cannot tell for sure how many
10864 insns are really going to be needed. The best we can do is
10865 look at the value passed. If it fits in SImode, then assume
10866 that's the mode it will be used for. Otherwise assume it
10867 will be used in DImode. */
10868 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10873 /* Avoid blowing up in arm_gen_constant (). */
10874 if (!(outer_code
== PLUS
10875 || outer_code
== AND
10876 || outer_code
== IOR
10877 || outer_code
== XOR
10878 || outer_code
== MINUS
))
10882 if (mode
== SImode
)
10884 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10885 INTVAL (x
), NULL
, NULL
,
10891 *cost
+= COSTS_N_INSNS (arm_gen_constant
10892 (outer_code
, SImode
, NULL
,
10893 trunc_int_for_mode (INTVAL (x
), SImode
),
10895 + arm_gen_constant (outer_code
, SImode
, NULL
,
10896 INTVAL (x
) >> 32, NULL
,
10908 if (arm_arch_thumb2
&& !flag_pic
)
10909 *cost
+= COSTS_N_INSNS (1);
10911 *cost
+= extra_cost
->ldst
.load
;
10914 *cost
+= COSTS_N_INSNS (1);
10918 *cost
+= COSTS_N_INSNS (1);
10920 *cost
+= extra_cost
->alu
.arith
;
10926 *cost
= COSTS_N_INSNS (4);
10931 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10932 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10934 if (vfp3_const_double_rtx (x
))
10937 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10943 if (mode
== DFmode
)
10944 *cost
+= extra_cost
->ldst
.loadd
;
10946 *cost
+= extra_cost
->ldst
.loadf
;
10949 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10953 *cost
= COSTS_N_INSNS (4);
10959 && TARGET_HARD_FLOAT
10960 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10961 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10962 *cost
= COSTS_N_INSNS (1);
10964 *cost
= COSTS_N_INSNS (4);
10969 /* When optimizing for size, we prefer constant pool entries to
10970 MOVW/MOVT pairs, so bump the cost of these slightly. */
10977 *cost
+= extra_cost
->alu
.clz
;
10981 if (XEXP (x
, 1) == const0_rtx
)
10984 *cost
+= extra_cost
->alu
.log_shift
;
10985 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10988 /* Fall through. */
10992 *cost
+= COSTS_N_INSNS (1);
10996 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10997 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10998 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10999 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11000 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11001 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11002 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11003 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11007 *cost
+= extra_cost
->mult
[1].extend
;
11008 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11009 ZERO_EXTEND
, 0, speed_p
)
11010 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11011 ZERO_EXTEND
, 0, speed_p
));
11014 *cost
= LIBCALL_COST (1);
11017 case UNSPEC_VOLATILE
:
11019 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11022 /* Reading the PC is like reading any other register. Writing it
11023 is more expensive, but we take that into account elsewhere. */
11028 /* TODO: Simple zero_extract of bottom bits using AND. */
11029 /* Fall through. */
11033 && CONST_INT_P (XEXP (x
, 1))
11034 && CONST_INT_P (XEXP (x
, 2)))
11037 *cost
+= extra_cost
->alu
.bfx
;
11038 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11041 /* Without UBFX/SBFX, need to resort to shift operations. */
11042 *cost
+= COSTS_N_INSNS (1);
11044 *cost
+= 2 * extra_cost
->alu
.shift
;
11045 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11049 if (TARGET_HARD_FLOAT
)
11052 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11053 if (!TARGET_FPU_ARMV8
11054 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11056 /* Pre v8, widening HF->DF is a two-step process, first
11057 widening to SFmode. */
11058 *cost
+= COSTS_N_INSNS (1);
11060 *cost
+= extra_cost
->fp
[0].widen
;
11062 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11066 *cost
= LIBCALL_COST (1);
11069 case FLOAT_TRUNCATE
:
11070 if (TARGET_HARD_FLOAT
)
11073 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11074 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11076 /* Vector modes? */
11078 *cost
= LIBCALL_COST (1);
11082 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11084 rtx op0
= XEXP (x
, 0);
11085 rtx op1
= XEXP (x
, 1);
11086 rtx op2
= XEXP (x
, 2);
11089 /* vfms or vfnma. */
11090 if (GET_CODE (op0
) == NEG
)
11091 op0
= XEXP (op0
, 0);
11093 /* vfnms or vfnma. */
11094 if (GET_CODE (op2
) == NEG
)
11095 op2
= XEXP (op2
, 0);
11097 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11098 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11099 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11102 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11107 *cost
= LIBCALL_COST (3);
11112 if (TARGET_HARD_FLOAT
)
11114 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11115 a vcvt fixed-point conversion. */
11116 if (code
== FIX
&& mode
== SImode
11117 && GET_CODE (XEXP (x
, 0)) == FIX
11118 && GET_MODE (XEXP (x
, 0)) == SFmode
11119 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11120 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11124 *cost
+= extra_cost
->fp
[0].toint
;
11126 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11131 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11133 mode
= GET_MODE (XEXP (x
, 0));
11135 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11136 /* Strip of the 'cost' of rounding towards zero. */
11137 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11138 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11141 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11142 /* ??? Increase the cost to deal with transferring from
11143 FP -> CORE registers? */
11146 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11147 && TARGET_FPU_ARMV8
)
11150 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11153 /* Vector costs? */
11155 *cost
= LIBCALL_COST (1);
11159 case UNSIGNED_FLOAT
:
11160 if (TARGET_HARD_FLOAT
)
11162 /* ??? Increase the cost to deal with transferring from CORE
11163 -> FP registers? */
11165 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11168 *cost
= LIBCALL_COST (1);
11176 /* Just a guess. Guess number of instructions in the asm
11177 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11178 though (see PR60663). */
11179 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11180 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11182 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11186 if (mode
!= VOIDmode
)
11187 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11189 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11194 #undef HANDLE_NARROW_SHIFT_ARITH
11196 /* RTX costs when optimizing for size. */
11198 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11199 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11202 int code
= GET_CODE (x
);
11204 if (TARGET_OLD_RTX_COSTS
11205 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11207 /* Old way. (Deprecated.) */
11209 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11210 (enum rtx_code
) outer_code
, total
);
11212 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11213 (enum rtx_code
) outer_code
, total
,
11219 if (current_tune
->insn_extra_cost
)
11220 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11221 (enum rtx_code
) outer_code
,
11222 current_tune
->insn_extra_cost
,
11224 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11225 && current_tune->insn_extra_cost != NULL */
11227 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11228 (enum rtx_code
) outer_code
,
11229 &generic_extra_costs
, total
, speed
);
11232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11234 print_rtl_single (dump_file
, x
);
11235 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11236 *total
, result
? "final" : "partial");
11241 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11242 supported on any "slowmul" cores, so it can be ignored. */
11245 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11246 int *total
, bool speed
)
11248 machine_mode mode
= GET_MODE (x
);
11252 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11259 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11262 *total
= COSTS_N_INSNS (20);
11266 if (CONST_INT_P (XEXP (x
, 1)))
11268 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11269 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11270 int cost
, const_ok
= const_ok_for_arm (i
);
11271 int j
, booth_unit_size
;
11273 /* Tune as appropriate. */
11274 cost
= const_ok
? 4 : 8;
11275 booth_unit_size
= 2;
11276 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11278 i
>>= booth_unit_size
;
11282 *total
= COSTS_N_INSNS (cost
);
11283 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
11287 *total
= COSTS_N_INSNS (20);
11291 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11296 /* RTX cost for cores with a fast multiply unit (M variants). */
11299 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11300 int *total
, bool speed
)
11302 machine_mode mode
= GET_MODE (x
);
11306 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11310 /* ??? should thumb2 use different costs? */
11314 /* There is no point basing this on the tuning, since it is always the
11315 fast variant if it exists at all. */
11317 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11318 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11319 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11321 *total
= COSTS_N_INSNS(2);
11326 if (mode
== DImode
)
11328 *total
= COSTS_N_INSNS (5);
11332 if (CONST_INT_P (XEXP (x
, 1)))
11334 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11335 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11336 int cost
, const_ok
= const_ok_for_arm (i
);
11337 int j
, booth_unit_size
;
11339 /* Tune as appropriate. */
11340 cost
= const_ok
? 4 : 8;
11341 booth_unit_size
= 8;
11342 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11344 i
>>= booth_unit_size
;
11348 *total
= COSTS_N_INSNS(cost
);
11352 if (mode
== SImode
)
11354 *total
= COSTS_N_INSNS (4);
11358 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11360 if (TARGET_HARD_FLOAT
11362 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11364 *total
= COSTS_N_INSNS (1);
11369 /* Requires a lib call */
11370 *total
= COSTS_N_INSNS (20);
11374 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11379 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11380 so it can be ignored. */
11383 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11384 int *total
, bool speed
)
11386 machine_mode mode
= GET_MODE (x
);
11390 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11397 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11398 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11400 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11401 will stall until the multiplication is complete. */
11402 *total
= COSTS_N_INSNS (3);
11406 /* There is no point basing this on the tuning, since it is always the
11407 fast variant if it exists at all. */
11409 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11410 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11411 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11413 *total
= COSTS_N_INSNS (2);
11418 if (mode
== DImode
)
11420 *total
= COSTS_N_INSNS (5);
11424 if (CONST_INT_P (XEXP (x
, 1)))
11426 /* If operand 1 is a constant we can more accurately
11427 calculate the cost of the multiply. The multiplier can
11428 retire 15 bits on the first cycle and a further 12 on the
11429 second. We do, of course, have to load the constant into
11430 a register first. */
11431 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11432 /* There's a general overhead of one cycle. */
11434 unsigned HOST_WIDE_INT masked_const
;
11436 if (i
& 0x80000000)
11439 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11441 masked_const
= i
& 0xffff8000;
11442 if (masked_const
!= 0)
11445 masked_const
= i
& 0xf8000000;
11446 if (masked_const
!= 0)
11449 *total
= COSTS_N_INSNS (cost
);
11453 if (mode
== SImode
)
11455 *total
= COSTS_N_INSNS (3);
11459 /* Requires a lib call */
11460 *total
= COSTS_N_INSNS (20);
11464 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11469 /* RTX costs for 9e (and later) cores. */
11472 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11473 int *total
, bool speed
)
11475 machine_mode mode
= GET_MODE (x
);
11482 /* Small multiply: 32 cycles for an integer multiply inst. */
11483 if (arm_arch6m
&& arm_m_profile_small_mul
)
11484 *total
= COSTS_N_INSNS (32);
11486 *total
= COSTS_N_INSNS (3);
11490 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11498 /* There is no point basing this on the tuning, since it is always the
11499 fast variant if it exists at all. */
11501 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11502 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11503 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11505 *total
= COSTS_N_INSNS (2);
11510 if (mode
== DImode
)
11512 *total
= COSTS_N_INSNS (5);
11516 if (mode
== SImode
)
11518 *total
= COSTS_N_INSNS (2);
11522 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11524 if (TARGET_HARD_FLOAT
11526 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11528 *total
= COSTS_N_INSNS (1);
11533 *total
= COSTS_N_INSNS (20);
11537 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11540 /* All address computations that can be done are free, but rtx cost returns
11541 the same for practically all of them. So we weight the different types
11542 of address here in the order (most pref first):
11543 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11545 arm_arm_address_cost (rtx x
)
11547 enum rtx_code c
= GET_CODE (x
);
11549 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11551 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11556 if (CONST_INT_P (XEXP (x
, 1)))
11559 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11569 arm_thumb_address_cost (rtx x
)
11571 enum rtx_code c
= GET_CODE (x
);
11576 && REG_P (XEXP (x
, 0))
11577 && CONST_INT_P (XEXP (x
, 1)))
11584 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11585 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11587 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11590 /* Adjust cost hook for XScale. */
11592 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11594 /* Some true dependencies can have a higher cost depending
11595 on precisely how certain input operands are used. */
11596 if (REG_NOTE_KIND(link
) == 0
11597 && recog_memoized (insn
) >= 0
11598 && recog_memoized (dep
) >= 0)
11600 int shift_opnum
= get_attr_shift (insn
);
11601 enum attr_type attr_type
= get_attr_type (dep
);
11603 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11604 operand for INSN. If we have a shifted input operand and the
11605 instruction we depend on is another ALU instruction, then we may
11606 have to account for an additional stall. */
11607 if (shift_opnum
!= 0
11608 && (attr_type
== TYPE_ALU_SHIFT_IMM
11609 || attr_type
== TYPE_ALUS_SHIFT_IMM
11610 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11611 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11612 || attr_type
== TYPE_ALU_SHIFT_REG
11613 || attr_type
== TYPE_ALUS_SHIFT_REG
11614 || attr_type
== TYPE_LOGIC_SHIFT_REG
11615 || attr_type
== TYPE_LOGICS_SHIFT_REG
11616 || attr_type
== TYPE_MOV_SHIFT
11617 || attr_type
== TYPE_MVN_SHIFT
11618 || attr_type
== TYPE_MOV_SHIFT_REG
11619 || attr_type
== TYPE_MVN_SHIFT_REG
))
11621 rtx shifted_operand
;
11624 /* Get the shifted operand. */
11625 extract_insn (insn
);
11626 shifted_operand
= recog_data
.operand
[shift_opnum
];
11628 /* Iterate over all the operands in DEP. If we write an operand
11629 that overlaps with SHIFTED_OPERAND, then we have increase the
11630 cost of this dependency. */
11631 extract_insn (dep
);
11632 preprocess_constraints (dep
);
11633 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11635 /* We can ignore strict inputs. */
11636 if (recog_data
.operand_type
[opno
] == OP_IN
)
11639 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11651 /* Adjust cost hook for Cortex A9. */
11653 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11655 switch (REG_NOTE_KIND (link
))
11662 case REG_DEP_OUTPUT
:
11663 if (recog_memoized (insn
) >= 0
11664 && recog_memoized (dep
) >= 0)
11666 if (GET_CODE (PATTERN (insn
)) == SET
)
11669 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11671 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11673 enum attr_type attr_type_insn
= get_attr_type (insn
);
11674 enum attr_type attr_type_dep
= get_attr_type (dep
);
11676 /* By default all dependencies of the form
11679 have an extra latency of 1 cycle because
11680 of the input and output dependency in this
11681 case. However this gets modeled as an true
11682 dependency and hence all these checks. */
11683 if (REG_P (SET_DEST (PATTERN (insn
)))
11684 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11686 /* FMACS is a special case where the dependent
11687 instruction can be issued 3 cycles before
11688 the normal latency in case of an output
11690 if ((attr_type_insn
== TYPE_FMACS
11691 || attr_type_insn
== TYPE_FMACD
)
11692 && (attr_type_dep
== TYPE_FMACS
11693 || attr_type_dep
== TYPE_FMACD
))
11695 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11696 *cost
= insn_default_latency (dep
) - 3;
11698 *cost
= insn_default_latency (dep
);
11703 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11704 *cost
= insn_default_latency (dep
) + 1;
11706 *cost
= insn_default_latency (dep
);
11716 gcc_unreachable ();
11722 /* Adjust cost hook for FA726TE. */
11724 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11726 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11727 have penalty of 3. */
11728 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11729 && recog_memoized (insn
) >= 0
11730 && recog_memoized (dep
) >= 0
11731 && get_attr_conds (dep
) == CONDS_SET
)
11733 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11734 if (get_attr_conds (insn
) == CONDS_USE
11735 && get_attr_type (insn
) != TYPE_BRANCH
)
11741 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11742 || get_attr_conds (insn
) == CONDS_USE
)
11752 /* Implement TARGET_REGISTER_MOVE_COST.
11754 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11755 it is typically more expensive than a single memory access. We set
11756 the cost to less than two memory accesses so that floating
11757 point to integer conversion does not go through memory. */
11760 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11761 reg_class_t from
, reg_class_t to
)
11765 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11766 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11768 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11769 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11771 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11778 if (from
== HI_REGS
|| to
== HI_REGS
)
11785 /* Implement TARGET_MEMORY_MOVE_COST. */
11788 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11789 bool in ATTRIBUTE_UNUSED
)
11795 if (GET_MODE_SIZE (mode
) < 4)
11798 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11802 /* Vectorizer cost model implementation. */
11804 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11806 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11808 int misalign ATTRIBUTE_UNUSED
)
11812 switch (type_of_cost
)
11815 return current_tune
->vec_costs
->scalar_stmt_cost
;
11818 return current_tune
->vec_costs
->scalar_load_cost
;
11821 return current_tune
->vec_costs
->scalar_store_cost
;
11824 return current_tune
->vec_costs
->vec_stmt_cost
;
11827 return current_tune
->vec_costs
->vec_align_load_cost
;
11830 return current_tune
->vec_costs
->vec_store_cost
;
11832 case vec_to_scalar
:
11833 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11835 case scalar_to_vec
:
11836 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11838 case unaligned_load
:
11839 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11841 case unaligned_store
:
11842 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11844 case cond_branch_taken
:
11845 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11847 case cond_branch_not_taken
:
11848 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11851 case vec_promote_demote
:
11852 return current_tune
->vec_costs
->vec_stmt_cost
;
11854 case vec_construct
:
11855 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11856 return elements
/ 2 + 1;
11859 gcc_unreachable ();
11863 /* Implement targetm.vectorize.add_stmt_cost. */
11866 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11867 struct _stmt_vec_info
*stmt_info
, int misalign
,
11868 enum vect_cost_model_location where
)
11870 unsigned *cost
= (unsigned *) data
;
11871 unsigned retval
= 0;
11873 if (flag_vect_cost_model
)
11875 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11876 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11878 /* Statements in an inner loop relative to the loop being
11879 vectorized are weighted more heavily. The value here is
11880 arbitrary and could potentially be improved with analysis. */
11881 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11882 count
*= 50; /* FIXME. */
11884 retval
= (unsigned) (count
* stmt_cost
);
11885 cost
[where
] += retval
;
11891 /* Return true if and only if this insn can dual-issue only as older. */
11893 cortexa7_older_only (rtx_insn
*insn
)
11895 if (recog_memoized (insn
) < 0)
11898 switch (get_attr_type (insn
))
11900 case TYPE_ALU_DSP_REG
:
11901 case TYPE_ALU_SREG
:
11902 case TYPE_ALUS_SREG
:
11903 case TYPE_LOGIC_REG
:
11904 case TYPE_LOGICS_REG
:
11906 case TYPE_ADCS_REG
:
11911 case TYPE_SHIFT_IMM
:
11912 case TYPE_SHIFT_REG
:
11913 case TYPE_LOAD_BYTE
:
11916 case TYPE_FFARITHS
:
11918 case TYPE_FFARITHD
:
11936 case TYPE_F_STORES
:
11943 /* Return true if and only if this insn can dual-issue as younger. */
11945 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11947 if (recog_memoized (insn
) < 0)
11950 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11954 switch (get_attr_type (insn
))
11957 case TYPE_ALUS_IMM
:
11958 case TYPE_LOGIC_IMM
:
11959 case TYPE_LOGICS_IMM
:
11964 case TYPE_MOV_SHIFT
:
11965 case TYPE_MOV_SHIFT_REG
:
11975 /* Look for an instruction that can dual issue only as an older
11976 instruction, and move it in front of any instructions that can
11977 dual-issue as younger, while preserving the relative order of all
11978 other instructions in the ready list. This is a hueuristic to help
11979 dual-issue in later cycles, by postponing issue of more flexible
11980 instructions. This heuristic may affect dual issue opportunities
11981 in the current cycle. */
11983 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11984 int *n_readyp
, int clock
)
11987 int first_older_only
= -1, first_younger
= -1;
11991 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11995 /* Traverse the ready list from the head (the instruction to issue
11996 first), and looking for the first instruction that can issue as
11997 younger and the first instruction that can dual-issue only as
11999 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12001 rtx_insn
*insn
= ready
[i
];
12002 if (cortexa7_older_only (insn
))
12004 first_older_only
= i
;
12006 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12009 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12013 /* Nothing to reorder because either no younger insn found or insn
12014 that can dual-issue only as older appears before any insn that
12015 can dual-issue as younger. */
12016 if (first_younger
== -1)
12019 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12023 /* Nothing to reorder because no older-only insn in the ready list. */
12024 if (first_older_only
== -1)
12027 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12031 /* Move first_older_only insn before first_younger. */
12033 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12034 INSN_UID(ready
[first_older_only
]),
12035 INSN_UID(ready
[first_younger
]));
12036 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12037 for (i
= first_older_only
; i
< first_younger
; i
++)
12039 ready
[i
] = ready
[i
+1];
12042 ready
[i
] = first_older_only_insn
;
12046 /* Implement TARGET_SCHED_REORDER. */
12048 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12054 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12057 /* Do nothing for other cores. */
12061 return arm_issue_rate ();
12064 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12065 It corrects the value of COST based on the relationship between
12066 INSN and DEP through the dependence LINK. It returns the new
12067 value. There is a per-core adjust_cost hook to adjust scheduler costs
12068 and the per-core hook can choose to completely override the generic
12069 adjust_cost function. Only put bits of code into arm_adjust_cost that
12070 are common across all cores. */
12072 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
12076 /* When generating Thumb-1 code, we want to place flag-setting operations
12077 close to a conditional branch which depends on them, so that we can
12078 omit the comparison. */
12080 && REG_NOTE_KIND (link
) == 0
12081 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12082 && recog_memoized (dep
) >= 0
12083 && get_attr_conds (dep
) == CONDS_SET
)
12086 if (current_tune
->sched_adjust_cost
!= NULL
)
12088 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12092 /* XXX Is this strictly true? */
12093 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12094 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12097 /* Call insns don't incur a stall, even if they follow a load. */
12098 if (REG_NOTE_KIND (link
) == 0
12102 if ((i_pat
= single_set (insn
)) != NULL
12103 && MEM_P (SET_SRC (i_pat
))
12104 && (d_pat
= single_set (dep
)) != NULL
12105 && MEM_P (SET_DEST (d_pat
)))
12107 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12108 /* This is a load after a store, there is no conflict if the load reads
12109 from a cached area. Assume that loads from the stack, and from the
12110 constant pool are cached, and that others will miss. This is a
12113 if ((GET_CODE (src_mem
) == SYMBOL_REF
12114 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12115 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12116 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12117 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12125 arm_max_conditional_execute (void)
12127 return max_insns_skipped
;
12131 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12134 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12136 return (optimize
> 0) ? 2 : 0;
12140 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12142 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12145 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12146 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12147 sequences of non-executed instructions in IT blocks probably take the same
12148 amount of time as executed instructions (and the IT instruction itself takes
12149 space in icache). This function was experimentally determined to give good
12150 results on a popular embedded benchmark. */
12153 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12155 return (TARGET_32BIT
&& speed_p
) ? 1
12156 : arm_default_branch_cost (speed_p
, predictable_p
);
12160 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12162 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12165 static bool fp_consts_inited
= false;
12167 static REAL_VALUE_TYPE value_fp0
;
12170 init_fp_table (void)
12174 r
= REAL_VALUE_ATOF ("0", DFmode
);
12176 fp_consts_inited
= true;
12179 /* Return TRUE if rtx X is a valid immediate FP constant. */
12181 arm_const_double_rtx (rtx x
)
12183 const REAL_VALUE_TYPE
*r
;
12185 if (!fp_consts_inited
)
12188 r
= CONST_DOUBLE_REAL_VALUE (x
);
12189 if (REAL_VALUE_MINUS_ZERO (*r
))
12192 if (real_equal (r
, &value_fp0
))
12198 /* VFPv3 has a fairly wide range of representable immediates, formed from
12199 "quarter-precision" floating-point values. These can be evaluated using this
12200 formula (with ^ for exponentiation):
12204 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12205 16 <= n <= 31 and 0 <= r <= 7.
12207 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12209 - A (most-significant) is the sign bit.
12210 - BCD are the exponent (encoded as r XOR 3).
12211 - EFGH are the mantissa (encoded as n - 16).
12214 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12215 fconst[sd] instruction, or -1 if X isn't suitable. */
12217 vfp3_const_double_index (rtx x
)
12219 REAL_VALUE_TYPE r
, m
;
12220 int sign
, exponent
;
12221 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12222 unsigned HOST_WIDE_INT mask
;
12223 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12226 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12229 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12231 /* We can't represent these things, so detect them first. */
12232 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12235 /* Extract sign, exponent and mantissa. */
12236 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12237 r
= real_value_abs (&r
);
12238 exponent
= REAL_EXP (&r
);
12239 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12240 highest (sign) bit, with a fixed binary point at bit point_pos.
12241 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12242 bits for the mantissa, this may fail (low bits would be lost). */
12243 real_ldexp (&m
, &r
, point_pos
- exponent
);
12244 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12245 mantissa
= w
.elt (0);
12246 mant_hi
= w
.elt (1);
12248 /* If there are bits set in the low part of the mantissa, we can't
12249 represent this value. */
12253 /* Now make it so that mantissa contains the most-significant bits, and move
12254 the point_pos to indicate that the least-significant bits have been
12256 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12257 mantissa
= mant_hi
;
12259 /* We can permit four significant bits of mantissa only, plus a high bit
12260 which is always 1. */
12261 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12262 if ((mantissa
& mask
) != 0)
12265 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12266 mantissa
>>= point_pos
- 5;
12268 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12269 floating-point immediate zero with Neon using an integer-zero load, but
12270 that case is handled elsewhere.) */
12274 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12276 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12277 normalized significands are in the range [1, 2). (Our mantissa is shifted
12278 left 4 places at this point relative to normalized IEEE754 values). GCC
12279 internally uses [0.5, 1) (see real.c), so the exponent returned from
12280 REAL_EXP must be altered. */
12281 exponent
= 5 - exponent
;
12283 if (exponent
< 0 || exponent
> 7)
12286 /* Sign, mantissa and exponent are now in the correct form to plug into the
12287 formula described in the comment above. */
12288 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12291 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12293 vfp3_const_double_rtx (rtx x
)
12298 return vfp3_const_double_index (x
) != -1;
12301 /* Recognize immediates which can be used in various Neon instructions. Legal
12302 immediates are described by the following table (for VMVN variants, the
12303 bitwise inverse of the constant shown is recognized. In either case, VMOV
12304 is output and the correct instruction to use for a given constant is chosen
12305 by the assembler). The constant shown is replicated across all elements of
12306 the destination vector.
12308 insn elems variant constant (binary)
12309 ---- ----- ------- -----------------
12310 vmov i32 0 00000000 00000000 00000000 abcdefgh
12311 vmov i32 1 00000000 00000000 abcdefgh 00000000
12312 vmov i32 2 00000000 abcdefgh 00000000 00000000
12313 vmov i32 3 abcdefgh 00000000 00000000 00000000
12314 vmov i16 4 00000000 abcdefgh
12315 vmov i16 5 abcdefgh 00000000
12316 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12317 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12318 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12319 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12320 vmvn i16 10 00000000 abcdefgh
12321 vmvn i16 11 abcdefgh 00000000
12322 vmov i32 12 00000000 00000000 abcdefgh 11111111
12323 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12324 vmov i32 14 00000000 abcdefgh 11111111 11111111
12325 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12326 vmov i8 16 abcdefgh
12327 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12328 eeeeeeee ffffffff gggggggg hhhhhhhh
12329 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12330 vmov f32 19 00000000 00000000 00000000 00000000
12332 For case 18, B = !b. Representable values are exactly those accepted by
12333 vfp3_const_double_index, but are output as floating-point numbers rather
12336 For case 19, we will change it to vmov.i32 when assembling.
12338 Variants 0-5 (inclusive) may also be used as immediates for the second
12339 operand of VORR/VBIC instructions.
12341 The INVERSE argument causes the bitwise inverse of the given operand to be
12342 recognized instead (used for recognizing legal immediates for the VAND/VORN
12343 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12344 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12345 output, rather than the real insns vbic/vorr).
12347 INVERSE makes no difference to the recognition of float vectors.
12349 The return value is the variant of immediate as shown in the above table, or
12350 -1 if the given value doesn't match any of the listed patterns.
12353 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12354 rtx
*modconst
, int *elementwidth
)
12356 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12358 for (i = 0; i < idx; i += (STRIDE)) \
12363 immtype = (CLASS); \
12364 elsize = (ELSIZE); \
12368 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12369 unsigned int innersize
;
12370 unsigned char bytes
[16];
12371 int immtype
= -1, matches
;
12372 unsigned int invmask
= inverse
? 0xff : 0;
12373 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12376 n_elts
= CONST_VECTOR_NUNITS (op
);
12380 if (mode
== VOIDmode
)
12384 innersize
= GET_MODE_UNIT_SIZE (mode
);
12386 /* Vectors of float constants. */
12387 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12389 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12390 const REAL_VALUE_TYPE
*r0
;
12392 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12395 /* FP16 vectors cannot be represented. */
12396 if (GET_MODE_INNER (mode
) == HFmode
)
12399 r0
= CONST_DOUBLE_REAL_VALUE (el0
);
12401 for (i
= 1; i
< n_elts
; i
++)
12403 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12404 if (!real_equal (r0
, CONST_DOUBLE_REAL_VALUE (elt
)))
12409 *modconst
= CONST_VECTOR_ELT (op
, 0);
12414 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12420 /* Splat vector constant out into a byte vector. */
12421 for (i
= 0; i
< n_elts
; i
++)
12423 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12424 unsigned HOST_WIDE_INT elpart
;
12426 gcc_assert (CONST_INT_P (el
));
12427 elpart
= INTVAL (el
);
12429 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12431 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12432 elpart
>>= BITS_PER_UNIT
;
12436 /* Sanity check. */
12437 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12441 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12442 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12444 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12445 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12447 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12448 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12450 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12451 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12453 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12455 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12457 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12458 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12460 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12461 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12463 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12464 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12466 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12467 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12469 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12471 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12473 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12474 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12476 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12477 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12479 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12480 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12482 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12483 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12485 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12487 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12488 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12496 *elementwidth
= elsize
;
12500 unsigned HOST_WIDE_INT imm
= 0;
12502 /* Un-invert bytes of recognized vector, if necessary. */
12504 for (i
= 0; i
< idx
; i
++)
12505 bytes
[i
] ^= invmask
;
12509 /* FIXME: Broken on 32-bit H_W_I hosts. */
12510 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12512 for (i
= 0; i
< 8; i
++)
12513 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12514 << (i
* BITS_PER_UNIT
);
12516 *modconst
= GEN_INT (imm
);
12520 unsigned HOST_WIDE_INT imm
= 0;
12522 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12523 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12525 *modconst
= GEN_INT (imm
);
12533 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12534 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12535 float elements), and a modified constant (whatever should be output for a
12536 VMOV) in *MODCONST. */
12539 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12540 rtx
*modconst
, int *elementwidth
)
12544 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12550 *modconst
= tmpconst
;
12553 *elementwidth
= tmpwidth
;
12558 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12559 the immediate is valid, write a constant suitable for using as an operand
12560 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12561 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12564 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12565 rtx
*modconst
, int *elementwidth
)
12569 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12571 if (retval
< 0 || retval
> 5)
12575 *modconst
= tmpconst
;
12578 *elementwidth
= tmpwidth
;
12583 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12584 the immediate is valid, write a constant suitable for using as an operand
12585 to VSHR/VSHL to *MODCONST and the corresponding element width to
12586 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12587 because they have different limitations. */
12590 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12591 rtx
*modconst
, int *elementwidth
,
12594 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12595 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12596 unsigned HOST_WIDE_INT last_elt
= 0;
12597 unsigned HOST_WIDE_INT maxshift
;
12599 /* Split vector constant out into a byte vector. */
12600 for (i
= 0; i
< n_elts
; i
++)
12602 rtx el
= CONST_VECTOR_ELT (op
, i
);
12603 unsigned HOST_WIDE_INT elpart
;
12605 if (CONST_INT_P (el
))
12606 elpart
= INTVAL (el
);
12607 else if (CONST_DOUBLE_P (el
))
12610 gcc_unreachable ();
12612 if (i
!= 0 && elpart
!= last_elt
)
12618 /* Shift less than element size. */
12619 maxshift
= innersize
* 8;
12623 /* Left shift immediate value can be from 0 to <size>-1. */
12624 if (last_elt
>= maxshift
)
12629 /* Right shift immediate value can be from 1 to <size>. */
12630 if (last_elt
== 0 || last_elt
> maxshift
)
12635 *elementwidth
= innersize
* 8;
12638 *modconst
= CONST_VECTOR_ELT (op
, 0);
12643 /* Return a string suitable for output of Neon immediate logic operation
12647 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12648 int inverse
, int quad
)
12650 int width
, is_valid
;
12651 static char templ
[40];
12653 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12655 gcc_assert (is_valid
!= 0);
12658 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12660 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12665 /* Return a string suitable for output of Neon immediate shift operation
12666 (VSHR or VSHL) MNEM. */
12669 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12670 machine_mode mode
, int quad
,
12673 int width
, is_valid
;
12674 static char templ
[40];
12676 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12677 gcc_assert (is_valid
!= 0);
12680 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12682 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12687 /* Output a sequence of pairwise operations to implement a reduction.
12688 NOTE: We do "too much work" here, because pairwise operations work on two
12689 registers-worth of operands in one go. Unfortunately we can't exploit those
12690 extra calculations to do the full operation in fewer steps, I don't think.
12691 Although all vector elements of the result but the first are ignored, we
12692 actually calculate the same result in each of the elements. An alternative
12693 such as initially loading a vector with zero to use as each of the second
12694 operands would use up an additional register and take an extra instruction,
12695 for no particular gain. */
12698 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12699 rtx (*reduc
) (rtx
, rtx
, rtx
))
12701 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12704 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12706 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12707 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12712 /* If VALS is a vector constant that can be loaded into a register
12713 using VDUP, generate instructions to do so and return an RTX to
12714 assign to the register. Otherwise return NULL_RTX. */
12717 neon_vdup_constant (rtx vals
)
12719 machine_mode mode
= GET_MODE (vals
);
12720 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12723 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12726 if (!const_vec_duplicate_p (vals
, &x
))
12727 /* The elements are not all the same. We could handle repeating
12728 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12729 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12733 /* We can load this constant by using VDUP and a constant in a
12734 single ARM register. This will be cheaper than a vector
12737 x
= copy_to_mode_reg (inner_mode
, x
);
12738 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12741 /* Generate code to load VALS, which is a PARALLEL containing only
12742 constants (for vec_init) or CONST_VECTOR, efficiently into a
12743 register. Returns an RTX to copy into the register, or NULL_RTX
12744 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12747 neon_make_constant (rtx vals
)
12749 machine_mode mode
= GET_MODE (vals
);
12751 rtx const_vec
= NULL_RTX
;
12752 int n_elts
= GET_MODE_NUNITS (mode
);
12756 if (GET_CODE (vals
) == CONST_VECTOR
)
12758 else if (GET_CODE (vals
) == PARALLEL
)
12760 /* A CONST_VECTOR must contain only CONST_INTs and
12761 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12762 Only store valid constants in a CONST_VECTOR. */
12763 for (i
= 0; i
< n_elts
; ++i
)
12765 rtx x
= XVECEXP (vals
, 0, i
);
12766 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12769 if (n_const
== n_elts
)
12770 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12773 gcc_unreachable ();
12775 if (const_vec
!= NULL
12776 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12777 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12779 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12780 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12781 pipeline cycle; creating the constant takes one or two ARM
12782 pipeline cycles. */
12784 else if (const_vec
!= NULL_RTX
)
12785 /* Load from constant pool. On Cortex-A8 this takes two cycles
12786 (for either double or quad vectors). We can not take advantage
12787 of single-cycle VLD1 because we need a PC-relative addressing
12791 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12792 We can not construct an initializer. */
12796 /* Initialize vector TARGET to VALS. */
12799 neon_expand_vector_init (rtx target
, rtx vals
)
12801 machine_mode mode
= GET_MODE (target
);
12802 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12803 int n_elts
= GET_MODE_NUNITS (mode
);
12804 int n_var
= 0, one_var
= -1;
12805 bool all_same
= true;
12809 for (i
= 0; i
< n_elts
; ++i
)
12811 x
= XVECEXP (vals
, 0, i
);
12812 if (!CONSTANT_P (x
))
12813 ++n_var
, one_var
= i
;
12815 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12821 rtx constant
= neon_make_constant (vals
);
12822 if (constant
!= NULL_RTX
)
12824 emit_move_insn (target
, constant
);
12829 /* Splat a single non-constant element if we can. */
12830 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12832 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12833 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12837 /* One field is non-constant. Load constant then overwrite varying
12838 field. This is more efficient than using the stack. */
12841 rtx copy
= copy_rtx (vals
);
12842 rtx index
= GEN_INT (one_var
);
12844 /* Load constant part of vector, substitute neighboring value for
12845 varying element. */
12846 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12847 neon_expand_vector_init (target
, copy
);
12849 /* Insert variable. */
12850 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12854 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12857 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12860 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12863 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12866 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12869 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12872 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12875 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12878 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12881 gcc_unreachable ();
12886 /* Construct the vector in memory one field at a time
12887 and load the whole vector. */
12888 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12889 for (i
= 0; i
< n_elts
; i
++)
12890 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12891 i
* GET_MODE_SIZE (inner_mode
)),
12892 XVECEXP (vals
, 0, i
));
12893 emit_move_insn (target
, mem
);
12896 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12897 ERR if it doesn't. EXP indicates the source location, which includes the
12898 inlining history for intrinsics. */
12901 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12902 const_tree exp
, const char *desc
)
12904 HOST_WIDE_INT lane
;
12906 gcc_assert (CONST_INT_P (operand
));
12908 lane
= INTVAL (operand
);
12910 if (lane
< low
|| lane
>= high
)
12913 error ("%K%s %wd out of range %wd - %wd",
12914 exp
, desc
, lane
, low
, high
- 1);
12916 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12920 /* Bounds-check lanes. */
12923 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12926 bounds_check (operand
, low
, high
, exp
, "lane");
12929 /* Bounds-check constants. */
12932 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12934 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12938 neon_element_bits (machine_mode mode
)
12940 return GET_MODE_UNIT_BITSIZE (mode
);
12944 /* Predicates for `match_operand' and `match_operator'. */
12946 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12947 WB is true if full writeback address modes are allowed and is false
12948 if limited writeback address modes (POST_INC and PRE_DEC) are
12952 arm_coproc_mem_operand (rtx op
, bool wb
)
12956 /* Reject eliminable registers. */
12957 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12958 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12959 || reg_mentioned_p (arg_pointer_rtx
, op
)
12960 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12961 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12962 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12963 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12966 /* Constants are converted into offsets from labels. */
12970 ind
= XEXP (op
, 0);
12972 if (reload_completed
12973 && (GET_CODE (ind
) == LABEL_REF
12974 || (GET_CODE (ind
) == CONST
12975 && GET_CODE (XEXP (ind
, 0)) == PLUS
12976 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12977 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12980 /* Match: (mem (reg)). */
12982 return arm_address_register_rtx_p (ind
, 0);
12984 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12985 acceptable in any case (subject to verification by
12986 arm_address_register_rtx_p). We need WB to be true to accept
12987 PRE_INC and POST_DEC. */
12988 if (GET_CODE (ind
) == POST_INC
12989 || GET_CODE (ind
) == PRE_DEC
12991 && (GET_CODE (ind
) == PRE_INC
12992 || GET_CODE (ind
) == POST_DEC
)))
12993 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12996 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12997 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12998 && GET_CODE (XEXP (ind
, 1)) == PLUS
12999 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13000 ind
= XEXP (ind
, 1);
13005 if (GET_CODE (ind
) == PLUS
13006 && REG_P (XEXP (ind
, 0))
13007 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13008 && CONST_INT_P (XEXP (ind
, 1))
13009 && INTVAL (XEXP (ind
, 1)) > -1024
13010 && INTVAL (XEXP (ind
, 1)) < 1024
13011 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13017 /* Return TRUE if OP is a memory operand which we can load or store a vector
13018 to/from. TYPE is one of the following values:
13019 0 - Vector load/stor (vldr)
13020 1 - Core registers (ldm)
13021 2 - Element/structure loads (vld1)
13024 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13028 /* Reject eliminable registers. */
13029 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13030 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13031 || reg_mentioned_p (arg_pointer_rtx
, op
)
13032 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13033 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13034 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13035 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13038 /* Constants are converted into offsets from labels. */
13042 ind
= XEXP (op
, 0);
13044 if (reload_completed
13045 && (GET_CODE (ind
) == LABEL_REF
13046 || (GET_CODE (ind
) == CONST
13047 && GET_CODE (XEXP (ind
, 0)) == PLUS
13048 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13049 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13052 /* Match: (mem (reg)). */
13054 return arm_address_register_rtx_p (ind
, 0);
13056 /* Allow post-increment with Neon registers. */
13057 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13058 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13059 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13061 /* Allow post-increment by register for VLDn */
13062 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13063 && GET_CODE (XEXP (ind
, 1)) == PLUS
13064 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13071 && GET_CODE (ind
) == PLUS
13072 && REG_P (XEXP (ind
, 0))
13073 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13074 && CONST_INT_P (XEXP (ind
, 1))
13075 && INTVAL (XEXP (ind
, 1)) > -1024
13076 /* For quad modes, we restrict the constant offset to be slightly less
13077 than what the instruction format permits. We have no such constraint
13078 on double mode offsets. (This must match arm_legitimate_index_p.) */
13079 && (INTVAL (XEXP (ind
, 1))
13080 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13081 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13087 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13090 neon_struct_mem_operand (rtx op
)
13094 /* Reject eliminable registers. */
13095 if (! (reload_in_progress
|| reload_completed
)
13096 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13097 || reg_mentioned_p (arg_pointer_rtx
, op
)
13098 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13099 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13100 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13101 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13104 /* Constants are converted into offsets from labels. */
13108 ind
= XEXP (op
, 0);
13110 if (reload_completed
13111 && (GET_CODE (ind
) == LABEL_REF
13112 || (GET_CODE (ind
) == CONST
13113 && GET_CODE (XEXP (ind
, 0)) == PLUS
13114 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13115 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13118 /* Match: (mem (reg)). */
13120 return arm_address_register_rtx_p (ind
, 0);
13122 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13123 if (GET_CODE (ind
) == POST_INC
13124 || GET_CODE (ind
) == PRE_DEC
)
13125 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13130 /* Return true if X is a register that will be eliminated later on. */
13132 arm_eliminable_register (rtx x
)
13134 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13135 || REGNO (x
) == ARG_POINTER_REGNUM
13136 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13137 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13140 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13141 coprocessor registers. Otherwise return NO_REGS. */
13144 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13146 if (mode
== HFmode
)
13148 if (!TARGET_NEON_FP16
)
13149 return GENERAL_REGS
;
13150 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13152 return GENERAL_REGS
;
13155 /* The neon move patterns handle all legitimate vector and struct
13158 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13159 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13160 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13161 || VALID_NEON_STRUCT_MODE (mode
)))
13164 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13167 return GENERAL_REGS
;
13170 /* Values which must be returned in the most-significant end of the return
13174 arm_return_in_msb (const_tree valtype
)
13176 return (TARGET_AAPCS_BASED
13177 && BYTES_BIG_ENDIAN
13178 && (AGGREGATE_TYPE_P (valtype
)
13179 || TREE_CODE (valtype
) == COMPLEX_TYPE
13180 || FIXED_POINT_TYPE_P (valtype
)));
13183 /* Return TRUE if X references a SYMBOL_REF. */
13185 symbol_mentioned_p (rtx x
)
13190 if (GET_CODE (x
) == SYMBOL_REF
)
13193 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13194 are constant offsets, not symbols. */
13195 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13198 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13200 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13206 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13207 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13210 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13217 /* Return TRUE if X references a LABEL_REF. */
13219 label_mentioned_p (rtx x
)
13224 if (GET_CODE (x
) == LABEL_REF
)
13227 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13228 instruction, but they are constant offsets, not symbols. */
13229 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13232 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13233 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13239 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13240 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13243 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13251 tls_mentioned_p (rtx x
)
13253 switch (GET_CODE (x
))
13256 return tls_mentioned_p (XEXP (x
, 0));
13259 if (XINT (x
, 1) == UNSPEC_TLS
)
13267 /* Must not copy any rtx that uses a pc-relative address.
13268 Also, disallow copying of load-exclusive instructions that
13269 may appear after splitting of compare-and-swap-style operations
13270 so as to prevent those loops from being transformed away from their
13271 canonical forms (see PR 69904). */
13274 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13276 /* The tls call insn cannot be copied, as it is paired with a data
13278 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13281 subrtx_iterator::array_type array
;
13282 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13284 const_rtx x
= *iter
;
13285 if (GET_CODE (x
) == UNSPEC
13286 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13287 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13291 rtx set
= single_set (insn
);
13294 rtx src
= SET_SRC (set
);
13295 if (GET_CODE (src
) == ZERO_EXTEND
)
13296 src
= XEXP (src
, 0);
13298 /* Catch the load-exclusive and load-acquire operations. */
13299 if (GET_CODE (src
) == UNSPEC_VOLATILE
13300 && (XINT (src
, 1) == VUNSPEC_LL
13301 || XINT (src
, 1) == VUNSPEC_LAX
))
13308 minmax_code (rtx x
)
13310 enum rtx_code code
= GET_CODE (x
);
13323 gcc_unreachable ();
13327 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13330 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13331 int *mask
, bool *signed_sat
)
13333 /* The high bound must be a power of two minus one. */
13334 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13338 /* The low bound is either zero (for usat) or one less than the
13339 negation of the high bound (for ssat). */
13340 if (INTVAL (lo_bound
) == 0)
13345 *signed_sat
= false;
13350 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13355 *signed_sat
= true;
13363 /* Return 1 if memory locations are adjacent. */
13365 adjacent_mem_locations (rtx a
, rtx b
)
13367 /* We don't guarantee to preserve the order of these memory refs. */
13368 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13371 if ((REG_P (XEXP (a
, 0))
13372 || (GET_CODE (XEXP (a
, 0)) == PLUS
13373 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13374 && (REG_P (XEXP (b
, 0))
13375 || (GET_CODE (XEXP (b
, 0)) == PLUS
13376 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13378 HOST_WIDE_INT val0
= 0, val1
= 0;
13382 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13384 reg0
= XEXP (XEXP (a
, 0), 0);
13385 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13388 reg0
= XEXP (a
, 0);
13390 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13392 reg1
= XEXP (XEXP (b
, 0), 0);
13393 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13396 reg1
= XEXP (b
, 0);
13398 /* Don't accept any offset that will require multiple
13399 instructions to handle, since this would cause the
13400 arith_adjacentmem pattern to output an overlong sequence. */
13401 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13404 /* Don't allow an eliminable register: register elimination can make
13405 the offset too large. */
13406 if (arm_eliminable_register (reg0
))
13409 val_diff
= val1
- val0
;
13413 /* If the target has load delay slots, then there's no benefit
13414 to using an ldm instruction unless the offset is zero and
13415 we are optimizing for size. */
13416 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13417 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13418 && (val_diff
== 4 || val_diff
== -4));
13421 return ((REGNO (reg0
) == REGNO (reg1
))
13422 && (val_diff
== 4 || val_diff
== -4));
13428 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13429 for load operations, false for store operations. CONSECUTIVE is true
13430 if the register numbers in the operation must be consecutive in the register
13431 bank. RETURN_PC is true if value is to be loaded in PC.
13432 The pattern we are trying to match for load is:
13433 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13434 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13437 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13440 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13441 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13442 3. If consecutive is TRUE, then for kth register being loaded,
13443 REGNO (R_dk) = REGNO (R_d0) + k.
13444 The pattern for store is similar. */
13446 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13447 bool consecutive
, bool return_pc
)
13449 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13450 rtx reg
, mem
, addr
;
13452 unsigned first_regno
;
13453 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13455 bool addr_reg_in_reglist
= false;
13456 bool update
= false;
13461 /* If not in SImode, then registers must be consecutive
13462 (e.g., VLDM instructions for DFmode). */
13463 gcc_assert ((mode
== SImode
) || consecutive
);
13464 /* Setting return_pc for stores is illegal. */
13465 gcc_assert (!return_pc
|| load
);
13467 /* Set up the increments and the regs per val based on the mode. */
13468 reg_increment
= GET_MODE_SIZE (mode
);
13469 regs_per_val
= reg_increment
/ 4;
13470 offset_adj
= return_pc
? 1 : 0;
13473 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13474 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13477 /* Check if this is a write-back. */
13478 elt
= XVECEXP (op
, 0, offset_adj
);
13479 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13485 /* The offset adjustment must be the number of registers being
13486 popped times the size of a single register. */
13487 if (!REG_P (SET_DEST (elt
))
13488 || !REG_P (XEXP (SET_SRC (elt
), 0))
13489 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13490 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13491 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13492 ((count
- 1 - offset_adj
) * reg_increment
))
13496 i
= i
+ offset_adj
;
13497 base
= base
+ offset_adj
;
13498 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13499 success depends on the type: VLDM can do just one reg,
13500 LDM must do at least two. */
13501 if ((count
<= i
) && (mode
== SImode
))
13504 elt
= XVECEXP (op
, 0, i
- 1);
13505 if (GET_CODE (elt
) != SET
)
13510 reg
= SET_DEST (elt
);
13511 mem
= SET_SRC (elt
);
13515 reg
= SET_SRC (elt
);
13516 mem
= SET_DEST (elt
);
13519 if (!REG_P (reg
) || !MEM_P (mem
))
13522 regno
= REGNO (reg
);
13523 first_regno
= regno
;
13524 addr
= XEXP (mem
, 0);
13525 if (GET_CODE (addr
) == PLUS
)
13527 if (!CONST_INT_P (XEXP (addr
, 1)))
13530 offset
= INTVAL (XEXP (addr
, 1));
13531 addr
= XEXP (addr
, 0);
13537 /* Don't allow SP to be loaded unless it is also the base register. It
13538 guarantees that SP is reset correctly when an LDM instruction
13539 is interrupted. Otherwise, we might end up with a corrupt stack. */
13540 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13543 for (; i
< count
; i
++)
13545 elt
= XVECEXP (op
, 0, i
);
13546 if (GET_CODE (elt
) != SET
)
13551 reg
= SET_DEST (elt
);
13552 mem
= SET_SRC (elt
);
13556 reg
= SET_SRC (elt
);
13557 mem
= SET_DEST (elt
);
13561 || GET_MODE (reg
) != mode
13562 || REGNO (reg
) <= regno
13565 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13566 /* Don't allow SP to be loaded unless it is also the base register. It
13567 guarantees that SP is reset correctly when an LDM instruction
13568 is interrupted. Otherwise, we might end up with a corrupt stack. */
13569 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13571 || GET_MODE (mem
) != mode
13572 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13573 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13574 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13575 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13576 offset
+ (i
- base
) * reg_increment
))
13577 && (!REG_P (XEXP (mem
, 0))
13578 || offset
+ (i
- base
) * reg_increment
!= 0)))
13581 regno
= REGNO (reg
);
13582 if (regno
== REGNO (addr
))
13583 addr_reg_in_reglist
= true;
13588 if (update
&& addr_reg_in_reglist
)
13591 /* For Thumb-1, address register is always modified - either by write-back
13592 or by explicit load. If the pattern does not describe an update,
13593 then the address register must be in the list of loaded registers. */
13595 return update
|| addr_reg_in_reglist
;
13601 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13602 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13603 instruction. ADD_OFFSET is nonzero if the base address register needs
13604 to be modified with an add instruction before we can use it. */
13607 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13608 int nops
, HOST_WIDE_INT add_offset
)
13610 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13611 if the offset isn't small enough. The reason 2 ldrs are faster
13612 is because these ARMs are able to do more than one cache access
13613 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13614 whilst the ARM8 has a double bandwidth cache. This means that
13615 these cores can do both an instruction fetch and a data fetch in
13616 a single cycle, so the trick of calculating the address into a
13617 scratch register (one of the result regs) and then doing a load
13618 multiple actually becomes slower (and no smaller in code size).
13619 That is the transformation
13621 ldr rd1, [rbase + offset]
13622 ldr rd2, [rbase + offset + 4]
13626 add rd1, rbase, offset
13627 ldmia rd1, {rd1, rd2}
13629 produces worse code -- '3 cycles + any stalls on rd2' instead of
13630 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13631 access per cycle, the first sequence could never complete in less
13632 than 6 cycles, whereas the ldm sequence would only take 5 and
13633 would make better use of sequential accesses if not hitting the
13636 We cheat here and test 'arm_ld_sched' which we currently know to
13637 only be true for the ARM8, ARM9 and StrongARM. If this ever
13638 changes, then the test below needs to be reworked. */
13639 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13642 /* XScale has load-store double instructions, but they have stricter
13643 alignment requirements than load-store multiple, so we cannot
13646 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13647 the pipeline until completion.
13655 An ldr instruction takes 1-3 cycles, but does not block the
13664 Best case ldr will always win. However, the more ldr instructions
13665 we issue, the less likely we are to be able to schedule them well.
13666 Using ldr instructions also increases code size.
13668 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13669 for counts of 3 or 4 regs. */
13670 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13675 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13676 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13677 an array ORDER which describes the sequence to use when accessing the
13678 offsets that produces an ascending order. In this sequence, each
13679 offset must be larger by exactly 4 than the previous one. ORDER[0]
13680 must have been filled in with the lowest offset by the caller.
13681 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13682 we use to verify that ORDER produces an ascending order of registers.
13683 Return true if it was possible to construct such an order, false if
13687 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13688 int *unsorted_regs
)
13691 for (i
= 1; i
< nops
; i
++)
13695 order
[i
] = order
[i
- 1];
13696 for (j
= 0; j
< nops
; j
++)
13697 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13699 /* We must find exactly one offset that is higher than the
13700 previous one by 4. */
13701 if (order
[i
] != order
[i
- 1])
13705 if (order
[i
] == order
[i
- 1])
13707 /* The register numbers must be ascending. */
13708 if (unsorted_regs
!= NULL
13709 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13715 /* Used to determine in a peephole whether a sequence of load
13716 instructions can be changed into a load-multiple instruction.
13717 NOPS is the number of separate load instructions we are examining. The
13718 first NOPS entries in OPERANDS are the destination registers, the
13719 next NOPS entries are memory operands. If this function is
13720 successful, *BASE is set to the common base register of the memory
13721 accesses; *LOAD_OFFSET is set to the first memory location's offset
13722 from that base register.
13723 REGS is an array filled in with the destination register numbers.
13724 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13725 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13726 the sequence of registers in REGS matches the loads from ascending memory
13727 locations, and the function verifies that the register numbers are
13728 themselves ascending. If CHECK_REGS is false, the register numbers
13729 are stored in the order they are found in the operands. */
13731 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13732 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13734 int unsorted_regs
[MAX_LDM_STM_OPS
];
13735 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13736 int order
[MAX_LDM_STM_OPS
];
13737 rtx base_reg_rtx
= NULL
;
13741 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13742 easily extended if required. */
13743 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13745 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13747 /* Loop over the operands and check that the memory references are
13748 suitable (i.e. immediate offsets from the same base register). At
13749 the same time, extract the target register, and the memory
13751 for (i
= 0; i
< nops
; i
++)
13756 /* Convert a subreg of a mem into the mem itself. */
13757 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13758 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13760 gcc_assert (MEM_P (operands
[nops
+ i
]));
13762 /* Don't reorder volatile memory references; it doesn't seem worth
13763 looking for the case where the order is ok anyway. */
13764 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13767 offset
= const0_rtx
;
13769 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13770 || (GET_CODE (reg
) == SUBREG
13771 && REG_P (reg
= SUBREG_REG (reg
))))
13772 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13773 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13774 || (GET_CODE (reg
) == SUBREG
13775 && REG_P (reg
= SUBREG_REG (reg
))))
13776 && (CONST_INT_P (offset
13777 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13781 base_reg
= REGNO (reg
);
13782 base_reg_rtx
= reg
;
13783 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13786 else if (base_reg
!= (int) REGNO (reg
))
13787 /* Not addressed from the same base register. */
13790 unsorted_regs
[i
] = (REG_P (operands
[i
])
13791 ? REGNO (operands
[i
])
13792 : REGNO (SUBREG_REG (operands
[i
])));
13794 /* If it isn't an integer register, or if it overwrites the
13795 base register but isn't the last insn in the list, then
13796 we can't do this. */
13797 if (unsorted_regs
[i
] < 0
13798 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13799 || unsorted_regs
[i
] > 14
13800 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13803 /* Don't allow SP to be loaded unless it is also the base
13804 register. It guarantees that SP is reset correctly when
13805 an LDM instruction is interrupted. Otherwise, we might
13806 end up with a corrupt stack. */
13807 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13810 unsorted_offsets
[i
] = INTVAL (offset
);
13811 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13815 /* Not a suitable memory address. */
13819 /* All the useful information has now been extracted from the
13820 operands into unsorted_regs and unsorted_offsets; additionally,
13821 order[0] has been set to the lowest offset in the list. Sort
13822 the offsets into order, verifying that they are adjacent, and
13823 check that the register numbers are ascending. */
13824 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13825 check_regs
? unsorted_regs
: NULL
))
13829 memcpy (saved_order
, order
, sizeof order
);
13835 for (i
= 0; i
< nops
; i
++)
13836 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13838 *load_offset
= unsorted_offsets
[order
[0]];
13842 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13845 if (unsorted_offsets
[order
[0]] == 0)
13846 ldm_case
= 1; /* ldmia */
13847 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13848 ldm_case
= 2; /* ldmib */
13849 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13850 ldm_case
= 3; /* ldmda */
13851 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13852 ldm_case
= 4; /* ldmdb */
13853 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13854 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13859 if (!multiple_operation_profitable_p (false, nops
,
13861 ? unsorted_offsets
[order
[0]] : 0))
13867 /* Used to determine in a peephole whether a sequence of store instructions can
13868 be changed into a store-multiple instruction.
13869 NOPS is the number of separate store instructions we are examining.
13870 NOPS_TOTAL is the total number of instructions recognized by the peephole
13872 The first NOPS entries in OPERANDS are the source registers, the next
13873 NOPS entries are memory operands. If this function is successful, *BASE is
13874 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13875 to the first memory location's offset from that base register. REGS is an
13876 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13877 likewise filled with the corresponding rtx's.
13878 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13879 numbers to an ascending order of stores.
13880 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13881 from ascending memory locations, and the function verifies that the register
13882 numbers are themselves ascending. If CHECK_REGS is false, the register
13883 numbers are stored in the order they are found in the operands. */
13885 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13886 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13887 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13889 int unsorted_regs
[MAX_LDM_STM_OPS
];
13890 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13891 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13892 int order
[MAX_LDM_STM_OPS
];
13894 rtx base_reg_rtx
= NULL
;
13897 /* Write back of base register is currently only supported for Thumb 1. */
13898 int base_writeback
= TARGET_THUMB1
;
13900 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13901 easily extended if required. */
13902 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13904 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13906 /* Loop over the operands and check that the memory references are
13907 suitable (i.e. immediate offsets from the same base register). At
13908 the same time, extract the target register, and the memory
13910 for (i
= 0; i
< nops
; i
++)
13915 /* Convert a subreg of a mem into the mem itself. */
13916 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13917 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13919 gcc_assert (MEM_P (operands
[nops
+ i
]));
13921 /* Don't reorder volatile memory references; it doesn't seem worth
13922 looking for the case where the order is ok anyway. */
13923 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13926 offset
= const0_rtx
;
13928 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13929 || (GET_CODE (reg
) == SUBREG
13930 && REG_P (reg
= SUBREG_REG (reg
))))
13931 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13932 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13933 || (GET_CODE (reg
) == SUBREG
13934 && REG_P (reg
= SUBREG_REG (reg
))))
13935 && (CONST_INT_P (offset
13936 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13938 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13939 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13940 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13944 base_reg
= REGNO (reg
);
13945 base_reg_rtx
= reg
;
13946 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13949 else if (base_reg
!= (int) REGNO (reg
))
13950 /* Not addressed from the same base register. */
13953 /* If it isn't an integer register, then we can't do this. */
13954 if (unsorted_regs
[i
] < 0
13955 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13956 /* The effects are unpredictable if the base register is
13957 both updated and stored. */
13958 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13959 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13960 || unsorted_regs
[i
] > 14)
13963 unsorted_offsets
[i
] = INTVAL (offset
);
13964 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13968 /* Not a suitable memory address. */
13972 /* All the useful information has now been extracted from the
13973 operands into unsorted_regs and unsorted_offsets; additionally,
13974 order[0] has been set to the lowest offset in the list. Sort
13975 the offsets into order, verifying that they are adjacent, and
13976 check that the register numbers are ascending. */
13977 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13978 check_regs
? unsorted_regs
: NULL
))
13982 memcpy (saved_order
, order
, sizeof order
);
13988 for (i
= 0; i
< nops
; i
++)
13990 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13992 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13995 *load_offset
= unsorted_offsets
[order
[0]];
13999 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14002 if (unsorted_offsets
[order
[0]] == 0)
14003 stm_case
= 1; /* stmia */
14004 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14005 stm_case
= 2; /* stmib */
14006 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14007 stm_case
= 3; /* stmda */
14008 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14009 stm_case
= 4; /* stmdb */
14013 if (!multiple_operation_profitable_p (false, nops
, 0))
14019 /* Routines for use in generating RTL. */
14021 /* Generate a load-multiple instruction. COUNT is the number of loads in
14022 the instruction; REGS and MEMS are arrays containing the operands.
14023 BASEREG is the base register to be used in addressing the memory operands.
14024 WBACK_OFFSET is nonzero if the instruction should update the base
14028 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14029 HOST_WIDE_INT wback_offset
)
14034 if (!multiple_operation_profitable_p (false, count
, 0))
14040 for (i
= 0; i
< count
; i
++)
14041 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14043 if (wback_offset
!= 0)
14044 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14046 seq
= get_insns ();
14052 result
= gen_rtx_PARALLEL (VOIDmode
,
14053 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14054 if (wback_offset
!= 0)
14056 XVECEXP (result
, 0, 0)
14057 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14062 for (j
= 0; i
< count
; i
++, j
++)
14063 XVECEXP (result
, 0, i
)
14064 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14069 /* Generate a store-multiple instruction. COUNT is the number of stores in
14070 the instruction; REGS and MEMS are arrays containing the operands.
14071 BASEREG is the base register to be used in addressing the memory operands.
14072 WBACK_OFFSET is nonzero if the instruction should update the base
14076 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14077 HOST_WIDE_INT wback_offset
)
14082 if (GET_CODE (basereg
) == PLUS
)
14083 basereg
= XEXP (basereg
, 0);
14085 if (!multiple_operation_profitable_p (false, count
, 0))
14091 for (i
= 0; i
< count
; i
++)
14092 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14094 if (wback_offset
!= 0)
14095 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14097 seq
= get_insns ();
14103 result
= gen_rtx_PARALLEL (VOIDmode
,
14104 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14105 if (wback_offset
!= 0)
14107 XVECEXP (result
, 0, 0)
14108 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14113 for (j
= 0; i
< count
; i
++, j
++)
14114 XVECEXP (result
, 0, i
)
14115 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14120 /* Generate either a load-multiple or a store-multiple instruction. This
14121 function can be used in situations where we can start with a single MEM
14122 rtx and adjust its address upwards.
14123 COUNT is the number of operations in the instruction, not counting a
14124 possible update of the base register. REGS is an array containing the
14126 BASEREG is the base register to be used in addressing the memory operands,
14127 which are constructed from BASEMEM.
14128 WRITE_BACK specifies whether the generated instruction should include an
14129 update of the base register.
14130 OFFSETP is used to pass an offset to and from this function; this offset
14131 is not used when constructing the address (instead BASEMEM should have an
14132 appropriate offset in its address), it is used only for setting
14133 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14136 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14137 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14139 rtx mems
[MAX_LDM_STM_OPS
];
14140 HOST_WIDE_INT offset
= *offsetp
;
14143 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14145 if (GET_CODE (basereg
) == PLUS
)
14146 basereg
= XEXP (basereg
, 0);
14148 for (i
= 0; i
< count
; i
++)
14150 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14151 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14159 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14160 write_back
? 4 * count
: 0);
14162 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14163 write_back
? 4 * count
: 0);
14167 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14168 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14170 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14175 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14176 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14178 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14182 /* Called from a peephole2 expander to turn a sequence of loads into an
14183 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14184 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14185 is true if we can reorder the registers because they are used commutatively
14187 Returns true iff we could generate a new instruction. */
14190 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14192 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14193 rtx mems
[MAX_LDM_STM_OPS
];
14194 int i
, j
, base_reg
;
14196 HOST_WIDE_INT offset
;
14197 int write_back
= FALSE
;
14201 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14202 &base_reg
, &offset
, !sort_regs
);
14208 for (i
= 0; i
< nops
- 1; i
++)
14209 for (j
= i
+ 1; j
< nops
; j
++)
14210 if (regs
[i
] > regs
[j
])
14216 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14220 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14221 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14227 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14228 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14230 if (!TARGET_THUMB1
)
14232 base_reg
= regs
[0];
14233 base_reg_rtx
= newbase
;
14237 for (i
= 0; i
< nops
; i
++)
14239 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14240 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14243 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14244 write_back
? offset
+ i
* 4 : 0));
14248 /* Called from a peephole2 expander to turn a sequence of stores into an
14249 STM instruction. OPERANDS are the operands found by the peephole matcher;
14250 NOPS indicates how many separate stores we are trying to combine.
14251 Returns true iff we could generate a new instruction. */
14254 gen_stm_seq (rtx
*operands
, int nops
)
14257 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14258 rtx mems
[MAX_LDM_STM_OPS
];
14261 HOST_WIDE_INT offset
;
14262 int write_back
= FALSE
;
14265 bool base_reg_dies
;
14267 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14268 mem_order
, &base_reg
, &offset
, true);
14273 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14275 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14278 gcc_assert (base_reg_dies
);
14284 gcc_assert (base_reg_dies
);
14285 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14289 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14291 for (i
= 0; i
< nops
; i
++)
14293 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14294 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14297 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14298 write_back
? offset
+ i
* 4 : 0));
14302 /* Called from a peephole2 expander to turn a sequence of stores that are
14303 preceded by constant loads into an STM instruction. OPERANDS are the
14304 operands found by the peephole matcher; NOPS indicates how many
14305 separate stores we are trying to combine; there are 2 * NOPS
14306 instructions in the peephole.
14307 Returns true iff we could generate a new instruction. */
14310 gen_const_stm_seq (rtx
*operands
, int nops
)
14312 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14313 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14314 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14315 rtx mems
[MAX_LDM_STM_OPS
];
14318 HOST_WIDE_INT offset
;
14319 int write_back
= FALSE
;
14322 bool base_reg_dies
;
14324 HARD_REG_SET allocated
;
14326 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14327 mem_order
, &base_reg
, &offset
, false);
14332 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14334 /* If the same register is used more than once, try to find a free
14336 CLEAR_HARD_REG_SET (allocated
);
14337 for (i
= 0; i
< nops
; i
++)
14339 for (j
= i
+ 1; j
< nops
; j
++)
14340 if (regs
[i
] == regs
[j
])
14342 rtx t
= peep2_find_free_register (0, nops
* 2,
14343 TARGET_THUMB1
? "l" : "r",
14344 SImode
, &allocated
);
14348 regs
[i
] = REGNO (t
);
14352 /* Compute an ordering that maps the register numbers to an ascending
14355 for (i
= 0; i
< nops
; i
++)
14356 if (regs
[i
] < regs
[reg_order
[0]])
14359 for (i
= 1; i
< nops
; i
++)
14361 int this_order
= reg_order
[i
- 1];
14362 for (j
= 0; j
< nops
; j
++)
14363 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14364 && (this_order
== reg_order
[i
- 1]
14365 || regs
[j
] < regs
[this_order
]))
14367 reg_order
[i
] = this_order
;
14370 /* Ensure that registers that must be live after the instruction end
14371 up with the correct value. */
14372 for (i
= 0; i
< nops
; i
++)
14374 int this_order
= reg_order
[i
];
14375 if ((this_order
!= mem_order
[i
]
14376 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14377 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14381 /* Load the constants. */
14382 for (i
= 0; i
< nops
; i
++)
14384 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14385 sorted_regs
[i
] = regs
[reg_order
[i
]];
14386 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14389 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14391 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14394 gcc_assert (base_reg_dies
);
14400 gcc_assert (base_reg_dies
);
14401 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14405 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14407 for (i
= 0; i
< nops
; i
++)
14409 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14410 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14413 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14414 write_back
? offset
+ i
* 4 : 0));
14418 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14419 unaligned copies on processors which support unaligned semantics for those
14420 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14421 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14422 An interleave factor of 1 (the minimum) will perform no interleaving.
14423 Load/store multiple are used for aligned addresses where possible. */
14426 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14427 HOST_WIDE_INT length
,
14428 unsigned int interleave_factor
)
14430 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14431 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14432 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14433 HOST_WIDE_INT i
, j
;
14434 HOST_WIDE_INT remaining
= length
, words
;
14435 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14437 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14438 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14439 HOST_WIDE_INT srcoffset
, dstoffset
;
14440 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14443 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14445 /* Use hard registers if we have aligned source or destination so we can use
14446 load/store multiple with contiguous registers. */
14447 if (dst_aligned
|| src_aligned
)
14448 for (i
= 0; i
< interleave_factor
; i
++)
14449 regs
[i
] = gen_rtx_REG (SImode
, i
);
14451 for (i
= 0; i
< interleave_factor
; i
++)
14452 regs
[i
] = gen_reg_rtx (SImode
);
14454 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14455 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14457 srcoffset
= dstoffset
= 0;
14459 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14460 For copying the last bytes we want to subtract this offset again. */
14461 src_autoinc
= dst_autoinc
= 0;
14463 for (i
= 0; i
< interleave_factor
; i
++)
14466 /* Copy BLOCK_SIZE_BYTES chunks. */
14468 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14471 if (src_aligned
&& interleave_factor
> 1)
14473 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14474 TRUE
, srcbase
, &srcoffset
));
14475 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14479 for (j
= 0; j
< interleave_factor
; j
++)
14481 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14483 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14484 srcoffset
+ j
* UNITS_PER_WORD
);
14485 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14487 srcoffset
+= block_size_bytes
;
14491 if (dst_aligned
&& interleave_factor
> 1)
14493 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14494 TRUE
, dstbase
, &dstoffset
));
14495 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14499 for (j
= 0; j
< interleave_factor
; j
++)
14501 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14503 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14504 dstoffset
+ j
* UNITS_PER_WORD
);
14505 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14507 dstoffset
+= block_size_bytes
;
14510 remaining
-= block_size_bytes
;
14513 /* Copy any whole words left (note these aren't interleaved with any
14514 subsequent halfword/byte load/stores in the interests of simplicity). */
14516 words
= remaining
/ UNITS_PER_WORD
;
14518 gcc_assert (words
< interleave_factor
);
14520 if (src_aligned
&& words
> 1)
14522 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14524 src_autoinc
+= UNITS_PER_WORD
* words
;
14528 for (j
= 0; j
< words
; j
++)
14530 addr
= plus_constant (Pmode
, src
,
14531 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14532 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14533 srcoffset
+ j
* UNITS_PER_WORD
);
14535 emit_move_insn (regs
[j
], mem
);
14537 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14539 srcoffset
+= words
* UNITS_PER_WORD
;
14542 if (dst_aligned
&& words
> 1)
14544 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14546 dst_autoinc
+= words
* UNITS_PER_WORD
;
14550 for (j
= 0; j
< words
; j
++)
14552 addr
= plus_constant (Pmode
, dst
,
14553 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14554 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14555 dstoffset
+ j
* UNITS_PER_WORD
);
14557 emit_move_insn (mem
, regs
[j
]);
14559 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14561 dstoffset
+= words
* UNITS_PER_WORD
;
14564 remaining
-= words
* UNITS_PER_WORD
;
14566 gcc_assert (remaining
< 4);
14568 /* Copy a halfword if necessary. */
14570 if (remaining
>= 2)
14572 halfword_tmp
= gen_reg_rtx (SImode
);
14574 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14575 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14576 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14578 /* Either write out immediately, or delay until we've loaded the last
14579 byte, depending on interleave factor. */
14580 if (interleave_factor
== 1)
14582 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14583 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14584 emit_insn (gen_unaligned_storehi (mem
,
14585 gen_lowpart (HImode
, halfword_tmp
)));
14586 halfword_tmp
= NULL
;
14594 gcc_assert (remaining
< 2);
14596 /* Copy last byte. */
14598 if ((remaining
& 1) != 0)
14600 byte_tmp
= gen_reg_rtx (SImode
);
14602 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14603 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14604 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14606 if (interleave_factor
== 1)
14608 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14609 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14610 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14619 /* Store last halfword if we haven't done so already. */
14623 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14624 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14625 emit_insn (gen_unaligned_storehi (mem
,
14626 gen_lowpart (HImode
, halfword_tmp
)));
14630 /* Likewise for last byte. */
14634 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14635 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14636 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14640 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14643 /* From mips_adjust_block_mem:
14645 Helper function for doing a loop-based block operation on memory
14646 reference MEM. Each iteration of the loop will operate on LENGTH
14649 Create a new base register for use within the loop and point it to
14650 the start of MEM. Create a new memory reference that uses this
14651 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14654 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14657 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14659 /* Although the new mem does not refer to a known location,
14660 it does keep up to LENGTH bytes of alignment. */
14661 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14662 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14665 /* From mips_block_move_loop:
14667 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14668 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14669 the memory regions do not overlap. */
14672 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14673 unsigned int interleave_factor
,
14674 HOST_WIDE_INT bytes_per_iter
)
14676 rtx src_reg
, dest_reg
, final_src
, test
;
14677 HOST_WIDE_INT leftover
;
14679 leftover
= length
% bytes_per_iter
;
14680 length
-= leftover
;
14682 /* Create registers and memory references for use within the loop. */
14683 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14684 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14686 /* Calculate the value that SRC_REG should have after the last iteration of
14688 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14689 0, 0, OPTAB_WIDEN
);
14691 /* Emit the start of the loop. */
14692 rtx_code_label
*label
= gen_label_rtx ();
14693 emit_label (label
);
14695 /* Emit the loop body. */
14696 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14697 interleave_factor
);
14699 /* Move on to the next block. */
14700 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14701 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14703 /* Emit the loop condition. */
14704 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14705 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14707 /* Mop up any left-over bytes. */
14709 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14712 /* Emit a block move when either the source or destination is unaligned (not
14713 aligned to a four-byte boundary). This may need further tuning depending on
14714 core type, optimize_size setting, etc. */
14717 arm_movmemqi_unaligned (rtx
*operands
)
14719 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14723 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14724 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14725 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14726 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14727 or dst_aligned though: allow more interleaving in those cases since the
14728 resulting code can be smaller. */
14729 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14730 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14733 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14734 interleave_factor
, bytes_per_iter
);
14736 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14737 interleave_factor
);
14741 /* Note that the loop created by arm_block_move_unaligned_loop may be
14742 subject to loop unrolling, which makes tuning this condition a little
14745 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14747 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14754 arm_gen_movmemqi (rtx
*operands
)
14756 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14757 HOST_WIDE_INT srcoffset
, dstoffset
;
14759 rtx src
, dst
, srcbase
, dstbase
;
14760 rtx part_bytes_reg
= NULL
;
14763 if (!CONST_INT_P (operands
[2])
14764 || !CONST_INT_P (operands
[3])
14765 || INTVAL (operands
[2]) > 64)
14768 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14769 return arm_movmemqi_unaligned (operands
);
14771 if (INTVAL (operands
[3]) & 3)
14774 dstbase
= operands
[0];
14775 srcbase
= operands
[1];
14777 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14778 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14780 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14781 out_words_to_go
= INTVAL (operands
[2]) / 4;
14782 last_bytes
= INTVAL (operands
[2]) & 3;
14783 dstoffset
= srcoffset
= 0;
14785 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14786 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14788 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14790 if (in_words_to_go
> 4)
14791 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14792 TRUE
, srcbase
, &srcoffset
));
14794 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14795 src
, FALSE
, srcbase
,
14798 if (out_words_to_go
)
14800 if (out_words_to_go
> 4)
14801 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14802 TRUE
, dstbase
, &dstoffset
));
14803 else if (out_words_to_go
!= 1)
14804 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14805 out_words_to_go
, dst
,
14808 dstbase
, &dstoffset
));
14811 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14812 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14813 if (last_bytes
!= 0)
14815 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14821 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14822 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14825 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14826 if (out_words_to_go
)
14830 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14831 sreg
= copy_to_reg (mem
);
14833 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14834 emit_move_insn (mem
, sreg
);
14837 gcc_assert (!in_words_to_go
); /* Sanity check */
14840 if (in_words_to_go
)
14842 gcc_assert (in_words_to_go
> 0);
14844 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14845 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14848 gcc_assert (!last_bytes
|| part_bytes_reg
);
14850 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14852 rtx tmp
= gen_reg_rtx (SImode
);
14854 /* The bytes we want are in the top end of the word. */
14855 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14856 GEN_INT (8 * (4 - last_bytes
))));
14857 part_bytes_reg
= tmp
;
14861 mem
= adjust_automodify_address (dstbase
, QImode
,
14862 plus_constant (Pmode
, dst
,
14864 dstoffset
+ last_bytes
- 1);
14865 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14869 tmp
= gen_reg_rtx (SImode
);
14870 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14871 part_bytes_reg
= tmp
;
14878 if (last_bytes
> 1)
14880 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14881 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14885 rtx tmp
= gen_reg_rtx (SImode
);
14886 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14887 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14888 part_bytes_reg
= tmp
;
14895 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14896 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14903 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14906 next_consecutive_mem (rtx mem
)
14908 machine_mode mode
= GET_MODE (mem
);
14909 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14910 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14912 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14915 /* Copy using LDRD/STRD instructions whenever possible.
14916 Returns true upon success. */
14918 gen_movmem_ldrd_strd (rtx
*operands
)
14920 unsigned HOST_WIDE_INT len
;
14921 HOST_WIDE_INT align
;
14922 rtx src
, dst
, base
;
14924 bool src_aligned
, dst_aligned
;
14925 bool src_volatile
, dst_volatile
;
14927 gcc_assert (CONST_INT_P (operands
[2]));
14928 gcc_assert (CONST_INT_P (operands
[3]));
14930 len
= UINTVAL (operands
[2]);
14934 /* Maximum alignment we can assume for both src and dst buffers. */
14935 align
= INTVAL (operands
[3]);
14937 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14940 /* Place src and dst addresses in registers
14941 and update the corresponding mem rtx. */
14943 dst_volatile
= MEM_VOLATILE_P (dst
);
14944 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14945 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14946 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14949 src_volatile
= MEM_VOLATILE_P (src
);
14950 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14951 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14952 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14954 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14957 if (src_volatile
|| dst_volatile
)
14960 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14961 if (!(dst_aligned
|| src_aligned
))
14962 return arm_gen_movmemqi (operands
);
14964 /* If the either src or dst is unaligned we'll be accessing it as pairs
14965 of unaligned SImode accesses. Otherwise we can generate DImode
14966 ldrd/strd instructions. */
14967 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14968 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14973 reg0
= gen_reg_rtx (DImode
);
14974 rtx low_reg
= NULL_RTX
;
14975 rtx hi_reg
= NULL_RTX
;
14977 if (!src_aligned
|| !dst_aligned
)
14979 low_reg
= gen_lowpart (SImode
, reg0
);
14980 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14983 emit_move_insn (reg0
, src
);
14986 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14987 src
= next_consecutive_mem (src
);
14988 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14992 emit_move_insn (dst
, reg0
);
14995 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14996 dst
= next_consecutive_mem (dst
);
14997 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
15000 src
= next_consecutive_mem (src
);
15001 dst
= next_consecutive_mem (dst
);
15004 gcc_assert (len
< 8);
15007 /* More than a word but less than a double-word to copy. Copy a word. */
15008 reg0
= gen_reg_rtx (SImode
);
15009 src
= adjust_address (src
, SImode
, 0);
15010 dst
= adjust_address (dst
, SImode
, 0);
15012 emit_move_insn (reg0
, src
);
15014 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15017 emit_move_insn (dst
, reg0
);
15019 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15021 src
= next_consecutive_mem (src
);
15022 dst
= next_consecutive_mem (dst
);
15029 /* Copy the remaining bytes. */
15032 dst
= adjust_address (dst
, HImode
, 0);
15033 src
= adjust_address (src
, HImode
, 0);
15034 reg0
= gen_reg_rtx (SImode
);
15036 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15038 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15041 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15043 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15045 src
= next_consecutive_mem (src
);
15046 dst
= next_consecutive_mem (dst
);
15051 dst
= adjust_address (dst
, QImode
, 0);
15052 src
= adjust_address (src
, QImode
, 0);
15053 reg0
= gen_reg_rtx (QImode
);
15054 emit_move_insn (reg0
, src
);
15055 emit_move_insn (dst
, reg0
);
15059 /* Select a dominance comparison mode if possible for a test of the general
15060 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15061 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15062 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15063 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15064 In all cases OP will be either EQ or NE, but we don't need to know which
15065 here. If we are unable to support a dominance comparison we return
15066 CC mode. This will then fail to match for the RTL expressions that
15067 generate this call. */
15069 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15071 enum rtx_code cond1
, cond2
;
15074 /* Currently we will probably get the wrong result if the individual
15075 comparisons are not simple. This also ensures that it is safe to
15076 reverse a comparison if necessary. */
15077 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15079 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15083 /* The if_then_else variant of this tests the second condition if the
15084 first passes, but is true if the first fails. Reverse the first
15085 condition to get a true "inclusive-or" expression. */
15086 if (cond_or
== DOM_CC_NX_OR_Y
)
15087 cond1
= reverse_condition (cond1
);
15089 /* If the comparisons are not equal, and one doesn't dominate the other,
15090 then we can't do this. */
15092 && !comparison_dominates_p (cond1
, cond2
)
15093 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15097 std::swap (cond1
, cond2
);
15102 if (cond_or
== DOM_CC_X_AND_Y
)
15107 case EQ
: return CC_DEQmode
;
15108 case LE
: return CC_DLEmode
;
15109 case LEU
: return CC_DLEUmode
;
15110 case GE
: return CC_DGEmode
;
15111 case GEU
: return CC_DGEUmode
;
15112 default: gcc_unreachable ();
15116 if (cond_or
== DOM_CC_X_AND_Y
)
15128 gcc_unreachable ();
15132 if (cond_or
== DOM_CC_X_AND_Y
)
15144 gcc_unreachable ();
15148 if (cond_or
== DOM_CC_X_AND_Y
)
15149 return CC_DLTUmode
;
15154 return CC_DLTUmode
;
15156 return CC_DLEUmode
;
15160 gcc_unreachable ();
15164 if (cond_or
== DOM_CC_X_AND_Y
)
15165 return CC_DGTUmode
;
15170 return CC_DGTUmode
;
15172 return CC_DGEUmode
;
15176 gcc_unreachable ();
15179 /* The remaining cases only occur when both comparisons are the
15182 gcc_assert (cond1
== cond2
);
15186 gcc_assert (cond1
== cond2
);
15190 gcc_assert (cond1
== cond2
);
15194 gcc_assert (cond1
== cond2
);
15195 return CC_DLEUmode
;
15198 gcc_assert (cond1
== cond2
);
15199 return CC_DGEUmode
;
15202 gcc_unreachable ();
15207 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15209 /* All floating point compares return CCFP if it is an equality
15210 comparison, and CCFPE otherwise. */
15211 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15234 gcc_unreachable ();
15238 /* A compare with a shifted operand. Because of canonicalization, the
15239 comparison will have to be swapped when we emit the assembler. */
15240 if (GET_MODE (y
) == SImode
15241 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15242 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15243 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15244 || GET_CODE (x
) == ROTATERT
))
15247 /* This operation is performed swapped, but since we only rely on the Z
15248 flag we don't need an additional mode. */
15249 if (GET_MODE (y
) == SImode
15250 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15251 && GET_CODE (x
) == NEG
15252 && (op
== EQ
|| op
== NE
))
15255 /* This is a special case that is used by combine to allow a
15256 comparison of a shifted byte load to be split into a zero-extend
15257 followed by a comparison of the shifted integer (only valid for
15258 equalities and unsigned inequalities). */
15259 if (GET_MODE (x
) == SImode
15260 && GET_CODE (x
) == ASHIFT
15261 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15262 && GET_CODE (XEXP (x
, 0)) == SUBREG
15263 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15264 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15265 && (op
== EQ
|| op
== NE
15266 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15267 && CONST_INT_P (y
))
15270 /* A construct for a conditional compare, if the false arm contains
15271 0, then both conditions must be true, otherwise either condition
15272 must be true. Not all conditions are possible, so CCmode is
15273 returned if it can't be done. */
15274 if (GET_CODE (x
) == IF_THEN_ELSE
15275 && (XEXP (x
, 2) == const0_rtx
15276 || XEXP (x
, 2) == const1_rtx
)
15277 && COMPARISON_P (XEXP (x
, 0))
15278 && COMPARISON_P (XEXP (x
, 1)))
15279 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15280 INTVAL (XEXP (x
, 2)));
15282 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15283 if (GET_CODE (x
) == AND
15284 && (op
== EQ
|| op
== NE
)
15285 && COMPARISON_P (XEXP (x
, 0))
15286 && COMPARISON_P (XEXP (x
, 1)))
15287 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15290 if (GET_CODE (x
) == IOR
15291 && (op
== EQ
|| op
== NE
)
15292 && COMPARISON_P (XEXP (x
, 0))
15293 && COMPARISON_P (XEXP (x
, 1)))
15294 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15297 /* An operation (on Thumb) where we want to test for a single bit.
15298 This is done by shifting that bit up into the top bit of a
15299 scratch register; we can then branch on the sign bit. */
15301 && GET_MODE (x
) == SImode
15302 && (op
== EQ
|| op
== NE
)
15303 && GET_CODE (x
) == ZERO_EXTRACT
15304 && XEXP (x
, 1) == const1_rtx
)
15307 /* An operation that sets the condition codes as a side-effect, the
15308 V flag is not set correctly, so we can only use comparisons where
15309 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15311 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15312 if (GET_MODE (x
) == SImode
15314 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15315 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15316 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15317 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15318 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15319 || GET_CODE (x
) == LSHIFTRT
15320 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15321 || GET_CODE (x
) == ROTATERT
15322 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15323 return CC_NOOVmode
;
15325 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15328 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15329 && GET_CODE (x
) == PLUS
15330 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15333 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15339 /* A DImode comparison against zero can be implemented by
15340 or'ing the two halves together. */
15341 if (y
== const0_rtx
)
15344 /* We can do an equality test in three Thumb instructions. */
15354 /* DImode unsigned comparisons can be implemented by cmp +
15355 cmpeq without a scratch register. Not worth doing in
15366 /* DImode signed and unsigned comparisons can be implemented
15367 by cmp + sbcs with a scratch register, but that does not
15368 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15369 gcc_assert (op
!= EQ
&& op
!= NE
);
15373 gcc_unreachable ();
15377 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15378 return GET_MODE (x
);
15383 /* X and Y are two things to compare using CODE. Emit the compare insn and
15384 return the rtx for register 0 in the proper mode. FP means this is a
15385 floating point compare: I don't think that it is needed on the arm. */
15387 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15391 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15393 /* We might have X as a constant, Y as a register because of the predicates
15394 used for cmpdi. If so, force X to a register here. */
15395 if (dimode_comparison
&& !REG_P (x
))
15396 x
= force_reg (DImode
, x
);
15398 mode
= SELECT_CC_MODE (code
, x
, y
);
15399 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15401 if (dimode_comparison
15402 && mode
!= CC_CZmode
)
15406 /* To compare two non-zero values for equality, XOR them and
15407 then compare against zero. Not used for ARM mode; there
15408 CC_CZmode is cheaper. */
15409 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15411 gcc_assert (!reload_completed
);
15412 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15416 /* A scratch register is required. */
15417 if (reload_completed
)
15418 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15420 scratch
= gen_rtx_SCRATCH (SImode
);
15422 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15423 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15424 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15427 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15432 /* Generate a sequence of insns that will generate the correct return
15433 address mask depending on the physical architecture that the program
15436 arm_gen_return_addr_mask (void)
15438 rtx reg
= gen_reg_rtx (Pmode
);
15440 emit_insn (gen_return_addr_mask (reg
));
15445 arm_reload_in_hi (rtx
*operands
)
15447 rtx ref
= operands
[1];
15449 HOST_WIDE_INT offset
= 0;
15451 if (GET_CODE (ref
) == SUBREG
)
15453 offset
= SUBREG_BYTE (ref
);
15454 ref
= SUBREG_REG (ref
);
15459 /* We have a pseudo which has been spilt onto the stack; there
15460 are two cases here: the first where there is a simple
15461 stack-slot replacement and a second where the stack-slot is
15462 out of range, or is used as a subreg. */
15463 if (reg_equiv_mem (REGNO (ref
)))
15465 ref
= reg_equiv_mem (REGNO (ref
));
15466 base
= find_replacement (&XEXP (ref
, 0));
15469 /* The slot is out of range, or was dressed up in a SUBREG. */
15470 base
= reg_equiv_address (REGNO (ref
));
15472 /* PR 62554: If there is no equivalent memory location then just move
15473 the value as an SImode register move. This happens when the target
15474 architecture variant does not have an HImode register move. */
15477 gcc_assert (REG_P (operands
[0]));
15478 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15479 gen_rtx_SUBREG (SImode
, ref
, 0)));
15484 base
= find_replacement (&XEXP (ref
, 0));
15486 /* Handle the case where the address is too complex to be offset by 1. */
15487 if (GET_CODE (base
) == MINUS
15488 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15490 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15492 emit_set_insn (base_plus
, base
);
15495 else if (GET_CODE (base
) == PLUS
)
15497 /* The addend must be CONST_INT, or we would have dealt with it above. */
15498 HOST_WIDE_INT hi
, lo
;
15500 offset
+= INTVAL (XEXP (base
, 1));
15501 base
= XEXP (base
, 0);
15503 /* Rework the address into a legal sequence of insns. */
15504 /* Valid range for lo is -4095 -> 4095 */
15507 : -((-offset
) & 0xfff));
15509 /* Corner case, if lo is the max offset then we would be out of range
15510 once we have added the additional 1 below, so bump the msb into the
15511 pre-loading insn(s). */
15515 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15516 ^ (HOST_WIDE_INT
) 0x80000000)
15517 - (HOST_WIDE_INT
) 0x80000000);
15519 gcc_assert (hi
+ lo
== offset
);
15523 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15525 /* Get the base address; addsi3 knows how to handle constants
15526 that require more than one insn. */
15527 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15533 /* Operands[2] may overlap operands[0] (though it won't overlap
15534 operands[1]), that's why we asked for a DImode reg -- so we can
15535 use the bit that does not overlap. */
15536 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15537 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15539 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15541 emit_insn (gen_zero_extendqisi2 (scratch
,
15542 gen_rtx_MEM (QImode
,
15543 plus_constant (Pmode
, base
,
15545 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15546 gen_rtx_MEM (QImode
,
15547 plus_constant (Pmode
, base
,
15549 if (!BYTES_BIG_ENDIAN
)
15550 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15551 gen_rtx_IOR (SImode
,
15554 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15558 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15559 gen_rtx_IOR (SImode
,
15560 gen_rtx_ASHIFT (SImode
, scratch
,
15562 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15565 /* Handle storing a half-word to memory during reload by synthesizing as two
15566 byte stores. Take care not to clobber the input values until after we
15567 have moved them somewhere safe. This code assumes that if the DImode
15568 scratch in operands[2] overlaps either the input value or output address
15569 in some way, then that value must die in this insn (we absolutely need
15570 two scratch registers for some corner cases). */
15572 arm_reload_out_hi (rtx
*operands
)
15574 rtx ref
= operands
[0];
15575 rtx outval
= operands
[1];
15577 HOST_WIDE_INT offset
= 0;
15579 if (GET_CODE (ref
) == SUBREG
)
15581 offset
= SUBREG_BYTE (ref
);
15582 ref
= SUBREG_REG (ref
);
15587 /* We have a pseudo which has been spilt onto the stack; there
15588 are two cases here: the first where there is a simple
15589 stack-slot replacement and a second where the stack-slot is
15590 out of range, or is used as a subreg. */
15591 if (reg_equiv_mem (REGNO (ref
)))
15593 ref
= reg_equiv_mem (REGNO (ref
));
15594 base
= find_replacement (&XEXP (ref
, 0));
15597 /* The slot is out of range, or was dressed up in a SUBREG. */
15598 base
= reg_equiv_address (REGNO (ref
));
15600 /* PR 62254: If there is no equivalent memory location then just move
15601 the value as an SImode register move. This happens when the target
15602 architecture variant does not have an HImode register move. */
15605 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15607 if (REG_P (outval
))
15609 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15610 gen_rtx_SUBREG (SImode
, outval
, 0)));
15612 else /* SUBREG_P (outval) */
15614 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15615 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15616 SUBREG_REG (outval
)));
15618 /* FIXME: Handle other cases ? */
15619 gcc_unreachable ();
15625 base
= find_replacement (&XEXP (ref
, 0));
15627 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15629 /* Handle the case where the address is too complex to be offset by 1. */
15630 if (GET_CODE (base
) == MINUS
15631 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15633 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15635 /* Be careful not to destroy OUTVAL. */
15636 if (reg_overlap_mentioned_p (base_plus
, outval
))
15638 /* Updating base_plus might destroy outval, see if we can
15639 swap the scratch and base_plus. */
15640 if (!reg_overlap_mentioned_p (scratch
, outval
))
15641 std::swap (scratch
, base_plus
);
15644 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15646 /* Be conservative and copy OUTVAL into the scratch now,
15647 this should only be necessary if outval is a subreg
15648 of something larger than a word. */
15649 /* XXX Might this clobber base? I can't see how it can,
15650 since scratch is known to overlap with OUTVAL, and
15651 must be wider than a word. */
15652 emit_insn (gen_movhi (scratch_hi
, outval
));
15653 outval
= scratch_hi
;
15657 emit_set_insn (base_plus
, base
);
15660 else if (GET_CODE (base
) == PLUS
)
15662 /* The addend must be CONST_INT, or we would have dealt with it above. */
15663 HOST_WIDE_INT hi
, lo
;
15665 offset
+= INTVAL (XEXP (base
, 1));
15666 base
= XEXP (base
, 0);
15668 /* Rework the address into a legal sequence of insns. */
15669 /* Valid range for lo is -4095 -> 4095 */
15672 : -((-offset
) & 0xfff));
15674 /* Corner case, if lo is the max offset then we would be out of range
15675 once we have added the additional 1 below, so bump the msb into the
15676 pre-loading insn(s). */
15680 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15681 ^ (HOST_WIDE_INT
) 0x80000000)
15682 - (HOST_WIDE_INT
) 0x80000000);
15684 gcc_assert (hi
+ lo
== offset
);
15688 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15690 /* Be careful not to destroy OUTVAL. */
15691 if (reg_overlap_mentioned_p (base_plus
, outval
))
15693 /* Updating base_plus might destroy outval, see if we
15694 can swap the scratch and base_plus. */
15695 if (!reg_overlap_mentioned_p (scratch
, outval
))
15696 std::swap (scratch
, base_plus
);
15699 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15701 /* Be conservative and copy outval into scratch now,
15702 this should only be necessary if outval is a
15703 subreg of something larger than a word. */
15704 /* XXX Might this clobber base? I can't see how it
15705 can, since scratch is known to overlap with
15707 emit_insn (gen_movhi (scratch_hi
, outval
));
15708 outval
= scratch_hi
;
15712 /* Get the base address; addsi3 knows how to handle constants
15713 that require more than one insn. */
15714 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15720 if (BYTES_BIG_ENDIAN
)
15722 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15723 plus_constant (Pmode
, base
,
15725 gen_lowpart (QImode
, outval
)));
15726 emit_insn (gen_lshrsi3 (scratch
,
15727 gen_rtx_SUBREG (SImode
, outval
, 0),
15729 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15731 gen_lowpart (QImode
, scratch
)));
15735 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15737 gen_lowpart (QImode
, outval
)));
15738 emit_insn (gen_lshrsi3 (scratch
,
15739 gen_rtx_SUBREG (SImode
, outval
, 0),
15741 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15742 plus_constant (Pmode
, base
,
15744 gen_lowpart (QImode
, scratch
)));
15748 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15749 (padded to the size of a word) should be passed in a register. */
15752 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15754 if (TARGET_AAPCS_BASED
)
15755 return must_pass_in_stack_var_size (mode
, type
);
15757 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15761 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15762 Return true if an argument passed on the stack should be padded upwards,
15763 i.e. if the least-significant byte has useful data.
15764 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15765 aggregate types are placed in the lowest memory address. */
15768 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15770 if (!TARGET_AAPCS_BASED
)
15771 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15773 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15780 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15781 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15782 register has useful data, and return the opposite if the most
15783 significant byte does. */
15786 arm_pad_reg_upward (machine_mode mode
,
15787 tree type
, int first ATTRIBUTE_UNUSED
)
15789 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15791 /* For AAPCS, small aggregates, small fixed-point types,
15792 and small complex types are always padded upwards. */
15795 if ((AGGREGATE_TYPE_P (type
)
15796 || TREE_CODE (type
) == COMPLEX_TYPE
15797 || FIXED_POINT_TYPE_P (type
))
15798 && int_size_in_bytes (type
) <= 4)
15803 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15804 && GET_MODE_SIZE (mode
) <= 4)
15809 /* Otherwise, use default padding. */
15810 return !BYTES_BIG_ENDIAN
;
15813 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15814 assuming that the address in the base register is word aligned. */
15816 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15818 HOST_WIDE_INT max_offset
;
15820 /* Offset must be a multiple of 4 in Thumb mode. */
15821 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15826 else if (TARGET_ARM
)
15831 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15834 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15835 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15836 Assumes that the address in the base register RN is word aligned. Pattern
15837 guarantees that both memory accesses use the same base register,
15838 the offsets are constants within the range, and the gap between the offsets is 4.
15839 If preload complete then check that registers are legal. WBACK indicates whether
15840 address is updated. LOAD indicates whether memory access is load or store. */
15842 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15843 bool wback
, bool load
)
15845 unsigned int t
, t2
, n
;
15847 if (!reload_completed
)
15850 if (!offset_ok_for_ldrd_strd (offset
))
15857 if ((TARGET_THUMB2
)
15858 && ((wback
&& (n
== t
|| n
== t2
))
15859 || (t
== SP_REGNUM
)
15860 || (t
== PC_REGNUM
)
15861 || (t2
== SP_REGNUM
)
15862 || (t2
== PC_REGNUM
)
15863 || (!load
&& (n
== PC_REGNUM
))
15864 || (load
&& (t
== t2
))
15865 /* Triggers Cortex-M3 LDRD errata. */
15866 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15870 && ((wback
&& (n
== t
|| n
== t2
))
15871 || (t2
== PC_REGNUM
)
15872 || (t
% 2 != 0) /* First destination register is not even. */
15874 /* PC can be used as base register (for offset addressing only),
15875 but it is depricated. */
15876 || (n
== PC_REGNUM
)))
15882 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15883 operand MEM's address contains an immediate offset from the base
15884 register and has no side effects, in which case it sets BASE and
15885 OFFSET accordingly. */
15887 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15891 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15893 /* TODO: Handle more general memory operand patterns, such as
15894 PRE_DEC and PRE_INC. */
15896 if (side_effects_p (mem
))
15899 /* Can't deal with subregs. */
15900 if (GET_CODE (mem
) == SUBREG
)
15903 gcc_assert (MEM_P (mem
));
15905 *offset
= const0_rtx
;
15907 addr
= XEXP (mem
, 0);
15909 /* If addr isn't valid for DImode, then we can't handle it. */
15910 if (!arm_legitimate_address_p (DImode
, addr
,
15911 reload_in_progress
|| reload_completed
))
15919 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15921 *base
= XEXP (addr
, 0);
15922 *offset
= XEXP (addr
, 1);
15923 return (REG_P (*base
) && CONST_INT_P (*offset
));
15929 /* Called from a peephole2 to replace two word-size accesses with a
15930 single LDRD/STRD instruction. Returns true iff we can generate a
15931 new instruction sequence. That is, both accesses use the same base
15932 register and the gap between constant offsets is 4. This function
15933 may reorder its operands to match ldrd/strd RTL templates.
15934 OPERANDS are the operands found by the peephole matcher;
15935 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15936 corresponding memory operands. LOAD indicaates whether the access
15937 is load or store. CONST_STORE indicates a store of constant
15938 integer values held in OPERANDS[4,5] and assumes that the pattern
15939 is of length 4 insn, for the purpose of checking dead registers.
15940 COMMUTE indicates that register operands may be reordered. */
15942 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15943 bool const_store
, bool commute
)
15946 HOST_WIDE_INT offsets
[2], offset
;
15947 rtx base
= NULL_RTX
;
15948 rtx cur_base
, cur_offset
, tmp
;
15950 HARD_REG_SET regset
;
15952 gcc_assert (!const_store
|| !load
);
15953 /* Check that the memory references are immediate offsets from the
15954 same base register. Extract the base register, the destination
15955 registers, and the corresponding memory offsets. */
15956 for (i
= 0; i
< nops
; i
++)
15958 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15963 else if (REGNO (base
) != REGNO (cur_base
))
15966 offsets
[i
] = INTVAL (cur_offset
);
15967 if (GET_CODE (operands
[i
]) == SUBREG
)
15969 tmp
= SUBREG_REG (operands
[i
]);
15970 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15975 /* Make sure there is no dependency between the individual loads. */
15976 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15977 return false; /* RAW */
15979 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15980 return false; /* WAW */
15982 /* If the same input register is used in both stores
15983 when storing different constants, try to find a free register.
15984 For example, the code
15989 can be transformed into
15993 in Thumb mode assuming that r1 is free.
15994 For ARM mode do the same but only if the starting register
15995 can be made to be even. */
15997 && REGNO (operands
[0]) == REGNO (operands
[1])
15998 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
16002 CLEAR_HARD_REG_SET (regset
);
16003 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16004 if (tmp
== NULL_RTX
)
16007 /* Use the new register in the first load to ensure that
16008 if the original input register is not dead after peephole,
16009 then it will have the correct constant value. */
16012 else if (TARGET_ARM
)
16014 int regno
= REGNO (operands
[0]);
16015 if (!peep2_reg_dead_p (4, operands
[0]))
16017 /* When the input register is even and is not dead after the
16018 pattern, it has to hold the second constant but we cannot
16019 form a legal STRD in ARM mode with this register as the second
16021 if (regno
% 2 == 0)
16024 /* Is regno-1 free? */
16025 SET_HARD_REG_SET (regset
);
16026 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
16027 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16028 if (tmp
== NULL_RTX
)
16035 /* Find a DImode register. */
16036 CLEAR_HARD_REG_SET (regset
);
16037 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16038 if (tmp
!= NULL_RTX
)
16040 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16041 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16045 /* Can we use the input register to form a DI register? */
16046 SET_HARD_REG_SET (regset
);
16047 CLEAR_HARD_REG_BIT(regset
,
16048 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16049 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16050 if (tmp
== NULL_RTX
)
16052 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16056 gcc_assert (operands
[0] != NULL_RTX
);
16057 gcc_assert (operands
[1] != NULL_RTX
);
16058 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16059 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16063 /* Make sure the instructions are ordered with lower memory access first. */
16064 if (offsets
[0] > offsets
[1])
16066 gap
= offsets
[0] - offsets
[1];
16067 offset
= offsets
[1];
16069 /* Swap the instructions such that lower memory is accessed first. */
16070 std::swap (operands
[0], operands
[1]);
16071 std::swap (operands
[2], operands
[3]);
16073 std::swap (operands
[4], operands
[5]);
16077 gap
= offsets
[1] - offsets
[0];
16078 offset
= offsets
[0];
16081 /* Make sure accesses are to consecutive memory locations. */
16085 /* Make sure we generate legal instructions. */
16086 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16090 /* In Thumb state, where registers are almost unconstrained, there
16091 is little hope to fix it. */
16095 if (load
&& commute
)
16097 /* Try reordering registers. */
16098 std::swap (operands
[0], operands
[1]);
16099 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16106 /* If input registers are dead after this pattern, they can be
16107 reordered or replaced by other registers that are free in the
16108 current pattern. */
16109 if (!peep2_reg_dead_p (4, operands
[0])
16110 || !peep2_reg_dead_p (4, operands
[1]))
16113 /* Try to reorder the input registers. */
16114 /* For example, the code
16119 can be transformed into
16124 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16127 std::swap (operands
[0], operands
[1]);
16131 /* Try to find a free DI register. */
16132 CLEAR_HARD_REG_SET (regset
);
16133 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16134 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16137 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16138 if (tmp
== NULL_RTX
)
16141 /* DREG must be an even-numbered register in DImode.
16142 Split it into SI registers. */
16143 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16144 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16145 gcc_assert (operands
[0] != NULL_RTX
);
16146 gcc_assert (operands
[1] != NULL_RTX
);
16147 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16148 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16150 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16162 /* Print a symbolic form of X to the debug file, F. */
16164 arm_print_value (FILE *f
, rtx x
)
16166 switch (GET_CODE (x
))
16169 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16173 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16181 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16183 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16184 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16192 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16196 fprintf (f
, "`%s'", XSTR (x
, 0));
16200 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16204 arm_print_value (f
, XEXP (x
, 0));
16208 arm_print_value (f
, XEXP (x
, 0));
16210 arm_print_value (f
, XEXP (x
, 1));
16218 fprintf (f
, "????");
16223 /* Routines for manipulation of the constant pool. */
16225 /* Arm instructions cannot load a large constant directly into a
16226 register; they have to come from a pc relative load. The constant
16227 must therefore be placed in the addressable range of the pc
16228 relative load. Depending on the precise pc relative load
16229 instruction the range is somewhere between 256 bytes and 4k. This
16230 means that we often have to dump a constant inside a function, and
16231 generate code to branch around it.
16233 It is important to minimize this, since the branches will slow
16234 things down and make the code larger.
16236 Normally we can hide the table after an existing unconditional
16237 branch so that there is no interruption of the flow, but in the
16238 worst case the code looks like this:
16256 We fix this by performing a scan after scheduling, which notices
16257 which instructions need to have their operands fetched from the
16258 constant table and builds the table.
16260 The algorithm starts by building a table of all the constants that
16261 need fixing up and all the natural barriers in the function (places
16262 where a constant table can be dropped without breaking the flow).
16263 For each fixup we note how far the pc-relative replacement will be
16264 able to reach and the offset of the instruction into the function.
16266 Having built the table we then group the fixes together to form
16267 tables that are as large as possible (subject to addressing
16268 constraints) and emit each table of constants after the last
16269 barrier that is within range of all the instructions in the group.
16270 If a group does not contain a barrier, then we forcibly create one
16271 by inserting a jump instruction into the flow. Once the table has
16272 been inserted, the insns are then modified to reference the
16273 relevant entry in the pool.
16275 Possible enhancements to the algorithm (not implemented) are:
16277 1) For some processors and object formats, there may be benefit in
16278 aligning the pools to the start of cache lines; this alignment
16279 would need to be taken into account when calculating addressability
16282 /* These typedefs are located at the start of this file, so that
16283 they can be used in the prototypes there. This comment is to
16284 remind readers of that fact so that the following structures
16285 can be understood more easily.
16287 typedef struct minipool_node Mnode;
16288 typedef struct minipool_fixup Mfix; */
16290 struct minipool_node
16292 /* Doubly linked chain of entries. */
16295 /* The maximum offset into the code that this entry can be placed. While
16296 pushing fixes for forward references, all entries are sorted in order
16297 of increasing max_address. */
16298 HOST_WIDE_INT max_address
;
16299 /* Similarly for an entry inserted for a backwards ref. */
16300 HOST_WIDE_INT min_address
;
16301 /* The number of fixes referencing this entry. This can become zero
16302 if we "unpush" an entry. In this case we ignore the entry when we
16303 come to emit the code. */
16305 /* The offset from the start of the minipool. */
16306 HOST_WIDE_INT offset
;
16307 /* The value in table. */
16309 /* The mode of value. */
16311 /* The size of the value. With iWMMXt enabled
16312 sizes > 4 also imply an alignment of 8-bytes. */
16316 struct minipool_fixup
16320 HOST_WIDE_INT address
;
16326 HOST_WIDE_INT forwards
;
16327 HOST_WIDE_INT backwards
;
16330 /* Fixes less than a word need padding out to a word boundary. */
16331 #define MINIPOOL_FIX_SIZE(mode) \
16332 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16334 static Mnode
* minipool_vector_head
;
16335 static Mnode
* minipool_vector_tail
;
16336 static rtx_code_label
*minipool_vector_label
;
16337 static int minipool_pad
;
16339 /* The linked list of all minipool fixes required for this function. */
16340 Mfix
* minipool_fix_head
;
16341 Mfix
* minipool_fix_tail
;
16342 /* The fix entry for the current minipool, once it has been placed. */
16343 Mfix
* minipool_barrier
;
16345 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16346 #define JUMP_TABLES_IN_TEXT_SECTION 0
16349 static HOST_WIDE_INT
16350 get_jump_table_size (rtx_jump_table_data
*insn
)
16352 /* ADDR_VECs only take room if read-only data does into the text
16354 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16356 rtx body
= PATTERN (insn
);
16357 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16358 HOST_WIDE_INT size
;
16359 HOST_WIDE_INT modesize
;
16361 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16362 size
= modesize
* XVECLEN (body
, elt
);
16366 /* Round up size of TBB table to a halfword boundary. */
16367 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16370 /* No padding necessary for TBH. */
16373 /* Add two bytes for alignment on Thumb. */
16378 gcc_unreachable ();
16386 /* Return the maximum amount of padding that will be inserted before
16389 static HOST_WIDE_INT
16390 get_label_padding (rtx label
)
16392 HOST_WIDE_INT align
, min_insn_size
;
16394 align
= 1 << label_to_alignment (label
);
16395 min_insn_size
= TARGET_THUMB
? 2 : 4;
16396 return align
> min_insn_size
? align
- min_insn_size
: 0;
16399 /* Move a minipool fix MP from its current location to before MAX_MP.
16400 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16401 constraints may need updating. */
16403 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16404 HOST_WIDE_INT max_address
)
16406 /* The code below assumes these are different. */
16407 gcc_assert (mp
!= max_mp
);
16409 if (max_mp
== NULL
)
16411 if (max_address
< mp
->max_address
)
16412 mp
->max_address
= max_address
;
16416 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16417 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16419 mp
->max_address
= max_address
;
16421 /* Unlink MP from its current position. Since max_mp is non-null,
16422 mp->prev must be non-null. */
16423 mp
->prev
->next
= mp
->next
;
16424 if (mp
->next
!= NULL
)
16425 mp
->next
->prev
= mp
->prev
;
16427 minipool_vector_tail
= mp
->prev
;
16429 /* Re-insert it before MAX_MP. */
16431 mp
->prev
= max_mp
->prev
;
16434 if (mp
->prev
!= NULL
)
16435 mp
->prev
->next
= mp
;
16437 minipool_vector_head
= mp
;
16440 /* Save the new entry. */
16443 /* Scan over the preceding entries and adjust their addresses as
16445 while (mp
->prev
!= NULL
16446 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16448 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16455 /* Add a constant to the minipool for a forward reference. Returns the
16456 node added or NULL if the constant will not fit in this pool. */
16458 add_minipool_forward_ref (Mfix
*fix
)
16460 /* If set, max_mp is the first pool_entry that has a lower
16461 constraint than the one we are trying to add. */
16462 Mnode
* max_mp
= NULL
;
16463 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16466 /* If the minipool starts before the end of FIX->INSN then this FIX
16467 can not be placed into the current pool. Furthermore, adding the
16468 new constant pool entry may cause the pool to start FIX_SIZE bytes
16470 if (minipool_vector_head
&&
16471 (fix
->address
+ get_attr_length (fix
->insn
)
16472 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16475 /* Scan the pool to see if a constant with the same value has
16476 already been added. While we are doing this, also note the
16477 location where we must insert the constant if it doesn't already
16479 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16481 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16482 && fix
->mode
== mp
->mode
16483 && (!LABEL_P (fix
->value
)
16484 || (CODE_LABEL_NUMBER (fix
->value
)
16485 == CODE_LABEL_NUMBER (mp
->value
)))
16486 && rtx_equal_p (fix
->value
, mp
->value
))
16488 /* More than one fix references this entry. */
16490 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16493 /* Note the insertion point if necessary. */
16495 && mp
->max_address
> max_address
)
16498 /* If we are inserting an 8-bytes aligned quantity and
16499 we have not already found an insertion point, then
16500 make sure that all such 8-byte aligned quantities are
16501 placed at the start of the pool. */
16502 if (ARM_DOUBLEWORD_ALIGN
16504 && fix
->fix_size
>= 8
16505 && mp
->fix_size
< 8)
16508 max_address
= mp
->max_address
;
16512 /* The value is not currently in the minipool, so we need to create
16513 a new entry for it. If MAX_MP is NULL, the entry will be put on
16514 the end of the list since the placement is less constrained than
16515 any existing entry. Otherwise, we insert the new fix before
16516 MAX_MP and, if necessary, adjust the constraints on the other
16519 mp
->fix_size
= fix
->fix_size
;
16520 mp
->mode
= fix
->mode
;
16521 mp
->value
= fix
->value
;
16523 /* Not yet required for a backwards ref. */
16524 mp
->min_address
= -65536;
16526 if (max_mp
== NULL
)
16528 mp
->max_address
= max_address
;
16530 mp
->prev
= minipool_vector_tail
;
16532 if (mp
->prev
== NULL
)
16534 minipool_vector_head
= mp
;
16535 minipool_vector_label
= gen_label_rtx ();
16538 mp
->prev
->next
= mp
;
16540 minipool_vector_tail
= mp
;
16544 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16545 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16547 mp
->max_address
= max_address
;
16550 mp
->prev
= max_mp
->prev
;
16552 if (mp
->prev
!= NULL
)
16553 mp
->prev
->next
= mp
;
16555 minipool_vector_head
= mp
;
16558 /* Save the new entry. */
16561 /* Scan over the preceding entries and adjust their addresses as
16563 while (mp
->prev
!= NULL
16564 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16566 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16574 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16575 HOST_WIDE_INT min_address
)
16577 HOST_WIDE_INT offset
;
16579 /* The code below assumes these are different. */
16580 gcc_assert (mp
!= min_mp
);
16582 if (min_mp
== NULL
)
16584 if (min_address
> mp
->min_address
)
16585 mp
->min_address
= min_address
;
16589 /* We will adjust this below if it is too loose. */
16590 mp
->min_address
= min_address
;
16592 /* Unlink MP from its current position. Since min_mp is non-null,
16593 mp->next must be non-null. */
16594 mp
->next
->prev
= mp
->prev
;
16595 if (mp
->prev
!= NULL
)
16596 mp
->prev
->next
= mp
->next
;
16598 minipool_vector_head
= mp
->next
;
16600 /* Reinsert it after MIN_MP. */
16602 mp
->next
= min_mp
->next
;
16604 if (mp
->next
!= NULL
)
16605 mp
->next
->prev
= mp
;
16607 minipool_vector_tail
= mp
;
16613 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16615 mp
->offset
= offset
;
16616 if (mp
->refcount
> 0)
16617 offset
+= mp
->fix_size
;
16619 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16620 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16626 /* Add a constant to the minipool for a backward reference. Returns the
16627 node added or NULL if the constant will not fit in this pool.
16629 Note that the code for insertion for a backwards reference can be
16630 somewhat confusing because the calculated offsets for each fix do
16631 not take into account the size of the pool (which is still under
16634 add_minipool_backward_ref (Mfix
*fix
)
16636 /* If set, min_mp is the last pool_entry that has a lower constraint
16637 than the one we are trying to add. */
16638 Mnode
*min_mp
= NULL
;
16639 /* This can be negative, since it is only a constraint. */
16640 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16643 /* If we can't reach the current pool from this insn, or if we can't
16644 insert this entry at the end of the pool without pushing other
16645 fixes out of range, then we don't try. This ensures that we
16646 can't fail later on. */
16647 if (min_address
>= minipool_barrier
->address
16648 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16649 >= minipool_barrier
->address
))
16652 /* Scan the pool to see if a constant with the same value has
16653 already been added. While we are doing this, also note the
16654 location where we must insert the constant if it doesn't already
16656 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16658 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16659 && fix
->mode
== mp
->mode
16660 && (!LABEL_P (fix
->value
)
16661 || (CODE_LABEL_NUMBER (fix
->value
)
16662 == CODE_LABEL_NUMBER (mp
->value
)))
16663 && rtx_equal_p (fix
->value
, mp
->value
)
16664 /* Check that there is enough slack to move this entry to the
16665 end of the table (this is conservative). */
16666 && (mp
->max_address
16667 > (minipool_barrier
->address
16668 + minipool_vector_tail
->offset
16669 + minipool_vector_tail
->fix_size
)))
16672 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16675 if (min_mp
!= NULL
)
16676 mp
->min_address
+= fix
->fix_size
;
16679 /* Note the insertion point if necessary. */
16680 if (mp
->min_address
< min_address
)
16682 /* For now, we do not allow the insertion of 8-byte alignment
16683 requiring nodes anywhere but at the start of the pool. */
16684 if (ARM_DOUBLEWORD_ALIGN
16685 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16690 else if (mp
->max_address
16691 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16693 /* Inserting before this entry would push the fix beyond
16694 its maximum address (which can happen if we have
16695 re-located a forwards fix); force the new fix to come
16697 if (ARM_DOUBLEWORD_ALIGN
16698 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16703 min_address
= mp
->min_address
+ fix
->fix_size
;
16706 /* Do not insert a non-8-byte aligned quantity before 8-byte
16707 aligned quantities. */
16708 else if (ARM_DOUBLEWORD_ALIGN
16709 && fix
->fix_size
< 8
16710 && mp
->fix_size
>= 8)
16713 min_address
= mp
->min_address
+ fix
->fix_size
;
16718 /* We need to create a new entry. */
16720 mp
->fix_size
= fix
->fix_size
;
16721 mp
->mode
= fix
->mode
;
16722 mp
->value
= fix
->value
;
16724 mp
->max_address
= minipool_barrier
->address
+ 65536;
16726 mp
->min_address
= min_address
;
16728 if (min_mp
== NULL
)
16731 mp
->next
= minipool_vector_head
;
16733 if (mp
->next
== NULL
)
16735 minipool_vector_tail
= mp
;
16736 minipool_vector_label
= gen_label_rtx ();
16739 mp
->next
->prev
= mp
;
16741 minipool_vector_head
= mp
;
16745 mp
->next
= min_mp
->next
;
16749 if (mp
->next
!= NULL
)
16750 mp
->next
->prev
= mp
;
16752 minipool_vector_tail
= mp
;
16755 /* Save the new entry. */
16763 /* Scan over the following entries and adjust their offsets. */
16764 while (mp
->next
!= NULL
)
16766 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16767 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16770 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16772 mp
->next
->offset
= mp
->offset
;
16781 assign_minipool_offsets (Mfix
*barrier
)
16783 HOST_WIDE_INT offset
= 0;
16786 minipool_barrier
= barrier
;
16788 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16790 mp
->offset
= offset
;
16792 if (mp
->refcount
> 0)
16793 offset
+= mp
->fix_size
;
16797 /* Output the literal table */
16799 dump_minipool (rtx_insn
*scan
)
16805 if (ARM_DOUBLEWORD_ALIGN
)
16806 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16807 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16814 fprintf (dump_file
,
16815 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16816 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16818 scan
= emit_label_after (gen_label_rtx (), scan
);
16819 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16820 scan
= emit_label_after (minipool_vector_label
, scan
);
16822 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16824 if (mp
->refcount
> 0)
16828 fprintf (dump_file
,
16829 ";; Offset %u, min %ld, max %ld ",
16830 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16831 (unsigned long) mp
->max_address
);
16832 arm_print_value (dump_file
, mp
->value
);
16833 fputc ('\n', dump_file
);
16836 switch (GET_MODE_SIZE (mp
->mode
))
16838 #ifdef HAVE_consttable_1
16840 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16844 #ifdef HAVE_consttable_2
16846 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16850 #ifdef HAVE_consttable_4
16852 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16856 #ifdef HAVE_consttable_8
16858 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16862 #ifdef HAVE_consttable_16
16864 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16869 gcc_unreachable ();
16877 minipool_vector_head
= minipool_vector_tail
= NULL
;
16878 scan
= emit_insn_after (gen_consttable_end (), scan
);
16879 scan
= emit_barrier_after (scan
);
16882 /* Return the cost of forcibly inserting a barrier after INSN. */
16884 arm_barrier_cost (rtx_insn
*insn
)
16886 /* Basing the location of the pool on the loop depth is preferable,
16887 but at the moment, the basic block information seems to be
16888 corrupt by this stage of the compilation. */
16889 int base_cost
= 50;
16890 rtx_insn
*next
= next_nonnote_insn (insn
);
16892 if (next
!= NULL
&& LABEL_P (next
))
16895 switch (GET_CODE (insn
))
16898 /* It will always be better to place the table before the label, rather
16907 return base_cost
- 10;
16910 return base_cost
+ 10;
16914 /* Find the best place in the insn stream in the range
16915 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16916 Create the barrier by inserting a jump and add a new fix entry for
16919 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16921 HOST_WIDE_INT count
= 0;
16922 rtx_barrier
*barrier
;
16923 rtx_insn
*from
= fix
->insn
;
16924 /* The instruction after which we will insert the jump. */
16925 rtx_insn
*selected
= NULL
;
16927 /* The address at which the jump instruction will be placed. */
16928 HOST_WIDE_INT selected_address
;
16930 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16931 rtx_code_label
*label
= gen_label_rtx ();
16933 selected_cost
= arm_barrier_cost (from
);
16934 selected_address
= fix
->address
;
16936 while (from
&& count
< max_count
)
16938 rtx_jump_table_data
*tmp
;
16941 /* This code shouldn't have been called if there was a natural barrier
16943 gcc_assert (!BARRIER_P (from
));
16945 /* Count the length of this insn. This must stay in sync with the
16946 code that pushes minipool fixes. */
16947 if (LABEL_P (from
))
16948 count
+= get_label_padding (from
);
16950 count
+= get_attr_length (from
);
16952 /* If there is a jump table, add its length. */
16953 if (tablejump_p (from
, NULL
, &tmp
))
16955 count
+= get_jump_table_size (tmp
);
16957 /* Jump tables aren't in a basic block, so base the cost on
16958 the dispatch insn. If we select this location, we will
16959 still put the pool after the table. */
16960 new_cost
= arm_barrier_cost (from
);
16962 if (count
< max_count
16963 && (!selected
|| new_cost
<= selected_cost
))
16966 selected_cost
= new_cost
;
16967 selected_address
= fix
->address
+ count
;
16970 /* Continue after the dispatch table. */
16971 from
= NEXT_INSN (tmp
);
16975 new_cost
= arm_barrier_cost (from
);
16977 if (count
< max_count
16978 && (!selected
|| new_cost
<= selected_cost
))
16981 selected_cost
= new_cost
;
16982 selected_address
= fix
->address
+ count
;
16985 from
= NEXT_INSN (from
);
16988 /* Make sure that we found a place to insert the jump. */
16989 gcc_assert (selected
);
16991 /* Make sure we do not split a call and its corresponding
16992 CALL_ARG_LOCATION note. */
16993 if (CALL_P (selected
))
16995 rtx_insn
*next
= NEXT_INSN (selected
);
16996 if (next
&& NOTE_P (next
)
16997 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
17001 /* Create a new JUMP_INSN that branches around a barrier. */
17002 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17003 JUMP_LABEL (from
) = label
;
17004 barrier
= emit_barrier_after (from
);
17005 emit_label_after (label
, barrier
);
17007 /* Create a minipool barrier entry for the new barrier. */
17008 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17009 new_fix
->insn
= barrier
;
17010 new_fix
->address
= selected_address
;
17011 new_fix
->next
= fix
->next
;
17012 fix
->next
= new_fix
;
17017 /* Record that there is a natural barrier in the insn stream at
17020 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17022 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17025 fix
->address
= address
;
17028 if (minipool_fix_head
!= NULL
)
17029 minipool_fix_tail
->next
= fix
;
17031 minipool_fix_head
= fix
;
17033 minipool_fix_tail
= fix
;
17036 /* Record INSN, which will need fixing up to load a value from the
17037 minipool. ADDRESS is the offset of the insn since the start of the
17038 function; LOC is a pointer to the part of the insn which requires
17039 fixing; VALUE is the constant that must be loaded, which is of type
17042 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17043 machine_mode mode
, rtx value
)
17045 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17048 fix
->address
= address
;
17051 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17052 fix
->value
= value
;
17053 fix
->forwards
= get_attr_pool_range (insn
);
17054 fix
->backwards
= get_attr_neg_pool_range (insn
);
17055 fix
->minipool
= NULL
;
17057 /* If an insn doesn't have a range defined for it, then it isn't
17058 expecting to be reworked by this code. Better to stop now than
17059 to generate duff assembly code. */
17060 gcc_assert (fix
->forwards
|| fix
->backwards
);
17062 /* If an entry requires 8-byte alignment then assume all constant pools
17063 require 4 bytes of padding. Trying to do this later on a per-pool
17064 basis is awkward because existing pool entries have to be modified. */
17065 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17070 fprintf (dump_file
,
17071 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17072 GET_MODE_NAME (mode
),
17073 INSN_UID (insn
), (unsigned long) address
,
17074 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17075 arm_print_value (dump_file
, fix
->value
);
17076 fprintf (dump_file
, "\n");
17079 /* Add it to the chain of fixes. */
17082 if (minipool_fix_head
!= NULL
)
17083 minipool_fix_tail
->next
= fix
;
17085 minipool_fix_head
= fix
;
17087 minipool_fix_tail
= fix
;
17090 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17091 Returns the number of insns needed, or 99 if we always want to synthesize
17094 arm_max_const_double_inline_cost ()
17096 /* Let the value get synthesized to avoid the use of literal pools. */
17097 if (arm_disable_literal_pool
)
17100 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17103 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17104 Returns the number of insns needed, or 99 if we don't know how to
17107 arm_const_double_inline_cost (rtx val
)
17109 rtx lowpart
, highpart
;
17112 mode
= GET_MODE (val
);
17114 if (mode
== VOIDmode
)
17117 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17119 lowpart
= gen_lowpart (SImode
, val
);
17120 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17122 gcc_assert (CONST_INT_P (lowpart
));
17123 gcc_assert (CONST_INT_P (highpart
));
17125 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17126 NULL_RTX
, NULL_RTX
, 0, 0)
17127 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17128 NULL_RTX
, NULL_RTX
, 0, 0));
17131 /* Cost of loading a SImode constant. */
17133 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17135 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17136 NULL_RTX
, NULL_RTX
, 1, 0);
17139 /* Return true if it is worthwhile to split a 64-bit constant into two
17140 32-bit operations. This is the case if optimizing for size, or
17141 if we have load delay slots, or if one 32-bit part can be done with
17142 a single data operation. */
17144 arm_const_double_by_parts (rtx val
)
17146 machine_mode mode
= GET_MODE (val
);
17149 if (optimize_size
|| arm_ld_sched
)
17152 if (mode
== VOIDmode
)
17155 part
= gen_highpart_mode (SImode
, mode
, val
);
17157 gcc_assert (CONST_INT_P (part
));
17159 if (const_ok_for_arm (INTVAL (part
))
17160 || const_ok_for_arm (~INTVAL (part
)))
17163 part
= gen_lowpart (SImode
, val
);
17165 gcc_assert (CONST_INT_P (part
));
17167 if (const_ok_for_arm (INTVAL (part
))
17168 || const_ok_for_arm (~INTVAL (part
)))
17174 /* Return true if it is possible to inline both the high and low parts
17175 of a 64-bit constant into 32-bit data processing instructions. */
17177 arm_const_double_by_immediates (rtx val
)
17179 machine_mode mode
= GET_MODE (val
);
17182 if (mode
== VOIDmode
)
17185 part
= gen_highpart_mode (SImode
, mode
, val
);
17187 gcc_assert (CONST_INT_P (part
));
17189 if (!const_ok_for_arm (INTVAL (part
)))
17192 part
= gen_lowpart (SImode
, val
);
17194 gcc_assert (CONST_INT_P (part
));
17196 if (!const_ok_for_arm (INTVAL (part
)))
17202 /* Scan INSN and note any of its operands that need fixing.
17203 If DO_PUSHES is false we do not actually push any of the fixups
17206 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17210 extract_constrain_insn (insn
);
17212 if (recog_data
.n_alternatives
== 0)
17215 /* Fill in recog_op_alt with information about the constraints of
17217 preprocess_constraints (insn
);
17219 const operand_alternative
*op_alt
= which_op_alt ();
17220 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17222 /* Things we need to fix can only occur in inputs. */
17223 if (recog_data
.operand_type
[opno
] != OP_IN
)
17226 /* If this alternative is a memory reference, then any mention
17227 of constants in this alternative is really to fool reload
17228 into allowing us to accept one there. We need to fix them up
17229 now so that we output the right code. */
17230 if (op_alt
[opno
].memory_ok
)
17232 rtx op
= recog_data
.operand
[opno
];
17234 if (CONSTANT_P (op
))
17237 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17238 recog_data
.operand_mode
[opno
], op
);
17240 else if (MEM_P (op
)
17241 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17242 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17246 rtx cop
= avoid_constant_pool_reference (op
);
17248 /* Casting the address of something to a mode narrower
17249 than a word can cause avoid_constant_pool_reference()
17250 to return the pool reference itself. That's no good to
17251 us here. Lets just hope that we can use the
17252 constant pool value directly. */
17254 cop
= get_pool_constant (XEXP (op
, 0));
17256 push_minipool_fix (insn
, address
,
17257 recog_data
.operand_loc
[opno
],
17258 recog_data
.operand_mode
[opno
], cop
);
17268 /* Rewrite move insn into subtract of 0 if the condition codes will
17269 be useful in next conditional jump insn. */
17272 thumb1_reorg (void)
17276 FOR_EACH_BB_FN (bb
, cfun
)
17279 rtx cmp
, op0
, op1
, set
= NULL
;
17280 rtx_insn
*prev
, *insn
= BB_END (bb
);
17281 bool insn_clobbered
= false;
17283 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17284 insn
= PREV_INSN (insn
);
17286 /* Find the last cbranchsi4_insn in basic block BB. */
17287 if (insn
== BB_HEAD (bb
)
17288 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17291 /* Get the register with which we are comparing. */
17292 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17293 op0
= XEXP (cmp
, 0);
17294 op1
= XEXP (cmp
, 1);
17296 /* Check that comparison is against ZERO. */
17297 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17300 /* Find the first flag setting insn before INSN in basic block BB. */
17301 gcc_assert (insn
!= BB_HEAD (bb
));
17302 for (prev
= PREV_INSN (insn
);
17304 && prev
!= BB_HEAD (bb
)
17306 || DEBUG_INSN_P (prev
)
17307 || ((set
= single_set (prev
)) != NULL
17308 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17309 prev
= PREV_INSN (prev
))
17311 if (reg_set_p (op0
, prev
))
17312 insn_clobbered
= true;
17315 /* Skip if op0 is clobbered by insn other than prev. */
17316 if (insn_clobbered
)
17322 dest
= SET_DEST (set
);
17323 src
= SET_SRC (set
);
17324 if (!low_register_operand (dest
, SImode
)
17325 || !low_register_operand (src
, SImode
))
17328 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17329 in INSN. Both src and dest of the move insn are checked. */
17330 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17332 dest
= copy_rtx (dest
);
17333 src
= copy_rtx (src
);
17334 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17335 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17336 INSN_CODE (prev
) = -1;
17337 /* Set test register in INSN to dest. */
17338 XEXP (cmp
, 0) = copy_rtx (dest
);
17339 INSN_CODE (insn
) = -1;
17344 /* Convert instructions to their cc-clobbering variant if possible, since
17345 that allows us to use smaller encodings. */
17348 thumb2_reorg (void)
17353 INIT_REG_SET (&live
);
17355 /* We are freeing block_for_insn in the toplev to keep compatibility
17356 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17357 compute_bb_for_insn ();
17360 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17362 FOR_EACH_BB_FN (bb
, cfun
)
17364 if ((current_tune
->disparage_flag_setting_t16_encodings
17365 == tune_params::DISPARAGE_FLAGS_ALL
)
17366 && optimize_bb_for_speed_p (bb
))
17370 Convert_Action action
= SKIP
;
17371 Convert_Action action_for_partial_flag_setting
17372 = ((current_tune
->disparage_flag_setting_t16_encodings
17373 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17374 && optimize_bb_for_speed_p (bb
))
17377 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17378 df_simulate_initialize_backwards (bb
, &live
);
17379 FOR_BB_INSNS_REVERSE (bb
, insn
)
17381 if (NONJUMP_INSN_P (insn
)
17382 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17383 && GET_CODE (PATTERN (insn
)) == SET
)
17386 rtx pat
= PATTERN (insn
);
17387 rtx dst
= XEXP (pat
, 0);
17388 rtx src
= XEXP (pat
, 1);
17389 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17391 if (UNARY_P (src
) || BINARY_P (src
))
17392 op0
= XEXP (src
, 0);
17394 if (BINARY_P (src
))
17395 op1
= XEXP (src
, 1);
17397 if (low_register_operand (dst
, SImode
))
17399 switch (GET_CODE (src
))
17402 /* Adding two registers and storing the result
17403 in the first source is already a 16-bit
17405 if (rtx_equal_p (dst
, op0
)
17406 && register_operand (op1
, SImode
))
17409 if (low_register_operand (op0
, SImode
))
17411 /* ADDS <Rd>,<Rn>,<Rm> */
17412 if (low_register_operand (op1
, SImode
))
17414 /* ADDS <Rdn>,#<imm8> */
17415 /* SUBS <Rdn>,#<imm8> */
17416 else if (rtx_equal_p (dst
, op0
)
17417 && CONST_INT_P (op1
)
17418 && IN_RANGE (INTVAL (op1
), -255, 255))
17420 /* ADDS <Rd>,<Rn>,#<imm3> */
17421 /* SUBS <Rd>,<Rn>,#<imm3> */
17422 else if (CONST_INT_P (op1
)
17423 && IN_RANGE (INTVAL (op1
), -7, 7))
17426 /* ADCS <Rd>, <Rn> */
17427 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17428 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17429 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17431 && COMPARISON_P (op1
)
17432 && cc_register (XEXP (op1
, 0), VOIDmode
)
17433 && maybe_get_arm_condition_code (op1
) == ARM_CS
17434 && XEXP (op1
, 1) == const0_rtx
)
17439 /* RSBS <Rd>,<Rn>,#0
17440 Not handled here: see NEG below. */
17441 /* SUBS <Rd>,<Rn>,#<imm3>
17443 Not handled here: see PLUS above. */
17444 /* SUBS <Rd>,<Rn>,<Rm> */
17445 if (low_register_operand (op0
, SImode
)
17446 && low_register_operand (op1
, SImode
))
17451 /* MULS <Rdm>,<Rn>,<Rdm>
17452 As an exception to the rule, this is only used
17453 when optimizing for size since MULS is slow on all
17454 known implementations. We do not even want to use
17455 MULS in cold code, if optimizing for speed, so we
17456 test the global flag here. */
17457 if (!optimize_size
)
17459 /* else fall through. */
17463 /* ANDS <Rdn>,<Rm> */
17464 if (rtx_equal_p (dst
, op0
)
17465 && low_register_operand (op1
, SImode
))
17466 action
= action_for_partial_flag_setting
;
17467 else if (rtx_equal_p (dst
, op1
)
17468 && low_register_operand (op0
, SImode
))
17469 action
= action_for_partial_flag_setting
== SKIP
17470 ? SKIP
: SWAP_CONV
;
17476 /* ASRS <Rdn>,<Rm> */
17477 /* LSRS <Rdn>,<Rm> */
17478 /* LSLS <Rdn>,<Rm> */
17479 if (rtx_equal_p (dst
, op0
)
17480 && low_register_operand (op1
, SImode
))
17481 action
= action_for_partial_flag_setting
;
17482 /* ASRS <Rd>,<Rm>,#<imm5> */
17483 /* LSRS <Rd>,<Rm>,#<imm5> */
17484 /* LSLS <Rd>,<Rm>,#<imm5> */
17485 else if (low_register_operand (op0
, SImode
)
17486 && CONST_INT_P (op1
)
17487 && IN_RANGE (INTVAL (op1
), 0, 31))
17488 action
= action_for_partial_flag_setting
;
17492 /* RORS <Rdn>,<Rm> */
17493 if (rtx_equal_p (dst
, op0
)
17494 && low_register_operand (op1
, SImode
))
17495 action
= action_for_partial_flag_setting
;
17499 /* MVNS <Rd>,<Rm> */
17500 if (low_register_operand (op0
, SImode
))
17501 action
= action_for_partial_flag_setting
;
17505 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17506 if (low_register_operand (op0
, SImode
))
17511 /* MOVS <Rd>,#<imm8> */
17512 if (CONST_INT_P (src
)
17513 && IN_RANGE (INTVAL (src
), 0, 255))
17514 action
= action_for_partial_flag_setting
;
17518 /* MOVS and MOV<c> with registers have different
17519 encodings, so are not relevant here. */
17527 if (action
!= SKIP
)
17529 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17530 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17533 if (action
== SWAP_CONV
)
17535 src
= copy_rtx (src
);
17536 XEXP (src
, 0) = op1
;
17537 XEXP (src
, 1) = op0
;
17538 pat
= gen_rtx_SET (dst
, src
);
17539 vec
= gen_rtvec (2, pat
, clobber
);
17541 else /* action == CONV */
17542 vec
= gen_rtvec (2, pat
, clobber
);
17544 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17545 INSN_CODE (insn
) = -1;
17549 if (NONDEBUG_INSN_P (insn
))
17550 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17554 CLEAR_REG_SET (&live
);
17557 /* Gcc puts the pool in the wrong place for ARM, since we can only
17558 load addresses a limited distance around the pc. We do some
17559 special munging to move the constant pool values to the correct
17560 point in the code. */
17565 HOST_WIDE_INT address
= 0;
17570 else if (TARGET_THUMB2
)
17573 /* Ensure all insns that must be split have been split at this point.
17574 Otherwise, the pool placement code below may compute incorrect
17575 insn lengths. Note that when optimizing, all insns have already
17576 been split at this point. */
17578 split_all_insns_noflow ();
17580 minipool_fix_head
= minipool_fix_tail
= NULL
;
17582 /* The first insn must always be a note, or the code below won't
17583 scan it properly. */
17584 insn
= get_insns ();
17585 gcc_assert (NOTE_P (insn
));
17588 /* Scan all the insns and record the operands that will need fixing. */
17589 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17591 if (BARRIER_P (insn
))
17592 push_minipool_barrier (insn
, address
);
17593 else if (INSN_P (insn
))
17595 rtx_jump_table_data
*table
;
17597 note_invalid_constants (insn
, address
, true);
17598 address
+= get_attr_length (insn
);
17600 /* If the insn is a vector jump, add the size of the table
17601 and skip the table. */
17602 if (tablejump_p (insn
, NULL
, &table
))
17604 address
+= get_jump_table_size (table
);
17608 else if (LABEL_P (insn
))
17609 /* Add the worst-case padding due to alignment. We don't add
17610 the _current_ padding because the minipool insertions
17611 themselves might change it. */
17612 address
+= get_label_padding (insn
);
17615 fix
= minipool_fix_head
;
17617 /* Now scan the fixups and perform the required changes. */
17622 Mfix
* last_added_fix
;
17623 Mfix
* last_barrier
= NULL
;
17626 /* Skip any further barriers before the next fix. */
17627 while (fix
&& BARRIER_P (fix
->insn
))
17630 /* No more fixes. */
17634 last_added_fix
= NULL
;
17636 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17638 if (BARRIER_P (ftmp
->insn
))
17640 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17643 last_barrier
= ftmp
;
17645 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17648 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17651 /* If we found a barrier, drop back to that; any fixes that we
17652 could have reached but come after the barrier will now go in
17653 the next mini-pool. */
17654 if (last_barrier
!= NULL
)
17656 /* Reduce the refcount for those fixes that won't go into this
17658 for (fdel
= last_barrier
->next
;
17659 fdel
&& fdel
!= ftmp
;
17662 fdel
->minipool
->refcount
--;
17663 fdel
->minipool
= NULL
;
17666 ftmp
= last_barrier
;
17670 /* ftmp is first fix that we can't fit into this pool and
17671 there no natural barriers that we could use. Insert a
17672 new barrier in the code somewhere between the previous
17673 fix and this one, and arrange to jump around it. */
17674 HOST_WIDE_INT max_address
;
17676 /* The last item on the list of fixes must be a barrier, so
17677 we can never run off the end of the list of fixes without
17678 last_barrier being set. */
17681 max_address
= minipool_vector_head
->max_address
;
17682 /* Check that there isn't another fix that is in range that
17683 we couldn't fit into this pool because the pool was
17684 already too large: we need to put the pool before such an
17685 instruction. The pool itself may come just after the
17686 fix because create_fix_barrier also allows space for a
17687 jump instruction. */
17688 if (ftmp
->address
< max_address
)
17689 max_address
= ftmp
->address
+ 1;
17691 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17694 assign_minipool_offsets (last_barrier
);
17698 if (!BARRIER_P (ftmp
->insn
)
17699 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17706 /* Scan over the fixes we have identified for this pool, fixing them
17707 up and adding the constants to the pool itself. */
17708 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17709 this_fix
= this_fix
->next
)
17710 if (!BARRIER_P (this_fix
->insn
))
17713 = plus_constant (Pmode
,
17714 gen_rtx_LABEL_REF (VOIDmode
,
17715 minipool_vector_label
),
17716 this_fix
->minipool
->offset
);
17717 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17720 dump_minipool (last_barrier
->insn
);
17724 /* From now on we must synthesize any constants that we can't handle
17725 directly. This can happen if the RTL gets split during final
17726 instruction generation. */
17727 cfun
->machine
->after_arm_reorg
= 1;
17729 /* Free the minipool memory. */
17730 obstack_free (&minipool_obstack
, minipool_startobj
);
17733 /* Routines to output assembly language. */
17735 /* Return string representation of passed in real value. */
17736 static const char *
17737 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17739 if (!fp_consts_inited
)
17742 gcc_assert (real_equal (r
, &value_fp0
));
17746 /* OPERANDS[0] is the entire list of insns that constitute pop,
17747 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17748 is in the list, UPDATE is true iff the list contains explicit
17749 update of base register. */
17751 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17757 const char *conditional
;
17758 int num_saves
= XVECLEN (operands
[0], 0);
17759 unsigned int regno
;
17760 unsigned int regno_base
= REGNO (operands
[1]);
17761 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17764 offset
+= update
? 1 : 0;
17765 offset
+= return_pc
? 1 : 0;
17767 /* Is the base register in the list? */
17768 for (i
= offset
; i
< num_saves
; i
++)
17770 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17771 /* If SP is in the list, then the base register must be SP. */
17772 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17773 /* If base register is in the list, there must be no explicit update. */
17774 if (regno
== regno_base
)
17775 gcc_assert (!update
);
17778 conditional
= reverse
? "%?%D0" : "%?%d0";
17779 /* Can't use POP if returning from an interrupt. */
17780 if ((regno_base
== SP_REGNUM
) && !(interrupt_p
&& return_pc
))
17782 sprintf (pattern
, "pop%s\t{", conditional
);
17786 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17787 It's just a convention, their semantics are identical. */
17788 if (regno_base
== SP_REGNUM
)
17789 sprintf (pattern
, "ldmfd%s\t", conditional
);
17791 sprintf (pattern
, "ldmia%s\t", conditional
);
17793 sprintf (pattern
, "ldm%s\t", conditional
);
17795 strcat (pattern
, reg_names
[regno_base
]);
17797 strcat (pattern
, "!, {");
17799 strcat (pattern
, ", {");
17802 /* Output the first destination register. */
17804 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17806 /* Output the rest of the destination registers. */
17807 for (i
= offset
+ 1; i
< num_saves
; i
++)
17809 strcat (pattern
, ", ");
17811 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17814 strcat (pattern
, "}");
17816 if (interrupt_p
&& return_pc
)
17817 strcat (pattern
, "^");
17819 output_asm_insn (pattern
, &cond
);
17823 /* Output the assembly for a store multiple. */
17826 vfp_output_vstmd (rtx
* operands
)
17832 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17833 ? XEXP (operands
[0], 0)
17834 : XEXP (XEXP (operands
[0], 0), 0);
17835 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17838 strcpy (pattern
, "vpush%?.64\t{%P1");
17840 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17842 p
= strlen (pattern
);
17844 gcc_assert (REG_P (operands
[1]));
17846 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17847 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17849 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17851 strcpy (&pattern
[p
], "}");
17853 output_asm_insn (pattern
, operands
);
17858 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17859 number of bytes pushed. */
17862 vfp_emit_fstmd (int base_reg
, int count
)
17869 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17870 register pairs are stored by a store multiple insn. We avoid this
17871 by pushing an extra pair. */
17872 if (count
== 2 && !arm_arch6
)
17874 if (base_reg
== LAST_VFP_REGNUM
- 3)
17879 /* FSTMD may not store more than 16 doubleword registers at once. Split
17880 larger stores into multiple parts (up to a maximum of two, in
17885 /* NOTE: base_reg is an internal register number, so each D register
17887 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17888 saved
+= vfp_emit_fstmd (base_reg
, 16);
17892 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17893 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17895 reg
= gen_rtx_REG (DFmode
, base_reg
);
17898 XVECEXP (par
, 0, 0)
17899 = gen_rtx_SET (gen_frame_mem
17901 gen_rtx_PRE_MODIFY (Pmode
,
17904 (Pmode
, stack_pointer_rtx
,
17907 gen_rtx_UNSPEC (BLKmode
,
17908 gen_rtvec (1, reg
),
17909 UNSPEC_PUSH_MULT
));
17911 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17912 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17913 RTX_FRAME_RELATED_P (tmp
) = 1;
17914 XVECEXP (dwarf
, 0, 0) = tmp
;
17916 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17917 RTX_FRAME_RELATED_P (tmp
) = 1;
17918 XVECEXP (dwarf
, 0, 1) = tmp
;
17920 for (i
= 1; i
< count
; i
++)
17922 reg
= gen_rtx_REG (DFmode
, base_reg
);
17924 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17926 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17927 plus_constant (Pmode
,
17931 RTX_FRAME_RELATED_P (tmp
) = 1;
17932 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17935 par
= emit_insn (par
);
17936 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17937 RTX_FRAME_RELATED_P (par
) = 1;
17942 /* Emit a call instruction with pattern PAT. ADDR is the address of
17943 the call target. */
17946 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17950 insn
= emit_call_insn (pat
);
17952 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17953 If the call might use such an entry, add a use of the PIC register
17954 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17955 if (TARGET_VXWORKS_RTP
17958 && GET_CODE (addr
) == SYMBOL_REF
17959 && (SYMBOL_REF_DECL (addr
)
17960 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17961 : !SYMBOL_REF_LOCAL_P (addr
)))
17963 require_pic_register ();
17964 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17967 if (TARGET_AAPCS_BASED
)
17969 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17970 linker. We need to add an IP clobber to allow setting
17971 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17972 is not needed since it's a fixed register. */
17973 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17974 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17978 /* Output a 'call' insn. */
17980 output_call (rtx
*operands
)
17982 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17984 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17985 if (REGNO (operands
[0]) == LR_REGNUM
)
17987 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17988 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17991 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17993 if (TARGET_INTERWORK
|| arm_arch4t
)
17994 output_asm_insn ("bx%?\t%0", operands
);
17996 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18001 /* Output a move from arm registers to arm registers of a long double
18002 OPERANDS[0] is the destination.
18003 OPERANDS[1] is the source. */
18005 output_mov_long_double_arm_from_arm (rtx
*operands
)
18007 /* We have to be careful here because the two might overlap. */
18008 int dest_start
= REGNO (operands
[0]);
18009 int src_start
= REGNO (operands
[1]);
18013 if (dest_start
< src_start
)
18015 for (i
= 0; i
< 3; i
++)
18017 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18018 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18019 output_asm_insn ("mov%?\t%0, %1", ops
);
18024 for (i
= 2; i
>= 0; i
--)
18026 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18027 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18028 output_asm_insn ("mov%?\t%0, %1", ops
);
18036 arm_emit_movpair (rtx dest
, rtx src
)
18040 /* If the src is an immediate, simplify it. */
18041 if (CONST_INT_P (src
))
18043 HOST_WIDE_INT val
= INTVAL (src
);
18044 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18045 if ((val
>> 16) & 0x0000ffff)
18047 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18049 GEN_INT ((val
>> 16) & 0x0000ffff));
18050 insn
= get_last_insn ();
18051 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18055 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18056 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18057 insn
= get_last_insn ();
18058 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18061 /* Output a move between double words. It must be REG<-MEM
18064 output_move_double (rtx
*operands
, bool emit
, int *count
)
18066 enum rtx_code code0
= GET_CODE (operands
[0]);
18067 enum rtx_code code1
= GET_CODE (operands
[1]);
18072 /* The only case when this might happen is when
18073 you are looking at the length of a DImode instruction
18074 that has an invalid constant in it. */
18075 if (code0
== REG
&& code1
!= MEM
)
18077 gcc_assert (!emit
);
18084 unsigned int reg0
= REGNO (operands
[0]);
18086 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18088 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18090 switch (GET_CODE (XEXP (operands
[1], 0)))
18097 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18098 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18100 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18105 gcc_assert (TARGET_LDRD
);
18107 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18114 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18116 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18124 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18126 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18131 gcc_assert (TARGET_LDRD
);
18133 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18138 /* Autoicrement addressing modes should never have overlapping
18139 base and destination registers, and overlapping index registers
18140 are already prohibited, so this doesn't need to worry about
18142 otherops
[0] = operands
[0];
18143 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18144 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18146 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18148 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18150 /* Registers overlap so split out the increment. */
18153 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18154 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18161 /* Use a single insn if we can.
18162 FIXME: IWMMXT allows offsets larger than ldrd can
18163 handle, fix these up with a pair of ldr. */
18165 || !CONST_INT_P (otherops
[2])
18166 || (INTVAL (otherops
[2]) > -256
18167 && INTVAL (otherops
[2]) < 256))
18170 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18176 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18177 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18187 /* Use a single insn if we can.
18188 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18189 fix these up with a pair of ldr. */
18191 || !CONST_INT_P (otherops
[2])
18192 || (INTVAL (otherops
[2]) > -256
18193 && INTVAL (otherops
[2]) < 256))
18196 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18202 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18203 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18213 /* We might be able to use ldrd %0, %1 here. However the range is
18214 different to ldr/adr, and it is broken on some ARMv7-M
18215 implementations. */
18216 /* Use the second register of the pair to avoid problematic
18218 otherops
[1] = operands
[1];
18220 output_asm_insn ("adr%?\t%0, %1", otherops
);
18221 operands
[1] = otherops
[0];
18225 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18227 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18234 /* ??? This needs checking for thumb2. */
18236 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18237 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18239 otherops
[0] = operands
[0];
18240 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18241 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18243 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18245 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18247 switch ((int) INTVAL (otherops
[2]))
18251 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18257 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18263 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18267 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18268 operands
[1] = otherops
[0];
18270 && (REG_P (otherops
[2])
18272 || (CONST_INT_P (otherops
[2])
18273 && INTVAL (otherops
[2]) > -256
18274 && INTVAL (otherops
[2]) < 256)))
18276 if (reg_overlap_mentioned_p (operands
[0],
18279 /* Swap base and index registers over to
18280 avoid a conflict. */
18281 std::swap (otherops
[1], otherops
[2]);
18283 /* If both registers conflict, it will usually
18284 have been fixed by a splitter. */
18285 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18286 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18290 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18291 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18298 otherops
[0] = operands
[0];
18300 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18305 if (CONST_INT_P (otherops
[2]))
18309 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18310 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18312 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18318 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18324 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18331 return "ldrd%?\t%0, [%1]";
18333 return "ldmia%?\t%1, %M0";
18337 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18338 /* Take care of overlapping base/data reg. */
18339 if (reg_mentioned_p (operands
[0], operands
[1]))
18343 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18344 output_asm_insn ("ldr%?\t%0, %1", operands
);
18354 output_asm_insn ("ldr%?\t%0, %1", operands
);
18355 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18365 /* Constraints should ensure this. */
18366 gcc_assert (code0
== MEM
&& code1
== REG
);
18367 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18368 || (TARGET_ARM
&& TARGET_LDRD
));
18370 switch (GET_CODE (XEXP (operands
[0], 0)))
18376 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18378 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18383 gcc_assert (TARGET_LDRD
);
18385 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18392 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18394 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18402 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18404 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18409 gcc_assert (TARGET_LDRD
);
18411 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18416 otherops
[0] = operands
[1];
18417 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18418 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18420 /* IWMMXT allows offsets larger than ldrd can handle,
18421 fix these up with a pair of ldr. */
18423 && CONST_INT_P (otherops
[2])
18424 && (INTVAL(otherops
[2]) <= -256
18425 || INTVAL(otherops
[2]) >= 256))
18427 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18431 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18432 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18441 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18442 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18448 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18451 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18456 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18461 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18462 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18464 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18468 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18475 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18482 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18487 && (REG_P (otherops
[2])
18489 || (CONST_INT_P (otherops
[2])
18490 && INTVAL (otherops
[2]) > -256
18491 && INTVAL (otherops
[2]) < 256)))
18493 otherops
[0] = operands
[1];
18494 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18496 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18502 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18503 otherops
[1] = operands
[1];
18506 output_asm_insn ("str%?\t%1, %0", operands
);
18507 output_asm_insn ("str%?\t%H1, %0", otherops
);
18517 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18518 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18521 output_move_quad (rtx
*operands
)
18523 if (REG_P (operands
[0]))
18525 /* Load, or reg->reg move. */
18527 if (MEM_P (operands
[1]))
18529 switch (GET_CODE (XEXP (operands
[1], 0)))
18532 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18537 output_asm_insn ("adr%?\t%0, %1", operands
);
18538 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18542 gcc_unreachable ();
18550 gcc_assert (REG_P (operands
[1]));
18552 dest
= REGNO (operands
[0]);
18553 src
= REGNO (operands
[1]);
18555 /* This seems pretty dumb, but hopefully GCC won't try to do it
18558 for (i
= 0; i
< 4; i
++)
18560 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18561 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18562 output_asm_insn ("mov%?\t%0, %1", ops
);
18565 for (i
= 3; i
>= 0; i
--)
18567 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18568 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18569 output_asm_insn ("mov%?\t%0, %1", ops
);
18575 gcc_assert (MEM_P (operands
[0]));
18576 gcc_assert (REG_P (operands
[1]));
18577 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18579 switch (GET_CODE (XEXP (operands
[0], 0)))
18582 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18586 gcc_unreachable ();
18593 /* Output a VFP load or store instruction. */
18596 output_move_vfp (rtx
*operands
)
18598 rtx reg
, mem
, addr
, ops
[2];
18599 int load
= REG_P (operands
[0]);
18600 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18601 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18606 reg
= operands
[!load
];
18607 mem
= operands
[load
];
18609 mode
= GET_MODE (reg
);
18611 gcc_assert (REG_P (reg
));
18612 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18613 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
18618 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18619 gcc_assert (MEM_P (mem
));
18621 addr
= XEXP (mem
, 0);
18623 switch (GET_CODE (addr
))
18626 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18627 ops
[0] = XEXP (addr
, 0);
18632 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18633 ops
[0] = XEXP (addr
, 0);
18638 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18644 sprintf (buff
, templ
,
18645 load
? "ld" : "st",
18648 integer_p
? "\t%@ int" : "");
18649 output_asm_insn (buff
, ops
);
18654 /* Output a Neon double-word or quad-word load or store, or a load
18655 or store for larger structure modes.
18657 WARNING: The ordering of elements is weird in big-endian mode,
18658 because the EABI requires that vectors stored in memory appear
18659 as though they were stored by a VSTM, as required by the EABI.
18660 GCC RTL defines element ordering based on in-memory order.
18661 This can be different from the architectural ordering of elements
18662 within a NEON register. The intrinsics defined in arm_neon.h use the
18663 NEON register element ordering, not the GCC RTL element ordering.
18665 For example, the in-memory ordering of a big-endian a quadword
18666 vector with 16-bit elements when stored from register pair {d0,d1}
18667 will be (lowest address first, d0[N] is NEON register element N):
18669 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18671 When necessary, quadword registers (dN, dN+1) are moved to ARM
18672 registers from rN in the order:
18674 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18676 So that STM/LDM can be used on vectors in ARM registers, and the
18677 same memory layout will result as if VSTM/VLDM were used.
18679 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18680 possible, which allows use of appropriate alignment tags.
18681 Note that the choice of "64" is independent of the actual vector
18682 element size; this size simply ensures that the behavior is
18683 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18685 Due to limitations of those instructions, use of VST1.64/VLD1.64
18686 is not possible if:
18687 - the address contains PRE_DEC, or
18688 - the mode refers to more than 4 double-word registers
18690 In those cases, it would be possible to replace VSTM/VLDM by a
18691 sequence of instructions; this is not currently implemented since
18692 this is not certain to actually improve performance. */
18695 output_move_neon (rtx
*operands
)
18697 rtx reg
, mem
, addr
, ops
[2];
18698 int regno
, nregs
, load
= REG_P (operands
[0]);
18703 reg
= operands
[!load
];
18704 mem
= operands
[load
];
18706 mode
= GET_MODE (reg
);
18708 gcc_assert (REG_P (reg
));
18709 regno
= REGNO (reg
);
18710 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18711 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18712 || NEON_REGNO_OK_FOR_QUAD (regno
));
18713 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18714 || VALID_NEON_QREG_MODE (mode
)
18715 || VALID_NEON_STRUCT_MODE (mode
));
18716 gcc_assert (MEM_P (mem
));
18718 addr
= XEXP (mem
, 0);
18720 /* Strip off const from addresses like (const (plus (...))). */
18721 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18722 addr
= XEXP (addr
, 0);
18724 switch (GET_CODE (addr
))
18727 /* We have to use vldm / vstm for too-large modes. */
18730 templ
= "v%smia%%?\t%%0!, %%h1";
18731 ops
[0] = XEXP (addr
, 0);
18735 templ
= "v%s1.64\t%%h1, %%A0";
18742 /* We have to use vldm / vstm in this case, since there is no
18743 pre-decrement form of the vld1 / vst1 instructions. */
18744 templ
= "v%smdb%%?\t%%0!, %%h1";
18745 ops
[0] = XEXP (addr
, 0);
18750 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18751 gcc_unreachable ();
18754 /* We have to use vldm / vstm for too-large modes. */
18758 templ
= "v%smia%%?\t%%m0, %%h1";
18760 templ
= "v%s1.64\t%%h1, %%A0";
18766 /* Fall through. */
18772 for (i
= 0; i
< nregs
; i
++)
18774 /* We're only using DImode here because it's a convenient size. */
18775 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18776 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18777 if (reg_overlap_mentioned_p (ops
[0], mem
))
18779 gcc_assert (overlap
== -1);
18784 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18785 output_asm_insn (buff
, ops
);
18790 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18791 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18792 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18793 output_asm_insn (buff
, ops
);
18800 gcc_unreachable ();
18803 sprintf (buff
, templ
, load
? "ld" : "st");
18804 output_asm_insn (buff
, ops
);
18809 /* Compute and return the length of neon_mov<mode>, where <mode> is
18810 one of VSTRUCT modes: EI, OI, CI or XI. */
18812 arm_attr_length_move_neon (rtx_insn
*insn
)
18814 rtx reg
, mem
, addr
;
18818 extract_insn_cached (insn
);
18820 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18822 mode
= GET_MODE (recog_data
.operand
[0]);
18833 gcc_unreachable ();
18837 load
= REG_P (recog_data
.operand
[0]);
18838 reg
= recog_data
.operand
[!load
];
18839 mem
= recog_data
.operand
[load
];
18841 gcc_assert (MEM_P (mem
));
18843 mode
= GET_MODE (reg
);
18844 addr
= XEXP (mem
, 0);
18846 /* Strip off const from addresses like (const (plus (...))). */
18847 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18848 addr
= XEXP (addr
, 0);
18850 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18852 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18859 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18863 arm_address_offset_is_imm (rtx_insn
*insn
)
18867 extract_insn_cached (insn
);
18869 if (REG_P (recog_data
.operand
[0]))
18872 mem
= recog_data
.operand
[0];
18874 gcc_assert (MEM_P (mem
));
18876 addr
= XEXP (mem
, 0);
18879 || (GET_CODE (addr
) == PLUS
18880 && REG_P (XEXP (addr
, 0))
18881 && CONST_INT_P (XEXP (addr
, 1))))
18887 /* Output an ADD r, s, #n where n may be too big for one instruction.
18888 If adding zero to one register, output nothing. */
18890 output_add_immediate (rtx
*operands
)
18892 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18894 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18897 output_multi_immediate (operands
,
18898 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18901 output_multi_immediate (operands
,
18902 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18909 /* Output a multiple immediate operation.
18910 OPERANDS is the vector of operands referred to in the output patterns.
18911 INSTR1 is the output pattern to use for the first constant.
18912 INSTR2 is the output pattern to use for subsequent constants.
18913 IMMED_OP is the index of the constant slot in OPERANDS.
18914 N is the constant value. */
18915 static const char *
18916 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18917 int immed_op
, HOST_WIDE_INT n
)
18919 #if HOST_BITS_PER_WIDE_INT > 32
18925 /* Quick and easy output. */
18926 operands
[immed_op
] = const0_rtx
;
18927 output_asm_insn (instr1
, operands
);
18932 const char * instr
= instr1
;
18934 /* Note that n is never zero here (which would give no output). */
18935 for (i
= 0; i
< 32; i
+= 2)
18939 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18940 output_asm_insn (instr
, operands
);
18950 /* Return the name of a shifter operation. */
18951 static const char *
18952 arm_shift_nmem(enum rtx_code code
)
18957 return ARM_LSL_NAME
;
18973 /* Return the appropriate ARM instruction for the operation code.
18974 The returned result should not be overwritten. OP is the rtx of the
18975 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18978 arithmetic_instr (rtx op
, int shift_first_arg
)
18980 switch (GET_CODE (op
))
18986 return shift_first_arg
? "rsb" : "sub";
19001 return arm_shift_nmem(GET_CODE(op
));
19004 gcc_unreachable ();
19008 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19009 for the operation code. The returned result should not be overwritten.
19010 OP is the rtx code of the shift.
19011 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19013 static const char *
19014 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19017 enum rtx_code code
= GET_CODE (op
);
19022 if (!CONST_INT_P (XEXP (op
, 1)))
19024 output_operand_lossage ("invalid shift operand");
19029 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19037 mnem
= arm_shift_nmem(code
);
19038 if (CONST_INT_P (XEXP (op
, 1)))
19040 *amountp
= INTVAL (XEXP (op
, 1));
19042 else if (REG_P (XEXP (op
, 1)))
19049 output_operand_lossage ("invalid shift operand");
19055 /* We never have to worry about the amount being other than a
19056 power of 2, since this case can never be reloaded from a reg. */
19057 if (!CONST_INT_P (XEXP (op
, 1)))
19059 output_operand_lossage ("invalid shift operand");
19063 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19065 /* Amount must be a power of two. */
19066 if (*amountp
& (*amountp
- 1))
19068 output_operand_lossage ("invalid shift operand");
19072 *amountp
= int_log2 (*amountp
);
19073 return ARM_LSL_NAME
;
19076 output_operand_lossage ("invalid shift operand");
19080 /* This is not 100% correct, but follows from the desire to merge
19081 multiplication by a power of 2 with the recognizer for a
19082 shift. >=32 is not a valid shift for "lsl", so we must try and
19083 output a shift that produces the correct arithmetical result.
19084 Using lsr #32 is identical except for the fact that the carry bit
19085 is not set correctly if we set the flags; but we never use the
19086 carry bit from such an operation, so we can ignore that. */
19087 if (code
== ROTATERT
)
19088 /* Rotate is just modulo 32. */
19090 else if (*amountp
!= (*amountp
& 31))
19092 if (code
== ASHIFT
)
19097 /* Shifts of 0 are no-ops. */
19104 /* Obtain the shift from the POWER of two. */
19106 static HOST_WIDE_INT
19107 int_log2 (HOST_WIDE_INT power
)
19109 HOST_WIDE_INT shift
= 0;
19111 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
19113 gcc_assert (shift
<= 31);
19120 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19121 because /bin/as is horribly restrictive. The judgement about
19122 whether or not each character is 'printable' (and can be output as
19123 is) or not (and must be printed with an octal escape) must be made
19124 with reference to the *host* character set -- the situation is
19125 similar to that discussed in the comments above pp_c_char in
19126 c-pretty-print.c. */
19128 #define MAX_ASCII_LEN 51
19131 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19134 int len_so_far
= 0;
19136 fputs ("\t.ascii\t\"", stream
);
19138 for (i
= 0; i
< len
; i
++)
19142 if (len_so_far
>= MAX_ASCII_LEN
)
19144 fputs ("\"\n\t.ascii\t\"", stream
);
19150 if (c
== '\\' || c
== '\"')
19152 putc ('\\', stream
);
19160 fprintf (stream
, "\\%03o", c
);
19165 fputs ("\"\n", stream
);
19168 /* Whether a register is callee saved or not. This is necessary because high
19169 registers are marked as caller saved when optimizing for size on Thumb-1
19170 targets despite being callee saved in order to avoid using them. */
19171 #define callee_saved_reg_p(reg) \
19172 (!call_used_regs[reg] \
19173 || (TARGET_THUMB1 && optimize_size \
19174 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19176 /* Compute the register save mask for registers 0 through 12
19177 inclusive. This code is used by arm_compute_save_reg_mask. */
19179 static unsigned long
19180 arm_compute_save_reg0_reg12_mask (void)
19182 unsigned long func_type
= arm_current_func_type ();
19183 unsigned long save_reg_mask
= 0;
19186 if (IS_INTERRUPT (func_type
))
19188 unsigned int max_reg
;
19189 /* Interrupt functions must not corrupt any registers,
19190 even call clobbered ones. If this is a leaf function
19191 we can just examine the registers used by the RTL, but
19192 otherwise we have to assume that whatever function is
19193 called might clobber anything, and so we have to save
19194 all the call-clobbered registers as well. */
19195 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19196 /* FIQ handlers have registers r8 - r12 banked, so
19197 we only need to check r0 - r7, Normal ISRs only
19198 bank r14 and r15, so we must check up to r12.
19199 r13 is the stack pointer which is always preserved,
19200 so we do not need to consider it here. */
19205 for (reg
= 0; reg
<= max_reg
; reg
++)
19206 if (df_regs_ever_live_p (reg
)
19207 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19208 save_reg_mask
|= (1 << reg
);
19210 /* Also save the pic base register if necessary. */
19212 && !TARGET_SINGLE_PIC_BASE
19213 && arm_pic_register
!= INVALID_REGNUM
19214 && crtl
->uses_pic_offset_table
)
19215 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19217 else if (IS_VOLATILE(func_type
))
19219 /* For noreturn functions we historically omitted register saves
19220 altogether. However this really messes up debugging. As a
19221 compromise save just the frame pointers. Combined with the link
19222 register saved elsewhere this should be sufficient to get
19224 if (frame_pointer_needed
)
19225 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19226 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19227 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19228 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19229 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19233 /* In the normal case we only need to save those registers
19234 which are call saved and which are used by this function. */
19235 for (reg
= 0; reg
<= 11; reg
++)
19236 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19237 save_reg_mask
|= (1 << reg
);
19239 /* Handle the frame pointer as a special case. */
19240 if (frame_pointer_needed
)
19241 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19243 /* If we aren't loading the PIC register,
19244 don't stack it even though it may be live. */
19246 && !TARGET_SINGLE_PIC_BASE
19247 && arm_pic_register
!= INVALID_REGNUM
19248 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19249 || crtl
->uses_pic_offset_table
))
19250 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19252 /* The prologue will copy SP into R0, so save it. */
19253 if (IS_STACKALIGN (func_type
))
19254 save_reg_mask
|= 1;
19257 /* Save registers so the exception handler can modify them. */
19258 if (crtl
->calls_eh_return
)
19264 reg
= EH_RETURN_DATA_REGNO (i
);
19265 if (reg
== INVALID_REGNUM
)
19267 save_reg_mask
|= 1 << reg
;
19271 return save_reg_mask
;
19274 /* Return true if r3 is live at the start of the function. */
19277 arm_r3_live_at_start_p (void)
19279 /* Just look at cfg info, which is still close enough to correct at this
19280 point. This gives false positives for broken functions that might use
19281 uninitialized data that happens to be allocated in r3, but who cares? */
19282 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19285 /* Compute the number of bytes used to store the static chain register on the
19286 stack, above the stack frame. We need to know this accurately to get the
19287 alignment of the rest of the stack frame correct. */
19290 arm_compute_static_chain_stack_bytes (void)
19292 /* See the defining assertion in arm_expand_prologue. */
19293 if (IS_NESTED (arm_current_func_type ())
19294 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19295 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19296 && !df_regs_ever_live_p (LR_REGNUM
)))
19297 && arm_r3_live_at_start_p ()
19298 && crtl
->args
.pretend_args_size
== 0)
19304 /* Compute a bit mask of which registers need to be
19305 saved on the stack for the current function.
19306 This is used by arm_get_frame_offsets, which may add extra registers. */
19308 static unsigned long
19309 arm_compute_save_reg_mask (void)
19311 unsigned int save_reg_mask
= 0;
19312 unsigned long func_type
= arm_current_func_type ();
19315 if (IS_NAKED (func_type
))
19316 /* This should never really happen. */
19319 /* If we are creating a stack frame, then we must save the frame pointer,
19320 IP (which will hold the old stack pointer), LR and the PC. */
19321 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19323 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19326 | (1 << PC_REGNUM
);
19328 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19330 /* Decide if we need to save the link register.
19331 Interrupt routines have their own banked link register,
19332 so they never need to save it.
19333 Otherwise if we do not use the link register we do not need to save
19334 it. If we are pushing other registers onto the stack however, we
19335 can save an instruction in the epilogue by pushing the link register
19336 now and then popping it back into the PC. This incurs extra memory
19337 accesses though, so we only do it when optimizing for size, and only
19338 if we know that we will not need a fancy return sequence. */
19339 if (df_regs_ever_live_p (LR_REGNUM
)
19342 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19343 && !crtl
->tail_call_emit
19344 && !crtl
->calls_eh_return
))
19345 save_reg_mask
|= 1 << LR_REGNUM
;
19347 if (cfun
->machine
->lr_save_eliminated
)
19348 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19350 if (TARGET_REALLY_IWMMXT
19351 && ((bit_count (save_reg_mask
)
19352 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19353 arm_compute_static_chain_stack_bytes())
19356 /* The total number of registers that are going to be pushed
19357 onto the stack is odd. We need to ensure that the stack
19358 is 64-bit aligned before we start to save iWMMXt registers,
19359 and also before we start to create locals. (A local variable
19360 might be a double or long long which we will load/store using
19361 an iWMMXt instruction). Therefore we need to push another
19362 ARM register, so that the stack will be 64-bit aligned. We
19363 try to avoid using the arg registers (r0 -r3) as they might be
19364 used to pass values in a tail call. */
19365 for (reg
= 4; reg
<= 12; reg
++)
19366 if ((save_reg_mask
& (1 << reg
)) == 0)
19370 save_reg_mask
|= (1 << reg
);
19373 cfun
->machine
->sibcall_blocked
= 1;
19374 save_reg_mask
|= (1 << 3);
19378 /* We may need to push an additional register for use initializing the
19379 PIC base register. */
19380 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19381 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19383 reg
= thumb_find_work_register (1 << 4);
19384 if (!call_used_regs
[reg
])
19385 save_reg_mask
|= (1 << reg
);
19388 return save_reg_mask
;
19391 /* Compute a bit mask of which registers need to be
19392 saved on the stack for the current function. */
19393 static unsigned long
19394 thumb1_compute_save_reg_mask (void)
19396 unsigned long mask
;
19400 for (reg
= 0; reg
< 12; reg
++)
19401 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19405 && !TARGET_SINGLE_PIC_BASE
19406 && arm_pic_register
!= INVALID_REGNUM
19407 && crtl
->uses_pic_offset_table
)
19408 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19410 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19411 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19412 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19414 /* LR will also be pushed if any lo regs are pushed. */
19415 if (mask
& 0xff || thumb_force_lr_save ())
19416 mask
|= (1 << LR_REGNUM
);
19418 /* Make sure we have a low work register if we need one.
19419 We will need one if we are going to push a high register,
19420 but we are not currently intending to push a low register. */
19421 if ((mask
& 0xff) == 0
19422 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19424 /* Use thumb_find_work_register to choose which register
19425 we will use. If the register is live then we will
19426 have to push it. Use LAST_LO_REGNUM as our fallback
19427 choice for the register to select. */
19428 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19429 /* Make sure the register returned by thumb_find_work_register is
19430 not part of the return value. */
19431 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19432 reg
= LAST_LO_REGNUM
;
19434 if (callee_saved_reg_p (reg
))
19438 /* The 504 below is 8 bytes less than 512 because there are two possible
19439 alignment words. We can't tell here if they will be present or not so we
19440 have to play it safe and assume that they are. */
19441 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19442 ROUND_UP_WORD (get_frame_size ()) +
19443 crtl
->outgoing_args_size
) >= 504)
19445 /* This is the same as the code in thumb1_expand_prologue() which
19446 determines which register to use for stack decrement. */
19447 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19448 if (mask
& (1 << reg
))
19451 if (reg
> LAST_LO_REGNUM
)
19453 /* Make sure we have a register available for stack decrement. */
19454 mask
|= 1 << LAST_LO_REGNUM
;
19462 /* Return the number of bytes required to save VFP registers. */
19464 arm_get_vfp_saved_size (void)
19466 unsigned int regno
;
19471 /* Space for saved VFP registers. */
19472 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19475 for (regno
= FIRST_VFP_REGNUM
;
19476 regno
< LAST_VFP_REGNUM
;
19479 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19480 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19484 /* Workaround ARM10 VFPr1 bug. */
19485 if (count
== 2 && !arm_arch6
)
19487 saved
+= count
* 8;
19496 if (count
== 2 && !arm_arch6
)
19498 saved
+= count
* 8;
19505 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19506 everything bar the final return instruction. If simple_return is true,
19507 then do not output epilogue, because it has already been emitted in RTL. */
19509 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19510 bool simple_return
)
19512 char conditional
[10];
19515 unsigned long live_regs_mask
;
19516 unsigned long func_type
;
19517 arm_stack_offsets
*offsets
;
19519 func_type
= arm_current_func_type ();
19521 if (IS_NAKED (func_type
))
19524 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19526 /* If this function was declared non-returning, and we have
19527 found a tail call, then we have to trust that the called
19528 function won't return. */
19533 /* Otherwise, trap an attempted return by aborting. */
19535 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19537 assemble_external_libcall (ops
[1]);
19538 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19544 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19546 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19548 cfun
->machine
->return_used_this_function
= 1;
19550 offsets
= arm_get_frame_offsets ();
19551 live_regs_mask
= offsets
->saved_regs_mask
;
19553 if (!simple_return
&& live_regs_mask
)
19555 const char * return_reg
;
19557 /* If we do not have any special requirements for function exit
19558 (e.g. interworking) then we can load the return address
19559 directly into the PC. Otherwise we must load it into LR. */
19561 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19562 return_reg
= reg_names
[PC_REGNUM
];
19564 return_reg
= reg_names
[LR_REGNUM
];
19566 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19568 /* There are three possible reasons for the IP register
19569 being saved. 1) a stack frame was created, in which case
19570 IP contains the old stack pointer, or 2) an ISR routine
19571 corrupted it, or 3) it was saved to align the stack on
19572 iWMMXt. In case 1, restore IP into SP, otherwise just
19574 if (frame_pointer_needed
)
19576 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19577 live_regs_mask
|= (1 << SP_REGNUM
);
19580 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19583 /* On some ARM architectures it is faster to use LDR rather than
19584 LDM to load a single register. On other architectures, the
19585 cost is the same. In 26 bit mode, or for exception handlers,
19586 we have to use LDM to load the PC so that the CPSR is also
19588 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19589 if (live_regs_mask
== (1U << reg
))
19592 if (reg
<= LAST_ARM_REGNUM
19593 && (reg
!= LR_REGNUM
19595 || ! IS_INTERRUPT (func_type
)))
19597 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19598 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19605 /* Generate the load multiple instruction to restore the
19606 registers. Note we can get here, even if
19607 frame_pointer_needed is true, but only if sp already
19608 points to the base of the saved core registers. */
19609 if (live_regs_mask
& (1 << SP_REGNUM
))
19611 unsigned HOST_WIDE_INT stack_adjust
;
19613 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19614 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19616 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19617 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19620 /* If we can't use ldmib (SA110 bug),
19621 then try to pop r3 instead. */
19623 live_regs_mask
|= 1 << 3;
19625 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19628 /* For interrupt returns we have to use an LDM rather than
19629 a POP so that we can use the exception return variant. */
19630 else if (IS_INTERRUPT (func_type
))
19631 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19633 sprintf (instr
, "pop%s\t{", conditional
);
19635 p
= instr
+ strlen (instr
);
19637 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19638 if (live_regs_mask
& (1 << reg
))
19640 int l
= strlen (reg_names
[reg
]);
19646 memcpy (p
, ", ", 2);
19650 memcpy (p
, "%|", 2);
19651 memcpy (p
+ 2, reg_names
[reg
], l
);
19655 if (live_regs_mask
& (1 << LR_REGNUM
))
19657 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19658 /* If returning from an interrupt, restore the CPSR. */
19659 if (IS_INTERRUPT (func_type
))
19666 output_asm_insn (instr
, & operand
);
19668 /* See if we need to generate an extra instruction to
19669 perform the actual function return. */
19671 && func_type
!= ARM_FT_INTERWORKED
19672 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19674 /* The return has already been handled
19675 by loading the LR into the PC. */
19682 switch ((int) ARM_FUNC_TYPE (func_type
))
19686 /* ??? This is wrong for unified assembly syntax. */
19687 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19690 case ARM_FT_INTERWORKED
:
19691 gcc_assert (arm_arch5
|| arm_arch4t
);
19692 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19695 case ARM_FT_EXCEPTION
:
19696 /* ??? This is wrong for unified assembly syntax. */
19697 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19701 /* Use bx if it's available. */
19702 if (arm_arch5
|| arm_arch4t
)
19703 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19705 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19709 output_asm_insn (instr
, & operand
);
19715 /* Write the function name into the code section, directly preceding
19716 the function prologue.
19718 Code will be output similar to this:
19720 .ascii "arm_poke_function_name", 0
19723 .word 0xff000000 + (t1 - t0)
19724 arm_poke_function_name
19726 stmfd sp!, {fp, ip, lr, pc}
19729 When performing a stack backtrace, code can inspect the value
19730 of 'pc' stored at 'fp' + 0. If the trace function then looks
19731 at location pc - 12 and the top 8 bits are set, then we know
19732 that there is a function name embedded immediately preceding this
19733 location and has length ((pc[-3]) & 0xff000000).
19735 We assume that pc is declared as a pointer to an unsigned long.
19737 It is of no benefit to output the function name if we are assembling
19738 a leaf function. These function types will not contain a stack
19739 backtrace structure, therefore it is not possible to determine the
19742 arm_poke_function_name (FILE *stream
, const char *name
)
19744 unsigned long alignlength
;
19745 unsigned long length
;
19748 length
= strlen (name
) + 1;
19749 alignlength
= ROUND_UP_WORD (length
);
19751 ASM_OUTPUT_ASCII (stream
, name
, length
);
19752 ASM_OUTPUT_ALIGN (stream
, 2);
19753 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19754 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19757 /* Place some comments into the assembler stream
19758 describing the current function. */
19760 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19762 unsigned long func_type
;
19764 /* ??? Do we want to print some of the below anyway? */
19768 /* Sanity check. */
19769 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19771 func_type
= arm_current_func_type ();
19773 switch ((int) ARM_FUNC_TYPE (func_type
))
19776 case ARM_FT_NORMAL
:
19778 case ARM_FT_INTERWORKED
:
19779 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19782 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19785 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19787 case ARM_FT_EXCEPTION
:
19788 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19792 if (IS_NAKED (func_type
))
19793 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19795 if (IS_VOLATILE (func_type
))
19796 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19798 if (IS_NESTED (func_type
))
19799 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19800 if (IS_STACKALIGN (func_type
))
19801 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19803 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19805 crtl
->args
.pretend_args_size
, frame_size
);
19807 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19808 frame_pointer_needed
,
19809 cfun
->machine
->uses_anonymous_args
);
19811 if (cfun
->machine
->lr_save_eliminated
)
19812 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19814 if (crtl
->calls_eh_return
)
19815 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19820 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19821 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19823 arm_stack_offsets
*offsets
;
19829 /* Emit any call-via-reg trampolines that are needed for v4t support
19830 of call_reg and call_value_reg type insns. */
19831 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19833 rtx label
= cfun
->machine
->call_via
[regno
];
19837 switch_to_section (function_section (current_function_decl
));
19838 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19839 CODE_LABEL_NUMBER (label
));
19840 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19844 /* ??? Probably not safe to set this here, since it assumes that a
19845 function will be emitted as assembly immediately after we generate
19846 RTL for it. This does not happen for inline functions. */
19847 cfun
->machine
->return_used_this_function
= 0;
19849 else /* TARGET_32BIT */
19851 /* We need to take into account any stack-frame rounding. */
19852 offsets
= arm_get_frame_offsets ();
19854 gcc_assert (!use_return_insn (FALSE
, NULL
)
19855 || (cfun
->machine
->return_used_this_function
!= 0)
19856 || offsets
->saved_regs
== offsets
->outgoing_args
19857 || frame_pointer_needed
);
19861 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19862 STR and STRD. If an even number of registers are being pushed, one
19863 or more STRD patterns are created for each register pair. If an
19864 odd number of registers are pushed, emit an initial STR followed by
19865 as many STRD instructions as are needed. This works best when the
19866 stack is initially 64-bit aligned (the normal case), since it
19867 ensures that each STRD is also 64-bit aligned. */
19869 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19874 rtx par
= NULL_RTX
;
19875 rtx dwarf
= NULL_RTX
;
19879 num_regs
= bit_count (saved_regs_mask
);
19881 /* Must be at least one register to save, and can't save SP or PC. */
19882 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19883 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19884 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19886 /* Create sequence for DWARF info. All the frame-related data for
19887 debugging is held in this wrapper. */
19888 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19890 /* Describe the stack adjustment. */
19891 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19892 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19893 RTX_FRAME_RELATED_P (tmp
) = 1;
19894 XVECEXP (dwarf
, 0, 0) = tmp
;
19896 /* Find the first register. */
19897 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19902 /* If there's an odd number of registers to push. Start off by
19903 pushing a single register. This ensures that subsequent strd
19904 operations are dword aligned (assuming that SP was originally
19905 64-bit aligned). */
19906 if ((num_regs
& 1) != 0)
19908 rtx reg
, mem
, insn
;
19910 reg
= gen_rtx_REG (SImode
, regno
);
19912 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19913 stack_pointer_rtx
));
19915 mem
= gen_frame_mem (Pmode
,
19917 (Pmode
, stack_pointer_rtx
,
19918 plus_constant (Pmode
, stack_pointer_rtx
,
19921 tmp
= gen_rtx_SET (mem
, reg
);
19922 RTX_FRAME_RELATED_P (tmp
) = 1;
19923 insn
= emit_insn (tmp
);
19924 RTX_FRAME_RELATED_P (insn
) = 1;
19925 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19926 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19927 RTX_FRAME_RELATED_P (tmp
) = 1;
19930 XVECEXP (dwarf
, 0, i
) = tmp
;
19934 while (i
< num_regs
)
19935 if (saved_regs_mask
& (1 << regno
))
19937 rtx reg1
, reg2
, mem1
, mem2
;
19938 rtx tmp0
, tmp1
, tmp2
;
19941 /* Find the register to pair with this one. */
19942 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19946 reg1
= gen_rtx_REG (SImode
, regno
);
19947 reg2
= gen_rtx_REG (SImode
, regno2
);
19954 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19957 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19959 -4 * (num_regs
- 1)));
19960 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19961 plus_constant (Pmode
, stack_pointer_rtx
,
19963 tmp1
= gen_rtx_SET (mem1
, reg1
);
19964 tmp2
= gen_rtx_SET (mem2
, reg2
);
19965 RTX_FRAME_RELATED_P (tmp0
) = 1;
19966 RTX_FRAME_RELATED_P (tmp1
) = 1;
19967 RTX_FRAME_RELATED_P (tmp2
) = 1;
19968 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19969 XVECEXP (par
, 0, 0) = tmp0
;
19970 XVECEXP (par
, 0, 1) = tmp1
;
19971 XVECEXP (par
, 0, 2) = tmp2
;
19972 insn
= emit_insn (par
);
19973 RTX_FRAME_RELATED_P (insn
) = 1;
19974 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19978 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19981 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19984 tmp1
= gen_rtx_SET (mem1
, reg1
);
19985 tmp2
= gen_rtx_SET (mem2
, reg2
);
19986 RTX_FRAME_RELATED_P (tmp1
) = 1;
19987 RTX_FRAME_RELATED_P (tmp2
) = 1;
19988 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19989 XVECEXP (par
, 0, 0) = tmp1
;
19990 XVECEXP (par
, 0, 1) = tmp2
;
19994 /* Create unwind information. This is an approximation. */
19995 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19996 plus_constant (Pmode
,
20000 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20001 plus_constant (Pmode
,
20006 RTX_FRAME_RELATED_P (tmp1
) = 1;
20007 RTX_FRAME_RELATED_P (tmp2
) = 1;
20008 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20009 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20011 regno
= regno2
+ 1;
20019 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20020 whenever possible, otherwise it emits single-word stores. The first store
20021 also allocates stack space for all saved registers, using writeback with
20022 post-addressing mode. All other stores use offset addressing. If no STRD
20023 can be emitted, this function emits a sequence of single-word stores,
20024 and not an STM as before, because single-word stores provide more freedom
20025 scheduling and can be turned into an STM by peephole optimizations. */
20027 arm_emit_strd_push (unsigned long saved_regs_mask
)
20030 int i
, j
, dwarf_index
= 0;
20032 rtx dwarf
= NULL_RTX
;
20033 rtx insn
= NULL_RTX
;
20036 /* TODO: A more efficient code can be emitted by changing the
20037 layout, e.g., first push all pairs that can use STRD to keep the
20038 stack aligned, and then push all other registers. */
20039 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20040 if (saved_regs_mask
& (1 << i
))
20043 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20044 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20045 gcc_assert (num_regs
> 0);
20047 /* Create sequence for DWARF info. */
20048 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20050 /* For dwarf info, we generate explicit stack update. */
20051 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20052 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20053 RTX_FRAME_RELATED_P (tmp
) = 1;
20054 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20056 /* Save registers. */
20057 offset
= - 4 * num_regs
;
20059 while (j
<= LAST_ARM_REGNUM
)
20060 if (saved_regs_mask
& (1 << j
))
20063 && (saved_regs_mask
& (1 << (j
+ 1))))
20065 /* Current register and previous register form register pair for
20066 which STRD can be generated. */
20069 /* Allocate stack space for all saved registers. */
20070 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20071 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20072 mem
= gen_frame_mem (DImode
, tmp
);
20075 else if (offset
> 0)
20076 mem
= gen_frame_mem (DImode
,
20077 plus_constant (Pmode
,
20081 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20083 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20084 RTX_FRAME_RELATED_P (tmp
) = 1;
20085 tmp
= emit_insn (tmp
);
20087 /* Record the first store insn. */
20088 if (dwarf_index
== 1)
20091 /* Generate dwarf info. */
20092 mem
= gen_frame_mem (SImode
,
20093 plus_constant (Pmode
,
20096 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20097 RTX_FRAME_RELATED_P (tmp
) = 1;
20098 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20100 mem
= gen_frame_mem (SImode
,
20101 plus_constant (Pmode
,
20104 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20105 RTX_FRAME_RELATED_P (tmp
) = 1;
20106 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20113 /* Emit a single word store. */
20116 /* Allocate stack space for all saved registers. */
20117 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20118 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20119 mem
= gen_frame_mem (SImode
, tmp
);
20122 else if (offset
> 0)
20123 mem
= gen_frame_mem (SImode
,
20124 plus_constant (Pmode
,
20128 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20130 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20131 RTX_FRAME_RELATED_P (tmp
) = 1;
20132 tmp
= emit_insn (tmp
);
20134 /* Record the first store insn. */
20135 if (dwarf_index
== 1)
20138 /* Generate dwarf info. */
20139 mem
= gen_frame_mem (SImode
,
20140 plus_constant(Pmode
,
20143 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20144 RTX_FRAME_RELATED_P (tmp
) = 1;
20145 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20154 /* Attach dwarf info to the first insn we generate. */
20155 gcc_assert (insn
!= NULL_RTX
);
20156 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20157 RTX_FRAME_RELATED_P (insn
) = 1;
20160 /* Generate and emit an insn that we will recognize as a push_multi.
20161 Unfortunately, since this insn does not reflect very well the actual
20162 semantics of the operation, we need to annotate the insn for the benefit
20163 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20164 MASK for registers that should be annotated for DWARF2 frame unwind
20167 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20170 int num_dwarf_regs
= 0;
20174 int dwarf_par_index
;
20177 /* We don't record the PC in the dwarf frame information. */
20178 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20180 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20182 if (mask
& (1 << i
))
20184 if (dwarf_regs_mask
& (1 << i
))
20188 gcc_assert (num_regs
&& num_regs
<= 16);
20189 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20191 /* For the body of the insn we are going to generate an UNSPEC in
20192 parallel with several USEs. This allows the insn to be recognized
20193 by the push_multi pattern in the arm.md file.
20195 The body of the insn looks something like this:
20198 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20199 (const_int:SI <num>)))
20200 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20206 For the frame note however, we try to be more explicit and actually
20207 show each register being stored into the stack frame, plus a (single)
20208 decrement of the stack pointer. We do it this way in order to be
20209 friendly to the stack unwinding code, which only wants to see a single
20210 stack decrement per instruction. The RTL we generate for the note looks
20211 something like this:
20214 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20215 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20216 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20217 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20221 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20222 instead we'd have a parallel expression detailing all
20223 the stores to the various memory addresses so that debug
20224 information is more up-to-date. Remember however while writing
20225 this to take care of the constraints with the push instruction.
20227 Note also that this has to be taken care of for the VFP registers.
20229 For more see PR43399. */
20231 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20232 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20233 dwarf_par_index
= 1;
20235 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20237 if (mask
& (1 << i
))
20239 reg
= gen_rtx_REG (SImode
, i
);
20241 XVECEXP (par
, 0, 0)
20242 = gen_rtx_SET (gen_frame_mem
20244 gen_rtx_PRE_MODIFY (Pmode
,
20247 (Pmode
, stack_pointer_rtx
,
20250 gen_rtx_UNSPEC (BLKmode
,
20251 gen_rtvec (1, reg
),
20252 UNSPEC_PUSH_MULT
));
20254 if (dwarf_regs_mask
& (1 << i
))
20256 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20258 RTX_FRAME_RELATED_P (tmp
) = 1;
20259 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20266 for (j
= 1, i
++; j
< num_regs
; i
++)
20268 if (mask
& (1 << i
))
20270 reg
= gen_rtx_REG (SImode
, i
);
20272 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20274 if (dwarf_regs_mask
& (1 << i
))
20277 = gen_rtx_SET (gen_frame_mem
20279 plus_constant (Pmode
, stack_pointer_rtx
,
20282 RTX_FRAME_RELATED_P (tmp
) = 1;
20283 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20290 par
= emit_insn (par
);
20292 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20293 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20294 RTX_FRAME_RELATED_P (tmp
) = 1;
20295 XVECEXP (dwarf
, 0, 0) = tmp
;
20297 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20302 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20303 SIZE is the offset to be adjusted.
20304 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20306 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20310 RTX_FRAME_RELATED_P (insn
) = 1;
20311 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20312 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20315 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20316 SAVED_REGS_MASK shows which registers need to be restored.
20318 Unfortunately, since this insn does not reflect very well the actual
20319 semantics of the operation, we need to annotate the insn for the benefit
20320 of DWARF2 frame unwind information. */
20322 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20327 rtx dwarf
= NULL_RTX
;
20329 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20333 offset_adj
= return_in_pc
? 1 : 0;
20334 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20335 if (saved_regs_mask
& (1 << i
))
20338 gcc_assert (num_regs
&& num_regs
<= 16);
20340 /* If SP is in reglist, then we don't emit SP update insn. */
20341 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20343 /* The parallel needs to hold num_regs SETs
20344 and one SET for the stack update. */
20345 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20348 XVECEXP (par
, 0, 0) = ret_rtx
;
20352 /* Increment the stack pointer, based on there being
20353 num_regs 4-byte registers to restore. */
20354 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20355 plus_constant (Pmode
,
20358 RTX_FRAME_RELATED_P (tmp
) = 1;
20359 XVECEXP (par
, 0, offset_adj
) = tmp
;
20362 /* Now restore every reg, which may include PC. */
20363 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20364 if (saved_regs_mask
& (1 << i
))
20366 reg
= gen_rtx_REG (SImode
, i
);
20367 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20369 /* Emit single load with writeback. */
20370 tmp
= gen_frame_mem (SImode
,
20371 gen_rtx_POST_INC (Pmode
,
20372 stack_pointer_rtx
));
20373 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20374 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20378 tmp
= gen_rtx_SET (reg
,
20381 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20382 RTX_FRAME_RELATED_P (tmp
) = 1;
20383 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20385 /* We need to maintain a sequence for DWARF info too. As dwarf info
20386 should not have PC, skip PC. */
20387 if (i
!= PC_REGNUM
)
20388 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20394 par
= emit_jump_insn (par
);
20396 par
= emit_insn (par
);
20398 REG_NOTES (par
) = dwarf
;
20400 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20401 stack_pointer_rtx
, stack_pointer_rtx
);
20404 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20405 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20407 Unfortunately, since this insn does not reflect very well the actual
20408 semantics of the operation, we need to annotate the insn for the benefit
20409 of DWARF2 frame unwind information. */
20411 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20415 rtx dwarf
= NULL_RTX
;
20418 gcc_assert (num_regs
&& num_regs
<= 32);
20420 /* Workaround ARM10 VFPr1 bug. */
20421 if (num_regs
== 2 && !arm_arch6
)
20423 if (first_reg
== 15)
20429 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20430 there could be up to 32 D-registers to restore.
20431 If there are more than 16 D-registers, make two recursive calls,
20432 each of which emits one pop_multi instruction. */
20435 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20436 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20440 /* The parallel needs to hold num_regs SETs
20441 and one SET for the stack update. */
20442 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20444 /* Increment the stack pointer, based on there being
20445 num_regs 8-byte registers to restore. */
20446 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20447 RTX_FRAME_RELATED_P (tmp
) = 1;
20448 XVECEXP (par
, 0, 0) = tmp
;
20450 /* Now show every reg that will be restored, using a SET for each. */
20451 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20453 reg
= gen_rtx_REG (DFmode
, i
);
20455 tmp
= gen_rtx_SET (reg
,
20458 plus_constant (Pmode
, base_reg
, 8 * j
)));
20459 RTX_FRAME_RELATED_P (tmp
) = 1;
20460 XVECEXP (par
, 0, j
+ 1) = tmp
;
20462 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20467 par
= emit_insn (par
);
20468 REG_NOTES (par
) = dwarf
;
20470 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20471 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20473 RTX_FRAME_RELATED_P (par
) = 1;
20474 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20477 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20478 base_reg
, base_reg
);
20481 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20482 number of registers are being popped, multiple LDRD patterns are created for
20483 all register pairs. If odd number of registers are popped, last register is
20484 loaded by using LDR pattern. */
20486 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20490 rtx par
= NULL_RTX
;
20491 rtx dwarf
= NULL_RTX
;
20492 rtx tmp
, reg
, tmp1
;
20493 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20495 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20496 if (saved_regs_mask
& (1 << i
))
20499 gcc_assert (num_regs
&& num_regs
<= 16);
20501 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20502 to be popped. So, if num_regs is even, now it will become odd,
20503 and we can generate pop with PC. If num_regs is odd, it will be
20504 even now, and ldr with return can be generated for PC. */
20508 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20510 /* Var j iterates over all the registers to gather all the registers in
20511 saved_regs_mask. Var i gives index of saved registers in stack frame.
20512 A PARALLEL RTX of register-pair is created here, so that pattern for
20513 LDRD can be matched. As PC is always last register to be popped, and
20514 we have already decremented num_regs if PC, we don't have to worry
20515 about PC in this loop. */
20516 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20517 if (saved_regs_mask
& (1 << j
))
20519 /* Create RTX for memory load. */
20520 reg
= gen_rtx_REG (SImode
, j
);
20521 tmp
= gen_rtx_SET (reg
,
20522 gen_frame_mem (SImode
,
20523 plus_constant (Pmode
,
20524 stack_pointer_rtx
, 4 * i
)));
20525 RTX_FRAME_RELATED_P (tmp
) = 1;
20529 /* When saved-register index (i) is even, the RTX to be emitted is
20530 yet to be created. Hence create it first. The LDRD pattern we
20531 are generating is :
20532 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20533 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20534 where target registers need not be consecutive. */
20535 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20539 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20540 added as 0th element and if i is odd, reg_i is added as 1st element
20541 of LDRD pattern shown above. */
20542 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20543 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20547 /* When saved-register index (i) is odd, RTXs for both the registers
20548 to be loaded are generated in above given LDRD pattern, and the
20549 pattern can be emitted now. */
20550 par
= emit_insn (par
);
20551 REG_NOTES (par
) = dwarf
;
20552 RTX_FRAME_RELATED_P (par
) = 1;
20558 /* If the number of registers pushed is odd AND return_in_pc is false OR
20559 number of registers are even AND return_in_pc is true, last register is
20560 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20561 then LDR with post increment. */
20563 /* Increment the stack pointer, based on there being
20564 num_regs 4-byte registers to restore. */
20565 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20566 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20567 RTX_FRAME_RELATED_P (tmp
) = 1;
20568 tmp
= emit_insn (tmp
);
20571 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20572 stack_pointer_rtx
, stack_pointer_rtx
);
20577 if (((num_regs
% 2) == 1 && !return_in_pc
)
20578 || ((num_regs
% 2) == 0 && return_in_pc
))
20580 /* Scan for the single register to be popped. Skip until the saved
20581 register is found. */
20582 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20584 /* Gen LDR with post increment here. */
20585 tmp1
= gen_rtx_MEM (SImode
,
20586 gen_rtx_POST_INC (SImode
,
20587 stack_pointer_rtx
));
20588 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20590 reg
= gen_rtx_REG (SImode
, j
);
20591 tmp
= gen_rtx_SET (reg
, tmp1
);
20592 RTX_FRAME_RELATED_P (tmp
) = 1;
20593 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20597 /* If return_in_pc, j must be PC_REGNUM. */
20598 gcc_assert (j
== PC_REGNUM
);
20599 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20600 XVECEXP (par
, 0, 0) = ret_rtx
;
20601 XVECEXP (par
, 0, 1) = tmp
;
20602 par
= emit_jump_insn (par
);
20606 par
= emit_insn (tmp
);
20607 REG_NOTES (par
) = dwarf
;
20608 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20609 stack_pointer_rtx
, stack_pointer_rtx
);
20613 else if ((num_regs
% 2) == 1 && return_in_pc
)
20615 /* There are 2 registers to be popped. So, generate the pattern
20616 pop_multiple_with_stack_update_and_return to pop in PC. */
20617 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20623 /* LDRD in ARM mode needs consecutive registers as operands. This function
20624 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20625 offset addressing and then generates one separate stack udpate. This provides
20626 more scheduling freedom, compared to writeback on every load. However,
20627 if the function returns using load into PC directly
20628 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20629 before the last load. TODO: Add a peephole optimization to recognize
20630 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20631 peephole optimization to merge the load at stack-offset zero
20632 with the stack update instruction using load with writeback
20633 in post-index addressing mode. */
20635 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20639 rtx par
= NULL_RTX
;
20640 rtx dwarf
= NULL_RTX
;
20643 /* Restore saved registers. */
20644 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20646 while (j
<= LAST_ARM_REGNUM
)
20647 if (saved_regs_mask
& (1 << j
))
20650 && (saved_regs_mask
& (1 << (j
+ 1)))
20651 && (j
+ 1) != PC_REGNUM
)
20653 /* Current register and next register form register pair for which
20654 LDRD can be generated. PC is always the last register popped, and
20655 we handle it separately. */
20657 mem
= gen_frame_mem (DImode
,
20658 plus_constant (Pmode
,
20662 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20664 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20665 tmp
= emit_insn (tmp
);
20666 RTX_FRAME_RELATED_P (tmp
) = 1;
20668 /* Generate dwarf info. */
20670 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20671 gen_rtx_REG (SImode
, j
),
20673 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20674 gen_rtx_REG (SImode
, j
+ 1),
20677 REG_NOTES (tmp
) = dwarf
;
20682 else if (j
!= PC_REGNUM
)
20684 /* Emit a single word load. */
20686 mem
= gen_frame_mem (SImode
,
20687 plus_constant (Pmode
,
20691 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20693 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20694 tmp
= emit_insn (tmp
);
20695 RTX_FRAME_RELATED_P (tmp
) = 1;
20697 /* Generate dwarf info. */
20698 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20699 gen_rtx_REG (SImode
, j
),
20705 else /* j == PC_REGNUM */
20711 /* Update the stack. */
20714 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20715 plus_constant (Pmode
,
20718 tmp
= emit_insn (tmp
);
20719 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20720 stack_pointer_rtx
, stack_pointer_rtx
);
20724 if (saved_regs_mask
& (1 << PC_REGNUM
))
20726 /* Only PC is to be popped. */
20727 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20728 XVECEXP (par
, 0, 0) = ret_rtx
;
20729 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20730 gen_frame_mem (SImode
,
20731 gen_rtx_POST_INC (SImode
,
20732 stack_pointer_rtx
)));
20733 RTX_FRAME_RELATED_P (tmp
) = 1;
20734 XVECEXP (par
, 0, 1) = tmp
;
20735 par
= emit_jump_insn (par
);
20737 /* Generate dwarf info. */
20738 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20739 gen_rtx_REG (SImode
, PC_REGNUM
),
20741 REG_NOTES (par
) = dwarf
;
20742 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20743 stack_pointer_rtx
, stack_pointer_rtx
);
20747 /* Calculate the size of the return value that is passed in registers. */
20749 arm_size_return_regs (void)
20753 if (crtl
->return_rtx
!= 0)
20754 mode
= GET_MODE (crtl
->return_rtx
);
20756 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20758 return GET_MODE_SIZE (mode
);
20761 /* Return true if the current function needs to save/restore LR. */
20763 thumb_force_lr_save (void)
20765 return !cfun
->machine
->lr_save_eliminated
20766 && (!leaf_function_p ()
20767 || thumb_far_jump_used_p ()
20768 || df_regs_ever_live_p (LR_REGNUM
));
20771 /* We do not know if r3 will be available because
20772 we do have an indirect tailcall happening in this
20773 particular case. */
20775 is_indirect_tailcall_p (rtx call
)
20777 rtx pat
= PATTERN (call
);
20779 /* Indirect tail call. */
20780 pat
= XVECEXP (pat
, 0, 0);
20781 if (GET_CODE (pat
) == SET
)
20782 pat
= SET_SRC (pat
);
20784 pat
= XEXP (XEXP (pat
, 0), 0);
20785 return REG_P (pat
);
20788 /* Return true if r3 is used by any of the tail call insns in the
20789 current function. */
20791 any_sibcall_could_use_r3 (void)
20796 if (!crtl
->tail_call_emit
)
20798 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20799 if (e
->flags
& EDGE_SIBCALL
)
20801 rtx call
= BB_END (e
->src
);
20802 if (!CALL_P (call
))
20803 call
= prev_nonnote_nondebug_insn (call
);
20804 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20805 if (find_regno_fusage (call
, USE
, 3)
20806 || is_indirect_tailcall_p (call
))
20813 /* Compute the distance from register FROM to register TO.
20814 These can be the arg pointer (26), the soft frame pointer (25),
20815 the stack pointer (13) or the hard frame pointer (11).
20816 In thumb mode r7 is used as the soft frame pointer, if needed.
20817 Typical stack layout looks like this:
20819 old stack pointer -> | |
20822 | | saved arguments for
20823 | | vararg functions
20826 hard FP & arg pointer -> | | \
20834 soft frame pointer -> | | /
20839 locals base pointer -> | | /
20844 current stack pointer -> | | /
20847 For a given function some or all of these stack components
20848 may not be needed, giving rise to the possibility of
20849 eliminating some of the registers.
20851 The values returned by this function must reflect the behavior
20852 of arm_expand_prologue() and arm_compute_save_reg_mask().
20854 The sign of the number returned reflects the direction of stack
20855 growth, so the values are positive for all eliminations except
20856 from the soft frame pointer to the hard frame pointer.
20858 SFP may point just inside the local variables block to ensure correct
20862 /* Calculate stack offsets. These are used to calculate register elimination
20863 offsets and in prologue/epilogue code. Also calculates which registers
20864 should be saved. */
20866 static arm_stack_offsets
*
20867 arm_get_frame_offsets (void)
20869 struct arm_stack_offsets
*offsets
;
20870 unsigned long func_type
;
20874 HOST_WIDE_INT frame_size
;
20877 offsets
= &cfun
->machine
->stack_offsets
;
20879 /* We need to know if we are a leaf function. Unfortunately, it
20880 is possible to be called after start_sequence has been called,
20881 which causes get_insns to return the insns for the sequence,
20882 not the function, which will cause leaf_function_p to return
20883 the incorrect result.
20885 to know about leaf functions once reload has completed, and the
20886 frame size cannot be changed after that time, so we can safely
20887 use the cached value. */
20889 if (reload_completed
)
20892 /* Initially this is the size of the local variables. It will translated
20893 into an offset once we have determined the size of preceding data. */
20894 frame_size
= ROUND_UP_WORD (get_frame_size ());
20896 leaf
= leaf_function_p ();
20898 /* Space for variadic functions. */
20899 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20901 /* In Thumb mode this is incorrect, but never used. */
20903 = (offsets
->saved_args
20904 + arm_compute_static_chain_stack_bytes ()
20905 + (frame_pointer_needed
? 4 : 0));
20909 unsigned int regno
;
20911 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20912 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20913 saved
= core_saved
;
20915 /* We know that SP will be doubleword aligned on entry, and we must
20916 preserve that condition at any subroutine call. We also require the
20917 soft frame pointer to be doubleword aligned. */
20919 if (TARGET_REALLY_IWMMXT
)
20921 /* Check for the call-saved iWMMXt registers. */
20922 for (regno
= FIRST_IWMMXT_REGNUM
;
20923 regno
<= LAST_IWMMXT_REGNUM
;
20925 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20929 func_type
= arm_current_func_type ();
20930 /* Space for saved VFP registers. */
20931 if (! IS_VOLATILE (func_type
)
20932 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20933 saved
+= arm_get_vfp_saved_size ();
20935 else /* TARGET_THUMB1 */
20937 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20938 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20939 saved
= core_saved
;
20940 if (TARGET_BACKTRACE
)
20944 /* Saved registers include the stack frame. */
20945 offsets
->saved_regs
20946 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20947 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20949 /* A leaf function does not need any stack alignment if it has nothing
20951 if (leaf
&& frame_size
== 0
20952 /* However if it calls alloca(), we have a dynamically allocated
20953 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20954 && ! cfun
->calls_alloca
)
20956 offsets
->outgoing_args
= offsets
->soft_frame
;
20957 offsets
->locals_base
= offsets
->soft_frame
;
20961 /* Ensure SFP has the correct alignment. */
20962 if (ARM_DOUBLEWORD_ALIGN
20963 && (offsets
->soft_frame
& 7))
20965 offsets
->soft_frame
+= 4;
20966 /* Try to align stack by pushing an extra reg. Don't bother doing this
20967 when there is a stack frame as the alignment will be rolled into
20968 the normal stack adjustment. */
20969 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20973 /* Register r3 is caller-saved. Normally it does not need to be
20974 saved on entry by the prologue. However if we choose to save
20975 it for padding then we may confuse the compiler into thinking
20976 a prologue sequence is required when in fact it is not. This
20977 will occur when shrink-wrapping if r3 is used as a scratch
20978 register and there are no other callee-saved writes.
20980 This situation can be avoided when other callee-saved registers
20981 are available and r3 is not mandatory if we choose a callee-saved
20982 register for padding. */
20983 bool prefer_callee_reg_p
= false;
20985 /* If it is safe to use r3, then do so. This sometimes
20986 generates better code on Thumb-2 by avoiding the need to
20987 use 32-bit push/pop instructions. */
20988 if (! any_sibcall_could_use_r3 ()
20989 && arm_size_return_regs () <= 12
20990 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20992 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20995 if (!TARGET_THUMB2
)
20996 prefer_callee_reg_p
= true;
20999 || prefer_callee_reg_p
)
21001 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21003 /* Avoid fixed registers; they may be changed at
21004 arbitrary times so it's unsafe to restore them
21005 during the epilogue. */
21007 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21017 offsets
->saved_regs
+= 4;
21018 offsets
->saved_regs_mask
|= (1 << reg
);
21023 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21024 offsets
->outgoing_args
= (offsets
->locals_base
21025 + crtl
->outgoing_args_size
);
21027 if (ARM_DOUBLEWORD_ALIGN
)
21029 /* Ensure SP remains doubleword aligned. */
21030 if (offsets
->outgoing_args
& 7)
21031 offsets
->outgoing_args
+= 4;
21032 gcc_assert (!(offsets
->outgoing_args
& 7));
21039 /* Calculate the relative offsets for the different stack pointers. Positive
21040 offsets are in the direction of stack growth. */
21043 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21045 arm_stack_offsets
*offsets
;
21047 offsets
= arm_get_frame_offsets ();
21049 /* OK, now we have enough information to compute the distances.
21050 There must be an entry in these switch tables for each pair
21051 of registers in ELIMINABLE_REGS, even if some of the entries
21052 seem to be redundant or useless. */
21055 case ARG_POINTER_REGNUM
:
21058 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21061 case FRAME_POINTER_REGNUM
:
21062 /* This is the reverse of the soft frame pointer
21063 to hard frame pointer elimination below. */
21064 return offsets
->soft_frame
- offsets
->saved_args
;
21066 case ARM_HARD_FRAME_POINTER_REGNUM
:
21067 /* This is only non-zero in the case where the static chain register
21068 is stored above the frame. */
21069 return offsets
->frame
- offsets
->saved_args
- 4;
21071 case STACK_POINTER_REGNUM
:
21072 /* If nothing has been pushed on the stack at all
21073 then this will return -4. This *is* correct! */
21074 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21077 gcc_unreachable ();
21079 gcc_unreachable ();
21081 case FRAME_POINTER_REGNUM
:
21084 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21087 case ARM_HARD_FRAME_POINTER_REGNUM
:
21088 /* The hard frame pointer points to the top entry in the
21089 stack frame. The soft frame pointer to the bottom entry
21090 in the stack frame. If there is no stack frame at all,
21091 then they are identical. */
21093 return offsets
->frame
- offsets
->soft_frame
;
21095 case STACK_POINTER_REGNUM
:
21096 return offsets
->outgoing_args
- offsets
->soft_frame
;
21099 gcc_unreachable ();
21101 gcc_unreachable ();
21104 /* You cannot eliminate from the stack pointer.
21105 In theory you could eliminate from the hard frame
21106 pointer to the stack pointer, but this will never
21107 happen, since if a stack frame is not needed the
21108 hard frame pointer will never be used. */
21109 gcc_unreachable ();
21113 /* Given FROM and TO register numbers, say whether this elimination is
21114 allowed. Frame pointer elimination is automatically handled.
21116 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21117 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21118 pointer, we must eliminate FRAME_POINTER_REGNUM into
21119 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21120 ARG_POINTER_REGNUM. */
21123 arm_can_eliminate (const int from
, const int to
)
21125 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21126 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21127 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21128 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21132 /* Emit RTL to save coprocessor registers on function entry. Returns the
21133 number of bytes pushed. */
21136 arm_save_coproc_regs(void)
21138 int saved_size
= 0;
21140 unsigned start_reg
;
21143 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21144 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21146 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21147 insn
= gen_rtx_MEM (V2SImode
, insn
);
21148 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21149 RTX_FRAME_RELATED_P (insn
) = 1;
21153 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21155 start_reg
= FIRST_VFP_REGNUM
;
21157 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21159 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21160 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21162 if (start_reg
!= reg
)
21163 saved_size
+= vfp_emit_fstmd (start_reg
,
21164 (reg
- start_reg
) / 2);
21165 start_reg
= reg
+ 2;
21168 if (start_reg
!= reg
)
21169 saved_size
+= vfp_emit_fstmd (start_reg
,
21170 (reg
- start_reg
) / 2);
21176 /* Set the Thumb frame pointer from the stack pointer. */
21179 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21181 HOST_WIDE_INT amount
;
21184 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21186 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21187 stack_pointer_rtx
, GEN_INT (amount
)));
21190 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21191 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21192 expects the first two operands to be the same. */
21195 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21197 hard_frame_pointer_rtx
));
21201 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21202 hard_frame_pointer_rtx
,
21203 stack_pointer_rtx
));
21205 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21206 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21207 RTX_FRAME_RELATED_P (dwarf
) = 1;
21208 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21211 RTX_FRAME_RELATED_P (insn
) = 1;
21214 struct scratch_reg
{
21219 /* Return a short-lived scratch register for use as a 2nd scratch register on
21220 function entry after the registers are saved in the prologue. This register
21221 must be released by means of release_scratch_register_on_entry. IP is not
21222 considered since it is always used as the 1st scratch register if available.
21224 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21225 mask of live registers. */
21228 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21229 unsigned long live_regs
)
21235 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21241 for (i
= 4; i
< 11; i
++)
21242 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21250 /* If IP is used as the 1st scratch register for a nested function,
21251 then either r3 wasn't available or is used to preserve IP. */
21252 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21254 regno
= (regno1
== 3 ? 2 : 3);
21256 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21261 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21264 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21265 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21266 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21267 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21268 RTX_FRAME_RELATED_P (insn
) = 1;
21269 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21273 /* Release a scratch register obtained from the preceding function. */
21276 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21280 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21281 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21282 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21283 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21284 RTX_FRAME_RELATED_P (insn
) = 1;
21285 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21289 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21291 #if PROBE_INTERVAL > 4096
21292 #error Cannot use indexed addressing mode for stack probing
21295 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21296 inclusive. These are offsets from the current stack pointer. REGNO1
21297 is the index number of the 1st scratch register and LIVE_REGS is the
21298 mask of live registers. */
21301 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21302 unsigned int regno1
, unsigned long live_regs
)
21304 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21306 /* See if we have a constant small number of probes to generate. If so,
21307 that's the easy case. */
21308 if (size
<= PROBE_INTERVAL
)
21310 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21311 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21312 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21315 /* The run-time loop is made up of 10 insns in the generic case while the
21316 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21317 else if (size
<= 5 * PROBE_INTERVAL
)
21319 HOST_WIDE_INT i
, rem
;
21321 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21322 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21323 emit_stack_probe (reg1
);
21325 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21326 it exceeds SIZE. If only two probes are needed, this will not
21327 generate any code. Then probe at FIRST + SIZE. */
21328 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21330 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21331 emit_stack_probe (reg1
);
21334 rem
= size
- (i
- PROBE_INTERVAL
);
21335 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21337 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21338 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21341 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21344 /* Otherwise, do the same as above, but in a loop. Note that we must be
21345 extra careful with variables wrapping around because we might be at
21346 the very top (or the very bottom) of the address space and we have
21347 to be able to handle this case properly; in particular, we use an
21348 equality test for the loop condition. */
21351 HOST_WIDE_INT rounded_size
;
21352 struct scratch_reg sr
;
21354 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21356 emit_move_insn (reg1
, GEN_INT (first
));
21359 /* Step 1: round SIZE to the previous multiple of the interval. */
21361 rounded_size
= size
& -PROBE_INTERVAL
;
21362 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21365 /* Step 2: compute initial and final value of the loop counter. */
21367 /* TEST_ADDR = SP + FIRST. */
21368 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21370 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21371 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21374 /* Step 3: the loop
21378 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21381 while (TEST_ADDR != LAST_ADDR)
21383 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21384 until it is equal to ROUNDED_SIZE. */
21386 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21389 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21390 that SIZE is equal to ROUNDED_SIZE. */
21392 if (size
!= rounded_size
)
21394 HOST_WIDE_INT rem
= size
- rounded_size
;
21396 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21398 emit_set_insn (sr
.reg
,
21399 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21400 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21401 PROBE_INTERVAL
- rem
));
21404 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21407 release_scratch_register_on_entry (&sr
);
21410 /* Make sure nothing is scheduled before we are done. */
21411 emit_insn (gen_blockage ());
21414 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21415 absolute addresses. */
21418 output_probe_stack_range (rtx reg1
, rtx reg2
)
21420 static int labelno
= 0;
21424 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21427 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21429 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21431 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21432 output_asm_insn ("sub\t%0, %0, %1", xops
);
21434 /* Probe at TEST_ADDR. */
21435 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21437 /* Test if TEST_ADDR == LAST_ADDR. */
21439 output_asm_insn ("cmp\t%0, %1", xops
);
21442 fputs ("\tbne\t", asm_out_file
);
21443 assemble_name_raw (asm_out_file
, loop_lab
);
21444 fputc ('\n', asm_out_file
);
21449 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21452 arm_expand_prologue (void)
21457 unsigned long live_regs_mask
;
21458 unsigned long func_type
;
21460 int saved_pretend_args
= 0;
21461 int saved_regs
= 0;
21462 unsigned HOST_WIDE_INT args_to_push
;
21463 HOST_WIDE_INT size
;
21464 arm_stack_offsets
*offsets
;
21467 func_type
= arm_current_func_type ();
21469 /* Naked functions don't have prologues. */
21470 if (IS_NAKED (func_type
))
21472 if (flag_stack_usage_info
)
21473 current_function_static_stack_size
= 0;
21477 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21478 args_to_push
= crtl
->args
.pretend_args_size
;
21480 /* Compute which register we will have to save onto the stack. */
21481 offsets
= arm_get_frame_offsets ();
21482 live_regs_mask
= offsets
->saved_regs_mask
;
21484 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21486 if (IS_STACKALIGN (func_type
))
21490 /* Handle a word-aligned stack pointer. We generate the following:
21495 <save and restore r0 in normal prologue/epilogue>
21499 The unwinder doesn't need to know about the stack realignment.
21500 Just tell it we saved SP in r0. */
21501 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21503 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21504 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21506 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21507 RTX_FRAME_RELATED_P (insn
) = 1;
21508 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21510 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21512 /* ??? The CFA changes here, which may cause GDB to conclude that it
21513 has entered a different function. That said, the unwind info is
21514 correct, individually, before and after this instruction because
21515 we've described the save of SP, which will override the default
21516 handling of SP as restoring from the CFA. */
21517 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21520 /* The static chain register is the same as the IP register. If it is
21521 clobbered when creating the frame, we need to save and restore it. */
21522 clobber_ip
= IS_NESTED (func_type
)
21523 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21524 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21525 && !df_regs_ever_live_p (LR_REGNUM
)
21526 && arm_r3_live_at_start_p ()));
21528 /* Find somewhere to store IP whilst the frame is being created.
21529 We try the following places in order:
21531 1. The last argument register r3 if it is available.
21532 2. A slot on the stack above the frame if there are no
21533 arguments to push onto the stack.
21534 3. Register r3 again, after pushing the argument registers
21535 onto the stack, if this is a varargs function.
21536 4. The last slot on the stack created for the arguments to
21537 push, if this isn't a varargs function.
21539 Note - we only need to tell the dwarf2 backend about the SP
21540 adjustment in the second variant; the static chain register
21541 doesn't need to be unwound, as it doesn't contain a value
21542 inherited from the caller. */
21545 if (!arm_r3_live_at_start_p ())
21546 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21547 else if (args_to_push
== 0)
21551 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21554 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21555 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21558 /* Just tell the dwarf backend that we adjusted SP. */
21559 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21560 plus_constant (Pmode
, stack_pointer_rtx
,
21562 RTX_FRAME_RELATED_P (insn
) = 1;
21563 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21567 /* Store the args on the stack. */
21568 if (cfun
->machine
->uses_anonymous_args
)
21570 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21571 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21572 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21573 saved_pretend_args
= 1;
21579 if (args_to_push
== 4)
21580 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21582 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21583 plus_constant (Pmode
,
21587 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21589 /* Just tell the dwarf backend that we adjusted SP. */
21590 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21591 plus_constant (Pmode
, stack_pointer_rtx
,
21593 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21596 RTX_FRAME_RELATED_P (insn
) = 1;
21597 fp_offset
= args_to_push
;
21602 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21604 if (IS_INTERRUPT (func_type
))
21606 /* Interrupt functions must not corrupt any registers.
21607 Creating a frame pointer however, corrupts the IP
21608 register, so we must push it first. */
21609 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21611 /* Do not set RTX_FRAME_RELATED_P on this insn.
21612 The dwarf stack unwinding code only wants to see one
21613 stack decrement per function, and this is not it. If
21614 this instruction is labeled as being part of the frame
21615 creation sequence then dwarf2out_frame_debug_expr will
21616 die when it encounters the assignment of IP to FP
21617 later on, since the use of SP here establishes SP as
21618 the CFA register and not IP.
21620 Anyway this instruction is not really part of the stack
21621 frame creation although it is part of the prologue. */
21624 insn
= emit_set_insn (ip_rtx
,
21625 plus_constant (Pmode
, stack_pointer_rtx
,
21627 RTX_FRAME_RELATED_P (insn
) = 1;
21632 /* Push the argument registers, or reserve space for them. */
21633 if (cfun
->machine
->uses_anonymous_args
)
21634 insn
= emit_multi_reg_push
21635 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21636 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21639 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21640 GEN_INT (- args_to_push
)));
21641 RTX_FRAME_RELATED_P (insn
) = 1;
21644 /* If this is an interrupt service routine, and the link register
21645 is going to be pushed, and we're not generating extra
21646 push of IP (needed when frame is needed and frame layout if apcs),
21647 subtracting four from LR now will mean that the function return
21648 can be done with a single instruction. */
21649 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21650 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21651 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21654 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21656 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21659 if (live_regs_mask
)
21661 unsigned long dwarf_regs_mask
= live_regs_mask
;
21663 saved_regs
+= bit_count (live_regs_mask
) * 4;
21664 if (optimize_size
&& !frame_pointer_needed
21665 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21667 /* If no coprocessor registers are being pushed and we don't have
21668 to worry about a frame pointer then push extra registers to
21669 create the stack frame. This is done is a way that does not
21670 alter the frame layout, so is independent of the epilogue. */
21674 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21676 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21677 if (frame
&& n
* 4 >= frame
)
21680 live_regs_mask
|= (1 << n
) - 1;
21681 saved_regs
+= frame
;
21686 && current_tune
->prefer_ldrd_strd
21687 && !optimize_function_for_size_p (cfun
))
21689 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21691 thumb2_emit_strd_push (live_regs_mask
);
21692 else if (TARGET_ARM
21693 && !TARGET_APCS_FRAME
21694 && !IS_INTERRUPT (func_type
))
21695 arm_emit_strd_push (live_regs_mask
);
21698 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21699 RTX_FRAME_RELATED_P (insn
) = 1;
21704 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21705 RTX_FRAME_RELATED_P (insn
) = 1;
21709 if (! IS_VOLATILE (func_type
))
21710 saved_regs
+= arm_save_coproc_regs ();
21712 if (frame_pointer_needed
&& TARGET_ARM
)
21714 /* Create the new frame pointer. */
21715 if (TARGET_APCS_FRAME
)
21717 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21718 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21719 RTX_FRAME_RELATED_P (insn
) = 1;
21723 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21724 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21725 stack_pointer_rtx
, insn
));
21726 RTX_FRAME_RELATED_P (insn
) = 1;
21730 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21731 if (flag_stack_usage_info
)
21732 current_function_static_stack_size
= size
;
21734 /* If this isn't an interrupt service routine and we have a frame, then do
21735 stack checking. We use IP as the first scratch register, except for the
21736 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21737 if (!IS_INTERRUPT (func_type
)
21738 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21740 unsigned int regno
;
21742 if (!IS_NESTED (func_type
) || clobber_ip
)
21744 else if (df_regs_ever_live_p (LR_REGNUM
))
21749 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21751 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21752 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21753 size
- STACK_CHECK_PROTECT
,
21754 regno
, live_regs_mask
);
21757 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21758 regno
, live_regs_mask
);
21761 /* Recover the static chain register. */
21764 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21765 insn
= gen_rtx_REG (SImode
, 3);
21768 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21769 insn
= gen_frame_mem (SImode
, insn
);
21771 emit_set_insn (ip_rtx
, insn
);
21772 emit_insn (gen_force_register_use (ip_rtx
));
21775 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21777 /* This add can produce multiple insns for a large constant, so we
21778 need to get tricky. */
21779 rtx_insn
*last
= get_last_insn ();
21781 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21782 - offsets
->outgoing_args
);
21784 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21788 last
= last
? NEXT_INSN (last
) : get_insns ();
21789 RTX_FRAME_RELATED_P (last
) = 1;
21791 while (last
!= insn
);
21793 /* If the frame pointer is needed, emit a special barrier that
21794 will prevent the scheduler from moving stores to the frame
21795 before the stack adjustment. */
21796 if (frame_pointer_needed
)
21797 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21798 hard_frame_pointer_rtx
));
21802 if (frame_pointer_needed
&& TARGET_THUMB2
)
21803 thumb_set_frame_pointer (offsets
);
21805 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21807 unsigned long mask
;
21809 mask
= live_regs_mask
;
21810 mask
&= THUMB2_WORK_REGS
;
21811 if (!IS_NESTED (func_type
))
21812 mask
|= (1 << IP_REGNUM
);
21813 arm_load_pic_register (mask
);
21816 /* If we are profiling, make sure no instructions are scheduled before
21817 the call to mcount. Similarly if the user has requested no
21818 scheduling in the prolog. Similarly if we want non-call exceptions
21819 using the EABI unwinder, to prevent faulting instructions from being
21820 swapped with a stack adjustment. */
21821 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21822 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21823 && cfun
->can_throw_non_call_exceptions
))
21824 emit_insn (gen_blockage ());
21826 /* If the link register is being kept alive, with the return address in it,
21827 then make sure that it does not get reused by the ce2 pass. */
21828 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21829 cfun
->machine
->lr_save_eliminated
= 1;
21832 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21834 arm_print_condition (FILE *stream
)
21836 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21838 /* Branch conversion is not implemented for Thumb-2. */
21841 output_operand_lossage ("predicated Thumb instruction");
21844 if (current_insn_predicate
!= NULL
)
21846 output_operand_lossage
21847 ("predicated instruction in conditional sequence");
21851 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21853 else if (current_insn_predicate
)
21855 enum arm_cond_code code
;
21859 output_operand_lossage ("predicated Thumb instruction");
21863 code
= get_arm_condition_code (current_insn_predicate
);
21864 fputs (arm_condition_codes
[code
], stream
);
21869 /* Globally reserved letters: acln
21870 Puncutation letters currently used: @_|?().!#
21871 Lower case letters currently used: bcdefhimpqtvwxyz
21872 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21873 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21875 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21877 If CODE is 'd', then the X is a condition operand and the instruction
21878 should only be executed if the condition is true.
21879 if CODE is 'D', then the X is a condition operand and the instruction
21880 should only be executed if the condition is false: however, if the mode
21881 of the comparison is CCFPEmode, then always execute the instruction -- we
21882 do this because in these circumstances !GE does not necessarily imply LT;
21883 in these cases the instruction pattern will take care to make sure that
21884 an instruction containing %d will follow, thereby undoing the effects of
21885 doing this instruction unconditionally.
21886 If CODE is 'N' then X is a floating point operand that must be negated
21888 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21889 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21891 arm_print_operand (FILE *stream
, rtx x
, int code
)
21896 fputs (ASM_COMMENT_START
, stream
);
21900 fputs (user_label_prefix
, stream
);
21904 fputs (REGISTER_PREFIX
, stream
);
21908 arm_print_condition (stream
);
21912 /* The current condition code for a condition code setting instruction.
21913 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21914 fputc('s', stream
);
21915 arm_print_condition (stream
);
21919 /* If the instruction is conditionally executed then print
21920 the current condition code, otherwise print 's'. */
21921 gcc_assert (TARGET_THUMB2
);
21922 if (current_insn_predicate
)
21923 arm_print_condition (stream
);
21925 fputc('s', stream
);
21928 /* %# is a "break" sequence. It doesn't output anything, but is used to
21929 separate e.g. operand numbers from following text, if that text consists
21930 of further digits which we don't want to be part of the operand
21938 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21939 fprintf (stream
, "%s", fp_const_from_val (&r
));
21943 /* An integer or symbol address without a preceding # sign. */
21945 switch (GET_CODE (x
))
21948 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21952 output_addr_const (stream
, x
);
21956 if (GET_CODE (XEXP (x
, 0)) == PLUS
21957 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21959 output_addr_const (stream
, x
);
21962 /* Fall through. */
21965 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21969 /* An integer that we want to print in HEX. */
21971 switch (GET_CODE (x
))
21974 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21978 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21983 if (CONST_INT_P (x
))
21986 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21987 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21991 putc ('~', stream
);
21992 output_addr_const (stream
, x
);
21997 /* Print the log2 of a CONST_INT. */
22001 if (!CONST_INT_P (x
)
22002 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22003 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22005 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22010 /* The low 16 bits of an immediate constant. */
22011 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22015 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22019 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22027 shift
= shift_op (x
, &val
);
22031 fprintf (stream
, ", %s ", shift
);
22033 arm_print_operand (stream
, XEXP (x
, 1), 0);
22035 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22040 /* An explanation of the 'Q', 'R' and 'H' register operands:
22042 In a pair of registers containing a DI or DF value the 'Q'
22043 operand returns the register number of the register containing
22044 the least significant part of the value. The 'R' operand returns
22045 the register number of the register containing the most
22046 significant part of the value.
22048 The 'H' operand returns the higher of the two register numbers.
22049 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22050 same as the 'Q' operand, since the most significant part of the
22051 value is held in the lower number register. The reverse is true
22052 on systems where WORDS_BIG_ENDIAN is false.
22054 The purpose of these operands is to distinguish between cases
22055 where the endian-ness of the values is important (for example
22056 when they are added together), and cases where the endian-ness
22057 is irrelevant, but the order of register operations is important.
22058 For example when loading a value from memory into a register
22059 pair, the endian-ness does not matter. Provided that the value
22060 from the lower memory address is put into the lower numbered
22061 register, and the value from the higher address is put into the
22062 higher numbered register, the load will work regardless of whether
22063 the value being loaded is big-wordian or little-wordian. The
22064 order of the two register loads can matter however, if the address
22065 of the memory location is actually held in one of the registers
22066 being overwritten by the load.
22068 The 'Q' and 'R' constraints are also available for 64-bit
22071 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22073 rtx part
= gen_lowpart (SImode
, x
);
22074 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22078 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22080 output_operand_lossage ("invalid operand for code '%c'", code
);
22084 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22088 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22090 machine_mode mode
= GET_MODE (x
);
22093 if (mode
== VOIDmode
)
22095 part
= gen_highpart_mode (SImode
, mode
, x
);
22096 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22100 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22102 output_operand_lossage ("invalid operand for code '%c'", code
);
22106 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22110 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22112 output_operand_lossage ("invalid operand for code '%c'", code
);
22116 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22120 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22122 output_operand_lossage ("invalid operand for code '%c'", code
);
22126 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22130 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22132 output_operand_lossage ("invalid operand for code '%c'", code
);
22136 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22140 asm_fprintf (stream
, "%r",
22141 REG_P (XEXP (x
, 0))
22142 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22146 asm_fprintf (stream
, "{%r-%r}",
22148 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22151 /* Like 'M', but writing doubleword vector registers, for use by Neon
22155 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22156 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22158 asm_fprintf (stream
, "{d%d}", regno
);
22160 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22165 /* CONST_TRUE_RTX means always -- that's the default. */
22166 if (x
== const_true_rtx
)
22169 if (!COMPARISON_P (x
))
22171 output_operand_lossage ("invalid operand for code '%c'", code
);
22175 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22180 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22181 want to do that. */
22182 if (x
== const_true_rtx
)
22184 output_operand_lossage ("instruction never executed");
22187 if (!COMPARISON_P (x
))
22189 output_operand_lossage ("invalid operand for code '%c'", code
);
22193 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22194 (get_arm_condition_code (x
))],
22204 /* Former Maverick support, removed after GCC-4.7. */
22205 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22210 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22211 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22212 /* Bad value for wCG register number. */
22214 output_operand_lossage ("invalid operand for code '%c'", code
);
22219 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22222 /* Print an iWMMXt control register name. */
22224 if (!CONST_INT_P (x
)
22226 || INTVAL (x
) >= 16)
22227 /* Bad value for wC register number. */
22229 output_operand_lossage ("invalid operand for code '%c'", code
);
22235 static const char * wc_reg_names
[16] =
22237 "wCID", "wCon", "wCSSF", "wCASF",
22238 "wC4", "wC5", "wC6", "wC7",
22239 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22240 "wC12", "wC13", "wC14", "wC15"
22243 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22247 /* Print the high single-precision register of a VFP double-precision
22251 machine_mode mode
= GET_MODE (x
);
22254 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22256 output_operand_lossage ("invalid operand for code '%c'", code
);
22261 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22263 output_operand_lossage ("invalid operand for code '%c'", code
);
22267 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22271 /* Print a VFP/Neon double precision or quad precision register name. */
22275 machine_mode mode
= GET_MODE (x
);
22276 int is_quad
= (code
== 'q');
22279 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22281 output_operand_lossage ("invalid operand for code '%c'", code
);
22286 || !IS_VFP_REGNUM (REGNO (x
)))
22288 output_operand_lossage ("invalid operand for code '%c'", code
);
22293 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22294 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22296 output_operand_lossage ("invalid operand for code '%c'", code
);
22300 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22301 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22305 /* These two codes print the low/high doubleword register of a Neon quad
22306 register, respectively. For pair-structure types, can also print
22307 low/high quadword registers. */
22311 machine_mode mode
= GET_MODE (x
);
22314 if ((GET_MODE_SIZE (mode
) != 16
22315 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22317 output_operand_lossage ("invalid operand for code '%c'", code
);
22322 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22324 output_operand_lossage ("invalid operand for code '%c'", code
);
22328 if (GET_MODE_SIZE (mode
) == 16)
22329 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22330 + (code
== 'f' ? 1 : 0));
22332 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22333 + (code
== 'f' ? 1 : 0));
22337 /* Print a VFPv3 floating-point constant, represented as an integer
22341 int index
= vfp3_const_double_index (x
);
22342 gcc_assert (index
!= -1);
22343 fprintf (stream
, "%d", index
);
22347 /* Print bits representing opcode features for Neon.
22349 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22350 and polynomials as unsigned.
22352 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22354 Bit 2 is 1 for rounding functions, 0 otherwise. */
22356 /* Identify the type as 's', 'u', 'p' or 'f'. */
22359 HOST_WIDE_INT bits
= INTVAL (x
);
22360 fputc ("uspf"[bits
& 3], stream
);
22364 /* Likewise, but signed and unsigned integers are both 'i'. */
22367 HOST_WIDE_INT bits
= INTVAL (x
);
22368 fputc ("iipf"[bits
& 3], stream
);
22372 /* As for 'T', but emit 'u' instead of 'p'. */
22375 HOST_WIDE_INT bits
= INTVAL (x
);
22376 fputc ("usuf"[bits
& 3], stream
);
22380 /* Bit 2: rounding (vs none). */
22383 HOST_WIDE_INT bits
= INTVAL (x
);
22384 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22388 /* Memory operand for vld1/vst1 instruction. */
22392 bool postinc
= FALSE
;
22393 rtx postinc_reg
= NULL
;
22394 unsigned align
, memsize
, align_bits
;
22396 gcc_assert (MEM_P (x
));
22397 addr
= XEXP (x
, 0);
22398 if (GET_CODE (addr
) == POST_INC
)
22401 addr
= XEXP (addr
, 0);
22403 if (GET_CODE (addr
) == POST_MODIFY
)
22405 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22406 addr
= XEXP (addr
, 0);
22408 asm_fprintf (stream
, "[%r", REGNO (addr
));
22410 /* We know the alignment of this access, so we can emit a hint in the
22411 instruction (for some alignments) as an aid to the memory subsystem
22413 align
= MEM_ALIGN (x
) >> 3;
22414 memsize
= MEM_SIZE (x
);
22416 /* Only certain alignment specifiers are supported by the hardware. */
22417 if (memsize
== 32 && (align
% 32) == 0)
22419 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22421 else if (memsize
>= 8 && (align
% 8) == 0)
22426 if (align_bits
!= 0)
22427 asm_fprintf (stream
, ":%d", align_bits
);
22429 asm_fprintf (stream
, "]");
22432 fputs("!", stream
);
22434 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22442 gcc_assert (MEM_P (x
));
22443 addr
= XEXP (x
, 0);
22444 gcc_assert (REG_P (addr
));
22445 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22449 /* Translate an S register number into a D register number and element index. */
22452 machine_mode mode
= GET_MODE (x
);
22455 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22457 output_operand_lossage ("invalid operand for code '%c'", code
);
22462 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22464 output_operand_lossage ("invalid operand for code '%c'", code
);
22468 regno
= regno
- FIRST_VFP_REGNUM
;
22469 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22474 gcc_assert (CONST_DOUBLE_P (x
));
22476 result
= vfp3_const_double_for_fract_bits (x
);
22478 result
= vfp3_const_double_for_bits (x
);
22479 fprintf (stream
, "#%d", result
);
22482 /* Register specifier for vld1.16/vst1.16. Translate the S register
22483 number into a D register number and element index. */
22486 machine_mode mode
= GET_MODE (x
);
22489 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22491 output_operand_lossage ("invalid operand for code '%c'", code
);
22496 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22498 output_operand_lossage ("invalid operand for code '%c'", code
);
22502 regno
= regno
- FIRST_VFP_REGNUM
;
22503 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22510 output_operand_lossage ("missing operand");
22514 switch (GET_CODE (x
))
22517 asm_fprintf (stream
, "%r", REGNO (x
));
22521 output_address (GET_MODE (x
), XEXP (x
, 0));
22527 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22528 sizeof (fpstr
), 0, 1);
22529 fprintf (stream
, "#%s", fpstr
);
22534 gcc_assert (GET_CODE (x
) != NEG
);
22535 fputc ('#', stream
);
22536 if (GET_CODE (x
) == HIGH
)
22538 fputs (":lower16:", stream
);
22542 output_addr_const (stream
, x
);
22548 /* Target hook for printing a memory address. */
22550 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22554 int is_minus
= GET_CODE (x
) == MINUS
;
22557 asm_fprintf (stream
, "[%r]", REGNO (x
));
22558 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22560 rtx base
= XEXP (x
, 0);
22561 rtx index
= XEXP (x
, 1);
22562 HOST_WIDE_INT offset
= 0;
22564 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22566 /* Ensure that BASE is a register. */
22567 /* (one of them must be). */
22568 /* Also ensure the SP is not used as in index register. */
22569 std::swap (base
, index
);
22571 switch (GET_CODE (index
))
22574 offset
= INTVAL (index
);
22577 asm_fprintf (stream
, "[%r, #%wd]",
22578 REGNO (base
), offset
);
22582 asm_fprintf (stream
, "[%r, %s%r]",
22583 REGNO (base
), is_minus
? "-" : "",
22593 asm_fprintf (stream
, "[%r, %s%r",
22594 REGNO (base
), is_minus
? "-" : "",
22595 REGNO (XEXP (index
, 0)));
22596 arm_print_operand (stream
, index
, 'S');
22597 fputs ("]", stream
);
22602 gcc_unreachable ();
22605 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22606 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22608 gcc_assert (REG_P (XEXP (x
, 0)));
22610 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22611 asm_fprintf (stream
, "[%r, #%s%d]!",
22612 REGNO (XEXP (x
, 0)),
22613 GET_CODE (x
) == PRE_DEC
? "-" : "",
22614 GET_MODE_SIZE (mode
));
22616 asm_fprintf (stream
, "[%r], #%s%d",
22617 REGNO (XEXP (x
, 0)),
22618 GET_CODE (x
) == POST_DEC
? "-" : "",
22619 GET_MODE_SIZE (mode
));
22621 else if (GET_CODE (x
) == PRE_MODIFY
)
22623 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22624 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22625 asm_fprintf (stream
, "#%wd]!",
22626 INTVAL (XEXP (XEXP (x
, 1), 1)));
22628 asm_fprintf (stream
, "%r]!",
22629 REGNO (XEXP (XEXP (x
, 1), 1)));
22631 else if (GET_CODE (x
) == POST_MODIFY
)
22633 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22634 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22635 asm_fprintf (stream
, "#%wd",
22636 INTVAL (XEXP (XEXP (x
, 1), 1)));
22638 asm_fprintf (stream
, "%r",
22639 REGNO (XEXP (XEXP (x
, 1), 1)));
22641 else output_addr_const (stream
, x
);
22646 asm_fprintf (stream
, "[%r]", REGNO (x
));
22647 else if (GET_CODE (x
) == POST_INC
)
22648 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22649 else if (GET_CODE (x
) == PLUS
)
22651 gcc_assert (REG_P (XEXP (x
, 0)));
22652 if (CONST_INT_P (XEXP (x
, 1)))
22653 asm_fprintf (stream
, "[%r, #%wd]",
22654 REGNO (XEXP (x
, 0)),
22655 INTVAL (XEXP (x
, 1)));
22657 asm_fprintf (stream
, "[%r, %r]",
22658 REGNO (XEXP (x
, 0)),
22659 REGNO (XEXP (x
, 1)));
22662 output_addr_const (stream
, x
);
22666 /* Target hook for indicating whether a punctuation character for
22667 TARGET_PRINT_OPERAND is valid. */
22669 arm_print_operand_punct_valid_p (unsigned char code
)
22671 return (code
== '@' || code
== '|' || code
== '.'
22672 || code
== '(' || code
== ')' || code
== '#'
22673 || (TARGET_32BIT
&& (code
== '?'))
22674 || (TARGET_THUMB2
&& (code
== '!'))
22675 || (TARGET_THUMB
&& (code
== '_')));
22678 /* Target hook for assembling integer objects. The ARM version needs to
22679 handle word-sized values specially. */
22681 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22685 if (size
== UNITS_PER_WORD
&& aligned_p
)
22687 fputs ("\t.word\t", asm_out_file
);
22688 output_addr_const (asm_out_file
, x
);
22690 /* Mark symbols as position independent. We only do this in the
22691 .text segment, not in the .data segment. */
22692 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22693 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22695 /* See legitimize_pic_address for an explanation of the
22696 TARGET_VXWORKS_RTP check. */
22697 if (!arm_pic_data_is_text_relative
22698 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22699 fputs ("(GOT)", asm_out_file
);
22701 fputs ("(GOTOFF)", asm_out_file
);
22703 fputc ('\n', asm_out_file
);
22707 mode
= GET_MODE (x
);
22709 if (arm_vector_mode_supported_p (mode
))
22713 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22715 units
= CONST_VECTOR_NUNITS (x
);
22716 size
= GET_MODE_UNIT_SIZE (mode
);
22718 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22719 for (i
= 0; i
< units
; i
++)
22721 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22723 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22726 for (i
= 0; i
< units
; i
++)
22728 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22730 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22731 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22737 return default_assemble_integer (x
, size
, aligned_p
);
22741 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22745 if (!TARGET_AAPCS_BASED
)
22748 default_named_section_asm_out_constructor
22749 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22753 /* Put these in the .init_array section, using a special relocation. */
22754 if (priority
!= DEFAULT_INIT_PRIORITY
)
22757 sprintf (buf
, "%s.%.5u",
22758 is_ctor
? ".init_array" : ".fini_array",
22760 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22767 switch_to_section (s
);
22768 assemble_align (POINTER_SIZE
);
22769 fputs ("\t.word\t", asm_out_file
);
22770 output_addr_const (asm_out_file
, symbol
);
22771 fputs ("(target1)\n", asm_out_file
);
22774 /* Add a function to the list of static constructors. */
22777 arm_elf_asm_constructor (rtx symbol
, int priority
)
22779 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22782 /* Add a function to the list of static destructors. */
22785 arm_elf_asm_destructor (rtx symbol
, int priority
)
22787 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22790 /* A finite state machine takes care of noticing whether or not instructions
22791 can be conditionally executed, and thus decrease execution time and code
22792 size by deleting branch instructions. The fsm is controlled by
22793 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22795 /* The state of the fsm controlling condition codes are:
22796 0: normal, do nothing special
22797 1: make ASM_OUTPUT_OPCODE not output this instruction
22798 2: make ASM_OUTPUT_OPCODE not output this instruction
22799 3: make instructions conditional
22800 4: make instructions conditional
22802 State transitions (state->state by whom under condition):
22803 0 -> 1 final_prescan_insn if the `target' is a label
22804 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22805 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22806 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22807 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22808 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22809 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22810 (the target insn is arm_target_insn).
22812 If the jump clobbers the conditions then we use states 2 and 4.
22814 A similar thing can be done with conditional return insns.
22816 XXX In case the `target' is an unconditional branch, this conditionalising
22817 of the instructions always reduces code size, but not always execution
22818 time. But then, I want to reduce the code size to somewhere near what
22819 /bin/cc produces. */
22821 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22822 instructions. When a COND_EXEC instruction is seen the subsequent
22823 instructions are scanned so that multiple conditional instructions can be
22824 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22825 specify the length and true/false mask for the IT block. These will be
22826 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22828 /* Returns the index of the ARM condition code string in
22829 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22830 COMPARISON should be an rtx like `(eq (...) (...))'. */
22833 maybe_get_arm_condition_code (rtx comparison
)
22835 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22836 enum arm_cond_code code
;
22837 enum rtx_code comp_code
= GET_CODE (comparison
);
22839 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22840 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22841 XEXP (comparison
, 1));
22845 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22846 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22847 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22848 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22849 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22850 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22851 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22852 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22853 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22854 case CC_DLTUmode
: code
= ARM_CC
;
22857 if (comp_code
== EQ
)
22858 return ARM_INVERSE_CONDITION_CODE (code
);
22859 if (comp_code
== NE
)
22866 case NE
: return ARM_NE
;
22867 case EQ
: return ARM_EQ
;
22868 case GE
: return ARM_PL
;
22869 case LT
: return ARM_MI
;
22870 default: return ARM_NV
;
22876 case NE
: return ARM_NE
;
22877 case EQ
: return ARM_EQ
;
22878 default: return ARM_NV
;
22884 case NE
: return ARM_MI
;
22885 case EQ
: return ARM_PL
;
22886 default: return ARM_NV
;
22891 /* We can handle all cases except UNEQ and LTGT. */
22894 case GE
: return ARM_GE
;
22895 case GT
: return ARM_GT
;
22896 case LE
: return ARM_LS
;
22897 case LT
: return ARM_MI
;
22898 case NE
: return ARM_NE
;
22899 case EQ
: return ARM_EQ
;
22900 case ORDERED
: return ARM_VC
;
22901 case UNORDERED
: return ARM_VS
;
22902 case UNLT
: return ARM_LT
;
22903 case UNLE
: return ARM_LE
;
22904 case UNGT
: return ARM_HI
;
22905 case UNGE
: return ARM_PL
;
22906 /* UNEQ and LTGT do not have a representation. */
22907 case UNEQ
: /* Fall through. */
22908 case LTGT
: /* Fall through. */
22909 default: return ARM_NV
;
22915 case NE
: return ARM_NE
;
22916 case EQ
: return ARM_EQ
;
22917 case GE
: return ARM_LE
;
22918 case GT
: return ARM_LT
;
22919 case LE
: return ARM_GE
;
22920 case LT
: return ARM_GT
;
22921 case GEU
: return ARM_LS
;
22922 case GTU
: return ARM_CC
;
22923 case LEU
: return ARM_CS
;
22924 case LTU
: return ARM_HI
;
22925 default: return ARM_NV
;
22931 case LTU
: return ARM_CS
;
22932 case GEU
: return ARM_CC
;
22933 default: return ARM_NV
;
22939 case NE
: return ARM_NE
;
22940 case EQ
: return ARM_EQ
;
22941 case GEU
: return ARM_CS
;
22942 case GTU
: return ARM_HI
;
22943 case LEU
: return ARM_LS
;
22944 case LTU
: return ARM_CC
;
22945 default: return ARM_NV
;
22951 case GE
: return ARM_GE
;
22952 case LT
: return ARM_LT
;
22953 case GEU
: return ARM_CS
;
22954 case LTU
: return ARM_CC
;
22955 default: return ARM_NV
;
22961 case NE
: return ARM_NE
;
22962 case EQ
: return ARM_EQ
;
22963 case GE
: return ARM_GE
;
22964 case GT
: return ARM_GT
;
22965 case LE
: return ARM_LE
;
22966 case LT
: return ARM_LT
;
22967 case GEU
: return ARM_CS
;
22968 case GTU
: return ARM_HI
;
22969 case LEU
: return ARM_LS
;
22970 case LTU
: return ARM_CC
;
22971 default: return ARM_NV
;
22974 default: gcc_unreachable ();
22978 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22979 static enum arm_cond_code
22980 get_arm_condition_code (rtx comparison
)
22982 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22983 gcc_assert (code
!= ARM_NV
);
22987 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22990 thumb2_final_prescan_insn (rtx_insn
*insn
)
22992 rtx_insn
*first_insn
= insn
;
22993 rtx body
= PATTERN (insn
);
22995 enum arm_cond_code code
;
23000 /* max_insns_skipped in the tune was already taken into account in the
23001 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23002 just emit the IT blocks as we can. It does not make sense to split
23004 max
= MAX_INSN_PER_IT_BLOCK
;
23006 /* Remove the previous insn from the count of insns to be output. */
23007 if (arm_condexec_count
)
23008 arm_condexec_count
--;
23010 /* Nothing to do if we are already inside a conditional block. */
23011 if (arm_condexec_count
)
23014 if (GET_CODE (body
) != COND_EXEC
)
23017 /* Conditional jumps are implemented directly. */
23021 predicate
= COND_EXEC_TEST (body
);
23022 arm_current_cc
= get_arm_condition_code (predicate
);
23024 n
= get_attr_ce_count (insn
);
23025 arm_condexec_count
= 1;
23026 arm_condexec_mask
= (1 << n
) - 1;
23027 arm_condexec_masklen
= n
;
23028 /* See if subsequent instructions can be combined into the same block. */
23031 insn
= next_nonnote_insn (insn
);
23033 /* Jumping into the middle of an IT block is illegal, so a label or
23034 barrier terminates the block. */
23035 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23038 body
= PATTERN (insn
);
23039 /* USE and CLOBBER aren't really insns, so just skip them. */
23040 if (GET_CODE (body
) == USE
23041 || GET_CODE (body
) == CLOBBER
)
23044 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23045 if (GET_CODE (body
) != COND_EXEC
)
23047 /* Maximum number of conditionally executed instructions in a block. */
23048 n
= get_attr_ce_count (insn
);
23049 if (arm_condexec_masklen
+ n
> max
)
23052 predicate
= COND_EXEC_TEST (body
);
23053 code
= get_arm_condition_code (predicate
);
23054 mask
= (1 << n
) - 1;
23055 if (arm_current_cc
== code
)
23056 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23057 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23060 arm_condexec_count
++;
23061 arm_condexec_masklen
+= n
;
23063 /* A jump must be the last instruction in a conditional block. */
23067 /* Restore recog_data (getting the attributes of other insns can
23068 destroy this array, but final.c assumes that it remains intact
23069 across this call). */
23070 extract_constrain_insn_cached (first_insn
);
23074 arm_final_prescan_insn (rtx_insn
*insn
)
23076 /* BODY will hold the body of INSN. */
23077 rtx body
= PATTERN (insn
);
23079 /* This will be 1 if trying to repeat the trick, and things need to be
23080 reversed if it appears to fail. */
23083 /* If we start with a return insn, we only succeed if we find another one. */
23084 int seeking_return
= 0;
23085 enum rtx_code return_code
= UNKNOWN
;
23087 /* START_INSN will hold the insn from where we start looking. This is the
23088 first insn after the following code_label if REVERSE is true. */
23089 rtx_insn
*start_insn
= insn
;
23091 /* If in state 4, check if the target branch is reached, in order to
23092 change back to state 0. */
23093 if (arm_ccfsm_state
== 4)
23095 if (insn
== arm_target_insn
)
23097 arm_target_insn
= NULL
;
23098 arm_ccfsm_state
= 0;
23103 /* If in state 3, it is possible to repeat the trick, if this insn is an
23104 unconditional branch to a label, and immediately following this branch
23105 is the previous target label which is only used once, and the label this
23106 branch jumps to is not too far off. */
23107 if (arm_ccfsm_state
== 3)
23109 if (simplejump_p (insn
))
23111 start_insn
= next_nonnote_insn (start_insn
);
23112 if (BARRIER_P (start_insn
))
23114 /* XXX Isn't this always a barrier? */
23115 start_insn
= next_nonnote_insn (start_insn
);
23117 if (LABEL_P (start_insn
)
23118 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23119 && LABEL_NUSES (start_insn
) == 1)
23124 else if (ANY_RETURN_P (body
))
23126 start_insn
= next_nonnote_insn (start_insn
);
23127 if (BARRIER_P (start_insn
))
23128 start_insn
= next_nonnote_insn (start_insn
);
23129 if (LABEL_P (start_insn
)
23130 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23131 && LABEL_NUSES (start_insn
) == 1)
23134 seeking_return
= 1;
23135 return_code
= GET_CODE (body
);
23144 gcc_assert (!arm_ccfsm_state
|| reverse
);
23145 if (!JUMP_P (insn
))
23148 /* This jump might be paralleled with a clobber of the condition codes
23149 the jump should always come first */
23150 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23151 body
= XVECEXP (body
, 0, 0);
23154 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23155 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23158 int fail
= FALSE
, succeed
= FALSE
;
23159 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23160 int then_not_else
= TRUE
;
23161 rtx_insn
*this_insn
= start_insn
;
23164 /* Register the insn jumped to. */
23167 if (!seeking_return
)
23168 label
= XEXP (SET_SRC (body
), 0);
23170 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23171 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23172 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23174 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23175 then_not_else
= FALSE
;
23177 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23179 seeking_return
= 1;
23180 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23182 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23184 seeking_return
= 1;
23185 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23186 then_not_else
= FALSE
;
23189 gcc_unreachable ();
23191 /* See how many insns this branch skips, and what kind of insns. If all
23192 insns are okay, and the label or unconditional branch to the same
23193 label is not too far away, succeed. */
23194 for (insns_skipped
= 0;
23195 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23199 this_insn
= next_nonnote_insn (this_insn
);
23203 switch (GET_CODE (this_insn
))
23206 /* Succeed if it is the target label, otherwise fail since
23207 control falls in from somewhere else. */
23208 if (this_insn
== label
)
23210 arm_ccfsm_state
= 1;
23218 /* Succeed if the following insn is the target label.
23220 If return insns are used then the last insn in a function
23221 will be a barrier. */
23222 this_insn
= next_nonnote_insn (this_insn
);
23223 if (this_insn
&& this_insn
== label
)
23225 arm_ccfsm_state
= 1;
23233 /* The AAPCS says that conditional calls should not be
23234 used since they make interworking inefficient (the
23235 linker can't transform BL<cond> into BLX). That's
23236 only a problem if the machine has BLX. */
23243 /* Succeed if the following insn is the target label, or
23244 if the following two insns are a barrier and the
23246 this_insn
= next_nonnote_insn (this_insn
);
23247 if (this_insn
&& BARRIER_P (this_insn
))
23248 this_insn
= next_nonnote_insn (this_insn
);
23250 if (this_insn
&& this_insn
== label
23251 && insns_skipped
< max_insns_skipped
)
23253 arm_ccfsm_state
= 1;
23261 /* If this is an unconditional branch to the same label, succeed.
23262 If it is to another label, do nothing. If it is conditional,
23264 /* XXX Probably, the tests for SET and the PC are
23267 scanbody
= PATTERN (this_insn
);
23268 if (GET_CODE (scanbody
) == SET
23269 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23271 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23272 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23274 arm_ccfsm_state
= 2;
23277 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23280 /* Fail if a conditional return is undesirable (e.g. on a
23281 StrongARM), but still allow this if optimizing for size. */
23282 else if (GET_CODE (scanbody
) == return_code
23283 && !use_return_insn (TRUE
, NULL
)
23286 else if (GET_CODE (scanbody
) == return_code
)
23288 arm_ccfsm_state
= 2;
23291 else if (GET_CODE (scanbody
) == PARALLEL
)
23293 switch (get_attr_conds (this_insn
))
23303 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23308 /* Instructions using or affecting the condition codes make it
23310 scanbody
= PATTERN (this_insn
);
23311 if (!(GET_CODE (scanbody
) == SET
23312 || GET_CODE (scanbody
) == PARALLEL
)
23313 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23323 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23324 arm_target_label
= CODE_LABEL_NUMBER (label
);
23327 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23329 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23331 this_insn
= next_nonnote_insn (this_insn
);
23332 gcc_assert (!this_insn
23333 || (!BARRIER_P (this_insn
)
23334 && !LABEL_P (this_insn
)));
23338 /* Oh, dear! we ran off the end.. give up. */
23339 extract_constrain_insn_cached (insn
);
23340 arm_ccfsm_state
= 0;
23341 arm_target_insn
= NULL
;
23344 arm_target_insn
= this_insn
;
23347 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23350 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23352 if (reverse
|| then_not_else
)
23353 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23356 /* Restore recog_data (getting the attributes of other insns can
23357 destroy this array, but final.c assumes that it remains intact
23358 across this call. */
23359 extract_constrain_insn_cached (insn
);
23363 /* Output IT instructions. */
23365 thumb2_asm_output_opcode (FILE * stream
)
23370 if (arm_condexec_mask
)
23372 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23373 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23375 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23376 arm_condition_codes
[arm_current_cc
]);
23377 arm_condexec_mask
= 0;
23381 /* Returns true if REGNO is a valid register
23382 for holding a quantity of type MODE. */
23384 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23386 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23387 return (regno
== CC_REGNUM
23388 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23389 && regno
== VFPCC_REGNUM
));
23391 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23395 /* For the Thumb we only allow values bigger than SImode in
23396 registers 0 - 6, so that there is always a second low
23397 register available to hold the upper part of the value.
23398 We probably we ought to ensure that the register is the
23399 start of an even numbered register pair. */
23400 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23402 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23403 && IS_VFP_REGNUM (regno
))
23405 if (mode
== SFmode
|| mode
== SImode
)
23406 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23408 if (mode
== DFmode
)
23409 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23411 if (mode
== HFmode
)
23412 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23415 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23416 || (VALID_NEON_QREG_MODE (mode
)
23417 && NEON_REGNO_OK_FOR_QUAD (regno
))
23418 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23419 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23420 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23421 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23422 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23427 if (TARGET_REALLY_IWMMXT
)
23429 if (IS_IWMMXT_GR_REGNUM (regno
))
23430 return mode
== SImode
;
23432 if (IS_IWMMXT_REGNUM (regno
))
23433 return VALID_IWMMXT_REG_MODE (mode
);
23436 /* We allow almost any value to be stored in the general registers.
23437 Restrict doubleword quantities to even register pairs in ARM state
23438 so that we can use ldrd. Do not allow very large Neon structure
23439 opaque modes in general registers; they would use too many. */
23440 if (regno
<= LAST_ARM_REGNUM
)
23442 if (ARM_NUM_REGS (mode
) > 4)
23448 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23451 if (regno
== FRAME_POINTER_REGNUM
23452 || regno
== ARG_POINTER_REGNUM
)
23453 /* We only allow integers in the fake hard registers. */
23454 return GET_MODE_CLASS (mode
) == MODE_INT
;
23459 /* Implement MODES_TIEABLE_P. */
23462 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23464 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23467 /* We specifically want to allow elements of "structure" modes to
23468 be tieable to the structure. This more general condition allows
23469 other rarer situations too. */
23471 && (VALID_NEON_DREG_MODE (mode1
)
23472 || VALID_NEON_QREG_MODE (mode1
)
23473 || VALID_NEON_STRUCT_MODE (mode1
))
23474 && (VALID_NEON_DREG_MODE (mode2
)
23475 || VALID_NEON_QREG_MODE (mode2
)
23476 || VALID_NEON_STRUCT_MODE (mode2
)))
23482 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23483 not used in arm mode. */
23486 arm_regno_class (int regno
)
23488 if (regno
== PC_REGNUM
)
23493 if (regno
== STACK_POINTER_REGNUM
)
23495 if (regno
== CC_REGNUM
)
23502 if (TARGET_THUMB2
&& regno
< 8)
23505 if ( regno
<= LAST_ARM_REGNUM
23506 || regno
== FRAME_POINTER_REGNUM
23507 || regno
== ARG_POINTER_REGNUM
)
23508 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23510 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23511 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23513 if (IS_VFP_REGNUM (regno
))
23515 if (regno
<= D7_VFP_REGNUM
)
23516 return VFP_D0_D7_REGS
;
23517 else if (regno
<= LAST_LO_VFP_REGNUM
)
23518 return VFP_LO_REGS
;
23520 return VFP_HI_REGS
;
23523 if (IS_IWMMXT_REGNUM (regno
))
23524 return IWMMXT_REGS
;
23526 if (IS_IWMMXT_GR_REGNUM (regno
))
23527 return IWMMXT_GR_REGS
;
23532 /* Handle a special case when computing the offset
23533 of an argument from the frame pointer. */
23535 arm_debugger_arg_offset (int value
, rtx addr
)
23539 /* We are only interested if dbxout_parms() failed to compute the offset. */
23543 /* We can only cope with the case where the address is held in a register. */
23547 /* If we are using the frame pointer to point at the argument, then
23548 an offset of 0 is correct. */
23549 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23552 /* If we are using the stack pointer to point at the
23553 argument, then an offset of 0 is correct. */
23554 /* ??? Check this is consistent with thumb2 frame layout. */
23555 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23556 && REGNO (addr
) == SP_REGNUM
)
23559 /* Oh dear. The argument is pointed to by a register rather
23560 than being held in a register, or being stored at a known
23561 offset from the frame pointer. Since GDB only understands
23562 those two kinds of argument we must translate the address
23563 held in the register into an offset from the frame pointer.
23564 We do this by searching through the insns for the function
23565 looking to see where this register gets its value. If the
23566 register is initialized from the frame pointer plus an offset
23567 then we are in luck and we can continue, otherwise we give up.
23569 This code is exercised by producing debugging information
23570 for a function with arguments like this:
23572 double func (double a, double b, int c, double d) {return d;}
23574 Without this code the stab for parameter 'd' will be set to
23575 an offset of 0 from the frame pointer, rather than 8. */
23577 /* The if() statement says:
23579 If the insn is a normal instruction
23580 and if the insn is setting the value in a register
23581 and if the register being set is the register holding the address of the argument
23582 and if the address is computing by an addition
23583 that involves adding to a register
23584 which is the frame pointer
23589 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23591 if ( NONJUMP_INSN_P (insn
)
23592 && GET_CODE (PATTERN (insn
)) == SET
23593 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23594 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23595 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23596 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23597 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23600 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23609 warning (0, "unable to compute real location of stacked parameter");
23610 value
= 8; /* XXX magic hack */
23616 /* Implement TARGET_PROMOTED_TYPE. */
23619 arm_promoted_type (const_tree t
)
23621 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23622 return float_type_node
;
23626 /* Implement TARGET_CONVERT_TO_TYPE.
23627 Specifically, this hook implements the peculiarity of the ARM
23628 half-precision floating-point C semantics that requires conversions between
23629 __fp16 to or from double to do an intermediate conversion to float. */
23632 arm_convert_to_type (tree type
, tree expr
)
23634 tree fromtype
= TREE_TYPE (expr
);
23635 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23637 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23638 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23639 return convert (type
, convert (float_type_node
, expr
));
23643 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23644 This simply adds HFmode as a supported mode; even though we don't
23645 implement arithmetic on this type directly, it's supported by
23646 optabs conversions, much the way the double-word arithmetic is
23647 special-cased in the default hook. */
23650 arm_scalar_mode_supported_p (machine_mode mode
)
23652 if (mode
== HFmode
)
23653 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23654 else if (ALL_FIXED_POINT_MODE_P (mode
))
23657 return default_scalar_mode_supported_p (mode
);
23660 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23661 not to early-clobber SRC registers in the process.
23663 We assume that the operands described by SRC and DEST represent a
23664 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23665 number of components into which the copy has been decomposed. */
23667 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23671 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23672 || REGNO (operands
[0]) < REGNO (operands
[1]))
23674 for (i
= 0; i
< count
; i
++)
23676 operands
[2 * i
] = dest
[i
];
23677 operands
[2 * i
+ 1] = src
[i
];
23682 for (i
= 0; i
< count
; i
++)
23684 operands
[2 * i
] = dest
[count
- i
- 1];
23685 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23690 /* Split operands into moves from op[1] + op[2] into op[0]. */
23693 neon_split_vcombine (rtx operands
[3])
23695 unsigned int dest
= REGNO (operands
[0]);
23696 unsigned int src1
= REGNO (operands
[1]);
23697 unsigned int src2
= REGNO (operands
[2]);
23698 machine_mode halfmode
= GET_MODE (operands
[1]);
23699 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23700 rtx destlo
, desthi
;
23702 if (src1
== dest
&& src2
== dest
+ halfregs
)
23704 /* No-op move. Can't split to nothing; emit something. */
23705 emit_note (NOTE_INSN_DELETED
);
23709 /* Preserve register attributes for variable tracking. */
23710 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23711 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23712 GET_MODE_SIZE (halfmode
));
23714 /* Special case of reversed high/low parts. Use VSWP. */
23715 if (src2
== dest
&& src1
== dest
+ halfregs
)
23717 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23718 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23719 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23723 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23725 /* Try to avoid unnecessary moves if part of the result
23726 is in the right place already. */
23728 emit_move_insn (destlo
, operands
[1]);
23729 if (src2
!= dest
+ halfregs
)
23730 emit_move_insn (desthi
, operands
[2]);
23734 if (src2
!= dest
+ halfregs
)
23735 emit_move_insn (desthi
, operands
[2]);
23737 emit_move_insn (destlo
, operands
[1]);
23741 /* Return the number (counting from 0) of
23742 the least significant set bit in MASK. */
23745 number_of_first_bit_set (unsigned mask
)
23747 return ctz_hwi (mask
);
23750 /* Like emit_multi_reg_push, but allowing for a different set of
23751 registers to be described as saved. MASK is the set of registers
23752 to be saved; REAL_REGS is the set of registers to be described as
23753 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23756 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23758 unsigned long regno
;
23759 rtx par
[10], tmp
, reg
;
23763 /* Build the parallel of the registers actually being stored. */
23764 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23766 regno
= ctz_hwi (mask
);
23767 reg
= gen_rtx_REG (SImode
, regno
);
23770 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23772 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23777 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23778 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23779 tmp
= gen_frame_mem (BLKmode
, tmp
);
23780 tmp
= gen_rtx_SET (tmp
, par
[0]);
23783 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23784 insn
= emit_insn (tmp
);
23786 /* Always build the stack adjustment note for unwind info. */
23787 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23788 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23791 /* Build the parallel of the registers recorded as saved for unwind. */
23792 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23794 regno
= ctz_hwi (real_regs
);
23795 reg
= gen_rtx_REG (SImode
, regno
);
23797 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23798 tmp
= gen_frame_mem (SImode
, tmp
);
23799 tmp
= gen_rtx_SET (tmp
, reg
);
23800 RTX_FRAME_RELATED_P (tmp
) = 1;
23808 RTX_FRAME_RELATED_P (par
[0]) = 1;
23809 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23812 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23817 /* Emit code to push or pop registers to or from the stack. F is the
23818 assembly file. MASK is the registers to pop. */
23820 thumb_pop (FILE *f
, unsigned long mask
)
23823 int lo_mask
= mask
& 0xFF;
23824 int pushed_words
= 0;
23828 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23830 /* Special case. Do not generate a POP PC statement here, do it in
23832 thumb_exit (f
, -1);
23836 fprintf (f
, "\tpop\t{");
23838 /* Look at the low registers first. */
23839 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23843 asm_fprintf (f
, "%r", regno
);
23845 if ((lo_mask
& ~1) != 0)
23852 if (mask
& (1 << PC_REGNUM
))
23854 /* Catch popping the PC. */
23855 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23856 || crtl
->calls_eh_return
)
23858 /* The PC is never poped directly, instead
23859 it is popped into r3 and then BX is used. */
23860 fprintf (f
, "}\n");
23862 thumb_exit (f
, -1);
23871 asm_fprintf (f
, "%r", PC_REGNUM
);
23875 fprintf (f
, "}\n");
23878 /* Generate code to return from a thumb function.
23879 If 'reg_containing_return_addr' is -1, then the return address is
23880 actually on the stack, at the stack pointer. */
23882 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23884 unsigned regs_available_for_popping
;
23885 unsigned regs_to_pop
;
23887 unsigned available
;
23891 int restore_a4
= FALSE
;
23893 /* Compute the registers we need to pop. */
23897 if (reg_containing_return_addr
== -1)
23899 regs_to_pop
|= 1 << LR_REGNUM
;
23903 if (TARGET_BACKTRACE
)
23905 /* Restore the (ARM) frame pointer and stack pointer. */
23906 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23910 /* If there is nothing to pop then just emit the BX instruction and
23912 if (pops_needed
== 0)
23914 if (crtl
->calls_eh_return
)
23915 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23917 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23920 /* Otherwise if we are not supporting interworking and we have not created
23921 a backtrace structure and the function was not entered in ARM mode then
23922 just pop the return address straight into the PC. */
23923 else if (!TARGET_INTERWORK
23924 && !TARGET_BACKTRACE
23925 && !is_called_in_ARM_mode (current_function_decl
)
23926 && !crtl
->calls_eh_return
)
23928 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23932 /* Find out how many of the (return) argument registers we can corrupt. */
23933 regs_available_for_popping
= 0;
23935 /* If returning via __builtin_eh_return, the bottom three registers
23936 all contain information needed for the return. */
23937 if (crtl
->calls_eh_return
)
23941 /* If we can deduce the registers used from the function's
23942 return value. This is more reliable that examining
23943 df_regs_ever_live_p () because that will be set if the register is
23944 ever used in the function, not just if the register is used
23945 to hold a return value. */
23947 if (crtl
->return_rtx
!= 0)
23948 mode
= GET_MODE (crtl
->return_rtx
);
23950 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23952 size
= GET_MODE_SIZE (mode
);
23956 /* In a void function we can use any argument register.
23957 In a function that returns a structure on the stack
23958 we can use the second and third argument registers. */
23959 if (mode
== VOIDmode
)
23960 regs_available_for_popping
=
23961 (1 << ARG_REGISTER (1))
23962 | (1 << ARG_REGISTER (2))
23963 | (1 << ARG_REGISTER (3));
23965 regs_available_for_popping
=
23966 (1 << ARG_REGISTER (2))
23967 | (1 << ARG_REGISTER (3));
23969 else if (size
<= 4)
23970 regs_available_for_popping
=
23971 (1 << ARG_REGISTER (2))
23972 | (1 << ARG_REGISTER (3));
23973 else if (size
<= 8)
23974 regs_available_for_popping
=
23975 (1 << ARG_REGISTER (3));
23978 /* Match registers to be popped with registers into which we pop them. */
23979 for (available
= regs_available_for_popping
,
23980 required
= regs_to_pop
;
23981 required
!= 0 && available
!= 0;
23982 available
&= ~(available
& - available
),
23983 required
&= ~(required
& - required
))
23986 /* If we have any popping registers left over, remove them. */
23988 regs_available_for_popping
&= ~available
;
23990 /* Otherwise if we need another popping register we can use
23991 the fourth argument register. */
23992 else if (pops_needed
)
23994 /* If we have not found any free argument registers and
23995 reg a4 contains the return address, we must move it. */
23996 if (regs_available_for_popping
== 0
23997 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23999 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24000 reg_containing_return_addr
= LR_REGNUM
;
24002 else if (size
> 12)
24004 /* Register a4 is being used to hold part of the return value,
24005 but we have dire need of a free, low register. */
24008 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24011 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24013 /* The fourth argument register is available. */
24014 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24020 /* Pop as many registers as we can. */
24021 thumb_pop (f
, regs_available_for_popping
);
24023 /* Process the registers we popped. */
24024 if (reg_containing_return_addr
== -1)
24026 /* The return address was popped into the lowest numbered register. */
24027 regs_to_pop
&= ~(1 << LR_REGNUM
);
24029 reg_containing_return_addr
=
24030 number_of_first_bit_set (regs_available_for_popping
);
24032 /* Remove this register for the mask of available registers, so that
24033 the return address will not be corrupted by further pops. */
24034 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24037 /* If we popped other registers then handle them here. */
24038 if (regs_available_for_popping
)
24042 /* Work out which register currently contains the frame pointer. */
24043 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24045 /* Move it into the correct place. */
24046 asm_fprintf (f
, "\tmov\t%r, %r\n",
24047 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24049 /* (Temporarily) remove it from the mask of popped registers. */
24050 regs_available_for_popping
&= ~(1 << frame_pointer
);
24051 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24053 if (regs_available_for_popping
)
24057 /* We popped the stack pointer as well,
24058 find the register that contains it. */
24059 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24061 /* Move it into the stack register. */
24062 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24064 /* At this point we have popped all necessary registers, so
24065 do not worry about restoring regs_available_for_popping
24066 to its correct value:
24068 assert (pops_needed == 0)
24069 assert (regs_available_for_popping == (1 << frame_pointer))
24070 assert (regs_to_pop == (1 << STACK_POINTER)) */
24074 /* Since we have just move the popped value into the frame
24075 pointer, the popping register is available for reuse, and
24076 we know that we still have the stack pointer left to pop. */
24077 regs_available_for_popping
|= (1 << frame_pointer
);
24081 /* If we still have registers left on the stack, but we no longer have
24082 any registers into which we can pop them, then we must move the return
24083 address into the link register and make available the register that
24085 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24087 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24089 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24090 reg_containing_return_addr
);
24092 reg_containing_return_addr
= LR_REGNUM
;
24095 /* If we have registers left on the stack then pop some more.
24096 We know that at most we will want to pop FP and SP. */
24097 if (pops_needed
> 0)
24102 thumb_pop (f
, regs_available_for_popping
);
24104 /* We have popped either FP or SP.
24105 Move whichever one it is into the correct register. */
24106 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24107 move_to
= number_of_first_bit_set (regs_to_pop
);
24109 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24111 regs_to_pop
&= ~(1 << move_to
);
24116 /* If we still have not popped everything then we must have only
24117 had one register available to us and we are now popping the SP. */
24118 if (pops_needed
> 0)
24122 thumb_pop (f
, regs_available_for_popping
);
24124 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24126 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24128 assert (regs_to_pop == (1 << STACK_POINTER))
24129 assert (pops_needed == 1)
24133 /* If necessary restore the a4 register. */
24136 if (reg_containing_return_addr
!= LR_REGNUM
)
24138 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24139 reg_containing_return_addr
= LR_REGNUM
;
24142 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24145 if (crtl
->calls_eh_return
)
24146 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24148 /* Return to caller. */
24149 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24152 /* Scan INSN just before assembler is output for it.
24153 For Thumb-1, we track the status of the condition codes; this
24154 information is used in the cbranchsi4_insn pattern. */
24156 thumb1_final_prescan_insn (rtx_insn
*insn
)
24158 if (flag_print_asm_name
)
24159 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24160 INSN_ADDRESSES (INSN_UID (insn
)));
24161 /* Don't overwrite the previous setter when we get to a cbranch. */
24162 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24164 enum attr_conds conds
;
24166 if (cfun
->machine
->thumb1_cc_insn
)
24168 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24169 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24172 conds
= get_attr_conds (insn
);
24173 if (conds
== CONDS_SET
)
24175 rtx set
= single_set (insn
);
24176 cfun
->machine
->thumb1_cc_insn
= insn
;
24177 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24178 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24179 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24180 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24182 rtx src1
= XEXP (SET_SRC (set
), 1);
24183 if (src1
== const0_rtx
)
24184 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24186 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24188 /* Record the src register operand instead of dest because
24189 cprop_hardreg pass propagates src. */
24190 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24193 else if (conds
!= CONDS_NOCOND
)
24194 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24197 /* Check if unexpected far jump is used. */
24198 if (cfun
->machine
->lr_save_eliminated
24199 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24200 internal_error("Unexpected thumb1 far jump");
24204 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24206 unsigned HOST_WIDE_INT mask
= 0xff;
24209 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24210 if (val
== 0) /* XXX */
24213 for (i
= 0; i
< 25; i
++)
24214 if ((val
& (mask
<< i
)) == val
)
24220 /* Returns nonzero if the current function contains,
24221 or might contain a far jump. */
24223 thumb_far_jump_used_p (void)
24226 bool far_jump
= false;
24227 unsigned int func_size
= 0;
24229 /* This test is only important for leaf functions. */
24230 /* assert (!leaf_function_p ()); */
24232 /* If we have already decided that far jumps may be used,
24233 do not bother checking again, and always return true even if
24234 it turns out that they are not being used. Once we have made
24235 the decision that far jumps are present (and that hence the link
24236 register will be pushed onto the stack) we cannot go back on it. */
24237 if (cfun
->machine
->far_jump_used
)
24240 /* If this function is not being called from the prologue/epilogue
24241 generation code then it must be being called from the
24242 INITIAL_ELIMINATION_OFFSET macro. */
24243 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24245 /* In this case we know that we are being asked about the elimination
24246 of the arg pointer register. If that register is not being used,
24247 then there are no arguments on the stack, and we do not have to
24248 worry that a far jump might force the prologue to push the link
24249 register, changing the stack offsets. In this case we can just
24250 return false, since the presence of far jumps in the function will
24251 not affect stack offsets.
24253 If the arg pointer is live (or if it was live, but has now been
24254 eliminated and so set to dead) then we do have to test to see if
24255 the function might contain a far jump. This test can lead to some
24256 false negatives, since before reload is completed, then length of
24257 branch instructions is not known, so gcc defaults to returning their
24258 longest length, which in turn sets the far jump attribute to true.
24260 A false negative will not result in bad code being generated, but it
24261 will result in a needless push and pop of the link register. We
24262 hope that this does not occur too often.
24264 If we need doubleword stack alignment this could affect the other
24265 elimination offsets so we can't risk getting it wrong. */
24266 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24267 cfun
->machine
->arg_pointer_live
= 1;
24268 else if (!cfun
->machine
->arg_pointer_live
)
24272 /* We should not change far_jump_used during or after reload, as there is
24273 no chance to change stack frame layout. */
24274 if (reload_in_progress
|| reload_completed
)
24277 /* Check to see if the function contains a branch
24278 insn with the far jump attribute set. */
24279 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24281 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24285 func_size
+= get_attr_length (insn
);
24288 /* Attribute far_jump will always be true for thumb1 before
24289 shorten_branch pass. So checking far_jump attribute before
24290 shorten_branch isn't much useful.
24292 Following heuristic tries to estimate more accurately if a far jump
24293 may finally be used. The heuristic is very conservative as there is
24294 no chance to roll-back the decision of not to use far jump.
24296 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24297 2-byte insn is associated with a 4 byte constant pool. Using
24298 function size 2048/3 as the threshold is conservative enough. */
24301 if ((func_size
* 3) >= 2048)
24303 /* Record the fact that we have decided that
24304 the function does use far jumps. */
24305 cfun
->machine
->far_jump_used
= 1;
24313 /* Return nonzero if FUNC must be entered in ARM mode. */
24315 is_called_in_ARM_mode (tree func
)
24317 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24319 /* Ignore the problem about functions whose address is taken. */
24320 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24324 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24330 /* Given the stack offsets and register mask in OFFSETS, decide how
24331 many additional registers to push instead of subtracting a constant
24332 from SP. For epilogues the principle is the same except we use pop.
24333 FOR_PROLOGUE indicates which we're generating. */
24335 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24337 HOST_WIDE_INT amount
;
24338 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24339 /* Extract a mask of the ones we can give to the Thumb's push/pop
24341 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24342 /* Then count how many other high registers will need to be pushed. */
24343 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24344 int n_free
, reg_base
, size
;
24346 if (!for_prologue
&& frame_pointer_needed
)
24347 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24349 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24351 /* If the stack frame size is 512 exactly, we can save one load
24352 instruction, which should make this a win even when optimizing
24354 if (!optimize_size
&& amount
!= 512)
24357 /* Can't do this if there are high registers to push. */
24358 if (high_regs_pushed
!= 0)
24361 /* Shouldn't do it in the prologue if no registers would normally
24362 be pushed at all. In the epilogue, also allow it if we'll have
24363 a pop insn for the PC. */
24366 || TARGET_BACKTRACE
24367 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24368 || TARGET_INTERWORK
24369 || crtl
->args
.pretend_args_size
!= 0))
24372 /* Don't do this if thumb_expand_prologue wants to emit instructions
24373 between the push and the stack frame allocation. */
24375 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24376 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24383 size
= arm_size_return_regs ();
24384 reg_base
= ARM_NUM_INTS (size
);
24385 live_regs_mask
>>= reg_base
;
24388 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24389 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24391 live_regs_mask
>>= 1;
24397 gcc_assert (amount
/ 4 * 4 == amount
);
24399 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24400 return (amount
- 508) / 4;
24401 if (amount
<= n_free
* 4)
24406 /* The bits which aren't usefully expanded as rtl. */
24408 thumb1_unexpanded_epilogue (void)
24410 arm_stack_offsets
*offsets
;
24412 unsigned long live_regs_mask
= 0;
24413 int high_regs_pushed
= 0;
24415 int had_to_push_lr
;
24418 if (cfun
->machine
->return_used_this_function
!= 0)
24421 if (IS_NAKED (arm_current_func_type ()))
24424 offsets
= arm_get_frame_offsets ();
24425 live_regs_mask
= offsets
->saved_regs_mask
;
24426 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24428 /* If we can deduce the registers used from the function's return value.
24429 This is more reliable that examining df_regs_ever_live_p () because that
24430 will be set if the register is ever used in the function, not just if
24431 the register is used to hold a return value. */
24432 size
= arm_size_return_regs ();
24434 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24437 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24438 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24441 /* The prolog may have pushed some high registers to use as
24442 work registers. e.g. the testsuite file:
24443 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24444 compiles to produce:
24445 push {r4, r5, r6, r7, lr}
24449 as part of the prolog. We have to undo that pushing here. */
24451 if (high_regs_pushed
)
24453 unsigned long mask
= live_regs_mask
& 0xff;
24456 /* The available low registers depend on the size of the value we are
24464 /* Oh dear! We have no low registers into which we can pop
24467 ("no low registers available for popping high registers");
24469 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24470 if (live_regs_mask
& (1 << next_hi_reg
))
24473 while (high_regs_pushed
)
24475 /* Find lo register(s) into which the high register(s) can
24477 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24479 if (mask
& (1 << regno
))
24480 high_regs_pushed
--;
24481 if (high_regs_pushed
== 0)
24485 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24487 /* Pop the values into the low register(s). */
24488 thumb_pop (asm_out_file
, mask
);
24490 /* Move the value(s) into the high registers. */
24491 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24493 if (mask
& (1 << regno
))
24495 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24498 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24499 if (live_regs_mask
& (1 << next_hi_reg
))
24504 live_regs_mask
&= ~0x0f00;
24507 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24508 live_regs_mask
&= 0xff;
24510 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24512 /* Pop the return address into the PC. */
24513 if (had_to_push_lr
)
24514 live_regs_mask
|= 1 << PC_REGNUM
;
24516 /* Either no argument registers were pushed or a backtrace
24517 structure was created which includes an adjusted stack
24518 pointer, so just pop everything. */
24519 if (live_regs_mask
)
24520 thumb_pop (asm_out_file
, live_regs_mask
);
24522 /* We have either just popped the return address into the
24523 PC or it is was kept in LR for the entire function.
24524 Note that thumb_pop has already called thumb_exit if the
24525 PC was in the list. */
24526 if (!had_to_push_lr
)
24527 thumb_exit (asm_out_file
, LR_REGNUM
);
24531 /* Pop everything but the return address. */
24532 if (live_regs_mask
)
24533 thumb_pop (asm_out_file
, live_regs_mask
);
24535 if (had_to_push_lr
)
24539 /* We have no free low regs, so save one. */
24540 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24544 /* Get the return address into a temporary register. */
24545 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24549 /* Move the return address to lr. */
24550 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24552 /* Restore the low register. */
24553 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24558 regno
= LAST_ARG_REGNUM
;
24563 /* Remove the argument registers that were pushed onto the stack. */
24564 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24565 SP_REGNUM
, SP_REGNUM
,
24566 crtl
->args
.pretend_args_size
);
24568 thumb_exit (asm_out_file
, regno
);
24574 /* Functions to save and restore machine-specific function data. */
24575 static struct machine_function
*
24576 arm_init_machine_status (void)
24578 struct machine_function
*machine
;
24579 machine
= ggc_cleared_alloc
<machine_function
> ();
24581 #if ARM_FT_UNKNOWN != 0
24582 machine
->func_type
= ARM_FT_UNKNOWN
;
24587 /* Return an RTX indicating where the return address to the
24588 calling function can be found. */
24590 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24595 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24598 /* Do anything needed before RTL is emitted for each function. */
24600 arm_init_expanders (void)
24602 /* Arrange to initialize and mark the machine per-function status. */
24603 init_machine_status
= arm_init_machine_status
;
24605 /* This is to stop the combine pass optimizing away the alignment
24606 adjustment of va_arg. */
24607 /* ??? It is claimed that this should not be necessary. */
24609 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24612 /* Check that FUNC is called with a different mode. */
24615 arm_change_mode_p (tree func
)
24617 if (TREE_CODE (func
) != FUNCTION_DECL
)
24620 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24623 callee_tree
= target_option_default_node
;
24625 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24626 int flags
= callee_opts
->x_target_flags
;
24628 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24631 /* Like arm_compute_initial_elimination offset. Simpler because there
24632 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24633 to point at the base of the local variables after static stack
24634 space for a function has been allocated. */
24637 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24639 arm_stack_offsets
*offsets
;
24641 offsets
= arm_get_frame_offsets ();
24645 case ARG_POINTER_REGNUM
:
24648 case STACK_POINTER_REGNUM
:
24649 return offsets
->outgoing_args
- offsets
->saved_args
;
24651 case FRAME_POINTER_REGNUM
:
24652 return offsets
->soft_frame
- offsets
->saved_args
;
24654 case ARM_HARD_FRAME_POINTER_REGNUM
:
24655 return offsets
->saved_regs
- offsets
->saved_args
;
24657 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24658 return offsets
->locals_base
- offsets
->saved_args
;
24661 gcc_unreachable ();
24665 case FRAME_POINTER_REGNUM
:
24668 case STACK_POINTER_REGNUM
:
24669 return offsets
->outgoing_args
- offsets
->soft_frame
;
24671 case ARM_HARD_FRAME_POINTER_REGNUM
:
24672 return offsets
->saved_regs
- offsets
->soft_frame
;
24674 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24675 return offsets
->locals_base
- offsets
->soft_frame
;
24678 gcc_unreachable ();
24683 gcc_unreachable ();
24687 /* Generate the function's prologue. */
24690 thumb1_expand_prologue (void)
24694 HOST_WIDE_INT amount
;
24695 HOST_WIDE_INT size
;
24696 arm_stack_offsets
*offsets
;
24697 unsigned long func_type
;
24699 unsigned long live_regs_mask
;
24700 unsigned long l_mask
;
24701 unsigned high_regs_pushed
= 0;
24703 func_type
= arm_current_func_type ();
24705 /* Naked functions don't have prologues. */
24706 if (IS_NAKED (func_type
))
24708 if (flag_stack_usage_info
)
24709 current_function_static_stack_size
= 0;
24713 if (IS_INTERRUPT (func_type
))
24715 error ("interrupt Service Routines cannot be coded in Thumb mode");
24719 if (is_called_in_ARM_mode (current_function_decl
))
24720 emit_insn (gen_prologue_thumb1_interwork ());
24722 offsets
= arm_get_frame_offsets ();
24723 live_regs_mask
= offsets
->saved_regs_mask
;
24725 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24726 l_mask
= live_regs_mask
& 0x40ff;
24727 /* Then count how many other high registers will need to be pushed. */
24728 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24730 if (crtl
->args
.pretend_args_size
)
24732 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24734 if (cfun
->machine
->uses_anonymous_args
)
24736 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24737 unsigned long mask
;
24739 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24740 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24742 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24746 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24747 stack_pointer_rtx
, x
));
24749 RTX_FRAME_RELATED_P (insn
) = 1;
24752 if (TARGET_BACKTRACE
)
24754 HOST_WIDE_INT offset
= 0;
24755 unsigned work_register
;
24756 rtx work_reg
, x
, arm_hfp_rtx
;
24758 /* We have been asked to create a stack backtrace structure.
24759 The code looks like this:
24763 0 sub SP, #16 Reserve space for 4 registers.
24764 2 push {R7} Push low registers.
24765 4 add R7, SP, #20 Get the stack pointer before the push.
24766 6 str R7, [SP, #8] Store the stack pointer
24767 (before reserving the space).
24768 8 mov R7, PC Get hold of the start of this code + 12.
24769 10 str R7, [SP, #16] Store it.
24770 12 mov R7, FP Get hold of the current frame pointer.
24771 14 str R7, [SP, #4] Store it.
24772 16 mov R7, LR Get hold of the current return address.
24773 18 str R7, [SP, #12] Store it.
24774 20 add R7, SP, #16 Point at the start of the
24775 backtrace structure.
24776 22 mov FP, R7 Put this value into the frame pointer. */
24778 work_register
= thumb_find_work_register (live_regs_mask
);
24779 work_reg
= gen_rtx_REG (SImode
, work_register
);
24780 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24782 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24783 stack_pointer_rtx
, GEN_INT (-16)));
24784 RTX_FRAME_RELATED_P (insn
) = 1;
24788 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24789 RTX_FRAME_RELATED_P (insn
) = 1;
24791 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24794 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24795 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24797 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24798 x
= gen_frame_mem (SImode
, x
);
24799 emit_move_insn (x
, work_reg
);
24801 /* Make sure that the instruction fetching the PC is in the right place
24802 to calculate "start of backtrace creation code + 12". */
24803 /* ??? The stores using the common WORK_REG ought to be enough to
24804 prevent the scheduler from doing anything weird. Failing that
24805 we could always move all of the following into an UNSPEC_VOLATILE. */
24808 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24809 emit_move_insn (work_reg
, x
);
24811 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24812 x
= gen_frame_mem (SImode
, x
);
24813 emit_move_insn (x
, work_reg
);
24815 emit_move_insn (work_reg
, arm_hfp_rtx
);
24817 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24818 x
= gen_frame_mem (SImode
, x
);
24819 emit_move_insn (x
, work_reg
);
24823 emit_move_insn (work_reg
, arm_hfp_rtx
);
24825 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24826 x
= gen_frame_mem (SImode
, x
);
24827 emit_move_insn (x
, work_reg
);
24829 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24830 emit_move_insn (work_reg
, x
);
24832 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24833 x
= gen_frame_mem (SImode
, x
);
24834 emit_move_insn (x
, work_reg
);
24837 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24838 emit_move_insn (work_reg
, x
);
24840 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24841 x
= gen_frame_mem (SImode
, x
);
24842 emit_move_insn (x
, work_reg
);
24844 x
= GEN_INT (offset
+ 12);
24845 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24847 emit_move_insn (arm_hfp_rtx
, work_reg
);
24849 /* Optimization: If we are not pushing any low registers but we are going
24850 to push some high registers then delay our first push. This will just
24851 be a push of LR and we can combine it with the push of the first high
24853 else if ((l_mask
& 0xff) != 0
24854 || (high_regs_pushed
== 0 && l_mask
))
24856 unsigned long mask
= l_mask
;
24857 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24858 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24859 RTX_FRAME_RELATED_P (insn
) = 1;
24862 if (high_regs_pushed
)
24864 unsigned pushable_regs
;
24865 unsigned next_hi_reg
;
24866 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24867 : crtl
->args
.info
.nregs
;
24868 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24870 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24871 if (live_regs_mask
& (1 << next_hi_reg
))
24874 /* Here we need to mask out registers used for passing arguments
24875 even if they can be pushed. This is to avoid using them to stash the high
24876 registers. Such kind of stash may clobber the use of arguments. */
24877 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24879 if (pushable_regs
== 0)
24880 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24882 while (high_regs_pushed
> 0)
24884 unsigned long real_regs_mask
= 0;
24886 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24888 if (pushable_regs
& (1 << regno
))
24890 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24891 gen_rtx_REG (SImode
, next_hi_reg
));
24893 high_regs_pushed
--;
24894 real_regs_mask
|= (1 << next_hi_reg
);
24896 if (high_regs_pushed
)
24898 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24900 if (live_regs_mask
& (1 << next_hi_reg
))
24905 pushable_regs
&= ~((1 << regno
) - 1);
24911 /* If we had to find a work register and we have not yet
24912 saved the LR then add it to the list of regs to push. */
24913 if (l_mask
== (1 << LR_REGNUM
))
24915 pushable_regs
|= l_mask
;
24916 real_regs_mask
|= l_mask
;
24920 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24921 RTX_FRAME_RELATED_P (insn
) = 1;
24925 /* Load the pic register before setting the frame pointer,
24926 so we can use r7 as a temporary work register. */
24927 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24928 arm_load_pic_register (live_regs_mask
);
24930 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24931 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24932 stack_pointer_rtx
);
24934 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24935 if (flag_stack_usage_info
)
24936 current_function_static_stack_size
= size
;
24938 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24939 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24940 sorry ("-fstack-check=specific for Thumb-1");
24942 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24943 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24948 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24949 GEN_INT (- amount
)));
24950 RTX_FRAME_RELATED_P (insn
) = 1;
24956 /* The stack decrement is too big for an immediate value in a single
24957 insn. In theory we could issue multiple subtracts, but after
24958 three of them it becomes more space efficient to place the full
24959 value in the constant pool and load into a register. (Also the
24960 ARM debugger really likes to see only one stack decrement per
24961 function). So instead we look for a scratch register into which
24962 we can load the decrement, and then we subtract this from the
24963 stack pointer. Unfortunately on the thumb the only available
24964 scratch registers are the argument registers, and we cannot use
24965 these as they may hold arguments to the function. Instead we
24966 attempt to locate a call preserved register which is used by this
24967 function. If we can find one, then we know that it will have
24968 been pushed at the start of the prologue and so we can corrupt
24970 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24971 if (live_regs_mask
& (1 << regno
))
24974 gcc_assert(regno
<= LAST_LO_REGNUM
);
24976 reg
= gen_rtx_REG (SImode
, regno
);
24978 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24980 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24981 stack_pointer_rtx
, reg
));
24983 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24984 plus_constant (Pmode
, stack_pointer_rtx
,
24986 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24987 RTX_FRAME_RELATED_P (insn
) = 1;
24991 if (frame_pointer_needed
)
24992 thumb_set_frame_pointer (offsets
);
24994 /* If we are profiling, make sure no instructions are scheduled before
24995 the call to mcount. Similarly if the user has requested no
24996 scheduling in the prolog. Similarly if we want non-call exceptions
24997 using the EABI unwinder, to prevent faulting instructions from being
24998 swapped with a stack adjustment. */
24999 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25000 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25001 && cfun
->can_throw_non_call_exceptions
))
25002 emit_insn (gen_blockage ());
25004 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25005 if (live_regs_mask
& 0xff)
25006 cfun
->machine
->lr_save_eliminated
= 0;
25009 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25010 POP instruction can be generated. LR should be replaced by PC. All
25011 the checks required are already done by USE_RETURN_INSN (). Hence,
25012 all we really need to check here is if single register is to be
25013 returned, or multiple register return. */
25015 thumb2_expand_return (bool simple_return
)
25018 unsigned long saved_regs_mask
;
25019 arm_stack_offsets
*offsets
;
25021 offsets
= arm_get_frame_offsets ();
25022 saved_regs_mask
= offsets
->saved_regs_mask
;
25024 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25025 if (saved_regs_mask
& (1 << i
))
25028 if (!simple_return
&& saved_regs_mask
)
25032 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25033 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25034 rtx addr
= gen_rtx_MEM (SImode
,
25035 gen_rtx_POST_INC (SImode
,
25036 stack_pointer_rtx
));
25037 set_mem_alias_set (addr
, get_frame_alias_set ());
25038 XVECEXP (par
, 0, 0) = ret_rtx
;
25039 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25040 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25041 emit_jump_insn (par
);
25045 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25046 saved_regs_mask
|= (1 << PC_REGNUM
);
25047 arm_emit_multi_reg_pop (saved_regs_mask
);
25052 emit_jump_insn (simple_return_rtx
);
25057 thumb1_expand_epilogue (void)
25059 HOST_WIDE_INT amount
;
25060 arm_stack_offsets
*offsets
;
25063 /* Naked functions don't have prologues. */
25064 if (IS_NAKED (arm_current_func_type ()))
25067 offsets
= arm_get_frame_offsets ();
25068 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25070 if (frame_pointer_needed
)
25072 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25073 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25075 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25077 gcc_assert (amount
>= 0);
25080 emit_insn (gen_blockage ());
25083 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25084 GEN_INT (amount
)));
25087 /* r3 is always free in the epilogue. */
25088 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25090 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25091 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25095 /* Emit a USE (stack_pointer_rtx), so that
25096 the stack adjustment will not be deleted. */
25097 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25099 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25100 emit_insn (gen_blockage ());
25102 /* Emit a clobber for each insn that will be restored in the epilogue,
25103 so that flow2 will get register lifetimes correct. */
25104 for (regno
= 0; regno
< 13; regno
++)
25105 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25106 emit_clobber (gen_rtx_REG (SImode
, regno
));
25108 if (! df_regs_ever_live_p (LR_REGNUM
))
25109 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25112 /* Epilogue code for APCS frame. */
25114 arm_expand_epilogue_apcs_frame (bool really_return
)
25116 unsigned long func_type
;
25117 unsigned long saved_regs_mask
;
25120 int floats_from_frame
= 0;
25121 arm_stack_offsets
*offsets
;
25123 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25124 func_type
= arm_current_func_type ();
25126 /* Get frame offsets for ARM. */
25127 offsets
= arm_get_frame_offsets ();
25128 saved_regs_mask
= offsets
->saved_regs_mask
;
25130 /* Find the offset of the floating-point save area in the frame. */
25132 = (offsets
->saved_args
25133 + arm_compute_static_chain_stack_bytes ()
25136 /* Compute how many core registers saved and how far away the floats are. */
25137 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25138 if (saved_regs_mask
& (1 << i
))
25141 floats_from_frame
+= 4;
25144 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25147 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25149 /* The offset is from IP_REGNUM. */
25150 int saved_size
= arm_get_vfp_saved_size ();
25151 if (saved_size
> 0)
25154 floats_from_frame
+= saved_size
;
25155 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25156 hard_frame_pointer_rtx
,
25157 GEN_INT (-floats_from_frame
)));
25158 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25159 ip_rtx
, hard_frame_pointer_rtx
);
25162 /* Generate VFP register multi-pop. */
25163 start_reg
= FIRST_VFP_REGNUM
;
25165 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25166 /* Look for a case where a reg does not need restoring. */
25167 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25168 && (!df_regs_ever_live_p (i
+ 1)
25169 || call_used_regs
[i
+ 1]))
25171 if (start_reg
!= i
)
25172 arm_emit_vfp_multi_reg_pop (start_reg
,
25173 (i
- start_reg
) / 2,
25174 gen_rtx_REG (SImode
,
25179 /* Restore the remaining regs that we have discovered (or possibly
25180 even all of them, if the conditional in the for loop never
25182 if (start_reg
!= i
)
25183 arm_emit_vfp_multi_reg_pop (start_reg
,
25184 (i
- start_reg
) / 2,
25185 gen_rtx_REG (SImode
, IP_REGNUM
));
25190 /* The frame pointer is guaranteed to be non-double-word aligned, as
25191 it is set to double-word-aligned old_stack_pointer - 4. */
25193 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25195 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25196 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25198 rtx addr
= gen_frame_mem (V2SImode
,
25199 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25201 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25202 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25203 gen_rtx_REG (V2SImode
, i
),
25209 /* saved_regs_mask should contain IP which contains old stack pointer
25210 at the time of activation creation. Since SP and IP are adjacent registers,
25211 we can restore the value directly into SP. */
25212 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25213 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25214 saved_regs_mask
|= (1 << SP_REGNUM
);
25216 /* There are two registers left in saved_regs_mask - LR and PC. We
25217 only need to restore LR (the return address), but to
25218 save time we can load it directly into PC, unless we need a
25219 special function exit sequence, or we are not really returning. */
25221 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25222 && !crtl
->calls_eh_return
)
25223 /* Delete LR from the register mask, so that LR on
25224 the stack is loaded into the PC in the register mask. */
25225 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25227 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25229 num_regs
= bit_count (saved_regs_mask
);
25230 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25233 emit_insn (gen_blockage ());
25234 /* Unwind the stack to just below the saved registers. */
25235 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25236 hard_frame_pointer_rtx
,
25237 GEN_INT (- 4 * num_regs
)));
25239 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25240 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25243 arm_emit_multi_reg_pop (saved_regs_mask
);
25245 if (IS_INTERRUPT (func_type
))
25247 /* Interrupt handlers will have pushed the
25248 IP onto the stack, so restore it now. */
25250 rtx addr
= gen_rtx_MEM (SImode
,
25251 gen_rtx_POST_INC (SImode
,
25252 stack_pointer_rtx
));
25253 set_mem_alias_set (addr
, get_frame_alias_set ());
25254 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25255 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25256 gen_rtx_REG (SImode
, IP_REGNUM
),
25260 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25263 if (crtl
->calls_eh_return
)
25264 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25266 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25268 if (IS_STACKALIGN (func_type
))
25269 /* Restore the original stack pointer. Before prologue, the stack was
25270 realigned and the original stack pointer saved in r0. For details,
25271 see comment in arm_expand_prologue. */
25272 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25274 emit_jump_insn (simple_return_rtx
);
25277 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25278 function is not a sibcall. */
25280 arm_expand_epilogue (bool really_return
)
25282 unsigned long func_type
;
25283 unsigned long saved_regs_mask
;
25287 arm_stack_offsets
*offsets
;
25289 func_type
= arm_current_func_type ();
25291 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25292 let output_return_instruction take care of instruction emission if any. */
25293 if (IS_NAKED (func_type
)
25294 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25297 emit_jump_insn (simple_return_rtx
);
25301 /* If we are throwing an exception, then we really must be doing a
25302 return, so we can't tail-call. */
25303 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25305 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25307 arm_expand_epilogue_apcs_frame (really_return
);
25311 /* Get frame offsets for ARM. */
25312 offsets
= arm_get_frame_offsets ();
25313 saved_regs_mask
= offsets
->saved_regs_mask
;
25314 num_regs
= bit_count (saved_regs_mask
);
25316 if (frame_pointer_needed
)
25319 /* Restore stack pointer if necessary. */
25322 /* In ARM mode, frame pointer points to first saved register.
25323 Restore stack pointer to last saved register. */
25324 amount
= offsets
->frame
- offsets
->saved_regs
;
25326 /* Force out any pending memory operations that reference stacked data
25327 before stack de-allocation occurs. */
25328 emit_insn (gen_blockage ());
25329 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25330 hard_frame_pointer_rtx
,
25331 GEN_INT (amount
)));
25332 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25334 hard_frame_pointer_rtx
);
25336 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25338 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25342 /* In Thumb-2 mode, the frame pointer points to the last saved
25344 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25347 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25348 hard_frame_pointer_rtx
,
25349 GEN_INT (amount
)));
25350 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25351 hard_frame_pointer_rtx
,
25352 hard_frame_pointer_rtx
);
25355 /* Force out any pending memory operations that reference stacked data
25356 before stack de-allocation occurs. */
25357 emit_insn (gen_blockage ());
25358 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25359 hard_frame_pointer_rtx
));
25360 arm_add_cfa_adjust_cfa_note (insn
, 0,
25362 hard_frame_pointer_rtx
);
25363 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25365 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25370 /* Pop off outgoing args and local frame to adjust stack pointer to
25371 last saved register. */
25372 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25376 /* Force out any pending memory operations that reference stacked data
25377 before stack de-allocation occurs. */
25378 emit_insn (gen_blockage ());
25379 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25381 GEN_INT (amount
)));
25382 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25383 stack_pointer_rtx
, stack_pointer_rtx
);
25384 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25386 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25390 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25392 /* Generate VFP register multi-pop. */
25393 int end_reg
= LAST_VFP_REGNUM
+ 1;
25395 /* Scan the registers in reverse order. We need to match
25396 any groupings made in the prologue and generate matching
25397 vldm operations. The need to match groups is because,
25398 unlike pop, vldm can only do consecutive regs. */
25399 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25400 /* Look for a case where a reg does not need restoring. */
25401 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25402 && (!df_regs_ever_live_p (i
+ 1)
25403 || call_used_regs
[i
+ 1]))
25405 /* Restore the regs discovered so far (from reg+2 to
25407 if (end_reg
> i
+ 2)
25408 arm_emit_vfp_multi_reg_pop (i
+ 2,
25409 (end_reg
- (i
+ 2)) / 2,
25410 stack_pointer_rtx
);
25414 /* Restore the remaining regs that we have discovered (or possibly
25415 even all of them, if the conditional in the for loop never
25417 if (end_reg
> i
+ 2)
25418 arm_emit_vfp_multi_reg_pop (i
+ 2,
25419 (end_reg
- (i
+ 2)) / 2,
25420 stack_pointer_rtx
);
25424 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25425 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25428 rtx addr
= gen_rtx_MEM (V2SImode
,
25429 gen_rtx_POST_INC (SImode
,
25430 stack_pointer_rtx
));
25431 set_mem_alias_set (addr
, get_frame_alias_set ());
25432 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25433 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25434 gen_rtx_REG (V2SImode
, i
),
25436 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25437 stack_pointer_rtx
, stack_pointer_rtx
);
25440 if (saved_regs_mask
)
25443 bool return_in_pc
= false;
25445 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25446 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25447 && !IS_STACKALIGN (func_type
)
25449 && crtl
->args
.pretend_args_size
== 0
25450 && saved_regs_mask
& (1 << LR_REGNUM
)
25451 && !crtl
->calls_eh_return
)
25453 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25454 saved_regs_mask
|= (1 << PC_REGNUM
);
25455 return_in_pc
= true;
25458 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25460 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25461 if (saved_regs_mask
& (1 << i
))
25463 rtx addr
= gen_rtx_MEM (SImode
,
25464 gen_rtx_POST_INC (SImode
,
25465 stack_pointer_rtx
));
25466 set_mem_alias_set (addr
, get_frame_alias_set ());
25468 if (i
== PC_REGNUM
)
25470 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25471 XVECEXP (insn
, 0, 0) = ret_rtx
;
25472 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25474 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25475 insn
= emit_jump_insn (insn
);
25479 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25481 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25482 gen_rtx_REG (SImode
, i
),
25484 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25486 stack_pointer_rtx
);
25493 && current_tune
->prefer_ldrd_strd
25494 && !optimize_function_for_size_p (cfun
))
25497 thumb2_emit_ldrd_pop (saved_regs_mask
);
25498 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25499 arm_emit_ldrd_pop (saved_regs_mask
);
25501 arm_emit_multi_reg_pop (saved_regs_mask
);
25504 arm_emit_multi_reg_pop (saved_regs_mask
);
25512 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25516 rtx dwarf
= NULL_RTX
;
25518 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25520 GEN_INT (amount
)));
25522 RTX_FRAME_RELATED_P (tmp
) = 1;
25524 if (cfun
->machine
->uses_anonymous_args
)
25526 /* Restore pretend args. Refer arm_expand_prologue on how to save
25527 pretend_args in stack. */
25528 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25529 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25530 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25531 if (saved_regs_mask
& (1 << i
))
25533 rtx reg
= gen_rtx_REG (SImode
, i
);
25534 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25537 REG_NOTES (tmp
) = dwarf
;
25539 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25540 stack_pointer_rtx
, stack_pointer_rtx
);
25543 if (!really_return
)
25546 if (crtl
->calls_eh_return
)
25547 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25549 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25551 if (IS_STACKALIGN (func_type
))
25552 /* Restore the original stack pointer. Before prologue, the stack was
25553 realigned and the original stack pointer saved in r0. For details,
25554 see comment in arm_expand_prologue. */
25555 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25557 emit_jump_insn (simple_return_rtx
);
25560 /* Implementation of insn prologue_thumb1_interwork. This is the first
25561 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25564 thumb1_output_interwork (void)
25567 FILE *f
= asm_out_file
;
25569 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25570 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25572 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25574 /* Generate code sequence to switch us into Thumb mode. */
25575 /* The .code 32 directive has already been emitted by
25576 ASM_DECLARE_FUNCTION_NAME. */
25577 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25578 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25580 /* Generate a label, so that the debugger will notice the
25581 change in instruction sets. This label is also used by
25582 the assembler to bypass the ARM code when this function
25583 is called from a Thumb encoded function elsewhere in the
25584 same file. Hence the definition of STUB_NAME here must
25585 agree with the definition in gas/config/tc-arm.c. */
25587 #define STUB_NAME ".real_start_of"
25589 fprintf (f
, "\t.code\t16\n");
25591 if (arm_dllexport_name_p (name
))
25592 name
= arm_strip_name_encoding (name
);
25594 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25595 fprintf (f
, "\t.thumb_func\n");
25596 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25601 /* Handle the case of a double word load into a low register from
25602 a computed memory address. The computed address may involve a
25603 register which is overwritten by the load. */
25605 thumb_load_double_from_address (rtx
*operands
)
25613 gcc_assert (REG_P (operands
[0]));
25614 gcc_assert (MEM_P (operands
[1]));
25616 /* Get the memory address. */
25617 addr
= XEXP (operands
[1], 0);
25619 /* Work out how the memory address is computed. */
25620 switch (GET_CODE (addr
))
25623 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25625 if (REGNO (operands
[0]) == REGNO (addr
))
25627 output_asm_insn ("ldr\t%H0, %2", operands
);
25628 output_asm_insn ("ldr\t%0, %1", operands
);
25632 output_asm_insn ("ldr\t%0, %1", operands
);
25633 output_asm_insn ("ldr\t%H0, %2", operands
);
25638 /* Compute <address> + 4 for the high order load. */
25639 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25641 output_asm_insn ("ldr\t%0, %1", operands
);
25642 output_asm_insn ("ldr\t%H0, %2", operands
);
25646 arg1
= XEXP (addr
, 0);
25647 arg2
= XEXP (addr
, 1);
25649 if (CONSTANT_P (arg1
))
25650 base
= arg2
, offset
= arg1
;
25652 base
= arg1
, offset
= arg2
;
25654 gcc_assert (REG_P (base
));
25656 /* Catch the case of <address> = <reg> + <reg> */
25657 if (REG_P (offset
))
25659 int reg_offset
= REGNO (offset
);
25660 int reg_base
= REGNO (base
);
25661 int reg_dest
= REGNO (operands
[0]);
25663 /* Add the base and offset registers together into the
25664 higher destination register. */
25665 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25666 reg_dest
+ 1, reg_base
, reg_offset
);
25668 /* Load the lower destination register from the address in
25669 the higher destination register. */
25670 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25671 reg_dest
, reg_dest
+ 1);
25673 /* Load the higher destination register from its own address
25675 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25676 reg_dest
+ 1, reg_dest
+ 1);
25680 /* Compute <address> + 4 for the high order load. */
25681 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25683 /* If the computed address is held in the low order register
25684 then load the high order register first, otherwise always
25685 load the low order register first. */
25686 if (REGNO (operands
[0]) == REGNO (base
))
25688 output_asm_insn ("ldr\t%H0, %2", operands
);
25689 output_asm_insn ("ldr\t%0, %1", operands
);
25693 output_asm_insn ("ldr\t%0, %1", operands
);
25694 output_asm_insn ("ldr\t%H0, %2", operands
);
25700 /* With no registers to worry about we can just load the value
25702 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25704 output_asm_insn ("ldr\t%H0, %2", operands
);
25705 output_asm_insn ("ldr\t%0, %1", operands
);
25709 gcc_unreachable ();
25716 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25721 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25722 std::swap (operands
[4], operands
[5]);
25724 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25725 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25729 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25730 std::swap (operands
[4], operands
[5]);
25731 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25732 std::swap (operands
[5], operands
[6]);
25733 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25734 std::swap (operands
[4], operands
[5]);
25736 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25737 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25741 gcc_unreachable ();
25747 /* Output a call-via instruction for thumb state. */
25749 thumb_call_via_reg (rtx reg
)
25751 int regno
= REGNO (reg
);
25754 gcc_assert (regno
< LR_REGNUM
);
25756 /* If we are in the normal text section we can use a single instance
25757 per compilation unit. If we are doing function sections, then we need
25758 an entry per section, since we can't rely on reachability. */
25759 if (in_section
== text_section
)
25761 thumb_call_reg_needed
= 1;
25763 if (thumb_call_via_label
[regno
] == NULL
)
25764 thumb_call_via_label
[regno
] = gen_label_rtx ();
25765 labelp
= thumb_call_via_label
+ regno
;
25769 if (cfun
->machine
->call_via
[regno
] == NULL
)
25770 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25771 labelp
= cfun
->machine
->call_via
+ regno
;
25774 output_asm_insn ("bl\t%a0", labelp
);
25778 /* Routines for generating rtl. */
25780 thumb_expand_movmemqi (rtx
*operands
)
25782 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25783 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25784 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25785 HOST_WIDE_INT offset
= 0;
25789 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25795 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25801 rtx reg
= gen_reg_rtx (SImode
);
25802 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25803 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25810 rtx reg
= gen_reg_rtx (HImode
);
25811 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25812 plus_constant (Pmode
, in
,
25814 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25823 rtx reg
= gen_reg_rtx (QImode
);
25824 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25825 plus_constant (Pmode
, in
,
25827 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25834 thumb_reload_out_hi (rtx
*operands
)
25836 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25839 /* Handle reading a half-word from memory during reload. */
25841 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25843 gcc_unreachable ();
25846 /* Return the length of a function name prefix
25847 that starts with the character 'c'. */
25849 arm_get_strip_length (int c
)
25853 ARM_NAME_ENCODING_LENGTHS
25858 /* Return a pointer to a function's name with any
25859 and all prefix encodings stripped from it. */
25861 arm_strip_name_encoding (const char *name
)
25865 while ((skip
= arm_get_strip_length (* name
)))
25871 /* If there is a '*' anywhere in the name's prefix, then
25872 emit the stripped name verbatim, otherwise prepend an
25873 underscore if leading underscores are being used. */
25875 arm_asm_output_labelref (FILE *stream
, const char *name
)
25880 while ((skip
= arm_get_strip_length (* name
)))
25882 verbatim
|= (*name
== '*');
25887 fputs (name
, stream
);
25889 asm_fprintf (stream
, "%U%s", name
);
25892 /* This function is used to emit an EABI tag and its associated value.
25893 We emit the numerical value of the tag in case the assembler does not
25894 support textual tags. (Eg gas prior to 2.20). If requested we include
25895 the tag name in a comment so that anyone reading the assembler output
25896 will know which tag is being set.
25898 This function is not static because arm-c.c needs it too. */
25901 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25903 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25904 if (flag_verbose_asm
|| flag_debug_asm
)
25905 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25906 asm_fprintf (asm_out_file
, "\n");
25909 /* This function is used to print CPU tuning information as comment
25910 in assembler file. Pointers are not printed for now. */
25913 arm_print_tune_info (void)
25915 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25916 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25917 current_tune
->constant_limit
);
25918 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25919 current_tune
->max_insns_skipped
);
25920 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25921 current_tune
->prefetch
.num_slots
);
25922 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25923 current_tune
->prefetch
.l1_cache_size
);
25924 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25925 current_tune
->prefetch
.l1_cache_line_size
);
25926 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25927 (int) current_tune
->prefer_constant_pool
);
25928 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25929 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25930 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25931 current_tune
->branch_cost (false, false));
25932 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25933 current_tune
->branch_cost (false, true));
25934 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25935 current_tune
->branch_cost (true, false));
25936 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25937 current_tune
->branch_cost (true, true));
25938 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25939 (int) current_tune
->prefer_ldrd_strd
);
25940 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25941 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25942 (int) current_tune
->logical_op_non_short_circuit_arm
);
25943 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25944 (int) current_tune
->prefer_neon_for_64bits
);
25945 asm_fprintf (asm_out_file
,
25946 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25947 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25948 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25949 (int) current_tune
->string_ops_prefer_neon
);
25950 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25951 current_tune
->max_insns_inline_memset
);
25952 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25953 current_tune
->fusible_ops
);
25954 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25955 (int) current_tune
->sched_autopref
);
25959 arm_file_start (void)
25965 if (arm_selected_arch
)
25967 /* armv7ve doesn't support any extensions. */
25968 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25970 /* Keep backward compatability for assemblers
25971 which don't support armv7ve. */
25972 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25973 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25974 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25975 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25976 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25980 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25984 gcc_assert (strlen (arm_selected_arch
->name
)
25985 <= sizeof (buf
) / sizeof (*pos
));
25986 strncpy (buf
, arm_selected_arch
->name
,
25987 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25988 buf
[pos
- arm_selected_arch
->name
] = '\0';
25989 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25990 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25993 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25996 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25997 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
26000 const char* truncated_name
26001 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
26002 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26005 if (print_tune_info
)
26006 arm_print_tune_info ();
26008 if (! TARGET_SOFT_FLOAT
&& TARGET_VFP
)
26010 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26011 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26013 if (TARGET_HARD_FLOAT_ABI
)
26014 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26017 /* Some of these attributes only apply when the corresponding features
26018 are used. However we don't have any easy way of figuring this out.
26019 Conservatively record the setting that would have been used. */
26021 if (flag_rounding_math
)
26022 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26024 if (!flag_unsafe_math_optimizations
)
26026 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26027 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26029 if (flag_signaling_nans
)
26030 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26032 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26033 flag_finite_math_only
? 1 : 3);
26035 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26036 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26037 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26038 flag_short_enums
? 1 : 2);
26040 /* Tag_ABI_optimization_goals. */
26043 else if (optimize
>= 2)
26049 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26051 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26054 if (arm_fp16_format
)
26055 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26056 (int) arm_fp16_format
);
26058 if (arm_lang_output_object_attributes_hook
)
26059 arm_lang_output_object_attributes_hook();
26062 default_file_start ();
26066 arm_file_end (void)
26070 if (NEED_INDICATE_EXEC_STACK
)
26071 /* Add .note.GNU-stack. */
26072 file_end_indicate_exec_stack ();
26074 if (! thumb_call_reg_needed
)
26077 switch_to_section (text_section
);
26078 asm_fprintf (asm_out_file
, "\t.code 16\n");
26079 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26081 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26083 rtx label
= thumb_call_via_label
[regno
];
26087 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26088 CODE_LABEL_NUMBER (label
));
26089 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26095 /* Symbols in the text segment can be accessed without indirecting via the
26096 constant pool; it may take an extra binary operation, but this is still
26097 faster than indirecting via memory. Don't do this when not optimizing,
26098 since we won't be calculating al of the offsets necessary to do this
26102 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26104 if (optimize
> 0 && TREE_CONSTANT (decl
))
26105 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26107 default_encode_section_info (decl
, rtl
, first
);
26109 #endif /* !ARM_PE */
26112 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26114 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26115 && !strcmp (prefix
, "L"))
26117 arm_ccfsm_state
= 0;
26118 arm_target_insn
= NULL
;
26120 default_internal_label (stream
, prefix
, labelno
);
26123 /* Output code to add DELTA to the first argument, and then jump
26124 to FUNCTION. Used for C++ multiple inheritance. */
26127 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26128 HOST_WIDE_INT
, tree function
)
26130 static int thunk_label
= 0;
26133 int mi_delta
= delta
;
26134 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26136 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26139 mi_delta
= - mi_delta
;
26141 final_start_function (emit_barrier (), file
, 1);
26145 int labelno
= thunk_label
++;
26146 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26147 /* Thunks are entered in arm mode when avaiable. */
26148 if (TARGET_THUMB1_ONLY
)
26150 /* push r3 so we can use it as a temporary. */
26151 /* TODO: Omit this save if r3 is not used. */
26152 fputs ("\tpush {r3}\n", file
);
26153 fputs ("\tldr\tr3, ", file
);
26157 fputs ("\tldr\tr12, ", file
);
26159 assemble_name (file
, label
);
26160 fputc ('\n', file
);
26163 /* If we are generating PIC, the ldr instruction below loads
26164 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26165 the address of the add + 8, so we have:
26167 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26170 Note that we have "+ 1" because some versions of GNU ld
26171 don't set the low bit of the result for R_ARM_REL32
26172 relocations against thumb function symbols.
26173 On ARMv6M this is +4, not +8. */
26174 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26175 assemble_name (file
, labelpc
);
26176 fputs (":\n", file
);
26177 if (TARGET_THUMB1_ONLY
)
26179 /* This is 2 insns after the start of the thunk, so we know it
26180 is 4-byte aligned. */
26181 fputs ("\tadd\tr3, pc, r3\n", file
);
26182 fputs ("\tmov r12, r3\n", file
);
26185 fputs ("\tadd\tr12, pc, r12\n", file
);
26187 else if (TARGET_THUMB1_ONLY
)
26188 fputs ("\tmov r12, r3\n", file
);
26190 if (TARGET_THUMB1_ONLY
)
26192 if (mi_delta
> 255)
26194 fputs ("\tldr\tr3, ", file
);
26195 assemble_name (file
, label
);
26196 fputs ("+4\n", file
);
26197 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26198 mi_op
, this_regno
, this_regno
);
26200 else if (mi_delta
!= 0)
26202 /* Thumb1 unified syntax requires s suffix in instruction name when
26203 one of the operands is immediate. */
26204 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26205 mi_op
, this_regno
, this_regno
,
26211 /* TODO: Use movw/movt for large constants when available. */
26212 while (mi_delta
!= 0)
26214 if ((mi_delta
& (3 << shift
)) == 0)
26218 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26219 mi_op
, this_regno
, this_regno
,
26220 mi_delta
& (0xff << shift
));
26221 mi_delta
&= ~(0xff << shift
);
26228 if (TARGET_THUMB1_ONLY
)
26229 fputs ("\tpop\t{r3}\n", file
);
26231 fprintf (file
, "\tbx\tr12\n");
26232 ASM_OUTPUT_ALIGN (file
, 2);
26233 assemble_name (file
, label
);
26234 fputs (":\n", file
);
26237 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26238 rtx tem
= XEXP (DECL_RTL (function
), 0);
26239 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26240 pipeline offset is four rather than eight. Adjust the offset
26242 tem
= plus_constant (GET_MODE (tem
), tem
,
26243 TARGET_THUMB1_ONLY
? -3 : -7);
26244 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26246 gen_rtx_SYMBOL_REF (Pmode
,
26247 ggc_strdup (labelpc
)));
26248 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26251 /* Output ".word .LTHUNKn". */
26252 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26254 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26255 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26259 fputs ("\tb\t", file
);
26260 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26261 if (NEED_PLT_RELOC
)
26262 fputs ("(PLT)", file
);
26263 fputc ('\n', file
);
26266 final_end_function ();
26269 /* MI thunk handling for TARGET_32BIT. */
26272 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26273 HOST_WIDE_INT vcall_offset
, tree function
)
26275 /* On ARM, this_regno is R0 or R1 depending on
26276 whether the function returns an aggregate or not.
26278 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26280 ? R1_REGNUM
: R0_REGNUM
);
26282 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26283 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26284 reload_completed
= 1;
26285 emit_note (NOTE_INSN_PROLOGUE_END
);
26287 /* Add DELTA to THIS_RTX. */
26289 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26290 delta
, this_rtx
, this_rtx
, false);
26292 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26293 if (vcall_offset
!= 0)
26295 /* Load *THIS_RTX. */
26296 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26297 /* Compute *THIS_RTX + VCALL_OFFSET. */
26298 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26300 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26301 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26302 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26305 /* Generate a tail call to the target function. */
26306 if (!TREE_USED (function
))
26308 assemble_external (function
);
26309 TREE_USED (function
) = 1;
26311 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26312 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26313 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26314 SIBLING_CALL_P (insn
) = 1;
26316 insn
= get_insns ();
26317 shorten_branches (insn
);
26318 final_start_function (insn
, file
, 1);
26319 final (insn
, file
, 1);
26320 final_end_function ();
26322 /* Stop pretending this is a post-reload pass. */
26323 reload_completed
= 0;
26326 /* Output code to add DELTA to the first argument, and then jump
26327 to FUNCTION. Used for C++ multiple inheritance. */
26330 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26331 HOST_WIDE_INT vcall_offset
, tree function
)
26334 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26336 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26340 arm_emit_vector_const (FILE *file
, rtx x
)
26343 const char * pattern
;
26345 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26347 switch (GET_MODE (x
))
26349 case V2SImode
: pattern
= "%08x"; break;
26350 case V4HImode
: pattern
= "%04x"; break;
26351 case V8QImode
: pattern
= "%02x"; break;
26352 default: gcc_unreachable ();
26355 fprintf (file
, "0x");
26356 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26360 element
= CONST_VECTOR_ELT (x
, i
);
26361 fprintf (file
, pattern
, INTVAL (element
));
26367 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26368 HFmode constant pool entries are actually loaded with ldr. */
26370 arm_emit_fp16_const (rtx c
)
26374 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26375 if (WORDS_BIG_ENDIAN
)
26376 assemble_zeros (2);
26377 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26378 if (!WORDS_BIG_ENDIAN
)
26379 assemble_zeros (2);
26383 arm_output_load_gr (rtx
*operands
)
26390 if (!MEM_P (operands
[1])
26391 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26392 || !REG_P (reg
= XEXP (sum
, 0))
26393 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26394 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26395 return "wldrw%?\t%0, %1";
26397 /* Fix up an out-of-range load of a GR register. */
26398 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26399 wcgr
= operands
[0];
26401 output_asm_insn ("ldr%?\t%0, %1", operands
);
26403 operands
[0] = wcgr
;
26405 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26406 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26411 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26413 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26414 named arg and all anonymous args onto the stack.
26415 XXX I know the prologue shouldn't be pushing registers, but it is faster
26419 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26423 int second_time ATTRIBUTE_UNUSED
)
26425 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26428 cfun
->machine
->uses_anonymous_args
= 1;
26429 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26431 nregs
= pcum
->aapcs_ncrn
;
26432 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26436 nregs
= pcum
->nregs
;
26438 if (nregs
< NUM_ARG_REGS
)
26439 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26442 /* We can't rely on the caller doing the proper promotion when
26443 using APCS or ATPCS. */
26446 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26448 return !TARGET_AAPCS_BASED
;
26451 static machine_mode
26452 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26454 int *punsignedp ATTRIBUTE_UNUSED
,
26455 const_tree fntype ATTRIBUTE_UNUSED
,
26456 int for_return ATTRIBUTE_UNUSED
)
26458 if (GET_MODE_CLASS (mode
) == MODE_INT
26459 && GET_MODE_SIZE (mode
) < 4)
26465 /* AAPCS based ABIs use short enums by default. */
26468 arm_default_short_enums (void)
26470 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26474 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26477 arm_align_anon_bitfield (void)
26479 return TARGET_AAPCS_BASED
;
26483 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26486 arm_cxx_guard_type (void)
26488 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26492 /* The EABI says test the least significant bit of a guard variable. */
26495 arm_cxx_guard_mask_bit (void)
26497 return TARGET_AAPCS_BASED
;
26501 /* The EABI specifies that all array cookies are 8 bytes long. */
26504 arm_get_cookie_size (tree type
)
26508 if (!TARGET_AAPCS_BASED
)
26509 return default_cxx_get_cookie_size (type
);
26511 size
= build_int_cst (sizetype
, 8);
26516 /* The EABI says that array cookies should also contain the element size. */
26519 arm_cookie_has_size (void)
26521 return TARGET_AAPCS_BASED
;
26525 /* The EABI says constructors and destructors should return a pointer to
26526 the object constructed/destroyed. */
26529 arm_cxx_cdtor_returns_this (void)
26531 return TARGET_AAPCS_BASED
;
26534 /* The EABI says that an inline function may never be the key
26538 arm_cxx_key_method_may_be_inline (void)
26540 return !TARGET_AAPCS_BASED
;
26544 arm_cxx_determine_class_data_visibility (tree decl
)
26546 if (!TARGET_AAPCS_BASED
26547 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26550 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26551 is exported. However, on systems without dynamic vague linkage,
26552 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26553 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26554 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26556 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26557 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26561 arm_cxx_class_data_always_comdat (void)
26563 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26564 vague linkage if the class has no key function. */
26565 return !TARGET_AAPCS_BASED
;
26569 /* The EABI says __aeabi_atexit should be used to register static
26573 arm_cxx_use_aeabi_atexit (void)
26575 return TARGET_AAPCS_BASED
;
26580 arm_set_return_address (rtx source
, rtx scratch
)
26582 arm_stack_offsets
*offsets
;
26583 HOST_WIDE_INT delta
;
26585 unsigned long saved_regs
;
26587 offsets
= arm_get_frame_offsets ();
26588 saved_regs
= offsets
->saved_regs_mask
;
26590 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26591 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26594 if (frame_pointer_needed
)
26595 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26598 /* LR will be the first saved register. */
26599 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26604 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26605 GEN_INT (delta
& ~4095)));
26610 addr
= stack_pointer_rtx
;
26612 addr
= plus_constant (Pmode
, addr
, delta
);
26614 /* The store needs to be marked as frame related in order to prevent
26615 DSE from deleting it as dead if it is based on fp. */
26616 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26617 RTX_FRAME_RELATED_P (insn
) = 1;
26618 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26624 thumb_set_return_address (rtx source
, rtx scratch
)
26626 arm_stack_offsets
*offsets
;
26627 HOST_WIDE_INT delta
;
26628 HOST_WIDE_INT limit
;
26631 unsigned long mask
;
26635 offsets
= arm_get_frame_offsets ();
26636 mask
= offsets
->saved_regs_mask
;
26637 if (mask
& (1 << LR_REGNUM
))
26640 /* Find the saved regs. */
26641 if (frame_pointer_needed
)
26643 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26644 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26650 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26653 /* Allow for the stack frame. */
26654 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26656 /* The link register is always the first saved register. */
26659 /* Construct the address. */
26660 addr
= gen_rtx_REG (SImode
, reg
);
26663 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26664 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26668 addr
= plus_constant (Pmode
, addr
, delta
);
26670 /* The store needs to be marked as frame related in order to prevent
26671 DSE from deleting it as dead if it is based on fp. */
26672 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26673 RTX_FRAME_RELATED_P (insn
) = 1;
26674 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26677 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26680 /* Implements target hook vector_mode_supported_p. */
26682 arm_vector_mode_supported_p (machine_mode mode
)
26684 /* Neon also supports V2SImode, etc. listed in the clause below. */
26685 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26686 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26687 || mode
== V2DImode
|| mode
== V8HFmode
))
26690 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26691 && ((mode
== V2SImode
)
26692 || (mode
== V4HImode
)
26693 || (mode
== V8QImode
)))
26696 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26697 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26698 || mode
== V2HAmode
))
26704 /* Implements target hook array_mode_supported_p. */
26707 arm_array_mode_supported_p (machine_mode mode
,
26708 unsigned HOST_WIDE_INT nelems
)
26711 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26712 && (nelems
>= 2 && nelems
<= 4))
26718 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26719 registers when autovectorizing for Neon, at least until multiple vector
26720 widths are supported properly by the middle-end. */
26722 static machine_mode
26723 arm_preferred_simd_mode (machine_mode mode
)
26729 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26731 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26733 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26735 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26737 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26744 if (TARGET_REALLY_IWMMXT
)
26760 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26762 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26763 using r0-r4 for function arguments, r7 for the stack frame and don't have
26764 enough left over to do doubleword arithmetic. For Thumb-2 all the
26765 potentially problematic instructions accept high registers so this is not
26766 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26767 that require many low registers. */
26769 arm_class_likely_spilled_p (reg_class_t rclass
)
26771 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26772 || rclass
== CC_REG
)
26778 /* Implements target hook small_register_classes_for_mode_p. */
26780 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26782 return TARGET_THUMB1
;
26785 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26786 ARM insns and therefore guarantee that the shift count is modulo 256.
26787 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26788 guarantee no particular behavior for out-of-range counts. */
26790 static unsigned HOST_WIDE_INT
26791 arm_shift_truncation_mask (machine_mode mode
)
26793 return mode
== SImode
? 255 : 0;
26797 /* Map internal gcc register numbers to DWARF2 register numbers. */
26800 arm_dbx_register_number (unsigned int regno
)
26805 if (IS_VFP_REGNUM (regno
))
26807 /* See comment in arm_dwarf_register_span. */
26808 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26809 return 64 + regno
- FIRST_VFP_REGNUM
;
26811 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26814 if (IS_IWMMXT_GR_REGNUM (regno
))
26815 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26817 if (IS_IWMMXT_REGNUM (regno
))
26818 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26820 return DWARF_FRAME_REGISTERS
;
26823 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26824 GCC models tham as 64 32-bit registers, so we need to describe this to
26825 the DWARF generation code. Other registers can use the default. */
26827 arm_dwarf_register_span (rtx rtl
)
26835 regno
= REGNO (rtl
);
26836 if (!IS_VFP_REGNUM (regno
))
26839 /* XXX FIXME: The EABI defines two VFP register ranges:
26840 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26842 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26843 corresponding D register. Until GDB supports this, we shall use the
26844 legacy encodings. We also use these encodings for D0-D15 for
26845 compatibility with older debuggers. */
26846 mode
= GET_MODE (rtl
);
26847 if (GET_MODE_SIZE (mode
) < 8)
26850 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26852 nregs
= GET_MODE_SIZE (mode
) / 4;
26853 for (i
= 0; i
< nregs
; i
+= 2)
26854 if (TARGET_BIG_END
)
26856 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26857 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26861 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26862 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26867 nregs
= GET_MODE_SIZE (mode
) / 8;
26868 for (i
= 0; i
< nregs
; i
++)
26869 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26872 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26875 #if ARM_UNWIND_INFO
26876 /* Emit unwind directives for a store-multiple instruction or stack pointer
26877 push during alignment.
26878 These should only ever be generated by the function prologue code, so
26879 expect them to have a particular form.
26880 The store-multiple instruction sometimes pushes pc as the last register,
26881 although it should not be tracked into unwind information, or for -Os
26882 sometimes pushes some dummy registers before first register that needs
26883 to be tracked in unwind information; such dummy registers are there just
26884 to avoid separate stack adjustment, and will not be restored in the
26888 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26891 HOST_WIDE_INT offset
;
26892 HOST_WIDE_INT nregs
;
26896 unsigned padfirst
= 0, padlast
= 0;
26899 e
= XVECEXP (p
, 0, 0);
26900 gcc_assert (GET_CODE (e
) == SET
);
26902 /* First insn will adjust the stack pointer. */
26903 gcc_assert (GET_CODE (e
) == SET
26904 && REG_P (SET_DEST (e
))
26905 && REGNO (SET_DEST (e
)) == SP_REGNUM
26906 && GET_CODE (SET_SRC (e
)) == PLUS
);
26908 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26909 nregs
= XVECLEN (p
, 0) - 1;
26910 gcc_assert (nregs
);
26912 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26915 /* For -Os dummy registers can be pushed at the beginning to
26916 avoid separate stack pointer adjustment. */
26917 e
= XVECEXP (p
, 0, 1);
26918 e
= XEXP (SET_DEST (e
), 0);
26919 if (GET_CODE (e
) == PLUS
)
26920 padfirst
= INTVAL (XEXP (e
, 1));
26921 gcc_assert (padfirst
== 0 || optimize_size
);
26922 /* The function prologue may also push pc, but not annotate it as it is
26923 never restored. We turn this into a stack pointer adjustment. */
26924 e
= XVECEXP (p
, 0, nregs
);
26925 e
= XEXP (SET_DEST (e
), 0);
26926 if (GET_CODE (e
) == PLUS
)
26927 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26929 padlast
= offset
- 4;
26930 gcc_assert (padlast
== 0 || padlast
== 4);
26932 fprintf (asm_out_file
, "\t.pad #4\n");
26934 fprintf (asm_out_file
, "\t.save {");
26936 else if (IS_VFP_REGNUM (reg
))
26939 fprintf (asm_out_file
, "\t.vsave {");
26942 /* Unknown register type. */
26943 gcc_unreachable ();
26945 /* If the stack increment doesn't match the size of the saved registers,
26946 something has gone horribly wrong. */
26947 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26951 /* The remaining insns will describe the stores. */
26952 for (i
= 1; i
<= nregs
; i
++)
26954 /* Expect (set (mem <addr>) (reg)).
26955 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26956 e
= XVECEXP (p
, 0, i
);
26957 gcc_assert (GET_CODE (e
) == SET
26958 && MEM_P (SET_DEST (e
))
26959 && REG_P (SET_SRC (e
)));
26961 reg
= REGNO (SET_SRC (e
));
26962 gcc_assert (reg
>= lastreg
);
26965 fprintf (asm_out_file
, ", ");
26966 /* We can't use %r for vfp because we need to use the
26967 double precision register names. */
26968 if (IS_VFP_REGNUM (reg
))
26969 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26971 asm_fprintf (asm_out_file
, "%r", reg
);
26975 /* Check that the addresses are consecutive. */
26976 e
= XEXP (SET_DEST (e
), 0);
26977 if (GET_CODE (e
) == PLUS
)
26978 gcc_assert (REG_P (XEXP (e
, 0))
26979 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26980 && CONST_INT_P (XEXP (e
, 1))
26981 && offset
== INTVAL (XEXP (e
, 1)));
26985 && REGNO (e
) == SP_REGNUM
);
26986 offset
+= reg_size
;
26989 fprintf (asm_out_file
, "}\n");
26991 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26994 /* Emit unwind directives for a SET. */
26997 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27005 switch (GET_CODE (e0
))
27008 /* Pushing a single register. */
27009 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27010 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27011 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27014 asm_fprintf (asm_out_file
, "\t.save ");
27015 if (IS_VFP_REGNUM (REGNO (e1
)))
27016 asm_fprintf(asm_out_file
, "{d%d}\n",
27017 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27019 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27023 if (REGNO (e0
) == SP_REGNUM
)
27025 /* A stack increment. */
27026 if (GET_CODE (e1
) != PLUS
27027 || !REG_P (XEXP (e1
, 0))
27028 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27029 || !CONST_INT_P (XEXP (e1
, 1)))
27032 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27033 -INTVAL (XEXP (e1
, 1)));
27035 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27037 HOST_WIDE_INT offset
;
27039 if (GET_CODE (e1
) == PLUS
)
27041 if (!REG_P (XEXP (e1
, 0))
27042 || !CONST_INT_P (XEXP (e1
, 1)))
27044 reg
= REGNO (XEXP (e1
, 0));
27045 offset
= INTVAL (XEXP (e1
, 1));
27046 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27047 HARD_FRAME_POINTER_REGNUM
, reg
,
27050 else if (REG_P (e1
))
27053 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27054 HARD_FRAME_POINTER_REGNUM
, reg
);
27059 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27061 /* Move from sp to reg. */
27062 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27064 else if (GET_CODE (e1
) == PLUS
27065 && REG_P (XEXP (e1
, 0))
27066 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27067 && CONST_INT_P (XEXP (e1
, 1)))
27069 /* Set reg to offset from sp. */
27070 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27071 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27083 /* Emit unwind directives for the given insn. */
27086 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27089 bool handled_one
= false;
27091 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27094 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27095 && (TREE_NOTHROW (current_function_decl
)
27096 || crtl
->all_throwers_are_sibcalls
))
27099 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27102 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27104 switch (REG_NOTE_KIND (note
))
27106 case REG_FRAME_RELATED_EXPR
:
27107 pat
= XEXP (note
, 0);
27110 case REG_CFA_REGISTER
:
27111 pat
= XEXP (note
, 0);
27114 pat
= PATTERN (insn
);
27115 if (GET_CODE (pat
) == PARALLEL
)
27116 pat
= XVECEXP (pat
, 0, 0);
27119 /* Only emitted for IS_STACKALIGN re-alignment. */
27124 src
= SET_SRC (pat
);
27125 dest
= SET_DEST (pat
);
27127 gcc_assert (src
== stack_pointer_rtx
);
27128 reg
= REGNO (dest
);
27129 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27132 handled_one
= true;
27135 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27136 to get correct dwarf information for shrink-wrap. We should not
27137 emit unwind information for it because these are used either for
27138 pretend arguments or notes to adjust sp and restore registers from
27140 case REG_CFA_DEF_CFA
:
27141 case REG_CFA_ADJUST_CFA
:
27142 case REG_CFA_RESTORE
:
27145 case REG_CFA_EXPRESSION
:
27146 case REG_CFA_OFFSET
:
27147 /* ??? Only handling here what we actually emit. */
27148 gcc_unreachable ();
27156 pat
= PATTERN (insn
);
27159 switch (GET_CODE (pat
))
27162 arm_unwind_emit_set (asm_out_file
, pat
);
27166 /* Store multiple. */
27167 arm_unwind_emit_sequence (asm_out_file
, pat
);
27176 /* Output a reference from a function exception table to the type_info
27177 object X. The EABI specifies that the symbol should be relocated by
27178 an R_ARM_TARGET2 relocation. */
27181 arm_output_ttype (rtx x
)
27183 fputs ("\t.word\t", asm_out_file
);
27184 output_addr_const (asm_out_file
, x
);
27185 /* Use special relocations for symbol references. */
27186 if (!CONST_INT_P (x
))
27187 fputs ("(TARGET2)", asm_out_file
);
27188 fputc ('\n', asm_out_file
);
27193 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27196 arm_asm_emit_except_personality (rtx personality
)
27198 fputs ("\t.personality\t", asm_out_file
);
27199 output_addr_const (asm_out_file
, personality
);
27200 fputc ('\n', asm_out_file
);
27203 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27206 arm_asm_init_sections (void)
27208 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27211 #endif /* ARM_UNWIND_INFO */
27213 /* Output unwind directives for the start/end of a function. */
27216 arm_output_fn_unwind (FILE * f
, bool prologue
)
27218 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27222 fputs ("\t.fnstart\n", f
);
27225 /* If this function will never be unwound, then mark it as such.
27226 The came condition is used in arm_unwind_emit to suppress
27227 the frame annotations. */
27228 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27229 && (TREE_NOTHROW (current_function_decl
)
27230 || crtl
->all_throwers_are_sibcalls
))
27231 fputs("\t.cantunwind\n", f
);
27233 fputs ("\t.fnend\n", f
);
27238 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27240 enum tls_reloc reloc
;
27243 val
= XVECEXP (x
, 0, 0);
27244 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27246 output_addr_const (fp
, val
);
27251 fputs ("(tlsgd)", fp
);
27254 fputs ("(tlsldm)", fp
);
27257 fputs ("(tlsldo)", fp
);
27260 fputs ("(gottpoff)", fp
);
27263 fputs ("(tpoff)", fp
);
27266 fputs ("(tlsdesc)", fp
);
27269 gcc_unreachable ();
27278 fputs (" + (. - ", fp
);
27279 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27280 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27281 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27282 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27292 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27295 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27297 gcc_assert (size
== 4);
27298 fputs ("\t.word\t", file
);
27299 output_addr_const (file
, x
);
27300 fputs ("(tlsldo)", file
);
27303 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27306 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27308 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27309 return arm_emit_tls_decoration (fp
, x
);
27310 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27313 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27315 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27316 assemble_name_raw (fp
, label
);
27320 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27322 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27326 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27330 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27332 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27336 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27340 else if (GET_CODE (x
) == CONST_VECTOR
)
27341 return arm_emit_vector_const (fp
, x
);
27346 /* Output assembly for a shift instruction.
27347 SET_FLAGS determines how the instruction modifies the condition codes.
27348 0 - Do not set condition codes.
27349 1 - Set condition codes.
27350 2 - Use smallest instruction. */
27352 arm_output_shift(rtx
* operands
, int set_flags
)
27355 static const char flag_chars
[3] = {'?', '.', '!'};
27360 c
= flag_chars
[set_flags
];
27361 shift
= shift_op(operands
[3], &val
);
27365 operands
[2] = GEN_INT(val
);
27366 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27369 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27371 output_asm_insn (pattern
, operands
);
27375 /* Output assembly for a WMMX immediate shift instruction. */
27377 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27379 int shift
= INTVAL (operands
[2]);
27381 machine_mode opmode
= GET_MODE (operands
[0]);
27383 gcc_assert (shift
>= 0);
27385 /* If the shift value in the register versions is > 63 (for D qualifier),
27386 31 (for W qualifier) or 15 (for H qualifier). */
27387 if (((opmode
== V4HImode
) && (shift
> 15))
27388 || ((opmode
== V2SImode
) && (shift
> 31))
27389 || ((opmode
== DImode
) && (shift
> 63)))
27393 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27394 output_asm_insn (templ
, operands
);
27395 if (opmode
== DImode
)
27397 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27398 output_asm_insn (templ
, operands
);
27403 /* The destination register will contain all zeros. */
27404 sprintf (templ
, "wzero\t%%0");
27405 output_asm_insn (templ
, operands
);
27410 if ((opmode
== DImode
) && (shift
> 32))
27412 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27413 output_asm_insn (templ
, operands
);
27414 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27415 output_asm_insn (templ
, operands
);
27419 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27420 output_asm_insn (templ
, operands
);
27425 /* Output assembly for a WMMX tinsr instruction. */
27427 arm_output_iwmmxt_tinsr (rtx
*operands
)
27429 int mask
= INTVAL (operands
[3]);
27432 int units
= mode_nunits
[GET_MODE (operands
[0])];
27433 gcc_assert ((mask
& (mask
- 1)) == 0);
27434 for (i
= 0; i
< units
; ++i
)
27436 if ((mask
& 0x01) == 1)
27442 gcc_assert (i
< units
);
27444 switch (GET_MODE (operands
[0]))
27447 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27450 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27453 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27456 gcc_unreachable ();
27459 output_asm_insn (templ
, operands
);
27464 /* Output a Thumb-1 casesi dispatch sequence. */
27466 thumb1_output_casesi (rtx
*operands
)
27468 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27470 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27472 switch (GET_MODE(diff_vec
))
27475 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27476 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27478 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27479 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27481 return "bl\t%___gnu_thumb1_case_si";
27483 gcc_unreachable ();
27487 /* Output a Thumb-2 casesi instruction. */
27489 thumb2_output_casesi (rtx
*operands
)
27491 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27493 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27495 output_asm_insn ("cmp\t%0, %1", operands
);
27496 output_asm_insn ("bhi\t%l3", operands
);
27497 switch (GET_MODE(diff_vec
))
27500 return "tbb\t[%|pc, %0]";
27502 return "tbh\t[%|pc, %0, lsl #1]";
27506 output_asm_insn ("adr\t%4, %l2", operands
);
27507 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27508 output_asm_insn ("add\t%4, %4, %5", operands
);
27513 output_asm_insn ("adr\t%4, %l2", operands
);
27514 return "ldr\t%|pc, [%4, %0, lsl #2]";
27517 gcc_unreachable ();
27521 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27522 per-core tuning structs. */
27524 arm_issue_rate (void)
27526 return current_tune
->issue_rate
;
27529 /* Return how many instructions should scheduler lookahead to choose the
27532 arm_first_cycle_multipass_dfa_lookahead (void)
27534 int issue_rate
= arm_issue_rate ();
27536 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27539 /* Enable modeling of L2 auto-prefetcher. */
27541 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27543 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27547 arm_mangle_type (const_tree type
)
27549 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27550 has to be managled as if it is in the "std" namespace. */
27551 if (TARGET_AAPCS_BASED
27552 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27553 return "St9__va_list";
27555 /* Half-precision float. */
27556 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27559 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27561 if (TYPE_NAME (type
) != NULL
)
27562 return arm_mangle_builtin_type (type
);
27564 /* Use the default mangling. */
27568 /* Order of allocation of core registers for Thumb: this allocation is
27569 written over the corresponding initial entries of the array
27570 initialized with REG_ALLOC_ORDER. We allocate all low registers
27571 first. Saving and restoring a low register is usually cheaper than
27572 using a call-clobbered high register. */
27574 static const int thumb_core_reg_alloc_order
[] =
27576 3, 2, 1, 0, 4, 5, 6, 7,
27577 14, 12, 8, 9, 10, 11
27580 /* Adjust register allocation order when compiling for Thumb. */
27583 arm_order_regs_for_local_alloc (void)
27585 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27586 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27588 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27589 sizeof (thumb_core_reg_alloc_order
));
27592 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27595 arm_frame_pointer_required (void)
27597 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27600 /* If the function receives nonlocal gotos, it needs to save the frame
27601 pointer in the nonlocal_goto_save_area object. */
27602 if (cfun
->has_nonlocal_label
)
27605 /* The frame pointer is required for non-leaf APCS frames. */
27606 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
27609 /* If we are probing the stack in the prologue, we will have a faulting
27610 instruction prior to the stack adjustment and this requires a frame
27611 pointer if we want to catch the exception using the EABI unwinder. */
27612 if (!IS_INTERRUPT (arm_current_func_type ())
27613 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27614 && arm_except_unwind_info (&global_options
) == UI_TARGET
27615 && cfun
->can_throw_non_call_exceptions
)
27617 HOST_WIDE_INT size
= get_frame_size ();
27619 /* That's irrelevant if there is no stack adjustment. */
27623 /* That's relevant only if there is a stack probe. */
27624 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27626 /* We don't have the final size of the frame so adjust. */
27627 size
+= 32 * UNITS_PER_WORD
;
27628 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27638 /* Only thumb1 can't support conditional execution, so return true if
27639 the target is not thumb1. */
27641 arm_have_conditional_execution (void)
27643 return !TARGET_THUMB1
;
27646 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27647 static HOST_WIDE_INT
27648 arm_vector_alignment (const_tree type
)
27650 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27652 if (TARGET_AAPCS_BASED
)
27653 align
= MIN (align
, 64);
27658 static unsigned int
27659 arm_autovectorize_vector_sizes (void)
27661 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27665 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27667 /* Vectors which aren't in packed structures will not be less aligned than
27668 the natural alignment of their element type, so this is safe. */
27669 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27672 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27676 arm_builtin_support_vector_misalignment (machine_mode mode
,
27677 const_tree type
, int misalignment
,
27680 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27682 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27687 /* If the misalignment is unknown, we should be able to handle the access
27688 so long as it is not to a member of a packed data structure. */
27689 if (misalignment
== -1)
27692 /* Return true if the misalignment is a multiple of the natural alignment
27693 of the vector's element type. This is probably always going to be
27694 true in practice, since we've already established that this isn't a
27696 return ((misalignment
% align
) == 0);
27699 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27704 arm_conditional_register_usage (void)
27708 if (TARGET_THUMB1
&& optimize_size
)
27710 /* When optimizing for size on Thumb-1, it's better not
27711 to use the HI regs, because of the overhead of
27713 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27714 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27717 /* The link register can be clobbered by any branch insn,
27718 but we have no way to track that at present, so mark
27719 it as unavailable. */
27721 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27723 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27725 /* VFPv3 registers are disabled when earlier VFP
27726 versions are selected due to the definition of
27727 LAST_VFP_REGNUM. */
27728 for (regno
= FIRST_VFP_REGNUM
;
27729 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27731 fixed_regs
[regno
] = 0;
27732 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27733 || regno
>= FIRST_VFP_REGNUM
+ 32;
27737 if (TARGET_REALLY_IWMMXT
)
27739 regno
= FIRST_IWMMXT_GR_REGNUM
;
27740 /* The 2002/10/09 revision of the XScale ABI has wCG0
27741 and wCG1 as call-preserved registers. The 2002/11/21
27742 revision changed this so that all wCG registers are
27743 scratch registers. */
27744 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27745 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27746 fixed_regs
[regno
] = 0;
27747 /* The XScale ABI has wR0 - wR9 as scratch registers,
27748 the rest as call-preserved registers. */
27749 for (regno
= FIRST_IWMMXT_REGNUM
;
27750 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27752 fixed_regs
[regno
] = 0;
27753 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27757 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27759 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27760 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27762 else if (TARGET_APCS_STACK
)
27764 fixed_regs
[10] = 1;
27765 call_used_regs
[10] = 1;
27767 /* -mcaller-super-interworking reserves r11 for calls to
27768 _interwork_r11_call_via_rN(). Making the register global
27769 is an easy way of ensuring that it remains valid for all
27771 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27772 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27774 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27775 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27776 if (TARGET_CALLER_INTERWORKING
)
27777 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27779 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27783 arm_preferred_rename_class (reg_class_t rclass
)
27785 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27786 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27787 and code size can be reduced. */
27788 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27794 /* Compute the atrribute "length" of insn "*push_multi".
27795 So this function MUST be kept in sync with that insn pattern. */
27797 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27799 int i
, regno
, hi_reg
;
27800 int num_saves
= XVECLEN (parallel_op
, 0);
27810 regno
= REGNO (first_op
);
27811 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27812 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27814 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27815 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27823 /* Compute the number of instructions emitted by output_move_double. */
27825 arm_count_output_move_double_insns (rtx
*operands
)
27829 /* output_move_double may modify the operands array, so call it
27830 here on a copy of the array. */
27831 ops
[0] = operands
[0];
27832 ops
[1] = operands
[1];
27833 output_move_double (ops
, false, &count
);
27838 vfp3_const_double_for_fract_bits (rtx operand
)
27840 REAL_VALUE_TYPE r0
;
27842 if (!CONST_DOUBLE_P (operand
))
27845 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27846 if (exact_real_inverse (DFmode
, &r0
)
27847 && !REAL_VALUE_NEGATIVE (r0
))
27849 if (exact_real_truncate (DFmode
, &r0
))
27851 HOST_WIDE_INT value
= real_to_integer (&r0
);
27852 value
= value
& 0xffffffff;
27853 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27854 return int_log2 (value
);
27860 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27861 log2 is in [1, 32], return that log2. Otherwise return -1.
27862 This is used in the patterns for vcvt.s32.f32 floating-point to
27863 fixed-point conversions. */
27866 vfp3_const_double_for_bits (rtx x
)
27868 const REAL_VALUE_TYPE
*r
;
27870 if (!CONST_DOUBLE_P (x
))
27873 r
= CONST_DOUBLE_REAL_VALUE (x
);
27875 if (REAL_VALUE_NEGATIVE (*r
)
27876 || REAL_VALUE_ISNAN (*r
)
27877 || REAL_VALUE_ISINF (*r
)
27878 || !real_isinteger (r
, SFmode
))
27881 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
27883 /* The exact_log2 above will have returned -1 if this is
27884 not an exact log2. */
27885 if (!IN_RANGE (hwint
, 1, 32))
27892 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27895 arm_pre_atomic_barrier (enum memmodel model
)
27897 if (need_atomic_barrier_p (model
, true))
27898 emit_insn (gen_memory_barrier ());
27902 arm_post_atomic_barrier (enum memmodel model
)
27904 if (need_atomic_barrier_p (model
, false))
27905 emit_insn (gen_memory_barrier ());
27908 /* Emit the load-exclusive and store-exclusive instructions.
27909 Use acquire and release versions if necessary. */
27912 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27914 rtx (*gen
) (rtx
, rtx
);
27920 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27921 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27922 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27923 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27925 gcc_unreachable ();
27932 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27933 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27934 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27935 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27937 gcc_unreachable ();
27941 emit_insn (gen (rval
, mem
));
27945 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27948 rtx (*gen
) (rtx
, rtx
, rtx
);
27954 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27955 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27956 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27957 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27959 gcc_unreachable ();
27966 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27967 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27968 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27969 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27971 gcc_unreachable ();
27975 emit_insn (gen (bval
, rval
, mem
));
27978 /* Mark the previous jump instruction as unlikely. */
27981 emit_unlikely_jump (rtx insn
)
27983 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27985 insn
= emit_jump_insn (insn
);
27986 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27989 /* Expand a compare and swap pattern. */
27992 arm_expand_compare_and_swap (rtx operands
[])
27994 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27996 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27998 bval
= operands
[0];
27999 rval
= operands
[1];
28001 oldval
= operands
[3];
28002 newval
= operands
[4];
28003 is_weak
= operands
[5];
28004 mod_s
= operands
[6];
28005 mod_f
= operands
[7];
28006 mode
= GET_MODE (mem
);
28008 /* Normally the succ memory model must be stronger than fail, but in the
28009 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28010 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28012 if (TARGET_HAVE_LDACQ
28013 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28014 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28015 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28021 /* For narrow modes, we're going to perform the comparison in SImode,
28022 so do the zero-extension now. */
28023 rval
= gen_reg_rtx (SImode
);
28024 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28028 /* Force the value into a register if needed. We waited until after
28029 the zero-extension above to do this properly. */
28030 if (!arm_add_operand (oldval
, SImode
))
28031 oldval
= force_reg (SImode
, oldval
);
28035 if (!cmpdi_operand (oldval
, mode
))
28036 oldval
= force_reg (mode
, oldval
);
28040 gcc_unreachable ();
28045 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28046 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28047 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28048 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28050 gcc_unreachable ();
28053 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28055 if (mode
== QImode
|| mode
== HImode
)
28056 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28058 /* In all cases, we arrange for success to be signaled by Z set.
28059 This arrangement allows for the boolean result to be used directly
28060 in a subsequent branch, post optimization. */
28061 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28062 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
28063 emit_insn (gen_rtx_SET (bval
, x
));
28066 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28067 another memory store between the load-exclusive and store-exclusive can
28068 reset the monitor from Exclusive to Open state. This means we must wait
28069 until after reload to split the pattern, lest we get a register spill in
28070 the middle of the atomic sequence. */
28073 arm_split_compare_and_swap (rtx operands
[])
28075 rtx rval
, mem
, oldval
, newval
, scratch
;
28077 enum memmodel mod_s
, mod_f
;
28079 rtx_code_label
*label1
, *label2
;
28082 rval
= operands
[0];
28084 oldval
= operands
[2];
28085 newval
= operands
[3];
28086 is_weak
= (operands
[4] != const0_rtx
);
28087 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
28088 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
28089 scratch
= operands
[7];
28090 mode
= GET_MODE (mem
);
28092 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28094 bool use_acquire
= TARGET_HAVE_LDACQ
28095 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28096 || is_mm_release (mod_s
));
28098 bool use_release
= TARGET_HAVE_LDACQ
28099 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28100 || is_mm_acquire (mod_s
));
28102 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28103 a full barrier is emitted after the store-release. */
28105 use_acquire
= false;
28107 /* Checks whether a barrier is needed and emits one accordingly. */
28108 if (!(use_acquire
|| use_release
))
28109 arm_pre_atomic_barrier (mod_s
);
28114 label1
= gen_label_rtx ();
28115 emit_label (label1
);
28117 label2
= gen_label_rtx ();
28119 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28121 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
28122 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28123 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28124 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28125 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28127 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
28129 /* Weak or strong, we want EQ to be true for success, so that we
28130 match the flags that we got from the compare above. */
28131 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28132 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
28133 emit_insn (gen_rtx_SET (cond
, x
));
28137 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28138 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28139 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
28140 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28143 if (!is_mm_relaxed (mod_f
))
28144 emit_label (label2
);
28146 /* Checks whether a barrier is needed and emits one accordingly. */
28148 || !(use_acquire
|| use_release
))
28149 arm_post_atomic_barrier (mod_s
);
28151 if (is_mm_relaxed (mod_f
))
28152 emit_label (label2
);
28156 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28157 rtx value
, rtx model_rtx
, rtx cond
)
28159 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28160 machine_mode mode
= GET_MODE (mem
);
28161 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28162 rtx_code_label
*label
;
28165 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28167 bool use_acquire
= TARGET_HAVE_LDACQ
28168 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28169 || is_mm_release (model
));
28171 bool use_release
= TARGET_HAVE_LDACQ
28172 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28173 || is_mm_acquire (model
));
28175 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28176 a full barrier is emitted after the store-release. */
28178 use_acquire
= false;
28180 /* Checks whether a barrier is needed and emits one accordingly. */
28181 if (!(use_acquire
|| use_release
))
28182 arm_pre_atomic_barrier (model
);
28184 label
= gen_label_rtx ();
28185 emit_label (label
);
28188 new_out
= gen_lowpart (wmode
, new_out
);
28190 old_out
= gen_lowpart (wmode
, old_out
);
28193 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28195 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28204 x
= gen_rtx_AND (wmode
, old_out
, value
);
28205 emit_insn (gen_rtx_SET (new_out
, x
));
28206 x
= gen_rtx_NOT (wmode
, new_out
);
28207 emit_insn (gen_rtx_SET (new_out
, x
));
28211 if (CONST_INT_P (value
))
28213 value
= GEN_INT (-INTVAL (value
));
28219 if (mode
== DImode
)
28221 /* DImode plus/minus need to clobber flags. */
28222 /* The adddi3 and subdi3 patterns are incorrectly written so that
28223 they require matching operands, even when we could easily support
28224 three operands. Thankfully, this can be fixed up post-splitting,
28225 as the individual add+adc patterns do accept three operands and
28226 post-reload cprop can make these moves go away. */
28227 emit_move_insn (new_out
, old_out
);
28229 x
= gen_adddi3 (new_out
, new_out
, value
);
28231 x
= gen_subdi3 (new_out
, new_out
, value
);
28238 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28239 emit_insn (gen_rtx_SET (new_out
, x
));
28243 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28246 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28247 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28249 /* Checks whether a barrier is needed and emits one accordingly. */
28251 || !(use_acquire
|| use_release
))
28252 arm_post_atomic_barrier (model
);
28255 #define MAX_VECT_LEN 16
28257 struct expand_vec_perm_d
28259 rtx target
, op0
, op1
;
28260 unsigned char perm
[MAX_VECT_LEN
];
28261 machine_mode vmode
;
28262 unsigned char nelt
;
28267 /* Generate a variable permutation. */
28270 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28272 machine_mode vmode
= GET_MODE (target
);
28273 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28275 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28276 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28277 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28278 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28279 gcc_checking_assert (TARGET_NEON
);
28283 if (vmode
== V8QImode
)
28284 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28286 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28292 if (vmode
== V8QImode
)
28294 pair
= gen_reg_rtx (V16QImode
);
28295 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28296 pair
= gen_lowpart (TImode
, pair
);
28297 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28301 pair
= gen_reg_rtx (OImode
);
28302 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28303 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28309 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28311 machine_mode vmode
= GET_MODE (target
);
28312 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28313 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28314 rtx rmask
[MAX_VECT_LEN
], mask
;
28316 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28317 numbering of elements for big-endian, we must reverse the order. */
28318 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28320 /* The VTBL instruction does not use a modulo index, so we must take care
28321 of that ourselves. */
28322 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28323 for (i
= 0; i
< nelt
; ++i
)
28325 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28326 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28328 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28331 /* Map lane ordering between architectural lane order, and GCC lane order,
28332 taking into account ABI. See comment above output_move_neon for details. */
28335 neon_endian_lane_map (machine_mode mode
, int lane
)
28337 if (BYTES_BIG_ENDIAN
)
28339 int nelems
= GET_MODE_NUNITS (mode
);
28340 /* Reverse lane order. */
28341 lane
= (nelems
- 1 - lane
);
28342 /* Reverse D register order, to match ABI. */
28343 if (GET_MODE_SIZE (mode
) == 16)
28344 lane
= lane
^ (nelems
/ 2);
28349 /* Some permutations index into pairs of vectors, this is a helper function
28350 to map indexes into those pairs of vectors. */
28353 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28355 int nelem
= GET_MODE_NUNITS (mode
);
28356 if (BYTES_BIG_ENDIAN
)
28358 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28362 /* Generate or test for an insn that supports a constant permutation. */
28364 /* Recognize patterns for the VUZP insns. */
28367 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28369 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28370 rtx out0
, out1
, in0
, in1
;
28371 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28375 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28378 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28379 big endian pattern on 64 bit vectors, so we correct for that. */
28380 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28381 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28383 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28385 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28387 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28391 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28393 for (i
= 0; i
< nelt
; i
++)
28396 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28397 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28407 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28408 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28409 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28410 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28411 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28412 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28413 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28414 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28416 gcc_unreachable ();
28421 if (swap_nelt
!= 0)
28422 std::swap (in0
, in1
);
28425 out1
= gen_reg_rtx (d
->vmode
);
28427 std::swap (out0
, out1
);
28429 emit_insn (gen (out0
, in0
, in1
, out1
));
28433 /* Recognize patterns for the VZIP insns. */
28436 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28438 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28439 rtx out0
, out1
, in0
, in1
;
28440 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28444 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28447 is_swapped
= BYTES_BIG_ENDIAN
;
28449 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28452 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28454 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28458 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28460 for (i
= 0; i
< nelt
/ 2; i
++)
28463 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28464 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28468 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28469 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28480 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28481 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28482 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28483 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28484 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28485 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28486 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28487 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28489 gcc_unreachable ();
28495 std::swap (in0
, in1
);
28498 out1
= gen_reg_rtx (d
->vmode
);
28500 std::swap (out0
, out1
);
28502 emit_insn (gen (out0
, in0
, in1
, out1
));
28506 /* Recognize patterns for the VREV insns. */
28509 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28511 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28512 rtx (*gen
)(rtx
, rtx
);
28514 if (!d
->one_vector_p
)
28523 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28524 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28532 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28533 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28534 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28535 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28543 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28544 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28545 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28546 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28547 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28548 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28549 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28550 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28559 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28560 for (j
= 0; j
<= diff
; j
+= 1)
28562 /* This is guaranteed to be true as the value of diff
28563 is 7, 3, 1 and we should have enough elements in the
28564 queue to generate this. Getting a vector mask with a
28565 value of diff other than these values implies that
28566 something is wrong by the time we get here. */
28567 gcc_assert (i
+ j
< nelt
);
28568 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28576 emit_insn (gen (d
->target
, d
->op0
));
28580 /* Recognize patterns for the VTRN insns. */
28583 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28585 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28586 rtx out0
, out1
, in0
, in1
;
28587 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28589 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28592 /* Note that these are little-endian tests. Adjust for big-endian later. */
28593 if (d
->perm
[0] == 0)
28595 else if (d
->perm
[0] == 1)
28599 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28601 for (i
= 0; i
< nelt
; i
+= 2)
28603 if (d
->perm
[i
] != i
+ odd
)
28605 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28615 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28616 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28617 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28618 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28619 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28620 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28621 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28622 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28624 gcc_unreachable ();
28629 if (BYTES_BIG_ENDIAN
)
28631 std::swap (in0
, in1
);
28636 out1
= gen_reg_rtx (d
->vmode
);
28638 std::swap (out0
, out1
);
28640 emit_insn (gen (out0
, in0
, in1
, out1
));
28644 /* Recognize patterns for the VEXT insns. */
28647 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28649 unsigned int i
, nelt
= d
->nelt
;
28650 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28653 unsigned int location
;
28655 unsigned int next
= d
->perm
[0] + 1;
28657 /* TODO: Handle GCC's numbering of elements for big-endian. */
28658 if (BYTES_BIG_ENDIAN
)
28661 /* Check if the extracted indexes are increasing by one. */
28662 for (i
= 1; i
< nelt
; next
++, i
++)
28664 /* If we hit the most significant element of the 2nd vector in
28665 the previous iteration, no need to test further. */
28666 if (next
== 2 * nelt
)
28669 /* If we are operating on only one vector: it could be a
28670 rotation. If there are only two elements of size < 64, let
28671 arm_evpc_neon_vrev catch it. */
28672 if (d
->one_vector_p
&& (next
== nelt
))
28674 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28680 if (d
->perm
[i
] != next
)
28684 location
= d
->perm
[0];
28688 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28689 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28690 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28691 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28692 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28693 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28694 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28695 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28696 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28705 offset
= GEN_INT (location
);
28706 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28710 /* The NEON VTBL instruction is a fully variable permuation that's even
28711 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28712 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28713 can do slightly better by expanding this as a constant where we don't
28714 have to apply a mask. */
28717 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28719 rtx rperm
[MAX_VECT_LEN
], sel
;
28720 machine_mode vmode
= d
->vmode
;
28721 unsigned int i
, nelt
= d
->nelt
;
28723 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28724 numbering of elements for big-endian, we must reverse the order. */
28725 if (BYTES_BIG_ENDIAN
)
28731 /* Generic code will try constant permutation twice. Once with the
28732 original mode and again with the elements lowered to QImode.
28733 So wait and don't do the selector expansion ourselves. */
28734 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28737 for (i
= 0; i
< nelt
; ++i
)
28738 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28739 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28740 sel
= force_reg (vmode
, sel
);
28742 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28747 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28749 /* Check if the input mask matches vext before reordering the
28752 if (arm_evpc_neon_vext (d
))
28755 /* The pattern matching functions above are written to look for a small
28756 number to begin the sequence (0, 1, N/2). If we begin with an index
28757 from the second operand, we can swap the operands. */
28758 if (d
->perm
[0] >= d
->nelt
)
28760 unsigned i
, nelt
= d
->nelt
;
28762 for (i
= 0; i
< nelt
; ++i
)
28763 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28765 std::swap (d
->op0
, d
->op1
);
28770 if (arm_evpc_neon_vuzp (d
))
28772 if (arm_evpc_neon_vzip (d
))
28774 if (arm_evpc_neon_vrev (d
))
28776 if (arm_evpc_neon_vtrn (d
))
28778 return arm_evpc_neon_vtbl (d
);
28783 /* Expand a vec_perm_const pattern. */
28786 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28788 struct expand_vec_perm_d d
;
28789 int i
, nelt
, which
;
28795 d
.vmode
= GET_MODE (target
);
28796 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28797 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28798 d
.testing_p
= false;
28800 for (i
= which
= 0; i
< nelt
; ++i
)
28802 rtx e
= XVECEXP (sel
, 0, i
);
28803 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28804 which
|= (ei
< nelt
? 1 : 2);
28814 d
.one_vector_p
= false;
28815 if (!rtx_equal_p (op0
, op1
))
28818 /* The elements of PERM do not suggest that only the first operand
28819 is used, but both operands are identical. Allow easier matching
28820 of the permutation by folding the permutation into the single
28824 for (i
= 0; i
< nelt
; ++i
)
28825 d
.perm
[i
] &= nelt
- 1;
28827 d
.one_vector_p
= true;
28832 d
.one_vector_p
= true;
28836 return arm_expand_vec_perm_const_1 (&d
);
28839 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28842 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28843 const unsigned char *sel
)
28845 struct expand_vec_perm_d d
;
28846 unsigned int i
, nelt
, which
;
28850 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28851 d
.testing_p
= true;
28852 memcpy (d
.perm
, sel
, nelt
);
28854 /* Categorize the set of elements in the selector. */
28855 for (i
= which
= 0; i
< nelt
; ++i
)
28857 unsigned char e
= d
.perm
[i
];
28858 gcc_assert (e
< 2 * nelt
);
28859 which
|= (e
< nelt
? 1 : 2);
28862 /* For all elements from second vector, fold the elements to first. */
28864 for (i
= 0; i
< nelt
; ++i
)
28867 /* Check whether the mask can be applied to the vector type. */
28868 d
.one_vector_p
= (which
!= 3);
28870 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28871 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28872 if (!d
.one_vector_p
)
28873 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28876 ret
= arm_expand_vec_perm_const_1 (&d
);
28883 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28885 /* If we are soft float and we do not have ldrd
28886 then all auto increment forms are ok. */
28887 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28892 /* Post increment and Pre Decrement are supported for all
28893 instruction forms except for vector forms. */
28896 if (VECTOR_MODE_P (mode
))
28898 if (code
!= ARM_PRE_DEC
)
28908 /* Without LDRD and mode size greater than
28909 word size, there is no point in auto-incrementing
28910 because ldm and stm will not have these forms. */
28911 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28914 /* Vector and floating point modes do not support
28915 these auto increment forms. */
28916 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28929 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28930 on ARM, since we know that shifts by negative amounts are no-ops.
28931 Additionally, the default expansion code is not available or suitable
28932 for post-reload insn splits (this can occur when the register allocator
28933 chooses not to do a shift in NEON).
28935 This function is used in both initial expand and post-reload splits, and
28936 handles all kinds of 64-bit shifts.
28938 Input requirements:
28939 - It is safe for the input and output to be the same register, but
28940 early-clobber rules apply for the shift amount and scratch registers.
28941 - Shift by register requires both scratch registers. In all other cases
28942 the scratch registers may be NULL.
28943 - Ashiftrt by a register also clobbers the CC register. */
28945 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28946 rtx amount
, rtx scratch1
, rtx scratch2
)
28948 rtx out_high
= gen_highpart (SImode
, out
);
28949 rtx out_low
= gen_lowpart (SImode
, out
);
28950 rtx in_high
= gen_highpart (SImode
, in
);
28951 rtx in_low
= gen_lowpart (SImode
, in
);
28954 in = the register pair containing the input value.
28955 out = the destination register pair.
28956 up = the high- or low-part of each pair.
28957 down = the opposite part to "up".
28958 In a shift, we can consider bits to shift from "up"-stream to
28959 "down"-stream, so in a left-shift "up" is the low-part and "down"
28960 is the high-part of each register pair. */
28962 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28963 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28964 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28965 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28967 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28969 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28970 && GET_MODE (out
) == DImode
);
28972 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28973 && GET_MODE (in
) == DImode
);
28975 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28976 && GET_MODE (amount
) == SImode
)
28977 || CONST_INT_P (amount
)));
28978 gcc_assert (scratch1
== NULL
28979 || (GET_CODE (scratch1
) == SCRATCH
)
28980 || (GET_MODE (scratch1
) == SImode
28981 && REG_P (scratch1
)));
28982 gcc_assert (scratch2
== NULL
28983 || (GET_CODE (scratch2
) == SCRATCH
)
28984 || (GET_MODE (scratch2
) == SImode
28985 && REG_P (scratch2
)));
28986 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28987 || !HARD_REGISTER_P (out
)
28988 || (REGNO (out
) != REGNO (amount
)
28989 && REGNO (out
) + 1 != REGNO (amount
)));
28991 /* Macros to make following code more readable. */
28992 #define SUB_32(DEST,SRC) \
28993 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28994 #define RSB_32(DEST,SRC) \
28995 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28996 #define SUB_S_32(DEST,SRC) \
28997 gen_addsi3_compare0 ((DEST), (SRC), \
28999 #define SET(DEST,SRC) \
29000 gen_rtx_SET ((DEST), (SRC))
29001 #define SHIFT(CODE,SRC,AMOUNT) \
29002 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29003 #define LSHIFT(CODE,SRC,AMOUNT) \
29004 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29005 SImode, (SRC), (AMOUNT))
29006 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29007 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29008 SImode, (SRC), (AMOUNT))
29010 gen_rtx_IOR (SImode, (A), (B))
29011 #define BRANCH(COND,LABEL) \
29012 gen_arm_cond_branch ((LABEL), \
29013 gen_rtx_ ## COND (CCmode, cc_reg, \
29017 /* Shifts by register and shifts by constant are handled separately. */
29018 if (CONST_INT_P (amount
))
29020 /* We have a shift-by-constant. */
29022 /* First, handle out-of-range shift amounts.
29023 In both cases we try to match the result an ARM instruction in a
29024 shift-by-register would give. This helps reduce execution
29025 differences between optimization levels, but it won't stop other
29026 parts of the compiler doing different things. This is "undefined
29027 behavior, in any case. */
29028 if (INTVAL (amount
) <= 0)
29029 emit_insn (gen_movdi (out
, in
));
29030 else if (INTVAL (amount
) >= 64)
29032 if (code
== ASHIFTRT
)
29034 rtx const31_rtx
= GEN_INT (31);
29035 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29036 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29039 emit_insn (gen_movdi (out
, const0_rtx
));
29042 /* Now handle valid shifts. */
29043 else if (INTVAL (amount
) < 32)
29045 /* Shifts by a constant less than 32. */
29046 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29048 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29049 emit_insn (SET (out_down
,
29050 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29052 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29056 /* Shifts by a constant greater than 31. */
29057 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29059 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29060 if (code
== ASHIFTRT
)
29061 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29064 emit_insn (SET (out_up
, const0_rtx
));
29069 /* We have a shift-by-register. */
29070 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29072 /* This alternative requires the scratch registers. */
29073 gcc_assert (scratch1
&& REG_P (scratch1
));
29074 gcc_assert (scratch2
&& REG_P (scratch2
));
29076 /* We will need the values "amount-32" and "32-amount" later.
29077 Swapping them around now allows the later code to be more general. */
29081 emit_insn (SUB_32 (scratch1
, amount
));
29082 emit_insn (RSB_32 (scratch2
, amount
));
29085 emit_insn (RSB_32 (scratch1
, amount
));
29086 /* Also set CC = amount > 32. */
29087 emit_insn (SUB_S_32 (scratch2
, amount
));
29090 emit_insn (RSB_32 (scratch1
, amount
));
29091 emit_insn (SUB_32 (scratch2
, amount
));
29094 gcc_unreachable ();
29097 /* Emit code like this:
29100 out_down = in_down << amount;
29101 out_down = (in_up << (amount - 32)) | out_down;
29102 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29103 out_up = in_up << amount;
29106 out_down = in_down >> amount;
29107 out_down = (in_up << (32 - amount)) | out_down;
29109 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29110 out_up = in_up << amount;
29113 out_down = in_down >> amount;
29114 out_down = (in_up << (32 - amount)) | out_down;
29116 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29117 out_up = in_up << amount;
29119 The ARM and Thumb2 variants are the same but implemented slightly
29120 differently. If this were only called during expand we could just
29121 use the Thumb2 case and let combine do the right thing, but this
29122 can also be called from post-reload splitters. */
29124 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29126 if (!TARGET_THUMB2
)
29128 /* Emit code for ARM mode. */
29129 emit_insn (SET (out_down
,
29130 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29131 if (code
== ASHIFTRT
)
29133 rtx_code_label
*done_label
= gen_label_rtx ();
29134 emit_jump_insn (BRANCH (LT
, done_label
));
29135 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29137 emit_label (done_label
);
29140 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29145 /* Emit code for Thumb2 mode.
29146 Thumb2 can't do shift and or in one insn. */
29147 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29148 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29150 if (code
== ASHIFTRT
)
29152 rtx_code_label
*done_label
= gen_label_rtx ();
29153 emit_jump_insn (BRANCH (LT
, done_label
));
29154 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29155 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29156 emit_label (done_label
);
29160 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29161 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29165 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29179 /* Returns true if the pattern is a valid symbolic address, which is either a
29180 symbol_ref or (symbol_ref + addend).
29182 According to the ARM ELF ABI, the initial addend of REL-type relocations
29183 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29184 literal field of the instruction as a 16-bit signed value in the range
29185 -32768 <= A < 32768. */
29188 arm_valid_symbolic_address_p (rtx addr
)
29190 rtx xop0
, xop1
= NULL_RTX
;
29193 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29196 /* (const (plus: symbol_ref const_int)) */
29197 if (GET_CODE (addr
) == CONST
)
29198 tmp
= XEXP (addr
, 0);
29200 if (GET_CODE (tmp
) == PLUS
)
29202 xop0
= XEXP (tmp
, 0);
29203 xop1
= XEXP (tmp
, 1);
29205 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29206 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29212 /* Returns true if a valid comparison operation and makes
29213 the operands in a form that is valid. */
29215 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29217 enum rtx_code code
= GET_CODE (*comparison
);
29219 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29220 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29222 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29224 if (code
== UNEQ
|| code
== LTGT
)
29227 code_int
= (int)code
;
29228 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29229 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29234 if (!arm_add_operand (*op1
, mode
))
29235 *op1
= force_reg (mode
, *op1
);
29236 if (!arm_add_operand (*op2
, mode
))
29237 *op2
= force_reg (mode
, *op2
);
29241 if (!cmpdi_operand (*op1
, mode
))
29242 *op1
= force_reg (mode
, *op1
);
29243 if (!cmpdi_operand (*op2
, mode
))
29244 *op2
= force_reg (mode
, *op2
);
29249 if (!arm_float_compare_operand (*op1
, mode
))
29250 *op1
= force_reg (mode
, *op1
);
29251 if (!arm_float_compare_operand (*op2
, mode
))
29252 *op2
= force_reg (mode
, *op2
);
29262 /* Maximum number of instructions to set block of memory. */
29264 arm_block_set_max_insns (void)
29266 if (optimize_function_for_size_p (cfun
))
29269 return current_tune
->max_insns_inline_memset
;
29272 /* Return TRUE if it's profitable to set block of memory for
29273 non-vectorized case. VAL is the value to set the memory
29274 with. LENGTH is the number of bytes to set. ALIGN is the
29275 alignment of the destination memory in bytes. UNALIGNED_P
29276 is TRUE if we can only set the memory with instructions
29277 meeting alignment requirements. USE_STRD_P is TRUE if we
29278 can use strd to set the memory. */
29280 arm_block_set_non_vect_profit_p (rtx val
,
29281 unsigned HOST_WIDE_INT length
,
29282 unsigned HOST_WIDE_INT align
,
29283 bool unaligned_p
, bool use_strd_p
)
29286 /* For leftovers in bytes of 0-7, we can set the memory block using
29287 strb/strh/str with minimum instruction number. */
29288 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29292 num
= arm_const_inline_cost (SET
, val
);
29293 num
+= length
/ align
+ length
% align
;
29295 else if (use_strd_p
)
29297 num
= arm_const_double_inline_cost (val
);
29298 num
+= (length
>> 3) + leftover
[length
& 7];
29302 num
= arm_const_inline_cost (SET
, val
);
29303 num
+= (length
>> 2) + leftover
[length
& 3];
29306 /* We may be able to combine last pair STRH/STRB into a single STR
29307 by shifting one byte back. */
29308 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29311 return (num
<= arm_block_set_max_insns ());
29314 /* Return TRUE if it's profitable to set block of memory for
29315 vectorized case. LENGTH is the number of bytes to set.
29316 ALIGN is the alignment of destination memory in bytes.
29317 MODE is the vector mode used to set the memory. */
29319 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29320 unsigned HOST_WIDE_INT align
,
29324 bool unaligned_p
= ((align
& 3) != 0);
29325 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29327 /* Instruction loading constant value. */
29329 /* Instructions storing the memory. */
29330 num
+= (length
+ nelt
- 1) / nelt
;
29331 /* Instructions adjusting the address expression. Only need to
29332 adjust address expression if it's 4 bytes aligned and bytes
29333 leftover can only be stored by mis-aligned store instruction. */
29334 if (!unaligned_p
&& (length
& 3) != 0)
29337 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29338 if (!unaligned_p
&& mode
== V16QImode
)
29341 return (num
<= arm_block_set_max_insns ());
29344 /* Set a block of memory using vectorization instructions for the
29345 unaligned case. We fill the first LENGTH bytes of the memory
29346 area starting from DSTBASE with byte constant VALUE. ALIGN is
29347 the alignment requirement of memory. Return TRUE if succeeded. */
29349 arm_block_set_unaligned_vect (rtx dstbase
,
29350 unsigned HOST_WIDE_INT length
,
29351 unsigned HOST_WIDE_INT value
,
29352 unsigned HOST_WIDE_INT align
)
29354 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29356 rtx val_elt
, val_vec
, reg
;
29357 rtx rval
[MAX_VECT_LEN
];
29358 rtx (*gen_func
) (rtx
, rtx
);
29360 unsigned HOST_WIDE_INT v
= value
;
29361 unsigned int offset
= 0;
29362 gcc_assert ((align
& 0x3) != 0);
29363 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29364 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29365 if (length
>= nelt_v16
)
29368 gen_func
= gen_movmisalignv16qi
;
29373 gen_func
= gen_movmisalignv8qi
;
29375 nelt_mode
= GET_MODE_NUNITS (mode
);
29376 gcc_assert (length
>= nelt_mode
);
29377 /* Skip if it isn't profitable. */
29378 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29381 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29382 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29384 v
= sext_hwi (v
, BITS_PER_WORD
);
29385 val_elt
= GEN_INT (v
);
29386 for (j
= 0; j
< nelt_mode
; j
++)
29389 reg
= gen_reg_rtx (mode
);
29390 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29391 /* Emit instruction loading the constant value. */
29392 emit_move_insn (reg
, val_vec
);
29394 /* Handle nelt_mode bytes in a vector. */
29395 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29397 emit_insn ((*gen_func
) (mem
, reg
));
29398 if (i
+ 2 * nelt_mode
<= length
)
29400 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29401 offset
+= nelt_mode
;
29402 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29406 /* If there are not less than nelt_v8 bytes leftover, we must be in
29408 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29410 /* Handle (8, 16) bytes leftover. */
29411 if (i
+ nelt_v8
< length
)
29413 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29414 offset
+= length
- i
;
29415 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29417 /* We are shifting bytes back, set the alignment accordingly. */
29418 if ((length
& 1) != 0 && align
>= 2)
29419 set_mem_align (mem
, BITS_PER_UNIT
);
29421 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29423 /* Handle (0, 8] bytes leftover. */
29424 else if (i
< length
&& i
+ nelt_v8
>= length
)
29426 if (mode
== V16QImode
)
29427 reg
= gen_lowpart (V8QImode
, reg
);
29429 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29430 + (nelt_mode
- nelt_v8
))));
29431 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29432 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29434 /* We are shifting bytes back, set the alignment accordingly. */
29435 if ((length
& 1) != 0 && align
>= 2)
29436 set_mem_align (mem
, BITS_PER_UNIT
);
29438 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29444 /* Set a block of memory using vectorization instructions for the
29445 aligned case. We fill the first LENGTH bytes of the memory area
29446 starting from DSTBASE with byte constant VALUE. ALIGN is the
29447 alignment requirement of memory. Return TRUE if succeeded. */
29449 arm_block_set_aligned_vect (rtx dstbase
,
29450 unsigned HOST_WIDE_INT length
,
29451 unsigned HOST_WIDE_INT value
,
29452 unsigned HOST_WIDE_INT align
)
29454 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29455 rtx dst
, addr
, mem
;
29456 rtx val_elt
, val_vec
, reg
;
29457 rtx rval
[MAX_VECT_LEN
];
29459 unsigned HOST_WIDE_INT v
= value
;
29460 unsigned int offset
= 0;
29462 gcc_assert ((align
& 0x3) == 0);
29463 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29464 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29465 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29470 nelt_mode
= GET_MODE_NUNITS (mode
);
29471 gcc_assert (length
>= nelt_mode
);
29472 /* Skip if it isn't profitable. */
29473 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29476 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29478 v
= sext_hwi (v
, BITS_PER_WORD
);
29479 val_elt
= GEN_INT (v
);
29480 for (j
= 0; j
< nelt_mode
; j
++)
29483 reg
= gen_reg_rtx (mode
);
29484 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29485 /* Emit instruction loading the constant value. */
29486 emit_move_insn (reg
, val_vec
);
29489 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29490 if (mode
== V16QImode
)
29492 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29493 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29495 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29496 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29498 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29499 offset
+= length
- nelt_mode
;
29500 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29501 /* We are shifting bytes back, set the alignment accordingly. */
29502 if ((length
& 0x3) == 0)
29503 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29504 else if ((length
& 0x1) == 0)
29505 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29507 set_mem_align (mem
, BITS_PER_UNIT
);
29509 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29512 /* Fall through for bytes leftover. */
29514 nelt_mode
= GET_MODE_NUNITS (mode
);
29515 reg
= gen_lowpart (V8QImode
, reg
);
29518 /* Handle 8 bytes in a vector. */
29519 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29521 addr
= plus_constant (Pmode
, dst
, i
);
29522 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29523 emit_move_insn (mem
, reg
);
29526 /* Handle single word leftover by shifting 4 bytes back. We can
29527 use aligned access for this case. */
29528 if (i
+ UNITS_PER_WORD
== length
)
29530 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29531 offset
+= i
- UNITS_PER_WORD
;
29532 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29533 /* We are shifting 4 bytes back, set the alignment accordingly. */
29534 if (align
> UNITS_PER_WORD
)
29535 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29537 emit_move_insn (mem
, reg
);
29539 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29540 We have to use unaligned access for this case. */
29541 else if (i
< length
)
29543 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29544 offset
+= length
- nelt_mode
;
29545 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29546 /* We are shifting bytes back, set the alignment accordingly. */
29547 if ((length
& 1) == 0)
29548 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29550 set_mem_align (mem
, BITS_PER_UNIT
);
29552 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29558 /* Set a block of memory using plain strh/strb instructions, only
29559 using instructions allowed by ALIGN on processor. We fill the
29560 first LENGTH bytes of the memory area starting from DSTBASE
29561 with byte constant VALUE. ALIGN is the alignment requirement
29564 arm_block_set_unaligned_non_vect (rtx dstbase
,
29565 unsigned HOST_WIDE_INT length
,
29566 unsigned HOST_WIDE_INT value
,
29567 unsigned HOST_WIDE_INT align
)
29570 rtx dst
, addr
, mem
;
29571 rtx val_exp
, val_reg
, reg
;
29573 HOST_WIDE_INT v
= value
;
29575 gcc_assert (align
== 1 || align
== 2);
29578 v
|= (value
<< BITS_PER_UNIT
);
29580 v
= sext_hwi (v
, BITS_PER_WORD
);
29581 val_exp
= GEN_INT (v
);
29582 /* Skip if it isn't profitable. */
29583 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29584 align
, true, false))
29587 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29588 mode
= (align
== 2 ? HImode
: QImode
);
29589 val_reg
= force_reg (SImode
, val_exp
);
29590 reg
= gen_lowpart (mode
, val_reg
);
29592 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29594 addr
= plus_constant (Pmode
, dst
, i
);
29595 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29596 emit_move_insn (mem
, reg
);
29599 /* Handle single byte leftover. */
29600 if (i
+ 1 == length
)
29602 reg
= gen_lowpart (QImode
, val_reg
);
29603 addr
= plus_constant (Pmode
, dst
, i
);
29604 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29605 emit_move_insn (mem
, reg
);
29609 gcc_assert (i
== length
);
29613 /* Set a block of memory using plain strd/str/strh/strb instructions,
29614 to permit unaligned copies on processors which support unaligned
29615 semantics for those instructions. We fill the first LENGTH bytes
29616 of the memory area starting from DSTBASE with byte constant VALUE.
29617 ALIGN is the alignment requirement of memory. */
29619 arm_block_set_aligned_non_vect (rtx dstbase
,
29620 unsigned HOST_WIDE_INT length
,
29621 unsigned HOST_WIDE_INT value
,
29622 unsigned HOST_WIDE_INT align
)
29625 rtx dst
, addr
, mem
;
29626 rtx val_exp
, val_reg
, reg
;
29627 unsigned HOST_WIDE_INT v
;
29630 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29631 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29633 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29634 if (length
< UNITS_PER_WORD
)
29635 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29638 v
|= (v
<< BITS_PER_WORD
);
29640 v
= sext_hwi (v
, BITS_PER_WORD
);
29642 val_exp
= GEN_INT (v
);
29643 /* Skip if it isn't profitable. */
29644 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29645 align
, false, use_strd_p
))
29650 /* Try without strd. */
29651 v
= (v
>> BITS_PER_WORD
);
29652 v
= sext_hwi (v
, BITS_PER_WORD
);
29653 val_exp
= GEN_INT (v
);
29654 use_strd_p
= false;
29655 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29656 align
, false, use_strd_p
))
29661 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29662 /* Handle double words using strd if possible. */
29665 val_reg
= force_reg (DImode
, val_exp
);
29667 for (; (i
+ 8 <= length
); i
+= 8)
29669 addr
= plus_constant (Pmode
, dst
, i
);
29670 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29671 emit_move_insn (mem
, reg
);
29675 val_reg
= force_reg (SImode
, val_exp
);
29677 /* Handle words. */
29678 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29679 for (; (i
+ 4 <= length
); i
+= 4)
29681 addr
= plus_constant (Pmode
, dst
, i
);
29682 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29683 if ((align
& 3) == 0)
29684 emit_move_insn (mem
, reg
);
29686 emit_insn (gen_unaligned_storesi (mem
, reg
));
29689 /* Merge last pair of STRH and STRB into a STR if possible. */
29690 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29692 addr
= plus_constant (Pmode
, dst
, i
- 1);
29693 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29694 /* We are shifting one byte back, set the alignment accordingly. */
29695 if ((align
& 1) == 0)
29696 set_mem_align (mem
, BITS_PER_UNIT
);
29698 /* Most likely this is an unaligned access, and we can't tell at
29699 compilation time. */
29700 emit_insn (gen_unaligned_storesi (mem
, reg
));
29704 /* Handle half word leftover. */
29705 if (i
+ 2 <= length
)
29707 reg
= gen_lowpart (HImode
, val_reg
);
29708 addr
= plus_constant (Pmode
, dst
, i
);
29709 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29710 if ((align
& 1) == 0)
29711 emit_move_insn (mem
, reg
);
29713 emit_insn (gen_unaligned_storehi (mem
, reg
));
29718 /* Handle single byte leftover. */
29719 if (i
+ 1 == length
)
29721 reg
= gen_lowpart (QImode
, val_reg
);
29722 addr
= plus_constant (Pmode
, dst
, i
);
29723 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29724 emit_move_insn (mem
, reg
);
29730 /* Set a block of memory using vectorization instructions for both
29731 aligned and unaligned cases. We fill the first LENGTH bytes of
29732 the memory area starting from DSTBASE with byte constant VALUE.
29733 ALIGN is the alignment requirement of memory. */
29735 arm_block_set_vect (rtx dstbase
,
29736 unsigned HOST_WIDE_INT length
,
29737 unsigned HOST_WIDE_INT value
,
29738 unsigned HOST_WIDE_INT align
)
29740 /* Check whether we need to use unaligned store instruction. */
29741 if (((align
& 3) != 0 || (length
& 3) != 0)
29742 /* Check whether unaligned store instruction is available. */
29743 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29746 if ((align
& 3) == 0)
29747 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29749 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29752 /* Expand string store operation. Firstly we try to do that by using
29753 vectorization instructions, then try with ARM unaligned access and
29754 double-word store if profitable. OPERANDS[0] is the destination,
29755 OPERANDS[1] is the number of bytes, operands[2] is the value to
29756 initialize the memory, OPERANDS[3] is the known alignment of the
29759 arm_gen_setmem (rtx
*operands
)
29761 rtx dstbase
= operands
[0];
29762 unsigned HOST_WIDE_INT length
;
29763 unsigned HOST_WIDE_INT value
;
29764 unsigned HOST_WIDE_INT align
;
29766 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29769 length
= UINTVAL (operands
[1]);
29773 value
= (UINTVAL (operands
[2]) & 0xFF);
29774 align
= UINTVAL (operands
[3]);
29775 if (TARGET_NEON
&& length
>= 8
29776 && current_tune
->string_ops_prefer_neon
29777 && arm_block_set_vect (dstbase
, length
, value
, align
))
29780 if (!unaligned_access
&& (align
& 3) != 0)
29781 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29783 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29788 arm_macro_fusion_p (void)
29790 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29795 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29798 rtx prev_set
= single_set (prev
);
29799 rtx curr_set
= single_set (curr
);
29805 if (any_condjump_p (curr
))
29808 if (!arm_macro_fusion_p ())
29811 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
29812 && aarch_crypto_can_dual_issue (prev
, curr
))
29815 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29817 /* We are trying to fuse
29818 movw imm / movt imm
29819 instructions as a group that gets scheduled together. */
29821 set_dest
= SET_DEST (curr_set
);
29823 if (GET_MODE (set_dest
) != SImode
)
29826 /* We are trying to match:
29827 prev (movw) == (set (reg r0) (const_int imm16))
29828 curr (movt) == (set (zero_extract (reg r0)
29831 (const_int imm16_1))
29833 prev (movw) == (set (reg r1)
29834 (high (symbol_ref ("SYM"))))
29835 curr (movt) == (set (reg r0)
29837 (symbol_ref ("SYM")))) */
29838 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29840 if (CONST_INT_P (SET_SRC (curr_set
))
29841 && CONST_INT_P (SET_SRC (prev_set
))
29842 && REG_P (XEXP (set_dest
, 0))
29843 && REG_P (SET_DEST (prev_set
))
29844 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29847 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29848 && REG_P (SET_DEST (curr_set
))
29849 && REG_P (SET_DEST (prev_set
))
29850 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29851 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29857 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29859 static unsigned HOST_WIDE_INT
29860 arm_asan_shadow_offset (void)
29862 return HOST_WIDE_INT_1U
<< 29;
29866 /* This is a temporary fix for PR60655. Ideally we need
29867 to handle most of these cases in the generic part but
29868 currently we reject minus (..) (sym_ref). We try to
29869 ameliorate the case with minus (sym_ref1) (sym_ref2)
29870 where they are in the same section. */
29873 arm_const_not_ok_for_debug_p (rtx p
)
29875 tree decl_op0
= NULL
;
29876 tree decl_op1
= NULL
;
29878 if (GET_CODE (p
) == MINUS
)
29880 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29882 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29884 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29885 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29887 if ((TREE_CODE (decl_op1
) == VAR_DECL
29888 || TREE_CODE (decl_op1
) == CONST_DECL
)
29889 && (TREE_CODE (decl_op0
) == VAR_DECL
29890 || TREE_CODE (decl_op0
) == CONST_DECL
))
29891 return (get_variable_section (decl_op1
, false)
29892 != get_variable_section (decl_op0
, false));
29894 if (TREE_CODE (decl_op1
) == LABEL_DECL
29895 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29896 return (DECL_CONTEXT (decl_op1
)
29897 != DECL_CONTEXT (decl_op0
));
29907 /* return TRUE if x is a reference to a value in a constant pool */
29909 arm_is_constant_pool_ref (rtx x
)
29912 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29913 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29916 /* Remember the last target of arm_set_current_function. */
29917 static GTY(()) tree arm_previous_fndecl
;
29919 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
29922 save_restore_target_globals (tree new_tree
)
29924 /* If we have a previous state, use it. */
29925 if (TREE_TARGET_GLOBALS (new_tree
))
29926 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29927 else if (new_tree
== target_option_default_node
)
29928 restore_target_globals (&default_target_globals
);
29931 /* Call target_reinit and save the state for TARGET_GLOBALS. */
29932 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
29935 arm_option_params_internal ();
29938 /* Invalidate arm_previous_fndecl. */
29941 arm_reset_previous_fndecl (void)
29943 arm_previous_fndecl
= NULL_TREE
;
29946 /* Establish appropriate back-end context for processing the function
29947 FNDECL. The argument might be NULL to indicate processing at top
29948 level, outside of any function scope. */
29951 arm_set_current_function (tree fndecl
)
29953 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29956 tree old_tree
= (arm_previous_fndecl
29957 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29960 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29962 /* If current function has no attributes but previous one did,
29963 use the default node. */
29964 if (! new_tree
&& old_tree
)
29965 new_tree
= target_option_default_node
;
29967 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29968 the default have been handled by save_restore_target_globals from
29969 arm_pragma_target_parse. */
29970 if (old_tree
== new_tree
)
29973 arm_previous_fndecl
= fndecl
;
29975 /* First set the target options. */
29976 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
29978 save_restore_target_globals (new_tree
);
29981 /* Implement TARGET_OPTION_PRINT. */
29984 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
29986 int flags
= ptr
->x_target_flags
;
29987 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[ptr
->x_arm_fpu_index
];
29989 fprintf (file
, "%*sselected arch %s\n", indent
, "",
29990 TARGET_THUMB2_P (flags
) ? "thumb2" :
29991 TARGET_THUMB_P (flags
) ? "thumb1" :
29994 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_desc
->name
);
29997 /* Hook to determine if one function can safely inline another. */
30000 arm_can_inline_p (tree caller
, tree callee
)
30002 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30003 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30005 struct cl_target_option
*caller_opts
30006 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30007 : target_option_default_node
);
30009 struct cl_target_option
*callee_opts
30010 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30011 : target_option_default_node
);
30013 const struct arm_fpu_desc
*caller_fpu
30014 = &all_fpus
[caller_opts
->x_arm_fpu_index
];
30015 const struct arm_fpu_desc
*callee_fpu
30016 = &all_fpus
[callee_opts
->x_arm_fpu_index
];
30018 /* Callee's fpu features should be a subset of the caller's. */
30019 if ((caller_fpu
->features
& callee_fpu
->features
) != callee_fpu
->features
)
30022 /* Need same model and regs. */
30023 if (callee_fpu
->model
!= caller_fpu
->model
30024 || callee_fpu
->regs
!= callee_fpu
->regs
)
30027 /* OK to inline between different modes.
30028 Function with mode specific instructions, e.g using asm,
30029 must be explicitly protected with noinline. */
30033 /* Hook to fix function's alignment affected by target attribute. */
30036 arm_relayout_function (tree fndecl
)
30038 if (DECL_USER_ALIGN (fndecl
))
30041 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30044 callee_tree
= target_option_default_node
;
30046 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30047 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
30050 /* Inner function to process the attribute((target(...))), take an argument and
30051 set the current options from the argument. If we have a list, recursively
30052 go over the list. */
30055 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30057 if (TREE_CODE (args
) == TREE_LIST
)
30061 for (; args
; args
= TREE_CHAIN (args
))
30062 if (TREE_VALUE (args
)
30063 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30068 else if (TREE_CODE (args
) != STRING_CST
)
30070 error ("attribute %<target%> argument not a string");
30074 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30077 while ((q
= strtok (argstr
, ",")) != NULL
)
30079 while (ISSPACE (*q
)) ++q
;
30082 if (!strncmp (q
, "thumb", 5))
30083 opts
->x_target_flags
|= MASK_THUMB
;
30085 else if (!strncmp (q
, "arm", 3))
30086 opts
->x_target_flags
&= ~MASK_THUMB
;
30088 else if (!strncmp (q
, "fpu=", 4))
30090 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30091 &opts
->x_arm_fpu_index
, CL_TARGET
))
30093 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30099 error ("attribute(target(\"%s\")) is unknown", q
);
30103 arm_option_check_internal (opts
);
30109 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30112 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30113 struct gcc_options
*opts_set
)
30115 if (!arm_valid_target_attribute_rec (args
, opts
))
30118 /* Do any overrides, such as global options arch=xxx. */
30119 arm_option_override_internal (opts
, opts_set
);
30121 return build_target_option_node (opts
);
30125 add_attribute (const char * mode
, tree
*attributes
)
30127 size_t len
= strlen (mode
);
30128 tree value
= build_string (len
, mode
);
30130 TREE_TYPE (value
) = build_array_type (char_type_node
,
30131 build_index_type (size_int (len
)));
30133 *attributes
= tree_cons (get_identifier ("target"),
30134 build_tree_list (NULL_TREE
, value
),
30138 /* For testing. Insert thumb or arm modes alternatively on functions. */
30141 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30145 if (! TARGET_FLIP_THUMB
)
30148 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30149 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30152 /* Nested definitions must inherit mode. */
30153 if (current_function_decl
)
30155 mode
= TARGET_THUMB
? "thumb" : "arm";
30156 add_attribute (mode
, attributes
);
30160 /* If there is already a setting don't change it. */
30161 if (lookup_attribute ("target", *attributes
) != NULL
)
30164 mode
= thumb_flipper
? "thumb" : "arm";
30165 add_attribute (mode
, attributes
);
30167 thumb_flipper
= !thumb_flipper
;
30170 /* Hook to validate attribute((target("string"))). */
30173 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30174 tree args
, int ARG_UNUSED (flags
))
30177 struct gcc_options func_options
;
30178 tree cur_tree
, new_optimize
;
30179 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30181 /* Get the optimization options of the current function. */
30182 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30184 /* If the function changed the optimization levels as well as setting target
30185 options, start with the optimizations specified. */
30186 if (!func_optimize
)
30187 func_optimize
= optimization_default_node
;
30189 /* Init func_options. */
30190 memset (&func_options
, 0, sizeof (func_options
));
30191 init_options_struct (&func_options
, NULL
);
30192 lang_hooks
.init_options_struct (&func_options
);
30194 /* Initialize func_options to the defaults. */
30195 cl_optimization_restore (&func_options
,
30196 TREE_OPTIMIZATION (func_optimize
));
30198 cl_target_option_restore (&func_options
,
30199 TREE_TARGET_OPTION (target_option_default_node
));
30201 /* Set func_options flags with new target mode. */
30202 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30203 &global_options_set
);
30205 if (cur_tree
== NULL_TREE
)
30208 new_optimize
= build_optimization_node (&func_options
);
30210 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30212 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30214 finalize_options_struct (&func_options
);
30220 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30223 fprintf (stream
, "\t.syntax unified\n");
30227 if (is_called_in_ARM_mode (decl
)
30228 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30229 && cfun
->is_thunk
))
30230 fprintf (stream
, "\t.code 32\n");
30231 else if (TARGET_THUMB1
)
30232 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30234 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30237 fprintf (stream
, "\t.arm\n");
30239 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30240 TARGET_SOFT_FLOAT
? "softvfp" : TARGET_FPU_NAME
);
30242 if (TARGET_POKE_FUNCTION_NAME
)
30243 arm_poke_function_name (stream
, (const char *) name
);
30246 /* If MEM is in the form of [base+offset], extract the two parts
30247 of address and set to BASE and OFFSET, otherwise return false
30248 after clearing BASE and OFFSET. */
30251 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30255 gcc_assert (MEM_P (mem
));
30257 addr
= XEXP (mem
, 0);
30259 /* Strip off const from addresses like (const (addr)). */
30260 if (GET_CODE (addr
) == CONST
)
30261 addr
= XEXP (addr
, 0);
30263 if (GET_CODE (addr
) == REG
)
30266 *offset
= const0_rtx
;
30270 if (GET_CODE (addr
) == PLUS
30271 && GET_CODE (XEXP (addr
, 0)) == REG
30272 && CONST_INT_P (XEXP (addr
, 1)))
30274 *base
= XEXP (addr
, 0);
30275 *offset
= XEXP (addr
, 1);
30280 *offset
= NULL_RTX
;
30285 /* If INSN is a load or store of address in the form of [base+offset],
30286 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30287 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30288 otherwise return FALSE. */
30291 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30295 gcc_assert (INSN_P (insn
));
30296 x
= PATTERN (insn
);
30297 if (GET_CODE (x
) != SET
)
30301 dest
= SET_DEST (x
);
30302 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30305 extract_base_offset_in_addr (dest
, base
, offset
);
30307 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30310 extract_base_offset_in_addr (src
, base
, offset
);
30315 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30318 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30320 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30321 and PRI are only calculated for these instructions. For other instruction,
30322 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30323 instruction fusion can be supported by returning different priorities.
30325 It's important that irrelevant instructions get the largest FUSION_PRI. */
30328 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30329 int *fusion_pri
, int *pri
)
30335 gcc_assert (INSN_P (insn
));
30338 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30345 /* Load goes first. */
30347 *fusion_pri
= tmp
- 1;
30349 *fusion_pri
= tmp
- 2;
30353 /* INSN with smaller base register goes first. */
30354 tmp
-= ((REGNO (base
) & 0xff) << 20);
30356 /* INSN with smaller offset goes first. */
30357 off_val
= (int)(INTVAL (offset
));
30359 tmp
-= (off_val
& 0xfffff);
30361 tmp
+= ((- off_val
) & 0xfffff);
30368 /* Construct and return a PARALLEL RTX vector with elements numbering the
30369 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30370 the vector - from the perspective of the architecture. This does not
30371 line up with GCC's perspective on lane numbers, so we end up with
30372 different masks depending on our target endian-ness. The diagram
30373 below may help. We must draw the distinction when building masks
30374 which select one half of the vector. An instruction selecting
30375 architectural low-lanes for a big-endian target, must be described using
30376 a mask selecting GCC high-lanes.
30378 Big-Endian Little-Endian
30380 GCC 0 1 2 3 3 2 1 0
30381 | x | x | x | x | | x | x | x | x |
30382 Architecture 3 2 1 0 3 2 1 0
30384 Low Mask: { 2, 3 } { 0, 1 }
30385 High Mask: { 0, 1 } { 2, 3 }
30389 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30391 int nunits
= GET_MODE_NUNITS (mode
);
30392 rtvec v
= rtvec_alloc (nunits
/ 2);
30393 int high_base
= nunits
/ 2;
30399 if (BYTES_BIG_ENDIAN
)
30400 base
= high
? low_base
: high_base
;
30402 base
= high
? high_base
: low_base
;
30404 for (i
= 0; i
< nunits
/ 2; i
++)
30405 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30407 t1
= gen_rtx_PARALLEL (mode
, v
);
30411 /* Check OP for validity as a PARALLEL RTX vector with elements
30412 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30413 from the perspective of the architecture. See the diagram above
30414 arm_simd_vect_par_cnst_half_p for more details. */
30417 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30420 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30421 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30422 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30425 if (!VECTOR_MODE_P (mode
))
30428 if (count_op
!= count_ideal
)
30431 for (i
= 0; i
< count_ideal
; i
++)
30433 rtx elt_op
= XVECEXP (op
, 0, i
);
30434 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30436 if (!CONST_INT_P (elt_op
)
30437 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30443 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30446 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30449 /* For now, we punt and not handle this for TARGET_THUMB1. */
30450 if (vcall_offset
&& TARGET_THUMB1
)
30453 /* Otherwise ok. */
30457 #include "gt-arm.h"