1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
38 #include "insn-config.h"
39 #include "conditions.h"
41 #include "insn-attr.h"
50 #include "insn-codes.h"
52 #include "diagnostic-core.h"
58 #include "cfgcleanup.h"
63 #include "sched-int.h"
65 #include "langhooks.h"
71 #include "gimple-expr.h"
72 #include "target-globals.h"
74 #include "tm-constrs.h"
77 /* This file should be included last. */
78 #include "target-def.h"
80 /* Forward definitions of types. */
81 typedef struct minipool_node Mnode
;
82 typedef struct minipool_fixup Mfix
;
84 void (*arm_lang_output_object_attributes_hook
)(void);
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx
);
93 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets
*arm_get_frame_offsets (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
98 HOST_WIDE_INT
, rtx
, rtx
, int, int);
99 static unsigned bit_count (unsigned long);
100 static int arm_address_register_rtx_p (rtx
, int);
101 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
102 static bool is_called_in_ARM_mode (tree
);
103 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
104 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
105 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
106 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
107 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
108 inline static int thumb1_index_register_rtx_p (rtx
, int);
109 static int thumb_far_jump_used_p (void);
110 static bool thumb_force_lr_save (void);
111 static unsigned arm_size_return_regs (void);
112 static bool arm_assemble_integer (rtx
, unsigned int, int);
113 static void arm_print_operand (FILE *, rtx
, int);
114 static void arm_print_operand_address (FILE *, rtx
);
115 static bool arm_print_operand_punct_valid_p (unsigned char code
);
116 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
117 static arm_cc
get_arm_condition_code (rtx
);
118 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
119 static const char *output_multi_immediate (rtx
*, const char *, const char *,
121 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
122 static struct machine_function
*arm_init_machine_status (void);
123 static void thumb_exit (FILE *, int);
124 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
125 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
126 static Mnode
*add_minipool_forward_ref (Mfix
*);
127 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
128 static Mnode
*add_minipool_backward_ref (Mfix
*);
129 static void assign_minipool_offsets (Mfix
*);
130 static void arm_print_value (FILE *, rtx
);
131 static void dump_minipool (rtx_insn
*);
132 static int arm_barrier_cost (rtx_insn
*);
133 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
134 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
135 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
137 static void arm_reorg (void);
138 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
139 static unsigned long arm_compute_save_reg0_reg12_mask (void);
140 static unsigned long arm_compute_save_reg_mask (void);
141 static unsigned long arm_isr_value (tree
);
142 static unsigned long arm_compute_func_type (void);
143 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
144 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
146 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
147 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
149 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
150 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
151 static int arm_comp_type_attributes (const_tree
, const_tree
);
152 static void arm_set_default_type_attributes (tree
);
153 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
154 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
155 static int optimal_immediate_sequence (enum rtx_code code
,
156 unsigned HOST_WIDE_INT val
,
157 struct four_ints
*return_sequence
);
158 static int optimal_immediate_sequence_1 (enum rtx_code code
,
159 unsigned HOST_WIDE_INT val
,
160 struct four_ints
*return_sequence
,
162 static int arm_get_strip_length (int);
163 static bool arm_function_ok_for_sibcall (tree
, tree
);
164 static machine_mode
arm_promote_function_mode (const_tree
,
167 static bool arm_return_in_memory (const_tree
, const_tree
);
168 static rtx
arm_function_value (const_tree
, const_tree
, bool);
169 static rtx
arm_libcall_value_1 (machine_mode
);
170 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
171 static bool arm_function_value_regno_p (const unsigned int);
172 static void arm_internal_label (FILE *, const char *, unsigned long);
173 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
175 static bool arm_have_conditional_execution (void);
176 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
177 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
178 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
179 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
180 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
181 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
182 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
183 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
184 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
185 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
186 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
187 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
188 static void emit_constant_insn (rtx cond
, rtx pattern
);
189 static rtx_insn
*emit_set_insn (rtx
, rtx
);
190 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
191 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
193 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
195 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
197 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
198 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
200 static rtx
aapcs_libcall_value (machine_mode
);
201 static int aapcs_select_return_coproc (const_tree
, const_tree
);
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
205 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
208 static void arm_encode_section_info (tree
, rtx
, int);
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree
, tree
*);
215 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
217 static bool arm_pass_by_reference (cumulative_args_t
,
218 machine_mode
, const_tree
, bool);
219 static bool arm_promote_prototypes (const_tree
);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree
);
223 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
224 static bool arm_return_in_memory (const_tree
, const_tree
);
226 static void arm_unwind_emit (FILE *, rtx_insn
*);
227 static bool arm_output_ttype (rtx
);
228 static void arm_asm_emit_except_personality (rtx
);
229 static void arm_asm_init_sections (void);
231 static rtx
arm_dwarf_register_span (rtx
);
233 static tree
arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree
arm_get_cookie_size (tree
);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree
);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree
arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree
, rtx
);
245 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
246 static void arm_option_override (void);
247 static void arm_set_current_function (tree
);
248 static bool arm_can_inline_p (tree
, tree
);
249 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
250 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn
*);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
257 static bool arm_output_addr_const_extra (FILE *, rtx
);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree
);
260 static const char *arm_invalid_parameter_type (const_tree t
);
261 static const char *arm_invalid_return_type (const_tree t
);
262 static tree
arm_promoted_type (const_tree t
);
263 static tree
arm_convert_to_type (tree type
, tree expr
);
264 static bool arm_scalar_mode_supported_p (machine_mode
);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx
, tree
, rtx
);
269 static rtx
arm_trampoline_adjust_address (rtx
);
270 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
274 static bool arm_array_mode_supported_p (machine_mode
,
275 unsigned HOST_WIDE_INT
);
276 static machine_mode
arm_preferred_simd_mode (machine_mode
);
277 static bool arm_class_likely_spilled_p (reg_class_t
);
278 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
279 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
284 static void arm_conditional_register_usage (void);
285 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
290 static int arm_cortex_m7_branch_cost (bool, bool);
292 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
293 const unsigned char *sel
);
295 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
297 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
299 int misalign ATTRIBUTE_UNUSED
);
300 static unsigned arm_add_stmt_cost (void *data
, int count
,
301 enum vect_cost_for_stmt kind
,
302 struct _stmt_vec_info
*stmt_info
,
304 enum vect_cost_model_location where
);
306 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
307 bool op0_preserve_value
);
308 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
310 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table
[] =
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
320 { "long_call", 0, 0, false, true, true, NULL
, false },
321 /* Whereas these functions are always known to reside within the 26 bit
323 { "short_call", 0, 0, false, true, true, NULL
, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
335 /* ARM/PE has three new attributes:
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
344 { "dllimport", 0, 0, true, false, false, NULL
, false },
345 { "dllexport", 0, 0, true, false, false, NULL
, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
354 { NULL
, 0, 0, false, false, false, NULL
, false }
357 /* Initialize the GCC target structure. */
358 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
359 #undef TARGET_MERGE_DECL_ATTRIBUTES
360 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
363 #undef TARGET_LEGITIMIZE_ADDRESS
364 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
367 #define TARGET_LRA_P hook_bool_void_true
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
372 #undef TARGET_INSERT_ATTRIBUTES
373 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_CAN_INLINE_P
402 #define TARGET_CAN_INLINE_P arm_can_inline_p
404 #undef TARGET_OPTION_OVERRIDE
405 #define TARGET_OPTION_OVERRIDE arm_option_override
407 #undef TARGET_COMP_TYPE_ATTRIBUTES
408 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
410 #undef TARGET_SCHED_MACRO_FUSION_P
411 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
413 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
414 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
416 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
417 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
419 #undef TARGET_SCHED_ADJUST_COST
420 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
422 #undef TARGET_SET_CURRENT_FUNCTION
423 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
425 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
426 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
428 #undef TARGET_SCHED_REORDER
429 #define TARGET_SCHED_REORDER arm_sched_reorder
431 #undef TARGET_REGISTER_MOVE_COST
432 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
434 #undef TARGET_MEMORY_MOVE_COST
435 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
437 #undef TARGET_ENCODE_SECTION_INFO
439 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
441 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
444 #undef TARGET_STRIP_NAME_ENCODING
445 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
447 #undef TARGET_ASM_INTERNAL_LABEL
448 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
450 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
451 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
453 #undef TARGET_FUNCTION_VALUE
454 #define TARGET_FUNCTION_VALUE arm_function_value
456 #undef TARGET_LIBCALL_VALUE
457 #define TARGET_LIBCALL_VALUE arm_libcall_value
459 #undef TARGET_FUNCTION_VALUE_REGNO_P
460 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
462 #undef TARGET_ASM_OUTPUT_MI_THUNK
463 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
467 #undef TARGET_RTX_COSTS
468 #define TARGET_RTX_COSTS arm_rtx_costs
469 #undef TARGET_ADDRESS_COST
470 #define TARGET_ADDRESS_COST arm_address_cost
472 #undef TARGET_SHIFT_TRUNCATION_MASK
473 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
474 #undef TARGET_VECTOR_MODE_SUPPORTED_P
475 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
476 #undef TARGET_ARRAY_MODE_SUPPORTED_P
477 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
478 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
479 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
480 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
481 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
482 arm_autovectorize_vector_sizes
484 #undef TARGET_MACHINE_DEPENDENT_REORG
485 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
487 #undef TARGET_INIT_BUILTINS
488 #define TARGET_INIT_BUILTINS arm_init_builtins
489 #undef TARGET_EXPAND_BUILTIN
490 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
491 #undef TARGET_BUILTIN_DECL
492 #define TARGET_BUILTIN_DECL arm_builtin_decl
494 #undef TARGET_INIT_LIBFUNCS
495 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
497 #undef TARGET_PROMOTE_FUNCTION_MODE
498 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
499 #undef TARGET_PROMOTE_PROTOTYPES
500 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
501 #undef TARGET_PASS_BY_REFERENCE
502 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
503 #undef TARGET_ARG_PARTIAL_BYTES
504 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
505 #undef TARGET_FUNCTION_ARG
506 #define TARGET_FUNCTION_ARG arm_function_arg
507 #undef TARGET_FUNCTION_ARG_ADVANCE
508 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
509 #undef TARGET_FUNCTION_ARG_BOUNDARY
510 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
512 #undef TARGET_SETUP_INCOMING_VARARGS
513 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
515 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
516 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
518 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
519 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
520 #undef TARGET_TRAMPOLINE_INIT
521 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
522 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
523 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
525 #undef TARGET_WARN_FUNC_RETURN
526 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
528 #undef TARGET_DEFAULT_SHORT_ENUMS
529 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
531 #undef TARGET_ALIGN_ANON_BITFIELD
532 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
534 #undef TARGET_NARROW_VOLATILE_BITFIELD
535 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
537 #undef TARGET_CXX_GUARD_TYPE
538 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
540 #undef TARGET_CXX_GUARD_MASK_BIT
541 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
543 #undef TARGET_CXX_GET_COOKIE_SIZE
544 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
546 #undef TARGET_CXX_COOKIE_HAS_SIZE
547 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
549 #undef TARGET_CXX_CDTOR_RETURNS_THIS
550 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
552 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
553 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
555 #undef TARGET_CXX_USE_AEABI_ATEXIT
556 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
558 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
559 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
560 arm_cxx_determine_class_data_visibility
562 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
563 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
565 #undef TARGET_RETURN_IN_MSB
566 #define TARGET_RETURN_IN_MSB arm_return_in_msb
568 #undef TARGET_RETURN_IN_MEMORY
569 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
571 #undef TARGET_MUST_PASS_IN_STACK
572 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
575 #undef TARGET_ASM_UNWIND_EMIT
576 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
578 /* EABI unwinding tables use a different format for the typeinfo tables. */
579 #undef TARGET_ASM_TTYPE
580 #define TARGET_ASM_TTYPE arm_output_ttype
582 #undef TARGET_ARM_EABI_UNWINDER
583 #define TARGET_ARM_EABI_UNWINDER true
585 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
586 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
588 #undef TARGET_ASM_INIT_SECTIONS
589 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #endif /* ARM_UNWIND_INFO */
592 #undef TARGET_DWARF_REGISTER_SPAN
593 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
595 #undef TARGET_CANNOT_COPY_INSN_P
596 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
599 #undef TARGET_HAVE_TLS
600 #define TARGET_HAVE_TLS true
603 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
604 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
606 #undef TARGET_LEGITIMATE_CONSTANT_P
607 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
609 #undef TARGET_CANNOT_FORCE_CONST_MEM
610 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
612 #undef TARGET_MAX_ANCHOR_OFFSET
613 #define TARGET_MAX_ANCHOR_OFFSET 4095
615 /* The minimum is set such that the total size of the block
616 for a particular anchor is -4088 + 1 + 4095 bytes, which is
617 divisible by eight, ensuring natural spacing of anchors. */
618 #undef TARGET_MIN_ANCHOR_OFFSET
619 #define TARGET_MIN_ANCHOR_OFFSET -4088
621 #undef TARGET_SCHED_ISSUE_RATE
622 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
624 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
625 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
626 arm_first_cycle_multipass_dfa_lookahead
628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
630 arm_first_cycle_multipass_dfa_lookahead_guard
632 #undef TARGET_MANGLE_TYPE
633 #define TARGET_MANGLE_TYPE arm_mangle_type
635 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
636 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
638 #undef TARGET_BUILD_BUILTIN_VA_LIST
639 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
640 #undef TARGET_EXPAND_BUILTIN_VA_START
641 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
642 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
643 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
646 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
647 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
650 #undef TARGET_LEGITIMATE_ADDRESS_P
651 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
653 #undef TARGET_PREFERRED_RELOAD_CLASS
654 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
656 #undef TARGET_INVALID_PARAMETER_TYPE
657 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
659 #undef TARGET_INVALID_RETURN_TYPE
660 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
662 #undef TARGET_PROMOTED_TYPE
663 #define TARGET_PROMOTED_TYPE arm_promoted_type
665 #undef TARGET_CONVERT_TO_TYPE
666 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
668 #undef TARGET_SCALAR_MODE_SUPPORTED_P
669 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
671 #undef TARGET_FRAME_POINTER_REQUIRED
672 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
674 #undef TARGET_CAN_ELIMINATE
675 #define TARGET_CAN_ELIMINATE arm_can_eliminate
677 #undef TARGET_CONDITIONAL_REGISTER_USAGE
678 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
680 #undef TARGET_CLASS_LIKELY_SPILLED_P
681 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
683 #undef TARGET_VECTORIZE_BUILTINS
684 #define TARGET_VECTORIZE_BUILTINS
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
688 arm_builtin_vectorized_function
690 #undef TARGET_VECTOR_ALIGNMENT
691 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
693 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
694 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
695 arm_vector_alignment_reachable
697 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
698 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
699 arm_builtin_support_vector_misalignment
701 #undef TARGET_PREFERRED_RENAME_CLASS
702 #define TARGET_PREFERRED_RENAME_CLASS \
703 arm_preferred_rename_class
705 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
706 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
707 arm_vectorize_vec_perm_const_ok
709 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
710 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
711 arm_builtin_vectorization_cost
712 #undef TARGET_VECTORIZE_ADD_STMT_COST
713 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
715 #undef TARGET_CANONICALIZE_COMPARISON
716 #define TARGET_CANONICALIZE_COMPARISON \
717 arm_canonicalize_comparison
719 #undef TARGET_ASAN_SHADOW_OFFSET
720 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
722 #undef MAX_INSN_PER_IT_BLOCK
723 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
725 #undef TARGET_CAN_USE_DOLOOP_P
726 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
728 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
729 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
731 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
732 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
734 #undef TARGET_SCHED_FUSION_PRIORITY
735 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
737 struct gcc_target targetm
= TARGET_INITIALIZER
;
739 /* Obstack for minipool constant handling. */
740 static struct obstack minipool_obstack
;
741 static char * minipool_startobj
;
743 /* The maximum number of insns skipped which
744 will be conditionalised if possible. */
745 static int max_insns_skipped
= 5;
747 extern FILE * asm_out_file
;
749 /* True if we are currently building a constant table. */
750 int making_const_table
;
752 /* The processor for which instructions should be scheduled. */
753 enum processor_type arm_tune
= arm_none
;
755 /* The current tuning set. */
756 const struct tune_params
*current_tune
;
758 /* Which floating point hardware to schedule for. */
761 /* Which floating popint hardware to use. */
762 const struct arm_fpu_desc
*arm_fpu_desc
;
764 /* Used for Thumb call_via trampolines. */
765 rtx thumb_call_via_label
[14];
766 static int thumb_call_reg_needed
;
768 /* The bits in this mask specify which
769 instructions we are allowed to generate. */
770 unsigned long insn_flags
= 0;
772 /* The bits in this mask specify which instruction scheduling options should
774 unsigned long tune_flags
= 0;
776 /* The highest ARM architecture version supported by the
778 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
780 /* The following are used in the arm.md file as equivalents to bits
781 in the above two flag variables. */
783 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
786 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
789 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
792 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
795 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
798 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
801 /* Nonzero if this chip supports the ARM 6K extensions. */
804 /* Nonzero if instructions present in ARMv6-M can be used. */
807 /* Nonzero if this chip supports the ARM 7 extensions. */
810 /* Nonzero if instructions not present in the 'M' profile can be used. */
811 int arm_arch_notm
= 0;
813 /* Nonzero if instructions present in ARMv7E-M can be used. */
816 /* Nonzero if instructions present in ARMv8 can be used. */
819 /* Nonzero if this chip can benefit from load scheduling. */
820 int arm_ld_sched
= 0;
822 /* Nonzero if this chip is a StrongARM. */
823 int arm_tune_strongarm
= 0;
825 /* Nonzero if this chip supports Intel Wireless MMX technology. */
826 int arm_arch_iwmmxt
= 0;
828 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
829 int arm_arch_iwmmxt2
= 0;
831 /* Nonzero if this chip is an XScale. */
832 int arm_arch_xscale
= 0;
834 /* Nonzero if tuning for XScale */
835 int arm_tune_xscale
= 0;
837 /* Nonzero if we want to tune for stores that access the write-buffer.
838 This typically means an ARM6 or ARM7 with MMU or MPU. */
839 int arm_tune_wbuf
= 0;
841 /* Nonzero if tuning for Cortex-A9. */
842 int arm_tune_cortex_a9
= 0;
844 /* Nonzero if we should define __THUMB_INTERWORK__ in the
846 XXX This is a bit of a hack, it's intended to help work around
847 problems in GLD which doesn't understand that armv5t code is
848 interworking clean. */
849 int arm_cpp_interwork
= 0;
851 /* Nonzero if chip supports Thumb 2. */
854 /* Nonzero if chip supports integer division instruction. */
855 int arm_arch_arm_hwdiv
;
856 int arm_arch_thumb_hwdiv
;
858 /* Nonzero if chip disallows volatile memory access in IT block. */
859 int arm_arch_no_volatile_ce
;
861 /* Nonzero if we should use Neon to handle 64-bits operations rather
862 than core registers. */
863 int prefer_neon_for_64bits
= 0;
865 /* Nonzero if we shouldn't use literal pools. */
866 bool arm_disable_literal_pool
= false;
868 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
869 we must report the mode of the memory reference from
870 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
871 machine_mode output_memory_reference_mode
;
873 /* The register number to be used for the PIC offset register. */
874 unsigned arm_pic_register
= INVALID_REGNUM
;
876 enum arm_pcs arm_pcs_default
;
878 /* For an explanation of these variables, see final_prescan_insn below. */
880 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
881 enum arm_cond_code arm_current_cc
;
884 int arm_target_label
;
885 /* The number of conditionally executed insns, including the current insn. */
886 int arm_condexec_count
= 0;
887 /* A bitmask specifying the patterns for the IT block.
888 Zero means do not output an IT block before this insn. */
889 int arm_condexec_mask
= 0;
890 /* The number of bits used in arm_condexec_mask. */
891 int arm_condexec_masklen
= 0;
893 /* Nonzero if chip supports the ARMv8 CRC instructions. */
894 int arm_arch_crc
= 0;
896 /* Nonzero if the core has a very small, high-latency, multiply unit. */
897 int arm_m_profile_small_mul
= 0;
899 /* The condition codes of the ARM, and the inverse function. */
900 static const char * const arm_condition_codes
[] =
902 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
903 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
906 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
907 int arm_regs_in_sequence
[] =
909 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
912 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
913 #define streq(string1, string2) (strcmp (string1, string2) == 0)
915 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
916 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
917 | (1 << PIC_OFFSET_TABLE_REGNUM)))
919 /* Initialization code. */
923 const char *const name
;
924 enum processor_type core
;
926 enum base_architecture base_arch
;
927 const unsigned long flags
;
928 const struct tune_params
*const tune
;
932 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
933 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
940 /* arm generic vectorizer costs. */
942 struct cpu_vec_costs arm_default_vec_cost
= {
943 1, /* scalar_stmt_cost. */
944 1, /* scalar load_cost. */
945 1, /* scalar_store_cost. */
946 1, /* vec_stmt_cost. */
947 1, /* vec_to_scalar_cost. */
948 1, /* scalar_to_vec_cost. */
949 1, /* vec_align_load_cost. */
950 1, /* vec_unalign_load_cost. */
951 1, /* vec_unalign_store_cost. */
952 1, /* vec_store_cost. */
953 3, /* cond_taken_branch_cost. */
954 1, /* cond_not_taken_branch_cost. */
957 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
958 #include "aarch-cost-tables.h"
962 const struct cpu_cost_table cortexa9_extra_costs
=
969 COSTS_N_INSNS (1), /* shift_reg. */
970 COSTS_N_INSNS (1), /* arith_shift. */
971 COSTS_N_INSNS (2), /* arith_shift_reg. */
973 COSTS_N_INSNS (1), /* log_shift_reg. */
974 COSTS_N_INSNS (1), /* extend. */
975 COSTS_N_INSNS (2), /* extend_arith. */
976 COSTS_N_INSNS (1), /* bfi. */
977 COSTS_N_INSNS (1), /* bfx. */
981 true /* non_exec_costs_exec. */
986 COSTS_N_INSNS (3), /* simple. */
987 COSTS_N_INSNS (3), /* flag_setting. */
988 COSTS_N_INSNS (2), /* extend. */
989 COSTS_N_INSNS (3), /* add. */
990 COSTS_N_INSNS (2), /* extend_add. */
991 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
995 0, /* simple (N/A). */
996 0, /* flag_setting (N/A). */
997 COSTS_N_INSNS (4), /* extend. */
999 COSTS_N_INSNS (4), /* extend_add. */
1005 COSTS_N_INSNS (2), /* load. */
1006 COSTS_N_INSNS (2), /* load_sign_extend. */
1007 COSTS_N_INSNS (2), /* ldrd. */
1008 COSTS_N_INSNS (2), /* ldm_1st. */
1009 1, /* ldm_regs_per_insn_1st. */
1010 2, /* ldm_regs_per_insn_subsequent. */
1011 COSTS_N_INSNS (5), /* loadf. */
1012 COSTS_N_INSNS (5), /* loadd. */
1013 COSTS_N_INSNS (1), /* load_unaligned. */
1014 COSTS_N_INSNS (2), /* store. */
1015 COSTS_N_INSNS (2), /* strd. */
1016 COSTS_N_INSNS (2), /* stm_1st. */
1017 1, /* stm_regs_per_insn_1st. */
1018 2, /* stm_regs_per_insn_subsequent. */
1019 COSTS_N_INSNS (1), /* storef. */
1020 COSTS_N_INSNS (1), /* stored. */
1021 COSTS_N_INSNS (1), /* store_unaligned. */
1022 COSTS_N_INSNS (1), /* loadv. */
1023 COSTS_N_INSNS (1) /* storev. */
1028 COSTS_N_INSNS (14), /* div. */
1029 COSTS_N_INSNS (4), /* mult. */
1030 COSTS_N_INSNS (7), /* mult_addsub. */
1031 COSTS_N_INSNS (30), /* fma. */
1032 COSTS_N_INSNS (3), /* addsub. */
1033 COSTS_N_INSNS (1), /* fpconst. */
1034 COSTS_N_INSNS (1), /* neg. */
1035 COSTS_N_INSNS (3), /* compare. */
1036 COSTS_N_INSNS (3), /* widen. */
1037 COSTS_N_INSNS (3), /* narrow. */
1038 COSTS_N_INSNS (3), /* toint. */
1039 COSTS_N_INSNS (3), /* fromint. */
1040 COSTS_N_INSNS (3) /* roundint. */
1044 COSTS_N_INSNS (24), /* div. */
1045 COSTS_N_INSNS (5), /* mult. */
1046 COSTS_N_INSNS (8), /* mult_addsub. */
1047 COSTS_N_INSNS (30), /* fma. */
1048 COSTS_N_INSNS (3), /* addsub. */
1049 COSTS_N_INSNS (1), /* fpconst. */
1050 COSTS_N_INSNS (1), /* neg. */
1051 COSTS_N_INSNS (3), /* compare. */
1052 COSTS_N_INSNS (3), /* widen. */
1053 COSTS_N_INSNS (3), /* narrow. */
1054 COSTS_N_INSNS (3), /* toint. */
1055 COSTS_N_INSNS (3), /* fromint. */
1056 COSTS_N_INSNS (3) /* roundint. */
1061 COSTS_N_INSNS (1) /* alu. */
1065 const struct cpu_cost_table cortexa8_extra_costs
=
1071 COSTS_N_INSNS (1), /* shift. */
1073 COSTS_N_INSNS (1), /* arith_shift. */
1074 0, /* arith_shift_reg. */
1075 COSTS_N_INSNS (1), /* log_shift. */
1076 0, /* log_shift_reg. */
1078 0, /* extend_arith. */
1084 true /* non_exec_costs_exec. */
1089 COSTS_N_INSNS (1), /* simple. */
1090 COSTS_N_INSNS (1), /* flag_setting. */
1091 COSTS_N_INSNS (1), /* extend. */
1092 COSTS_N_INSNS (1), /* add. */
1093 COSTS_N_INSNS (1), /* extend_add. */
1094 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1098 0, /* simple (N/A). */
1099 0, /* flag_setting (N/A). */
1100 COSTS_N_INSNS (2), /* extend. */
1102 COSTS_N_INSNS (2), /* extend_add. */
1108 COSTS_N_INSNS (1), /* load. */
1109 COSTS_N_INSNS (1), /* load_sign_extend. */
1110 COSTS_N_INSNS (1), /* ldrd. */
1111 COSTS_N_INSNS (1), /* ldm_1st. */
1112 1, /* ldm_regs_per_insn_1st. */
1113 2, /* ldm_regs_per_insn_subsequent. */
1114 COSTS_N_INSNS (1), /* loadf. */
1115 COSTS_N_INSNS (1), /* loadd. */
1116 COSTS_N_INSNS (1), /* load_unaligned. */
1117 COSTS_N_INSNS (1), /* store. */
1118 COSTS_N_INSNS (1), /* strd. */
1119 COSTS_N_INSNS (1), /* stm_1st. */
1120 1, /* stm_regs_per_insn_1st. */
1121 2, /* stm_regs_per_insn_subsequent. */
1122 COSTS_N_INSNS (1), /* storef. */
1123 COSTS_N_INSNS (1), /* stored. */
1124 COSTS_N_INSNS (1), /* store_unaligned. */
1125 COSTS_N_INSNS (1), /* loadv. */
1126 COSTS_N_INSNS (1) /* storev. */
1131 COSTS_N_INSNS (36), /* div. */
1132 COSTS_N_INSNS (11), /* mult. */
1133 COSTS_N_INSNS (20), /* mult_addsub. */
1134 COSTS_N_INSNS (30), /* fma. */
1135 COSTS_N_INSNS (9), /* addsub. */
1136 COSTS_N_INSNS (3), /* fpconst. */
1137 COSTS_N_INSNS (3), /* neg. */
1138 COSTS_N_INSNS (6), /* compare. */
1139 COSTS_N_INSNS (4), /* widen. */
1140 COSTS_N_INSNS (4), /* narrow. */
1141 COSTS_N_INSNS (8), /* toint. */
1142 COSTS_N_INSNS (8), /* fromint. */
1143 COSTS_N_INSNS (8) /* roundint. */
1147 COSTS_N_INSNS (64), /* div. */
1148 COSTS_N_INSNS (16), /* mult. */
1149 COSTS_N_INSNS (25), /* mult_addsub. */
1150 COSTS_N_INSNS (30), /* fma. */
1151 COSTS_N_INSNS (9), /* addsub. */
1152 COSTS_N_INSNS (3), /* fpconst. */
1153 COSTS_N_INSNS (3), /* neg. */
1154 COSTS_N_INSNS (6), /* compare. */
1155 COSTS_N_INSNS (6), /* widen. */
1156 COSTS_N_INSNS (6), /* narrow. */
1157 COSTS_N_INSNS (8), /* toint. */
1158 COSTS_N_INSNS (8), /* fromint. */
1159 COSTS_N_INSNS (8) /* roundint. */
1164 COSTS_N_INSNS (1) /* alu. */
1168 const struct cpu_cost_table cortexa5_extra_costs
=
1174 COSTS_N_INSNS (1), /* shift. */
1175 COSTS_N_INSNS (1), /* shift_reg. */
1176 COSTS_N_INSNS (1), /* arith_shift. */
1177 COSTS_N_INSNS (1), /* arith_shift_reg. */
1178 COSTS_N_INSNS (1), /* log_shift. */
1179 COSTS_N_INSNS (1), /* log_shift_reg. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* extend_arith. */
1182 COSTS_N_INSNS (1), /* bfi. */
1183 COSTS_N_INSNS (1), /* bfx. */
1184 COSTS_N_INSNS (1), /* clz. */
1185 COSTS_N_INSNS (1), /* rev. */
1187 true /* non_exec_costs_exec. */
1194 COSTS_N_INSNS (1), /* flag_setting. */
1195 COSTS_N_INSNS (1), /* extend. */
1196 COSTS_N_INSNS (1), /* add. */
1197 COSTS_N_INSNS (1), /* extend_add. */
1198 COSTS_N_INSNS (7) /* idiv. */
1202 0, /* simple (N/A). */
1203 0, /* flag_setting (N/A). */
1204 COSTS_N_INSNS (1), /* extend. */
1206 COSTS_N_INSNS (2), /* extend_add. */
1212 COSTS_N_INSNS (1), /* load. */
1213 COSTS_N_INSNS (1), /* load_sign_extend. */
1214 COSTS_N_INSNS (6), /* ldrd. */
1215 COSTS_N_INSNS (1), /* ldm_1st. */
1216 1, /* ldm_regs_per_insn_1st. */
1217 2, /* ldm_regs_per_insn_subsequent. */
1218 COSTS_N_INSNS (2), /* loadf. */
1219 COSTS_N_INSNS (4), /* loadd. */
1220 COSTS_N_INSNS (1), /* load_unaligned. */
1221 COSTS_N_INSNS (1), /* store. */
1222 COSTS_N_INSNS (3), /* strd. */
1223 COSTS_N_INSNS (1), /* stm_1st. */
1224 1, /* stm_regs_per_insn_1st. */
1225 2, /* stm_regs_per_insn_subsequent. */
1226 COSTS_N_INSNS (2), /* storef. */
1227 COSTS_N_INSNS (2), /* stored. */
1228 COSTS_N_INSNS (1), /* store_unaligned. */
1229 COSTS_N_INSNS (1), /* loadv. */
1230 COSTS_N_INSNS (1) /* storev. */
1235 COSTS_N_INSNS (15), /* div. */
1236 COSTS_N_INSNS (3), /* mult. */
1237 COSTS_N_INSNS (7), /* mult_addsub. */
1238 COSTS_N_INSNS (7), /* fma. */
1239 COSTS_N_INSNS (3), /* addsub. */
1240 COSTS_N_INSNS (3), /* fpconst. */
1241 COSTS_N_INSNS (3), /* neg. */
1242 COSTS_N_INSNS (3), /* compare. */
1243 COSTS_N_INSNS (3), /* widen. */
1244 COSTS_N_INSNS (3), /* narrow. */
1245 COSTS_N_INSNS (3), /* toint. */
1246 COSTS_N_INSNS (3), /* fromint. */
1247 COSTS_N_INSNS (3) /* roundint. */
1251 COSTS_N_INSNS (30), /* div. */
1252 COSTS_N_INSNS (6), /* mult. */
1253 COSTS_N_INSNS (10), /* mult_addsub. */
1254 COSTS_N_INSNS (7), /* fma. */
1255 COSTS_N_INSNS (3), /* addsub. */
1256 COSTS_N_INSNS (3), /* fpconst. */
1257 COSTS_N_INSNS (3), /* neg. */
1258 COSTS_N_INSNS (3), /* compare. */
1259 COSTS_N_INSNS (3), /* widen. */
1260 COSTS_N_INSNS (3), /* narrow. */
1261 COSTS_N_INSNS (3), /* toint. */
1262 COSTS_N_INSNS (3), /* fromint. */
1263 COSTS_N_INSNS (3) /* roundint. */
1268 COSTS_N_INSNS (1) /* alu. */
1273 const struct cpu_cost_table cortexa7_extra_costs
=
1279 COSTS_N_INSNS (1), /* shift. */
1280 COSTS_N_INSNS (1), /* shift_reg. */
1281 COSTS_N_INSNS (1), /* arith_shift. */
1282 COSTS_N_INSNS (1), /* arith_shift_reg. */
1283 COSTS_N_INSNS (1), /* log_shift. */
1284 COSTS_N_INSNS (1), /* log_shift_reg. */
1285 COSTS_N_INSNS (1), /* extend. */
1286 COSTS_N_INSNS (1), /* extend_arith. */
1287 COSTS_N_INSNS (1), /* bfi. */
1288 COSTS_N_INSNS (1), /* bfx. */
1289 COSTS_N_INSNS (1), /* clz. */
1290 COSTS_N_INSNS (1), /* rev. */
1292 true /* non_exec_costs_exec. */
1299 COSTS_N_INSNS (1), /* flag_setting. */
1300 COSTS_N_INSNS (1), /* extend. */
1301 COSTS_N_INSNS (1), /* add. */
1302 COSTS_N_INSNS (1), /* extend_add. */
1303 COSTS_N_INSNS (7) /* idiv. */
1307 0, /* simple (N/A). */
1308 0, /* flag_setting (N/A). */
1309 COSTS_N_INSNS (1), /* extend. */
1311 COSTS_N_INSNS (2), /* extend_add. */
1317 COSTS_N_INSNS (1), /* load. */
1318 COSTS_N_INSNS (1), /* load_sign_extend. */
1319 COSTS_N_INSNS (3), /* ldrd. */
1320 COSTS_N_INSNS (1), /* ldm_1st. */
1321 1, /* ldm_regs_per_insn_1st. */
1322 2, /* ldm_regs_per_insn_subsequent. */
1323 COSTS_N_INSNS (2), /* loadf. */
1324 COSTS_N_INSNS (2), /* loadd. */
1325 COSTS_N_INSNS (1), /* load_unaligned. */
1326 COSTS_N_INSNS (1), /* store. */
1327 COSTS_N_INSNS (3), /* strd. */
1328 COSTS_N_INSNS (1), /* stm_1st. */
1329 1, /* stm_regs_per_insn_1st. */
1330 2, /* stm_regs_per_insn_subsequent. */
1331 COSTS_N_INSNS (2), /* storef. */
1332 COSTS_N_INSNS (2), /* stored. */
1333 COSTS_N_INSNS (1), /* store_unaligned. */
1334 COSTS_N_INSNS (1), /* loadv. */
1335 COSTS_N_INSNS (1) /* storev. */
1340 COSTS_N_INSNS (15), /* div. */
1341 COSTS_N_INSNS (3), /* mult. */
1342 COSTS_N_INSNS (7), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1356 COSTS_N_INSNS (30), /* div. */
1357 COSTS_N_INSNS (6), /* mult. */
1358 COSTS_N_INSNS (10), /* mult_addsub. */
1359 COSTS_N_INSNS (7), /* fma. */
1360 COSTS_N_INSNS (3), /* addsub. */
1361 COSTS_N_INSNS (3), /* fpconst. */
1362 COSTS_N_INSNS (3), /* neg. */
1363 COSTS_N_INSNS (3), /* compare. */
1364 COSTS_N_INSNS (3), /* widen. */
1365 COSTS_N_INSNS (3), /* narrow. */
1366 COSTS_N_INSNS (3), /* toint. */
1367 COSTS_N_INSNS (3), /* fromint. */
1368 COSTS_N_INSNS (3) /* roundint. */
1373 COSTS_N_INSNS (1) /* alu. */
1377 const struct cpu_cost_table cortexa12_extra_costs
=
1384 COSTS_N_INSNS (1), /* shift_reg. */
1385 COSTS_N_INSNS (1), /* arith_shift. */
1386 COSTS_N_INSNS (1), /* arith_shift_reg. */
1387 COSTS_N_INSNS (1), /* log_shift. */
1388 COSTS_N_INSNS (1), /* log_shift_reg. */
1390 COSTS_N_INSNS (1), /* extend_arith. */
1392 COSTS_N_INSNS (1), /* bfx. */
1393 COSTS_N_INSNS (1), /* clz. */
1394 COSTS_N_INSNS (1), /* rev. */
1396 true /* non_exec_costs_exec. */
1401 COSTS_N_INSNS (2), /* simple. */
1402 COSTS_N_INSNS (3), /* flag_setting. */
1403 COSTS_N_INSNS (2), /* extend. */
1404 COSTS_N_INSNS (3), /* add. */
1405 COSTS_N_INSNS (2), /* extend_add. */
1406 COSTS_N_INSNS (18) /* idiv. */
1410 0, /* simple (N/A). */
1411 0, /* flag_setting (N/A). */
1412 COSTS_N_INSNS (3), /* extend. */
1414 COSTS_N_INSNS (3), /* extend_add. */
1420 COSTS_N_INSNS (3), /* load. */
1421 COSTS_N_INSNS (3), /* load_sign_extend. */
1422 COSTS_N_INSNS (3), /* ldrd. */
1423 COSTS_N_INSNS (3), /* ldm_1st. */
1424 1, /* ldm_regs_per_insn_1st. */
1425 2, /* ldm_regs_per_insn_subsequent. */
1426 COSTS_N_INSNS (3), /* loadf. */
1427 COSTS_N_INSNS (3), /* loadd. */
1428 0, /* load_unaligned. */
1432 1, /* stm_regs_per_insn_1st. */
1433 2, /* stm_regs_per_insn_subsequent. */
1434 COSTS_N_INSNS (2), /* storef. */
1435 COSTS_N_INSNS (2), /* stored. */
1436 0, /* store_unaligned. */
1437 COSTS_N_INSNS (1), /* loadv. */
1438 COSTS_N_INSNS (1) /* storev. */
1443 COSTS_N_INSNS (17), /* div. */
1444 COSTS_N_INSNS (4), /* mult. */
1445 COSTS_N_INSNS (8), /* mult_addsub. */
1446 COSTS_N_INSNS (8), /* fma. */
1447 COSTS_N_INSNS (4), /* addsub. */
1448 COSTS_N_INSNS (2), /* fpconst. */
1449 COSTS_N_INSNS (2), /* neg. */
1450 COSTS_N_INSNS (2), /* compare. */
1451 COSTS_N_INSNS (4), /* widen. */
1452 COSTS_N_INSNS (4), /* narrow. */
1453 COSTS_N_INSNS (4), /* toint. */
1454 COSTS_N_INSNS (4), /* fromint. */
1455 COSTS_N_INSNS (4) /* roundint. */
1459 COSTS_N_INSNS (31), /* div. */
1460 COSTS_N_INSNS (4), /* mult. */
1461 COSTS_N_INSNS (8), /* mult_addsub. */
1462 COSTS_N_INSNS (8), /* fma. */
1463 COSTS_N_INSNS (4), /* addsub. */
1464 COSTS_N_INSNS (2), /* fpconst. */
1465 COSTS_N_INSNS (2), /* neg. */
1466 COSTS_N_INSNS (2), /* compare. */
1467 COSTS_N_INSNS (4), /* widen. */
1468 COSTS_N_INSNS (4), /* narrow. */
1469 COSTS_N_INSNS (4), /* toint. */
1470 COSTS_N_INSNS (4), /* fromint. */
1471 COSTS_N_INSNS (4) /* roundint. */
1476 COSTS_N_INSNS (1) /* alu. */
1480 const struct cpu_cost_table cortexa15_extra_costs
=
1488 COSTS_N_INSNS (1), /* arith_shift. */
1489 COSTS_N_INSNS (1), /* arith_shift_reg. */
1490 COSTS_N_INSNS (1), /* log_shift. */
1491 COSTS_N_INSNS (1), /* log_shift_reg. */
1493 COSTS_N_INSNS (1), /* extend_arith. */
1494 COSTS_N_INSNS (1), /* bfi. */
1499 true /* non_exec_costs_exec. */
1504 COSTS_N_INSNS (2), /* simple. */
1505 COSTS_N_INSNS (3), /* flag_setting. */
1506 COSTS_N_INSNS (2), /* extend. */
1507 COSTS_N_INSNS (2), /* add. */
1508 COSTS_N_INSNS (2), /* extend_add. */
1509 COSTS_N_INSNS (18) /* idiv. */
1513 0, /* simple (N/A). */
1514 0, /* flag_setting (N/A). */
1515 COSTS_N_INSNS (3), /* extend. */
1517 COSTS_N_INSNS (3), /* extend_add. */
1523 COSTS_N_INSNS (3), /* load. */
1524 COSTS_N_INSNS (3), /* load_sign_extend. */
1525 COSTS_N_INSNS (3), /* ldrd. */
1526 COSTS_N_INSNS (4), /* ldm_1st. */
1527 1, /* ldm_regs_per_insn_1st. */
1528 2, /* ldm_regs_per_insn_subsequent. */
1529 COSTS_N_INSNS (4), /* loadf. */
1530 COSTS_N_INSNS (4), /* loadd. */
1531 0, /* load_unaligned. */
1534 COSTS_N_INSNS (1), /* stm_1st. */
1535 1, /* stm_regs_per_insn_1st. */
1536 2, /* stm_regs_per_insn_subsequent. */
1539 0, /* store_unaligned. */
1540 COSTS_N_INSNS (1), /* loadv. */
1541 COSTS_N_INSNS (1) /* storev. */
1546 COSTS_N_INSNS (17), /* div. */
1547 COSTS_N_INSNS (4), /* mult. */
1548 COSTS_N_INSNS (8), /* mult_addsub. */
1549 COSTS_N_INSNS (8), /* fma. */
1550 COSTS_N_INSNS (4), /* addsub. */
1551 COSTS_N_INSNS (2), /* fpconst. */
1552 COSTS_N_INSNS (2), /* neg. */
1553 COSTS_N_INSNS (5), /* compare. */
1554 COSTS_N_INSNS (4), /* widen. */
1555 COSTS_N_INSNS (4), /* narrow. */
1556 COSTS_N_INSNS (4), /* toint. */
1557 COSTS_N_INSNS (4), /* fromint. */
1558 COSTS_N_INSNS (4) /* roundint. */
1562 COSTS_N_INSNS (31), /* div. */
1563 COSTS_N_INSNS (4), /* mult. */
1564 COSTS_N_INSNS (8), /* mult_addsub. */
1565 COSTS_N_INSNS (8), /* fma. */
1566 COSTS_N_INSNS (4), /* addsub. */
1567 COSTS_N_INSNS (2), /* fpconst. */
1568 COSTS_N_INSNS (2), /* neg. */
1569 COSTS_N_INSNS (2), /* compare. */
1570 COSTS_N_INSNS (4), /* widen. */
1571 COSTS_N_INSNS (4), /* narrow. */
1572 COSTS_N_INSNS (4), /* toint. */
1573 COSTS_N_INSNS (4), /* fromint. */
1574 COSTS_N_INSNS (4) /* roundint. */
1579 COSTS_N_INSNS (1) /* alu. */
1583 const struct cpu_cost_table v7m_extra_costs
=
1591 0, /* arith_shift. */
1592 COSTS_N_INSNS (1), /* arith_shift_reg. */
1594 COSTS_N_INSNS (1), /* log_shift_reg. */
1596 COSTS_N_INSNS (1), /* extend_arith. */
1601 COSTS_N_INSNS (1), /* non_exec. */
1602 false /* non_exec_costs_exec. */
1607 COSTS_N_INSNS (1), /* simple. */
1608 COSTS_N_INSNS (1), /* flag_setting. */
1609 COSTS_N_INSNS (2), /* extend. */
1610 COSTS_N_INSNS (1), /* add. */
1611 COSTS_N_INSNS (3), /* extend_add. */
1612 COSTS_N_INSNS (8) /* idiv. */
1616 0, /* simple (N/A). */
1617 0, /* flag_setting (N/A). */
1618 COSTS_N_INSNS (2), /* extend. */
1620 COSTS_N_INSNS (3), /* extend_add. */
1626 COSTS_N_INSNS (2), /* load. */
1627 0, /* load_sign_extend. */
1628 COSTS_N_INSNS (3), /* ldrd. */
1629 COSTS_N_INSNS (2), /* ldm_1st. */
1630 1, /* ldm_regs_per_insn_1st. */
1631 1, /* ldm_regs_per_insn_subsequent. */
1632 COSTS_N_INSNS (2), /* loadf. */
1633 COSTS_N_INSNS (3), /* loadd. */
1634 COSTS_N_INSNS (1), /* load_unaligned. */
1635 COSTS_N_INSNS (2), /* store. */
1636 COSTS_N_INSNS (3), /* strd. */
1637 COSTS_N_INSNS (2), /* stm_1st. */
1638 1, /* stm_regs_per_insn_1st. */
1639 1, /* stm_regs_per_insn_subsequent. */
1640 COSTS_N_INSNS (2), /* storef. */
1641 COSTS_N_INSNS (3), /* stored. */
1642 COSTS_N_INSNS (1), /* store_unaligned. */
1643 COSTS_N_INSNS (1), /* loadv. */
1644 COSTS_N_INSNS (1) /* storev. */
1649 COSTS_N_INSNS (7), /* div. */
1650 COSTS_N_INSNS (2), /* mult. */
1651 COSTS_N_INSNS (5), /* mult_addsub. */
1652 COSTS_N_INSNS (3), /* fma. */
1653 COSTS_N_INSNS (1), /* addsub. */
1665 COSTS_N_INSNS (15), /* div. */
1666 COSTS_N_INSNS (5), /* mult. */
1667 COSTS_N_INSNS (7), /* mult_addsub. */
1668 COSTS_N_INSNS (7), /* fma. */
1669 COSTS_N_INSNS (3), /* addsub. */
1682 COSTS_N_INSNS (1) /* alu. */
1686 const struct tune_params arm_slowmul_tune
=
1688 arm_slowmul_rtx_costs
,
1689 NULL
, /* Insn extra costs. */
1690 NULL
, /* Sched adj cost. */
1691 arm_default_branch_cost
,
1692 &arm_default_vec_cost
,
1693 3, /* Constant limit. */
1694 5, /* Max cond insns. */
1695 8, /* Memset max inline. */
1696 1, /* Issue rate. */
1697 ARM_PREFETCH_NOT_BENEFICIAL
,
1698 tune_params::PREF_CONST_POOL_TRUE
,
1699 tune_params::PREF_LDRD_FALSE
,
1700 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1701 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1702 tune_params::DISPARAGE_FLAGS_NEITHER
,
1703 tune_params::PREF_NEON_64_FALSE
,
1704 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1705 tune_params::FUSE_NOTHING
,
1706 tune_params::SCHED_AUTOPREF_OFF
1709 const struct tune_params arm_fastmul_tune
=
1711 arm_fastmul_rtx_costs
,
1712 NULL
, /* Insn extra costs. */
1713 NULL
, /* Sched adj cost. */
1714 arm_default_branch_cost
,
1715 &arm_default_vec_cost
,
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 8, /* Memset max inline. */
1719 1, /* Issue rate. */
1720 ARM_PREFETCH_NOT_BENEFICIAL
,
1721 tune_params::PREF_CONST_POOL_TRUE
,
1722 tune_params::PREF_LDRD_FALSE
,
1723 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1724 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1725 tune_params::DISPARAGE_FLAGS_NEITHER
,
1726 tune_params::PREF_NEON_64_FALSE
,
1727 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1728 tune_params::FUSE_NOTHING
,
1729 tune_params::SCHED_AUTOPREF_OFF
1732 /* StrongARM has early execution of branches, so a sequence that is worth
1733 skipping is shorter. Set max_insns_skipped to a lower value. */
1735 const struct tune_params arm_strongarm_tune
=
1737 arm_fastmul_rtx_costs
,
1738 NULL
, /* Insn extra costs. */
1739 NULL
, /* Sched adj cost. */
1740 arm_default_branch_cost
,
1741 &arm_default_vec_cost
,
1742 1, /* Constant limit. */
1743 3, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL
,
1747 tune_params::PREF_CONST_POOL_TRUE
,
1748 tune_params::PREF_LDRD_FALSE
,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER
,
1752 tune_params::PREF_NEON_64_FALSE
,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1754 tune_params::FUSE_NOTHING
,
1755 tune_params::SCHED_AUTOPREF_OFF
1758 const struct tune_params arm_xscale_tune
=
1760 arm_xscale_rtx_costs
,
1761 NULL
, /* Insn extra costs. */
1762 xscale_sched_adjust_cost
,
1763 arm_default_branch_cost
,
1764 &arm_default_vec_cost
,
1765 2, /* Constant limit. */
1766 3, /* Max cond insns. */
1767 8, /* Memset max inline. */
1768 1, /* Issue rate. */
1769 ARM_PREFETCH_NOT_BENEFICIAL
,
1770 tune_params::PREF_CONST_POOL_TRUE
,
1771 tune_params::PREF_LDRD_FALSE
,
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1773 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1774 tune_params::DISPARAGE_FLAGS_NEITHER
,
1775 tune_params::PREF_NEON_64_FALSE
,
1776 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1777 tune_params::FUSE_NOTHING
,
1778 tune_params::SCHED_AUTOPREF_OFF
1781 const struct tune_params arm_9e_tune
=
1784 NULL
, /* Insn extra costs. */
1785 NULL
, /* Sched adj cost. */
1786 arm_default_branch_cost
,
1787 &arm_default_vec_cost
,
1788 1, /* Constant limit. */
1789 5, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL
,
1793 tune_params::PREF_CONST_POOL_TRUE
,
1794 tune_params::PREF_LDRD_FALSE
,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER
,
1798 tune_params::PREF_NEON_64_FALSE
,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1800 tune_params::FUSE_NOTHING
,
1801 tune_params::SCHED_AUTOPREF_OFF
1804 const struct tune_params arm_marvell_pj4_tune
=
1807 NULL
, /* Insn extra costs. */
1808 NULL
, /* Sched adj cost. */
1809 arm_default_branch_cost
,
1810 &arm_default_vec_cost
,
1811 1, /* Constant limit. */
1812 5, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 2, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL
,
1816 tune_params::PREF_CONST_POOL_TRUE
,
1817 tune_params::PREF_LDRD_FALSE
,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER
,
1821 tune_params::PREF_NEON_64_FALSE
,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1823 tune_params::FUSE_NOTHING
,
1824 tune_params::SCHED_AUTOPREF_OFF
1827 const struct tune_params arm_v6t2_tune
=
1830 NULL
, /* Insn extra costs. */
1831 NULL
, /* Sched adj cost. */
1832 arm_default_branch_cost
,
1833 &arm_default_vec_cost
,
1834 1, /* Constant limit. */
1835 5, /* Max cond insns. */
1836 8, /* Memset max inline. */
1837 1, /* Issue rate. */
1838 ARM_PREFETCH_NOT_BENEFICIAL
,
1839 tune_params::PREF_CONST_POOL_FALSE
,
1840 tune_params::PREF_LDRD_FALSE
,
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1842 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1843 tune_params::DISPARAGE_FLAGS_NEITHER
,
1844 tune_params::PREF_NEON_64_FALSE
,
1845 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1846 tune_params::FUSE_NOTHING
,
1847 tune_params::SCHED_AUTOPREF_OFF
1851 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1852 const struct tune_params arm_cortex_tune
=
1855 &generic_extra_costs
,
1856 NULL
, /* Sched adj cost. */
1857 arm_default_branch_cost
,
1858 &arm_default_vec_cost
,
1859 1, /* Constant limit. */
1860 5, /* Max cond insns. */
1861 8, /* Memset max inline. */
1862 2, /* Issue rate. */
1863 ARM_PREFETCH_NOT_BENEFICIAL
,
1864 tune_params::PREF_CONST_POOL_FALSE
,
1865 tune_params::PREF_LDRD_FALSE
,
1866 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1868 tune_params::DISPARAGE_FLAGS_NEITHER
,
1869 tune_params::PREF_NEON_64_FALSE
,
1870 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1871 tune_params::FUSE_NOTHING
,
1872 tune_params::SCHED_AUTOPREF_OFF
1875 const struct tune_params arm_cortex_a8_tune
=
1878 &cortexa8_extra_costs
,
1879 NULL
, /* Sched adj cost. */
1880 arm_default_branch_cost
,
1881 &arm_default_vec_cost
,
1882 1, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 2, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL
,
1887 tune_params::PREF_CONST_POOL_FALSE
,
1888 tune_params::PREF_LDRD_FALSE
,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER
,
1892 tune_params::PREF_NEON_64_FALSE
,
1893 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1894 tune_params::FUSE_NOTHING
,
1895 tune_params::SCHED_AUTOPREF_OFF
1898 const struct tune_params arm_cortex_a7_tune
=
1901 &cortexa7_extra_costs
,
1902 NULL
, /* Sched adj cost. */
1903 arm_default_branch_cost
,
1904 &arm_default_vec_cost
,
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 8, /* Memset max inline. */
1908 2, /* Issue rate. */
1909 ARM_PREFETCH_NOT_BENEFICIAL
,
1910 tune_params::PREF_CONST_POOL_FALSE
,
1911 tune_params::PREF_LDRD_FALSE
,
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1913 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1914 tune_params::DISPARAGE_FLAGS_NEITHER
,
1915 tune_params::PREF_NEON_64_FALSE
,
1916 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1917 tune_params::FUSE_NOTHING
,
1918 tune_params::SCHED_AUTOPREF_OFF
1921 const struct tune_params arm_cortex_a15_tune
=
1924 &cortexa15_extra_costs
,
1925 NULL
, /* Sched adj cost. */
1926 arm_default_branch_cost
,
1927 &arm_default_vec_cost
,
1928 1, /* Constant limit. */
1929 2, /* Max cond insns. */
1930 8, /* Memset max inline. */
1931 3, /* Issue rate. */
1932 ARM_PREFETCH_NOT_BENEFICIAL
,
1933 tune_params::PREF_CONST_POOL_FALSE
,
1934 tune_params::PREF_LDRD_TRUE
,
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1937 tune_params::DISPARAGE_FLAGS_ALL
,
1938 tune_params::PREF_NEON_64_FALSE
,
1939 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1940 tune_params::FUSE_NOTHING
,
1941 tune_params::SCHED_AUTOPREF_FULL
1944 const struct tune_params arm_cortex_a53_tune
=
1947 &cortexa53_extra_costs
,
1948 NULL
, /* Sched adj cost. */
1949 arm_default_branch_cost
,
1950 &arm_default_vec_cost
,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 2, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL
,
1956 tune_params::PREF_CONST_POOL_FALSE
,
1957 tune_params::PREF_LDRD_FALSE
,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER
,
1961 tune_params::PREF_NEON_64_FALSE
,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1964 tune_params::SCHED_AUTOPREF_OFF
1967 const struct tune_params arm_cortex_a57_tune
=
1970 &cortexa57_extra_costs
,
1971 NULL
, /* Sched adj cost. */
1972 arm_default_branch_cost
,
1973 &arm_default_vec_cost
,
1974 1, /* Constant limit. */
1975 2, /* Max cond insns. */
1976 8, /* Memset max inline. */
1977 3, /* Issue rate. */
1978 ARM_PREFETCH_NOT_BENEFICIAL
,
1979 tune_params::PREF_CONST_POOL_FALSE
,
1980 tune_params::PREF_LDRD_TRUE
,
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1983 tune_params::DISPARAGE_FLAGS_ALL
,
1984 tune_params::PREF_NEON_64_FALSE
,
1985 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1986 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1987 tune_params::SCHED_AUTOPREF_FULL
1990 const struct tune_params arm_xgene1_tune
=
1993 &xgene1_extra_costs
,
1994 NULL
, /* Sched adj cost. */
1995 arm_default_branch_cost
,
1996 &arm_default_vec_cost
,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 32, /* Memset max inline. */
2000 4, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL
,
2002 tune_params::PREF_CONST_POOL_FALSE
,
2003 tune_params::PREF_LDRD_TRUE
,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL
,
2007 tune_params::PREF_NEON_64_FALSE
,
2008 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2009 tune_params::FUSE_NOTHING
,
2010 tune_params::SCHED_AUTOPREF_OFF
2013 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2014 less appealing. Set max_insns_skipped to a low value. */
2016 const struct tune_params arm_cortex_a5_tune
=
2019 &cortexa5_extra_costs
,
2020 NULL
, /* Sched adj cost. */
2021 arm_cortex_a5_branch_cost
,
2022 &arm_default_vec_cost
,
2023 1, /* Constant limit. */
2024 1, /* Max cond insns. */
2025 8, /* Memset max inline. */
2026 2, /* Issue rate. */
2027 ARM_PREFETCH_NOT_BENEFICIAL
,
2028 tune_params::PREF_CONST_POOL_FALSE
,
2029 tune_params::PREF_LDRD_FALSE
,
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2031 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2032 tune_params::DISPARAGE_FLAGS_NEITHER
,
2033 tune_params::PREF_NEON_64_FALSE
,
2034 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2035 tune_params::FUSE_NOTHING
,
2036 tune_params::SCHED_AUTOPREF_OFF
2039 const struct tune_params arm_cortex_a9_tune
=
2042 &cortexa9_extra_costs
,
2043 cortex_a9_sched_adjust_cost
,
2044 arm_default_branch_cost
,
2045 &arm_default_vec_cost
,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 2, /* Issue rate. */
2050 ARM_PREFETCH_BENEFICIAL(4,32,32),
2051 tune_params::PREF_CONST_POOL_FALSE
,
2052 tune_params::PREF_LDRD_FALSE
,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER
,
2056 tune_params::PREF_NEON_64_FALSE
,
2057 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2058 tune_params::FUSE_NOTHING
,
2059 tune_params::SCHED_AUTOPREF_OFF
2062 const struct tune_params arm_cortex_a12_tune
=
2065 &cortexa12_extra_costs
,
2066 NULL
, /* Sched adj cost. */
2067 arm_default_branch_cost
,
2068 &arm_default_vec_cost
, /* Vectorizer costs. */
2069 1, /* Constant limit. */
2070 2, /* Max cond insns. */
2071 8, /* Memset max inline. */
2072 2, /* Issue rate. */
2073 ARM_PREFETCH_NOT_BENEFICIAL
,
2074 tune_params::PREF_CONST_POOL_FALSE
,
2075 tune_params::PREF_LDRD_TRUE
,
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2077 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2078 tune_params::DISPARAGE_FLAGS_ALL
,
2079 tune_params::PREF_NEON_64_FALSE
,
2080 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2081 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2082 tune_params::SCHED_AUTOPREF_OFF
2085 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2086 cycle to execute each. An LDR from the constant pool also takes two cycles
2087 to execute, but mildly increases pipelining opportunity (consecutive
2088 loads/stores can be pipelined together, saving one cycle), and may also
2089 improve icache utilisation. Hence we prefer the constant pool for such
2092 const struct tune_params arm_v7m_tune
=
2096 NULL
, /* Sched adj cost. */
2097 arm_cortex_m_branch_cost
,
2098 &arm_default_vec_cost
,
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 1, /* Issue rate. */
2103 ARM_PREFETCH_NOT_BENEFICIAL
,
2104 tune_params::PREF_CONST_POOL_TRUE
,
2105 tune_params::PREF_LDRD_FALSE
,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_NEITHER
,
2109 tune_params::PREF_NEON_64_FALSE
,
2110 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2111 tune_params::FUSE_NOTHING
,
2112 tune_params::SCHED_AUTOPREF_OFF
2115 /* Cortex-M7 tuning. */
2117 const struct tune_params arm_cortex_m7_tune
=
2121 NULL
, /* Sched adj cost. */
2122 arm_cortex_m7_branch_cost
,
2123 &arm_default_vec_cost
,
2124 0, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL
,
2129 tune_params::PREF_CONST_POOL_TRUE
,
2130 tune_params::PREF_LDRD_FALSE
,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER
,
2134 tune_params::PREF_NEON_64_FALSE
,
2135 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2136 tune_params::FUSE_NOTHING
,
2137 tune_params::SCHED_AUTOPREF_OFF
2140 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2141 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2142 const struct tune_params arm_v6m_tune
=
2145 NULL
, /* Insn extra costs. */
2146 NULL
, /* Sched adj cost. */
2147 arm_default_branch_cost
,
2148 &arm_default_vec_cost
, /* Vectorizer costs. */
2149 1, /* Constant limit. */
2150 5, /* Max cond insns. */
2151 8, /* Memset max inline. */
2152 1, /* Issue rate. */
2153 ARM_PREFETCH_NOT_BENEFICIAL
,
2154 tune_params::PREF_CONST_POOL_FALSE
,
2155 tune_params::PREF_LDRD_FALSE
,
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2158 tune_params::DISPARAGE_FLAGS_NEITHER
,
2159 tune_params::PREF_NEON_64_FALSE
,
2160 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2161 tune_params::FUSE_NOTHING
,
2162 tune_params::SCHED_AUTOPREF_OFF
2165 const struct tune_params arm_fa726te_tune
=
2168 NULL
, /* Insn extra costs. */
2169 fa726te_sched_adjust_cost
,
2170 arm_default_branch_cost
,
2171 &arm_default_vec_cost
,
2172 1, /* Constant limit. */
2173 5, /* Max cond insns. */
2174 8, /* Memset max inline. */
2175 2, /* Issue rate. */
2176 ARM_PREFETCH_NOT_BENEFICIAL
,
2177 tune_params::PREF_CONST_POOL_TRUE
,
2178 tune_params::PREF_LDRD_FALSE
,
2179 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2181 tune_params::DISPARAGE_FLAGS_NEITHER
,
2182 tune_params::PREF_NEON_64_FALSE
,
2183 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2184 tune_params::FUSE_NOTHING
,
2185 tune_params::SCHED_AUTOPREF_OFF
2189 /* Not all of these give usefully different compilation alternatives,
2190 but there is no simple way of generalizing them. */
2191 static const struct processors all_cores
[] =
2194 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2195 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2196 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2197 #include "arm-cores.def"
2199 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2202 static const struct processors all_architectures
[] =
2204 /* ARM Architectures */
2205 /* We don't specify tuning costs here as it will be figured out
2208 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2209 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2210 #include "arm-arches.def"
2212 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2216 /* These are populated as commandline arguments are processed, or NULL
2217 if not specified. */
2218 static const struct processors
*arm_selected_arch
;
2219 static const struct processors
*arm_selected_cpu
;
2220 static const struct processors
*arm_selected_tune
;
2222 /* The name of the preprocessor macro to define for this architecture. */
2224 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2226 /* Available values for -mfpu=. */
2228 static const struct arm_fpu_desc all_fpus
[] =
2230 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2231 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2232 #include "arm-fpus.def"
2237 /* Supported TLS relocations. */
2245 TLS_DESCSEQ
/* GNU scheme */
2248 /* The maximum number of insns to be used when loading a constant. */
2250 arm_constant_limit (bool size_p
)
2252 return size_p
? 1 : current_tune
->constant_limit
;
2255 /* Emit an insn that's a simple single-set. Both the operands must be known
2257 inline static rtx_insn
*
2258 emit_set_insn (rtx x
, rtx y
)
2260 return emit_insn (gen_rtx_SET (x
, y
));
2263 /* Return the number of bits set in VALUE. */
2265 bit_count (unsigned long value
)
2267 unsigned long count
= 0;
2272 value
&= value
- 1; /* Clear the least-significant set bit. */
2282 } arm_fixed_mode_set
;
2284 /* A small helper for setting fixed-point library libfuncs. */
2287 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2288 const char *funcname
, const char *modename
,
2293 if (num_suffix
== 0)
2294 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2296 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2298 set_optab_libfunc (optable
, mode
, buffer
);
2302 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2303 machine_mode from
, const char *funcname
,
2304 const char *toname
, const char *fromname
)
2307 const char *maybe_suffix_2
= "";
2309 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2310 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2311 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2312 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2313 maybe_suffix_2
= "2";
2315 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2318 set_conv_libfunc (optable
, to
, from
, buffer
);
2321 /* Set up library functions unique to ARM. */
2324 arm_init_libfuncs (void)
2326 /* For Linux, we have access to kernel support for atomic operations. */
2327 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2328 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2330 /* There are no special library functions unless we are using the
2335 /* The functions below are described in Section 4 of the "Run-Time
2336 ABI for the ARM architecture", Version 1.0. */
2338 /* Double-precision floating-point arithmetic. Table 2. */
2339 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2340 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2341 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2342 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2343 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2345 /* Double-precision comparisons. Table 3. */
2346 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2347 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2348 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2349 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2350 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2351 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2352 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2354 /* Single-precision floating-point arithmetic. Table 4. */
2355 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2356 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2357 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2358 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2359 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2361 /* Single-precision comparisons. Table 5. */
2362 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2363 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2364 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2365 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2366 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2367 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2368 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2370 /* Floating-point to integer conversions. Table 6. */
2371 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2372 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2373 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2374 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2375 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2376 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2377 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2378 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2380 /* Conversions between floating types. Table 7. */
2381 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2382 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2384 /* Integer to floating-point conversions. Table 8. */
2385 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2386 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2387 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2388 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2389 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2390 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2391 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2392 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2394 /* Long long. Table 9. */
2395 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2396 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2397 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2398 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2399 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2400 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2401 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2402 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2404 /* Integer (32/32->32) division. \S 4.3.1. */
2405 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2406 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2408 /* The divmod functions are designed so that they can be used for
2409 plain division, even though they return both the quotient and the
2410 remainder. The quotient is returned in the usual location (i.e.,
2411 r0 for SImode, {r0, r1} for DImode), just as would be expected
2412 for an ordinary division routine. Because the AAPCS calling
2413 conventions specify that all of { r0, r1, r2, r3 } are
2414 callee-saved registers, there is no need to tell the compiler
2415 explicitly that those registers are clobbered by these
2417 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2418 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2420 /* For SImode division the ABI provides div-without-mod routines,
2421 which are faster. */
2422 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2423 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2425 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2426 divmod libcalls instead. */
2427 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2428 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2429 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2430 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2432 /* Half-precision float operations. The compiler handles all operations
2433 with NULL libfuncs by converting the SFmode. */
2434 switch (arm_fp16_format
)
2436 case ARM_FP16_FORMAT_IEEE
:
2437 case ARM_FP16_FORMAT_ALTERNATIVE
:
2440 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2441 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2443 : "__gnu_f2h_alternative"));
2444 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2445 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2447 : "__gnu_h2f_alternative"));
2450 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2451 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2452 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2453 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2454 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2457 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2458 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2459 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2460 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2461 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2462 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2463 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2470 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2472 const arm_fixed_mode_set fixed_arith_modes
[] =
2493 const arm_fixed_mode_set fixed_conv_modes
[] =
2523 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2525 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2526 "add", fixed_arith_modes
[i
].name
, 3);
2527 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2528 "ssadd", fixed_arith_modes
[i
].name
, 3);
2529 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2530 "usadd", fixed_arith_modes
[i
].name
, 3);
2531 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2532 "sub", fixed_arith_modes
[i
].name
, 3);
2533 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2534 "sssub", fixed_arith_modes
[i
].name
, 3);
2535 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2536 "ussub", fixed_arith_modes
[i
].name
, 3);
2537 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2538 "mul", fixed_arith_modes
[i
].name
, 3);
2539 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2540 "ssmul", fixed_arith_modes
[i
].name
, 3);
2541 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2542 "usmul", fixed_arith_modes
[i
].name
, 3);
2543 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2544 "div", fixed_arith_modes
[i
].name
, 3);
2545 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2546 "udiv", fixed_arith_modes
[i
].name
, 3);
2547 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2548 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2549 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2550 "usdiv", fixed_arith_modes
[i
].name
, 3);
2551 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2552 "neg", fixed_arith_modes
[i
].name
, 2);
2553 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2554 "ssneg", fixed_arith_modes
[i
].name
, 2);
2555 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2556 "usneg", fixed_arith_modes
[i
].name
, 2);
2557 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2558 "ashl", fixed_arith_modes
[i
].name
, 3);
2559 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2560 "ashr", fixed_arith_modes
[i
].name
, 3);
2561 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2562 "lshr", fixed_arith_modes
[i
].name
, 3);
2563 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2564 "ssashl", fixed_arith_modes
[i
].name
, 3);
2565 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2566 "usashl", fixed_arith_modes
[i
].name
, 3);
2567 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2568 "cmp", fixed_arith_modes
[i
].name
, 2);
2571 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2572 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2575 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2576 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2579 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2580 fixed_conv_modes
[j
].mode
, "fract",
2581 fixed_conv_modes
[i
].name
,
2582 fixed_conv_modes
[j
].name
);
2583 arm_set_fixed_conv_libfunc (satfract_optab
,
2584 fixed_conv_modes
[i
].mode
,
2585 fixed_conv_modes
[j
].mode
, "satfract",
2586 fixed_conv_modes
[i
].name
,
2587 fixed_conv_modes
[j
].name
);
2588 arm_set_fixed_conv_libfunc (fractuns_optab
,
2589 fixed_conv_modes
[i
].mode
,
2590 fixed_conv_modes
[j
].mode
, "fractuns",
2591 fixed_conv_modes
[i
].name
,
2592 fixed_conv_modes
[j
].name
);
2593 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2594 fixed_conv_modes
[i
].mode
,
2595 fixed_conv_modes
[j
].mode
, "satfractuns",
2596 fixed_conv_modes
[i
].name
,
2597 fixed_conv_modes
[j
].name
);
2601 if (TARGET_AAPCS_BASED
)
2602 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2605 /* On AAPCS systems, this is the "struct __va_list". */
2606 static GTY(()) tree va_list_type
;
2608 /* Return the type to use as __builtin_va_list. */
2610 arm_build_builtin_va_list (void)
2615 if (!TARGET_AAPCS_BASED
)
2616 return std_build_builtin_va_list ();
2618 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2626 The C Library ABI further reinforces this definition in \S
2629 We must follow this definition exactly. The structure tag
2630 name is visible in C++ mangled names, and thus forms a part
2631 of the ABI. The field name may be used by people who
2632 #include <stdarg.h>. */
2633 /* Create the type. */
2634 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2635 /* Give it the required name. */
2636 va_list_name
= build_decl (BUILTINS_LOCATION
,
2638 get_identifier ("__va_list"),
2640 DECL_ARTIFICIAL (va_list_name
) = 1;
2641 TYPE_NAME (va_list_type
) = va_list_name
;
2642 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2643 /* Create the __ap field. */
2644 ap_field
= build_decl (BUILTINS_LOCATION
,
2646 get_identifier ("__ap"),
2648 DECL_ARTIFICIAL (ap_field
) = 1;
2649 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2650 TYPE_FIELDS (va_list_type
) = ap_field
;
2651 /* Compute its layout. */
2652 layout_type (va_list_type
);
2654 return va_list_type
;
2657 /* Return an expression of type "void *" pointing to the next
2658 available argument in a variable-argument list. VALIST is the
2659 user-level va_list object, of type __builtin_va_list. */
2661 arm_extract_valist_ptr (tree valist
)
2663 if (TREE_TYPE (valist
) == error_mark_node
)
2664 return error_mark_node
;
2666 /* On an AAPCS target, the pointer is stored within "struct
2668 if (TARGET_AAPCS_BASED
)
2670 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2671 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2672 valist
, ap_field
, NULL_TREE
);
2678 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2680 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2682 valist
= arm_extract_valist_ptr (valist
);
2683 std_expand_builtin_va_start (valist
, nextarg
);
2686 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2688 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2691 valist
= arm_extract_valist_ptr (valist
);
2692 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2695 /* Check any incompatible options that the user has specified. */
2697 arm_option_check_internal (struct gcc_options
*opts
)
2699 int flags
= opts
->x_target_flags
;
2701 /* Make sure that the processor choice does not conflict with any of the
2702 other command line choices. */
2703 if (TARGET_ARM_P (flags
) && !(insn_flags
& FL_NOTM
))
2704 error ("target CPU does not support ARM mode");
2706 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2707 from here where no function is being compiled currently. */
2708 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2709 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2711 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2712 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2714 /* If this target is normally configured to use APCS frames, warn if they
2715 are turned off and debugging is turned on. */
2716 if (TARGET_ARM_P (flags
)
2717 && write_symbols
!= NO_DEBUG
2718 && !TARGET_APCS_FRAME
2719 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2720 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2722 /* iWMMXt unsupported under Thumb mode. */
2723 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2724 error ("iWMMXt unsupported under Thumb mode");
2726 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2727 error ("can not use -mtp=cp15 with 16-bit Thumb");
2729 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2731 error ("RTP PIC is incompatible with Thumb");
2735 /* We only support -mslow-flash-data on armv7-m targets. */
2736 if (target_slow_flash_data
2737 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2738 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2739 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2742 /* Set params depending on attributes and optimization options. */
2744 arm_option_params_internal (struct gcc_options
*opts
)
2746 int flags
= opts
->x_target_flags
;
2748 /* If we are not using the default (ARM mode) section anchor offset
2749 ranges, then set the correct ranges now. */
2750 if (TARGET_THUMB1_P (flags
))
2752 /* Thumb-1 LDR instructions cannot have negative offsets.
2753 Permissible positive offset ranges are 5-bit (for byte loads),
2754 6-bit (for halfword loads), or 7-bit (for word loads).
2755 Empirical results suggest a 7-bit anchor range gives the best
2756 overall code size. */
2757 targetm
.min_anchor_offset
= 0;
2758 targetm
.max_anchor_offset
= 127;
2760 else if (TARGET_THUMB2_P (flags
))
2762 /* The minimum is set such that the total size of the block
2763 for a particular anchor is 248 + 1 + 4095 bytes, which is
2764 divisible by eight, ensuring natural spacing of anchors. */
2765 targetm
.min_anchor_offset
= -248;
2766 targetm
.max_anchor_offset
= 4095;
2770 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2771 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2776 /* If optimizing for size, bump the number of instructions that we
2777 are prepared to conditionally execute (even on a StrongARM). */
2778 max_insns_skipped
= 6;
2780 /* For THUMB2, we limit the conditional sequence to one IT block. */
2781 if (TARGET_THUMB2_P (flags
))
2782 max_insns_skipped
= opts
->x_arm_restrict_it
? 1 : 4;
2785 /* When -mrestrict-it is in use tone down the if-conversion. */
2787 = (TARGET_THUMB2_P (opts
->x_target_flags
) && opts
->x_arm_restrict_it
)
2788 ? 1 : current_tune
->max_insns_skipped
;
2791 /* True if -mflip-thumb should next add an attribute for the default
2792 mode, false if it should next add an attribute for the opposite mode. */
2793 static GTY(()) bool thumb_flipper
;
2795 /* Options after initial target override. */
2796 static GTY(()) tree init_optimize
;
2798 /* Reset options between modes that the user has specified. */
2800 arm_option_override_internal (struct gcc_options
*opts
,
2801 struct gcc_options
*opts_set
)
2803 if (TARGET_THUMB_P (opts
->x_target_flags
) && !(insn_flags
& FL_THUMB
))
2805 warning (0, "target CPU does not support THUMB instructions");
2806 opts
->x_target_flags
&= ~MASK_THUMB
;
2809 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2811 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2812 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2815 /* Callee super interworking implies thumb interworking. Adding
2816 this to the flags here simplifies the logic elsewhere. */
2817 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2818 opts
->x_target_flags
|= MASK_INTERWORK
;
2820 /* need to remember initial values so combinaisons of options like
2821 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2822 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2824 if (! opts_set
->x_arm_restrict_it
)
2825 opts
->x_arm_restrict_it
= arm_arch8
;
2827 if (!TARGET_THUMB2_P (opts
->x_target_flags
))
2828 opts
->x_arm_restrict_it
= 0;
2830 /* Don't warn since it's on by default in -O2. */
2831 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2832 opts
->x_flag_schedule_insns
= 0;
2834 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2836 /* Disable shrink-wrap when optimizing function for size, since it tends to
2837 generate additional returns. */
2838 if (optimize_function_for_size_p (cfun
)
2839 && TARGET_THUMB2_P (opts
->x_target_flags
))
2840 opts
->x_flag_shrink_wrap
= false;
2842 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2844 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2845 - epilogue_insns - does not accurately model the corresponding insns
2846 emitted in the asm file. In particular, see the comment in thumb_exit
2847 'Find out how many of the (return) argument registers we can corrupt'.
2848 As a consequence, the epilogue may clobber registers without fipa-ra
2849 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2850 TODO: Accurately model clobbers for epilogue_insns and reenable
2852 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2853 opts
->x_flag_ipa_ra
= 0;
2855 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
2857 /* Thumb2 inline assembly code should always use unified syntax.
2858 This will apply to ARM and Thumb1 eventually. */
2859 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
2862 /* Fix up any incompatible options that the user has specified. */
2864 arm_option_override (void)
2866 arm_selected_arch
= NULL
;
2867 arm_selected_cpu
= NULL
;
2868 arm_selected_tune
= NULL
;
2870 if (global_options_set
.x_arm_arch_option
)
2871 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2873 if (global_options_set
.x_arm_cpu_option
)
2875 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2876 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2879 if (global_options_set
.x_arm_tune_option
)
2880 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2882 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2883 SUBTARGET_OVERRIDE_OPTIONS
;
2886 if (arm_selected_arch
)
2888 if (arm_selected_cpu
)
2890 /* Check for conflict between mcpu and march. */
2891 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2893 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2894 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2895 /* -march wins for code generation.
2896 -mcpu wins for default tuning. */
2897 if (!arm_selected_tune
)
2898 arm_selected_tune
= arm_selected_cpu
;
2900 arm_selected_cpu
= arm_selected_arch
;
2904 arm_selected_arch
= NULL
;
2907 /* Pick a CPU based on the architecture. */
2908 arm_selected_cpu
= arm_selected_arch
;
2911 /* If the user did not specify a processor, choose one for them. */
2912 if (!arm_selected_cpu
)
2914 const struct processors
* sel
;
2915 unsigned int sought
;
2917 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2918 if (!arm_selected_cpu
->name
)
2920 #ifdef SUBTARGET_CPU_DEFAULT
2921 /* Use the subtarget default CPU if none was specified by
2923 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2925 /* Default to ARM6. */
2926 if (!arm_selected_cpu
->name
)
2927 arm_selected_cpu
= &all_cores
[arm6
];
2930 sel
= arm_selected_cpu
;
2931 insn_flags
= sel
->flags
;
2933 /* Now check to see if the user has specified some command line
2934 switch that require certain abilities from the cpu. */
2937 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2939 sought
|= (FL_THUMB
| FL_MODE32
);
2941 /* There are no ARM processors that support both APCS-26 and
2942 interworking. Therefore we force FL_MODE26 to be removed
2943 from insn_flags here (if it was set), so that the search
2944 below will always be able to find a compatible processor. */
2945 insn_flags
&= ~FL_MODE26
;
2948 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2950 /* Try to locate a CPU type that supports all of the abilities
2951 of the default CPU, plus the extra abilities requested by
2953 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2954 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2957 if (sel
->name
== NULL
)
2959 unsigned current_bit_count
= 0;
2960 const struct processors
* best_fit
= NULL
;
2962 /* Ideally we would like to issue an error message here
2963 saying that it was not possible to find a CPU compatible
2964 with the default CPU, but which also supports the command
2965 line options specified by the programmer, and so they
2966 ought to use the -mcpu=<name> command line option to
2967 override the default CPU type.
2969 If we cannot find a cpu that has both the
2970 characteristics of the default cpu and the given
2971 command line options we scan the array again looking
2972 for a best match. */
2973 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2974 if ((sel
->flags
& sought
) == sought
)
2978 count
= bit_count (sel
->flags
& insn_flags
);
2980 if (count
>= current_bit_count
)
2983 current_bit_count
= count
;
2987 gcc_assert (best_fit
);
2991 arm_selected_cpu
= sel
;
2995 gcc_assert (arm_selected_cpu
);
2996 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2997 if (!arm_selected_tune
)
2998 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3000 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3001 insn_flags
= arm_selected_cpu
->flags
;
3002 arm_base_arch
= arm_selected_cpu
->base_arch
;
3004 arm_tune
= arm_selected_tune
->core
;
3005 tune_flags
= arm_selected_tune
->flags
;
3006 current_tune
= arm_selected_tune
->tune
;
3008 /* TBD: Dwarf info for apcs frame is not handled yet. */
3009 if (TARGET_APCS_FRAME
)
3010 flag_shrink_wrap
= false;
3012 /* BPABI targets use linker tricks to allow interworking on cores
3013 without thumb support. */
3014 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
3016 warning (0, "target CPU does not support interworking" );
3017 target_flags
&= ~MASK_INTERWORK
;
3020 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3022 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3023 target_flags
|= MASK_APCS_FRAME
;
3026 if (TARGET_POKE_FUNCTION_NAME
)
3027 target_flags
|= MASK_APCS_FRAME
;
3029 if (TARGET_APCS_REENT
&& flag_pic
)
3030 error ("-fpic and -mapcs-reent are incompatible");
3032 if (TARGET_APCS_REENT
)
3033 warning (0, "APCS reentrant code not supported. Ignored");
3035 if (TARGET_APCS_FLOAT
)
3036 warning (0, "passing floating point arguments in fp regs not yet supported");
3038 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3039 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
3040 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
3041 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
3042 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
3043 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
3044 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
3045 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
3046 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
3047 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3048 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
3049 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
3050 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
3051 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
3052 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
3054 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
3055 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
3056 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
3057 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
3058 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
3059 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
3060 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
3061 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
3062 arm_arch_no_volatile_ce
= (insn_flags
& FL_NO_VOLATILE_CE
) != 0;
3063 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3064 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
3065 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
3067 /* V5 code we generate is completely interworking capable, so we turn off
3068 TARGET_INTERWORK here to avoid many tests later on. */
3070 /* XXX However, we must pass the right pre-processor defines to CPP
3071 or GLD can get confused. This is a hack. */
3072 if (TARGET_INTERWORK
)
3073 arm_cpp_interwork
= 1;
3076 target_flags
&= ~MASK_INTERWORK
;
3078 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3079 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3081 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3082 error ("iwmmxt abi requires an iwmmxt capable cpu");
3084 if (!global_options_set
.x_arm_fpu_index
)
3086 const char *target_fpu_name
;
3089 #ifdef FPUTYPE_DEFAULT
3090 target_fpu_name
= FPUTYPE_DEFAULT
;
3092 target_fpu_name
= "vfp";
3095 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3100 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
3102 switch (arm_fpu_desc
->model
)
3104 case ARM_FP_MODEL_VFP
:
3105 arm_fpu_attr
= FPU_VFP
;
3112 if (TARGET_AAPCS_BASED
)
3114 if (TARGET_CALLER_INTERWORKING
)
3115 error ("AAPCS does not support -mcaller-super-interworking");
3117 if (TARGET_CALLEE_INTERWORKING
)
3118 error ("AAPCS does not support -mcallee-super-interworking");
3121 /* iWMMXt and NEON are incompatible. */
3122 if (TARGET_IWMMXT
&& TARGET_NEON
)
3123 error ("iWMMXt and NEON are incompatible");
3125 /* __fp16 support currently assumes the core has ldrh. */
3126 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3127 sorry ("__fp16 and no ldrh");
3129 /* If soft-float is specified then don't use FPU. */
3130 if (TARGET_SOFT_FLOAT
)
3131 arm_fpu_attr
= FPU_NONE
;
3133 if (TARGET_AAPCS_BASED
)
3135 if (arm_abi
== ARM_ABI_IWMMXT
)
3136 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3137 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3138 && TARGET_HARD_FLOAT
3140 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3142 arm_pcs_default
= ARM_PCS_AAPCS
;
3146 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3147 sorry ("-mfloat-abi=hard and VFP");
3149 if (arm_abi
== ARM_ABI_APCS
)
3150 arm_pcs_default
= ARM_PCS_APCS
;
3152 arm_pcs_default
= ARM_PCS_ATPCS
;
3155 /* For arm2/3 there is no need to do any scheduling if we are doing
3156 software floating-point. */
3157 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
3158 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3160 /* Use the cp15 method if it is available. */
3161 if (target_thread_pointer
== TP_AUTO
)
3163 if (arm_arch6k
&& !TARGET_THUMB1
)
3164 target_thread_pointer
= TP_CP15
;
3166 target_thread_pointer
= TP_SOFT
;
3169 /* Override the default structure alignment for AAPCS ABI. */
3170 if (!global_options_set
.x_arm_structure_size_boundary
)
3172 if (TARGET_AAPCS_BASED
)
3173 arm_structure_size_boundary
= 8;
3177 if (arm_structure_size_boundary
!= 8
3178 && arm_structure_size_boundary
!= 32
3179 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3181 if (ARM_DOUBLEWORD_ALIGN
)
3183 "structure size boundary can only be set to 8, 32 or 64");
3185 warning (0, "structure size boundary can only be set to 8 or 32");
3186 arm_structure_size_boundary
3187 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3191 /* If stack checking is disabled, we can use r10 as the PIC register,
3192 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3193 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3195 if (TARGET_VXWORKS_RTP
)
3196 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3197 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3200 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3201 arm_pic_register
= 9;
3203 if (arm_pic_register_string
!= NULL
)
3205 int pic_register
= decode_reg_name (arm_pic_register_string
);
3208 warning (0, "-mpic-register= is useless without -fpic");
3210 /* Prevent the user from choosing an obviously stupid PIC register. */
3211 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3212 || pic_register
== HARD_FRAME_POINTER_REGNUM
3213 || pic_register
== STACK_POINTER_REGNUM
3214 || pic_register
>= PC_REGNUM
3215 || (TARGET_VXWORKS_RTP
3216 && (unsigned int) pic_register
!= arm_pic_register
))
3217 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3219 arm_pic_register
= pic_register
;
3222 if (TARGET_VXWORKS_RTP
3223 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3224 arm_pic_data_is_text_relative
= 0;
3226 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3227 if (fix_cm3_ldrd
== 2)
3229 if (arm_selected_cpu
->core
== cortexm3
)
3235 /* Enable -munaligned-access by default for
3236 - all ARMv6 architecture-based processors
3237 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3238 - ARMv8 architecture-base processors.
3240 Disable -munaligned-access by default for
3241 - all pre-ARMv6 architecture-based processors
3242 - ARMv6-M architecture-based processors. */
3244 if (unaligned_access
== 2)
3246 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3247 unaligned_access
= 1;
3249 unaligned_access
= 0;
3251 else if (unaligned_access
== 1
3252 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3254 warning (0, "target CPU does not support unaligned accesses");
3255 unaligned_access
= 0;
3258 /* Hot/Cold partitioning is not currently supported, since we can't
3259 handle literal pool placement in that case. */
3260 if (flag_reorder_blocks_and_partition
)
3262 inform (input_location
,
3263 "-freorder-blocks-and-partition not supported on this architecture");
3264 flag_reorder_blocks_and_partition
= 0;
3265 flag_reorder_blocks
= 1;
3269 /* Hoisting PIC address calculations more aggressively provides a small,
3270 but measurable, size reduction for PIC code. Therefore, we decrease
3271 the bar for unrestricted expression hoisting to the cost of PIC address
3272 calculation, which is 2 instructions. */
3273 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3274 global_options
.x_param_values
,
3275 global_options_set
.x_param_values
);
3277 /* ARM EABI defaults to strict volatile bitfields. */
3278 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3279 && abi_version_at_least(2))
3280 flag_strict_volatile_bitfields
= 1;
3282 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3283 have deemed it beneficial (signified by setting
3284 prefetch.num_slots to 1 or more). */
3285 if (flag_prefetch_loop_arrays
< 0
3288 && current_tune
->prefetch
.num_slots
> 0)
3289 flag_prefetch_loop_arrays
= 1;
3291 /* Set up parameters to be used in prefetching algorithm. Do not
3292 override the defaults unless we are tuning for a core we have
3293 researched values for. */
3294 if (current_tune
->prefetch
.num_slots
> 0)
3295 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3296 current_tune
->prefetch
.num_slots
,
3297 global_options
.x_param_values
,
3298 global_options_set
.x_param_values
);
3299 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3300 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3301 current_tune
->prefetch
.l1_cache_line_size
,
3302 global_options
.x_param_values
,
3303 global_options_set
.x_param_values
);
3304 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3305 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3306 current_tune
->prefetch
.l1_cache_size
,
3307 global_options
.x_param_values
,
3308 global_options_set
.x_param_values
);
3310 /* Use Neon to perform 64-bits operations rather than core
3312 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3313 if (use_neon_for_64bits
== 1)
3314 prefer_neon_for_64bits
= true;
3316 /* Use the alternative scheduling-pressure algorithm by default. */
3317 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3318 global_options
.x_param_values
,
3319 global_options_set
.x_param_values
);
3321 /* Look through ready list and all of queue for instructions
3322 relevant for L2 auto-prefetcher. */
3323 int param_sched_autopref_queue_depth
;
3325 switch (current_tune
->sched_autopref
)
3327 case tune_params::SCHED_AUTOPREF_OFF
:
3328 param_sched_autopref_queue_depth
= -1;
3331 case tune_params::SCHED_AUTOPREF_RANK
:
3332 param_sched_autopref_queue_depth
= 0;
3335 case tune_params::SCHED_AUTOPREF_FULL
:
3336 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3343 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3344 param_sched_autopref_queue_depth
,
3345 global_options
.x_param_values
,
3346 global_options_set
.x_param_values
);
3348 /* Currently, for slow flash data, we just disable literal pools. */
3349 if (target_slow_flash_data
)
3350 arm_disable_literal_pool
= true;
3352 /* Disable scheduling fusion by default if it's not armv7 processor
3353 or doesn't prefer ldrd/strd. */
3354 if (flag_schedule_fusion
== 2
3355 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3356 flag_schedule_fusion
= 0;
3358 /* Need to remember initial options before they are overriden. */
3359 init_optimize
= build_optimization_node (&global_options
);
3361 arm_option_override_internal (&global_options
, &global_options_set
);
3362 arm_option_check_internal (&global_options
);
3363 arm_option_params_internal (&global_options
);
3365 /* Register global variables with the garbage collector. */
3366 arm_add_gc_roots ();
3368 /* Save the initial options in case the user does function specific
3370 target_option_default_node
= target_option_current_node
3371 = build_target_option_node (&global_options
);
3373 /* Init initial mode for testing. */
3374 thumb_flipper
= TARGET_THUMB
;
3378 arm_add_gc_roots (void)
3380 gcc_obstack_init(&minipool_obstack
);
3381 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3384 /* A table of known ARM exception types.
3385 For use with the interrupt function attribute. */
3389 const char *const arg
;
3390 const unsigned long return_value
;
3394 static const isr_attribute_arg isr_attribute_args
[] =
3396 { "IRQ", ARM_FT_ISR
},
3397 { "irq", ARM_FT_ISR
},
3398 { "FIQ", ARM_FT_FIQ
},
3399 { "fiq", ARM_FT_FIQ
},
3400 { "ABORT", ARM_FT_ISR
},
3401 { "abort", ARM_FT_ISR
},
3402 { "ABORT", ARM_FT_ISR
},
3403 { "abort", ARM_FT_ISR
},
3404 { "UNDEF", ARM_FT_EXCEPTION
},
3405 { "undef", ARM_FT_EXCEPTION
},
3406 { "SWI", ARM_FT_EXCEPTION
},
3407 { "swi", ARM_FT_EXCEPTION
},
3408 { NULL
, ARM_FT_NORMAL
}
3411 /* Returns the (interrupt) function type of the current
3412 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3414 static unsigned long
3415 arm_isr_value (tree argument
)
3417 const isr_attribute_arg
* ptr
;
3421 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3423 /* No argument - default to IRQ. */
3424 if (argument
== NULL_TREE
)
3427 /* Get the value of the argument. */
3428 if (TREE_VALUE (argument
) == NULL_TREE
3429 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3430 return ARM_FT_UNKNOWN
;
3432 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3434 /* Check it against the list of known arguments. */
3435 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3436 if (streq (arg
, ptr
->arg
))
3437 return ptr
->return_value
;
3439 /* An unrecognized interrupt type. */
3440 return ARM_FT_UNKNOWN
;
3443 /* Computes the type of the current function. */
3445 static unsigned long
3446 arm_compute_func_type (void)
3448 unsigned long type
= ARM_FT_UNKNOWN
;
3452 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3454 /* Decide if the current function is volatile. Such functions
3455 never return, and many memory cycles can be saved by not storing
3456 register values that will never be needed again. This optimization
3457 was added to speed up context switching in a kernel application. */
3459 && (TREE_NOTHROW (current_function_decl
)
3460 || !(flag_unwind_tables
3462 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3463 && TREE_THIS_VOLATILE (current_function_decl
))
3464 type
|= ARM_FT_VOLATILE
;
3466 if (cfun
->static_chain_decl
!= NULL
)
3467 type
|= ARM_FT_NESTED
;
3469 attr
= DECL_ATTRIBUTES (current_function_decl
);
3471 a
= lookup_attribute ("naked", attr
);
3473 type
|= ARM_FT_NAKED
;
3475 a
= lookup_attribute ("isr", attr
);
3477 a
= lookup_attribute ("interrupt", attr
);
3480 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3482 type
|= arm_isr_value (TREE_VALUE (a
));
3487 /* Returns the type of the current function. */
3490 arm_current_func_type (void)
3492 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3493 cfun
->machine
->func_type
= arm_compute_func_type ();
3495 return cfun
->machine
->func_type
;
3499 arm_allocate_stack_slots_for_args (void)
3501 /* Naked functions should not allocate stack slots for arguments. */
3502 return !IS_NAKED (arm_current_func_type ());
3506 arm_warn_func_return (tree decl
)
3508 /* Naked functions are implemented entirely in assembly, including the
3509 return sequence, so suppress warnings about this. */
3510 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3514 /* Output assembler code for a block containing the constant parts
3515 of a trampoline, leaving space for the variable parts.
3517 On the ARM, (if r8 is the static chain regnum, and remembering that
3518 referencing pc adds an offset of 8) the trampoline looks like:
3521 .word static chain value
3522 .word function's address
3523 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3526 arm_asm_trampoline_template (FILE *f
)
3528 if (TARGET_UNIFIED_ASM
)
3529 fprintf (f
, "\t.syntax unified\n");
3531 fprintf (f
, "\t.syntax divided\n");
3535 fprintf (f
, "\t.arm\n");
3536 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3537 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3539 else if (TARGET_THUMB2
)
3541 fprintf (f
, "\t.thumb\n");
3542 /* The Thumb-2 trampoline is similar to the arm implementation.
3543 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3544 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3545 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3546 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3550 ASM_OUTPUT_ALIGN (f
, 2);
3551 fprintf (f
, "\t.code\t16\n");
3552 fprintf (f
, ".Ltrampoline_start:\n");
3553 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3554 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3555 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3556 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3557 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3558 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3560 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3561 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3564 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3567 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3569 rtx fnaddr
, mem
, a_tramp
;
3571 emit_block_move (m_tramp
, assemble_trampoline_template (),
3572 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3574 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3575 emit_move_insn (mem
, chain_value
);
3577 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3578 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3579 emit_move_insn (mem
, fnaddr
);
3581 a_tramp
= XEXP (m_tramp
, 0);
3582 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3583 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3584 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3587 /* Thumb trampolines should be entered in thumb mode, so set
3588 the bottom bit of the address. */
3591 arm_trampoline_adjust_address (rtx addr
)
3594 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3595 NULL
, 0, OPTAB_LIB_WIDEN
);
3599 /* Return 1 if it is possible to return using a single instruction.
3600 If SIBLING is non-null, this is a test for a return before a sibling
3601 call. SIBLING is the call insn, so we can examine its register usage. */
3604 use_return_insn (int iscond
, rtx sibling
)
3607 unsigned int func_type
;
3608 unsigned long saved_int_regs
;
3609 unsigned HOST_WIDE_INT stack_adjust
;
3610 arm_stack_offsets
*offsets
;
3612 /* Never use a return instruction before reload has run. */
3613 if (!reload_completed
)
3616 func_type
= arm_current_func_type ();
3618 /* Naked, volatile and stack alignment functions need special
3620 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3623 /* So do interrupt functions that use the frame pointer and Thumb
3624 interrupt functions. */
3625 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3628 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3629 && !optimize_function_for_size_p (cfun
))
3632 offsets
= arm_get_frame_offsets ();
3633 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3635 /* As do variadic functions. */
3636 if (crtl
->args
.pretend_args_size
3637 || cfun
->machine
->uses_anonymous_args
3638 /* Or if the function calls __builtin_eh_return () */
3639 || crtl
->calls_eh_return
3640 /* Or if the function calls alloca */
3641 || cfun
->calls_alloca
3642 /* Or if there is a stack adjustment. However, if the stack pointer
3643 is saved on the stack, we can use a pre-incrementing stack load. */
3644 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3645 && stack_adjust
== 4)))
3648 saved_int_regs
= offsets
->saved_regs_mask
;
3650 /* Unfortunately, the insn
3652 ldmib sp, {..., sp, ...}
3654 triggers a bug on most SA-110 based devices, such that the stack
3655 pointer won't be correctly restored if the instruction takes a
3656 page fault. We work around this problem by popping r3 along with
3657 the other registers, since that is never slower than executing
3658 another instruction.
3660 We test for !arm_arch5 here, because code for any architecture
3661 less than this could potentially be run on one of the buggy
3663 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3665 /* Validate that r3 is a call-clobbered register (always true in
3666 the default abi) ... */
3667 if (!call_used_regs
[3])
3670 /* ... that it isn't being used for a return value ... */
3671 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3674 /* ... or for a tail-call argument ... */
3677 gcc_assert (CALL_P (sibling
));
3679 if (find_regno_fusage (sibling
, USE
, 3))
3683 /* ... and that there are no call-saved registers in r0-r2
3684 (always true in the default ABI). */
3685 if (saved_int_regs
& 0x7)
3689 /* Can't be done if interworking with Thumb, and any registers have been
3691 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3694 /* On StrongARM, conditional returns are expensive if they aren't
3695 taken and multiple registers have been stacked. */
3696 if (iscond
&& arm_tune_strongarm
)
3698 /* Conditional return when just the LR is stored is a simple
3699 conditional-load instruction, that's not expensive. */
3700 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3704 && arm_pic_register
!= INVALID_REGNUM
3705 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3709 /* If there are saved registers but the LR isn't saved, then we need
3710 two instructions for the return. */
3711 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3714 /* Can't be done if any of the VFP regs are pushed,
3715 since this also requires an insn. */
3716 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3717 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3718 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3721 if (TARGET_REALLY_IWMMXT
)
3722 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3723 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3729 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3730 shrink-wrapping if possible. This is the case if we need to emit a
3731 prologue, which we can test by looking at the offsets. */
3733 use_simple_return_p (void)
3735 arm_stack_offsets
*offsets
;
3737 offsets
= arm_get_frame_offsets ();
3738 return offsets
->outgoing_args
!= 0;
3741 /* Return TRUE if int I is a valid immediate ARM constant. */
3744 const_ok_for_arm (HOST_WIDE_INT i
)
3748 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3749 be all zero, or all one. */
3750 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3751 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3752 != ((~(unsigned HOST_WIDE_INT
) 0)
3753 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3756 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3758 /* Fast return for 0 and small values. We must do this for zero, since
3759 the code below can't handle that one case. */
3760 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3763 /* Get the number of trailing zeros. */
3764 lowbit
= ffs((int) i
) - 1;
3766 /* Only even shifts are allowed in ARM mode so round down to the
3767 nearest even number. */
3771 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3776 /* Allow rotated constants in ARM mode. */
3778 && ((i
& ~0xc000003f) == 0
3779 || (i
& ~0xf000000f) == 0
3780 || (i
& ~0xfc000003) == 0))
3787 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3790 if (i
== v
|| i
== (v
| (v
<< 8)))
3793 /* Allow repeated pattern 0xXY00XY00. */
3803 /* Return true if I is a valid constant for the operation CODE. */
3805 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3807 if (const_ok_for_arm (i
))
3813 /* See if we can use movw. */
3814 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3817 /* Otherwise, try mvn. */
3818 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3821 /* See if we can use addw or subw. */
3823 && ((i
& 0xfffff000) == 0
3824 || ((-i
) & 0xfffff000) == 0))
3826 /* else fall through. */
3846 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3848 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3854 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3858 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3865 /* Return true if I is a valid di mode constant for the operation CODE. */
3867 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3869 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3870 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3871 rtx hi
= GEN_INT (hi_val
);
3872 rtx lo
= GEN_INT (lo_val
);
3882 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3883 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3885 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3892 /* Emit a sequence of insns to handle a large constant.
3893 CODE is the code of the operation required, it can be any of SET, PLUS,
3894 IOR, AND, XOR, MINUS;
3895 MODE is the mode in which the operation is being performed;
3896 VAL is the integer to operate on;
3897 SOURCE is the other operand (a register, or a null-pointer for SET);
3898 SUBTARGETS means it is safe to create scratch registers if that will
3899 either produce a simpler sequence, or we will want to cse the values.
3900 Return value is the number of insns emitted. */
3902 /* ??? Tweak this for thumb2. */
3904 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3905 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3909 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3910 cond
= COND_EXEC_TEST (PATTERN (insn
));
3914 if (subtargets
|| code
== SET
3915 || (REG_P (target
) && REG_P (source
)
3916 && REGNO (target
) != REGNO (source
)))
3918 /* After arm_reorg has been called, we can't fix up expensive
3919 constants by pushing them into memory so we must synthesize
3920 them in-line, regardless of the cost. This is only likely to
3921 be more costly on chips that have load delay slots and we are
3922 compiling without running the scheduler (so no splitting
3923 occurred before the final instruction emission).
3925 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3927 if (!cfun
->machine
->after_arm_reorg
3929 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3931 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3936 /* Currently SET is the only monadic value for CODE, all
3937 the rest are diadic. */
3938 if (TARGET_USE_MOVT
)
3939 arm_emit_movpair (target
, GEN_INT (val
));
3941 emit_set_insn (target
, GEN_INT (val
));
3947 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3949 if (TARGET_USE_MOVT
)
3950 arm_emit_movpair (temp
, GEN_INT (val
));
3952 emit_set_insn (temp
, GEN_INT (val
));
3954 /* For MINUS, the value is subtracted from, since we never
3955 have subtraction of a constant. */
3957 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3959 emit_set_insn (target
,
3960 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3966 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3970 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3971 ARM/THUMB2 immediates, and add up to VAL.
3972 Thr function return value gives the number of insns required. */
3974 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3975 struct four_ints
*return_sequence
)
3977 int best_consecutive_zeros
= 0;
3981 struct four_ints tmp_sequence
;
3983 /* If we aren't targeting ARM, the best place to start is always at
3984 the bottom, otherwise look more closely. */
3987 for (i
= 0; i
< 32; i
+= 2)
3989 int consecutive_zeros
= 0;
3991 if (!(val
& (3 << i
)))
3993 while ((i
< 32) && !(val
& (3 << i
)))
3995 consecutive_zeros
+= 2;
3998 if (consecutive_zeros
> best_consecutive_zeros
)
4000 best_consecutive_zeros
= consecutive_zeros
;
4001 best_start
= i
- consecutive_zeros
;
4008 /* So long as it won't require any more insns to do so, it's
4009 desirable to emit a small constant (in bits 0...9) in the last
4010 insn. This way there is more chance that it can be combined with
4011 a later addressing insn to form a pre-indexed load or store
4012 operation. Consider:
4014 *((volatile int *)0xe0000100) = 1;
4015 *((volatile int *)0xe0000110) = 2;
4017 We want this to wind up as:
4021 str rB, [rA, #0x100]
4023 str rB, [rA, #0x110]
4025 rather than having to synthesize both large constants from scratch.
4027 Therefore, we calculate how many insns would be required to emit
4028 the constant starting from `best_start', and also starting from
4029 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4030 yield a shorter sequence, we may as well use zero. */
4031 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4033 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
4035 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4036 if (insns2
<= insns1
)
4038 *return_sequence
= tmp_sequence
;
4046 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4048 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4049 struct four_ints
*return_sequence
, int i
)
4051 int remainder
= val
& 0xffffffff;
4054 /* Try and find a way of doing the job in either two or three
4057 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4058 location. We start at position I. This may be the MSB, or
4059 optimial_immediate_sequence may have positioned it at the largest block
4060 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4061 wrapping around to the top of the word when we drop off the bottom.
4062 In the worst case this code should produce no more than four insns.
4064 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4065 constants, shifted to any arbitrary location. We should always start
4070 unsigned int b1
, b2
, b3
, b4
;
4071 unsigned HOST_WIDE_INT result
;
4074 gcc_assert (insns
< 4);
4079 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4080 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4083 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4084 /* We can use addw/subw for the last 12 bits. */
4088 /* Use an 8-bit shifted/rotated immediate. */
4092 result
= remainder
& ((0x0ff << end
)
4093 | ((i
< end
) ? (0xff >> (32 - end
))
4100 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4101 arbitrary shifts. */
4102 i
-= TARGET_ARM
? 2 : 1;
4106 /* Next, see if we can do a better job with a thumb2 replicated
4109 We do it this way around to catch the cases like 0x01F001E0 where
4110 two 8-bit immediates would work, but a replicated constant would
4113 TODO: 16-bit constants that don't clear all the bits, but still win.
4114 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4117 b1
= (remainder
& 0xff000000) >> 24;
4118 b2
= (remainder
& 0x00ff0000) >> 16;
4119 b3
= (remainder
& 0x0000ff00) >> 8;
4120 b4
= remainder
& 0xff;
4124 /* The 8-bit immediate already found clears b1 (and maybe b2),
4125 but must leave b3 and b4 alone. */
4127 /* First try to find a 32-bit replicated constant that clears
4128 almost everything. We can assume that we can't do it in one,
4129 or else we wouldn't be here. */
4130 unsigned int tmp
= b1
& b2
& b3
& b4
;
4131 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4133 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4134 + (tmp
== b3
) + (tmp
== b4
);
4136 && (matching_bytes
>= 3
4137 || (matching_bytes
== 2
4138 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4140 /* At least 3 of the bytes match, and the fourth has at
4141 least as many bits set, or two of the bytes match
4142 and it will only require one more insn to finish. */
4150 /* Second, try to find a 16-bit replicated constant that can
4151 leave three of the bytes clear. If b2 or b4 is already
4152 zero, then we can. If the 8-bit from above would not
4153 clear b2 anyway, then we still win. */
4154 else if (b1
== b3
&& (!b2
|| !b4
4155 || (remainder
& 0x00ff0000 & ~result
)))
4157 result
= remainder
& 0xff00ff00;
4163 /* The 8-bit immediate already found clears b2 (and maybe b3)
4164 and we don't get here unless b1 is alredy clear, but it will
4165 leave b4 unchanged. */
4167 /* If we can clear b2 and b4 at once, then we win, since the
4168 8-bits couldn't possibly reach that far. */
4171 result
= remainder
& 0x00ff00ff;
4177 return_sequence
->i
[insns
++] = result
;
4178 remainder
&= ~result
;
4180 if (code
== SET
|| code
== MINUS
)
4188 /* Emit an instruction with the indicated PATTERN. If COND is
4189 non-NULL, conditionalize the execution of the instruction on COND
4193 emit_constant_insn (rtx cond
, rtx pattern
)
4196 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4197 emit_insn (pattern
);
4200 /* As above, but extra parameter GENERATE which, if clear, suppresses
4204 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4205 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4210 int final_invert
= 0;
4212 int set_sign_bit_copies
= 0;
4213 int clear_sign_bit_copies
= 0;
4214 int clear_zero_bit_copies
= 0;
4215 int set_zero_bit_copies
= 0;
4216 int insns
= 0, neg_insns
, inv_insns
;
4217 unsigned HOST_WIDE_INT temp1
, temp2
;
4218 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4219 struct four_ints
*immediates
;
4220 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4222 /* Find out which operations are safe for a given CODE. Also do a quick
4223 check for degenerate cases; these can occur when DImode operations
4236 if (remainder
== 0xffffffff)
4239 emit_constant_insn (cond
,
4240 gen_rtx_SET (target
,
4241 GEN_INT (ARM_SIGN_EXTEND (val
))));
4247 if (reload_completed
&& rtx_equal_p (target
, source
))
4251 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4260 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4263 if (remainder
== 0xffffffff)
4265 if (reload_completed
&& rtx_equal_p (target
, source
))
4268 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4277 if (reload_completed
&& rtx_equal_p (target
, source
))
4280 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4284 if (remainder
== 0xffffffff)
4287 emit_constant_insn (cond
,
4288 gen_rtx_SET (target
,
4289 gen_rtx_NOT (mode
, source
)));
4296 /* We treat MINUS as (val - source), since (source - val) is always
4297 passed as (source + (-val)). */
4301 emit_constant_insn (cond
,
4302 gen_rtx_SET (target
,
4303 gen_rtx_NEG (mode
, source
)));
4306 if (const_ok_for_arm (val
))
4309 emit_constant_insn (cond
,
4310 gen_rtx_SET (target
,
4311 gen_rtx_MINUS (mode
, GEN_INT (val
),
4322 /* If we can do it in one insn get out quickly. */
4323 if (const_ok_for_op (val
, code
))
4326 emit_constant_insn (cond
,
4327 gen_rtx_SET (target
,
4329 ? gen_rtx_fmt_ee (code
, mode
, source
,
4335 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4337 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4338 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4342 if (mode
== SImode
&& i
== 16)
4343 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4345 emit_constant_insn (cond
,
4346 gen_zero_extendhisi2
4347 (target
, gen_lowpart (HImode
, source
)));
4349 /* Extz only supports SImode, but we can coerce the operands
4351 emit_constant_insn (cond
,
4352 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4353 gen_lowpart (SImode
, source
),
4354 GEN_INT (i
), const0_rtx
));
4360 /* Calculate a few attributes that may be useful for specific
4362 /* Count number of leading zeros. */
4363 for (i
= 31; i
>= 0; i
--)
4365 if ((remainder
& (1 << i
)) == 0)
4366 clear_sign_bit_copies
++;
4371 /* Count number of leading 1's. */
4372 for (i
= 31; i
>= 0; i
--)
4374 if ((remainder
& (1 << i
)) != 0)
4375 set_sign_bit_copies
++;
4380 /* Count number of trailing zero's. */
4381 for (i
= 0; i
<= 31; i
++)
4383 if ((remainder
& (1 << i
)) == 0)
4384 clear_zero_bit_copies
++;
4389 /* Count number of trailing 1's. */
4390 for (i
= 0; i
<= 31; i
++)
4392 if ((remainder
& (1 << i
)) != 0)
4393 set_zero_bit_copies
++;
4401 /* See if we can do this by sign_extending a constant that is known
4402 to be negative. This is a good, way of doing it, since the shift
4403 may well merge into a subsequent insn. */
4404 if (set_sign_bit_copies
> 1)
4406 if (const_ok_for_arm
4407 (temp1
= ARM_SIGN_EXTEND (remainder
4408 << (set_sign_bit_copies
- 1))))
4412 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4413 emit_constant_insn (cond
,
4414 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4415 emit_constant_insn (cond
,
4416 gen_ashrsi3 (target
, new_src
,
4417 GEN_INT (set_sign_bit_copies
- 1)));
4421 /* For an inverted constant, we will need to set the low bits,
4422 these will be shifted out of harm's way. */
4423 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4424 if (const_ok_for_arm (~temp1
))
4428 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4429 emit_constant_insn (cond
,
4430 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4431 emit_constant_insn (cond
,
4432 gen_ashrsi3 (target
, new_src
,
4433 GEN_INT (set_sign_bit_copies
- 1)));
4439 /* See if we can calculate the value as the difference between two
4440 valid immediates. */
4441 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4443 int topshift
= clear_sign_bit_copies
& ~1;
4445 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4446 & (0xff000000 >> topshift
));
4448 /* If temp1 is zero, then that means the 9 most significant
4449 bits of remainder were 1 and we've caused it to overflow.
4450 When topshift is 0 we don't need to do anything since we
4451 can borrow from 'bit 32'. */
4452 if (temp1
== 0 && topshift
!= 0)
4453 temp1
= 0x80000000 >> (topshift
- 1);
4455 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4457 if (const_ok_for_arm (temp2
))
4461 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4462 emit_constant_insn (cond
,
4463 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4464 emit_constant_insn (cond
,
4465 gen_addsi3 (target
, new_src
,
4473 /* See if we can generate this by setting the bottom (or the top)
4474 16 bits, and then shifting these into the other half of the
4475 word. We only look for the simplest cases, to do more would cost
4476 too much. Be careful, however, not to generate this when the
4477 alternative would take fewer insns. */
4478 if (val
& 0xffff0000)
4480 temp1
= remainder
& 0xffff0000;
4481 temp2
= remainder
& 0x0000ffff;
4483 /* Overlaps outside this range are best done using other methods. */
4484 for (i
= 9; i
< 24; i
++)
4486 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4487 && !const_ok_for_arm (temp2
))
4489 rtx new_src
= (subtargets
4490 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4492 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4493 source
, subtargets
, generate
);
4501 gen_rtx_ASHIFT (mode
, source
,
4508 /* Don't duplicate cases already considered. */
4509 for (i
= 17; i
< 24; i
++)
4511 if (((temp1
| (temp1
>> i
)) == remainder
)
4512 && !const_ok_for_arm (temp1
))
4514 rtx new_src
= (subtargets
4515 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4517 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4518 source
, subtargets
, generate
);
4523 gen_rtx_SET (target
,
4526 gen_rtx_LSHIFTRT (mode
, source
,
4537 /* If we have IOR or XOR, and the constant can be loaded in a
4538 single instruction, and we can find a temporary to put it in,
4539 then this can be done in two instructions instead of 3-4. */
4541 /* TARGET can't be NULL if SUBTARGETS is 0 */
4542 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4544 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4548 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4550 emit_constant_insn (cond
,
4551 gen_rtx_SET (sub
, GEN_INT (val
)));
4552 emit_constant_insn (cond
,
4553 gen_rtx_SET (target
,
4554 gen_rtx_fmt_ee (code
, mode
,
4565 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4566 and the remainder 0s for e.g. 0xfff00000)
4567 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4569 This can be done in 2 instructions by using shifts with mov or mvn.
4574 mvn r0, r0, lsr #12 */
4575 if (set_sign_bit_copies
> 8
4576 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4580 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4581 rtx shift
= GEN_INT (set_sign_bit_copies
);
4587 gen_rtx_ASHIFT (mode
,
4592 gen_rtx_SET (target
,
4594 gen_rtx_LSHIFTRT (mode
, sub
,
4601 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4603 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4605 For eg. r0 = r0 | 0xfff
4610 if (set_zero_bit_copies
> 8
4611 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4615 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4616 rtx shift
= GEN_INT (set_zero_bit_copies
);
4622 gen_rtx_LSHIFTRT (mode
,
4627 gen_rtx_SET (target
,
4629 gen_rtx_ASHIFT (mode
, sub
,
4635 /* This will never be reached for Thumb2 because orn is a valid
4636 instruction. This is for Thumb1 and the ARM 32 bit cases.
4638 x = y | constant (such that ~constant is a valid constant)
4640 x = ~(~y & ~constant).
4642 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4646 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4647 emit_constant_insn (cond
,
4649 gen_rtx_NOT (mode
, source
)));
4652 sub
= gen_reg_rtx (mode
);
4653 emit_constant_insn (cond
,
4655 gen_rtx_AND (mode
, source
,
4657 emit_constant_insn (cond
,
4658 gen_rtx_SET (target
,
4659 gen_rtx_NOT (mode
, sub
)));
4666 /* See if two shifts will do 2 or more insn's worth of work. */
4667 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4669 HOST_WIDE_INT shift_mask
= ((0xffffffff
4670 << (32 - clear_sign_bit_copies
))
4673 if ((remainder
| shift_mask
) != 0xffffffff)
4675 HOST_WIDE_INT new_val
4676 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4680 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4681 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4682 new_src
, source
, subtargets
, 1);
4687 rtx targ
= subtargets
? NULL_RTX
: target
;
4688 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4689 targ
, source
, subtargets
, 0);
4695 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4696 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4698 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4699 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4705 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4707 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4709 if ((remainder
| shift_mask
) != 0xffffffff)
4711 HOST_WIDE_INT new_val
4712 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4715 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4717 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4718 new_src
, source
, subtargets
, 1);
4723 rtx targ
= subtargets
? NULL_RTX
: target
;
4725 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4726 targ
, source
, subtargets
, 0);
4732 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4733 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4735 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4736 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4748 /* Calculate what the instruction sequences would be if we generated it
4749 normally, negated, or inverted. */
4751 /* AND cannot be split into multiple insns, so invert and use BIC. */
4754 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4757 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4762 if (can_invert
|| final_invert
)
4763 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4768 immediates
= &pos_immediates
;
4770 /* Is the negated immediate sequence more efficient? */
4771 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4774 immediates
= &neg_immediates
;
4779 /* Is the inverted immediate sequence more efficient?
4780 We must allow for an extra NOT instruction for XOR operations, although
4781 there is some chance that the final 'mvn' will get optimized later. */
4782 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4785 immediates
= &inv_immediates
;
4793 /* Now output the chosen sequence as instructions. */
4796 for (i
= 0; i
< insns
; i
++)
4798 rtx new_src
, temp1_rtx
;
4800 temp1
= immediates
->i
[i
];
4802 if (code
== SET
|| code
== MINUS
)
4803 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4804 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4805 new_src
= gen_reg_rtx (mode
);
4811 else if (can_negate
)
4814 temp1
= trunc_int_for_mode (temp1
, mode
);
4815 temp1_rtx
= GEN_INT (temp1
);
4819 else if (code
== MINUS
)
4820 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4822 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4824 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4829 can_negate
= can_invert
;
4833 else if (code
== MINUS
)
4841 emit_constant_insn (cond
, gen_rtx_SET (target
,
4842 gen_rtx_NOT (mode
, source
)));
4849 /* Canonicalize a comparison so that we are more likely to recognize it.
4850 This can be done for a few constant compares, where we can make the
4851 immediate value easier to load. */
4854 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4855 bool op0_preserve_value
)
4858 unsigned HOST_WIDE_INT i
, maxval
;
4860 mode
= GET_MODE (*op0
);
4861 if (mode
== VOIDmode
)
4862 mode
= GET_MODE (*op1
);
4864 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4866 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4867 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4868 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4869 for GTU/LEU in Thumb mode. */
4873 if (*code
== GT
|| *code
== LE
4874 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4876 /* Missing comparison. First try to use an available
4878 if (CONST_INT_P (*op1
))
4886 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4888 *op1
= GEN_INT (i
+ 1);
4889 *code
= *code
== GT
? GE
: LT
;
4895 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4896 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4898 *op1
= GEN_INT (i
+ 1);
4899 *code
= *code
== GTU
? GEU
: LTU
;
4908 /* If that did not work, reverse the condition. */
4909 if (!op0_preserve_value
)
4911 std::swap (*op0
, *op1
);
4912 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4918 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4919 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4920 to facilitate possible combining with a cmp into 'ands'. */
4922 && GET_CODE (*op0
) == ZERO_EXTEND
4923 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4924 && GET_MODE (XEXP (*op0
, 0)) == QImode
4925 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4926 && subreg_lowpart_p (XEXP (*op0
, 0))
4927 && *op1
== const0_rtx
)
4928 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4931 /* Comparisons smaller than DImode. Only adjust comparisons against
4932 an out-of-range constant. */
4933 if (!CONST_INT_P (*op1
)
4934 || const_ok_for_arm (INTVAL (*op1
))
4935 || const_ok_for_arm (- INTVAL (*op1
)))
4949 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4951 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4952 *code
= *code
== GT
? GE
: LT
;
4960 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4962 *op1
= GEN_INT (i
- 1);
4963 *code
= *code
== GE
? GT
: LE
;
4970 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4971 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4973 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4974 *code
= *code
== GTU
? GEU
: LTU
;
4982 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4984 *op1
= GEN_INT (i
- 1);
4985 *code
= *code
== GEU
? GTU
: LEU
;
4996 /* Define how to find the value returned by a function. */
4999 arm_function_value(const_tree type
, const_tree func
,
5000 bool outgoing ATTRIBUTE_UNUSED
)
5003 int unsignedp ATTRIBUTE_UNUSED
;
5004 rtx r ATTRIBUTE_UNUSED
;
5006 mode
= TYPE_MODE (type
);
5008 if (TARGET_AAPCS_BASED
)
5009 return aapcs_allocate_return_reg (mode
, type
, func
);
5011 /* Promote integer types. */
5012 if (INTEGRAL_TYPE_P (type
))
5013 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5015 /* Promotes small structs returned in a register to full-word size
5016 for big-endian AAPCS. */
5017 if (arm_return_in_msb (type
))
5019 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5020 if (size
% UNITS_PER_WORD
!= 0)
5022 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5023 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5027 return arm_libcall_value_1 (mode
);
5030 /* libcall hashtable helpers. */
5032 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5034 static inline hashval_t
hash (const rtx_def
*);
5035 static inline bool equal (const rtx_def
*, const rtx_def
*);
5036 static inline void remove (rtx_def
*);
5040 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5042 return rtx_equal_p (p1
, p2
);
5046 libcall_hasher::hash (const rtx_def
*p1
)
5048 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5051 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5054 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5056 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5060 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5062 static bool init_done
= false;
5063 static libcall_table_type
*libcall_htab
= NULL
;
5069 libcall_htab
= new libcall_table_type (31);
5070 add_libcall (libcall_htab
,
5071 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5072 add_libcall (libcall_htab
,
5073 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5074 add_libcall (libcall_htab
,
5075 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5076 add_libcall (libcall_htab
,
5077 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5079 add_libcall (libcall_htab
,
5080 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5081 add_libcall (libcall_htab
,
5082 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5083 add_libcall (libcall_htab
,
5084 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5085 add_libcall (libcall_htab
,
5086 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5088 add_libcall (libcall_htab
,
5089 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5090 add_libcall (libcall_htab
,
5091 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5092 add_libcall (libcall_htab
,
5093 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5094 add_libcall (libcall_htab
,
5095 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5096 add_libcall (libcall_htab
,
5097 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5098 add_libcall (libcall_htab
,
5099 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5100 add_libcall (libcall_htab
,
5101 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5102 add_libcall (libcall_htab
,
5103 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5105 /* Values from double-precision helper functions are returned in core
5106 registers if the selected core only supports single-precision
5107 arithmetic, even if we are using the hard-float ABI. The same is
5108 true for single-precision helpers, but we will never be using the
5109 hard-float ABI on a CPU which doesn't support single-precision
5110 operations in hardware. */
5111 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5112 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5113 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5114 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5115 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5116 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5117 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5118 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5119 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5120 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5121 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5122 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5124 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5128 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5132 arm_libcall_value_1 (machine_mode mode
)
5134 if (TARGET_AAPCS_BASED
)
5135 return aapcs_libcall_value (mode
);
5136 else if (TARGET_IWMMXT_ABI
5137 && arm_vector_mode_supported_p (mode
))
5138 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5140 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5143 /* Define how to find the value returned by a library function
5144 assuming the value has mode MODE. */
5147 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5149 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5150 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5152 /* The following libcalls return their result in integer registers,
5153 even though they return a floating point value. */
5154 if (arm_libcall_uses_aapcs_base (libcall
))
5155 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5159 return arm_libcall_value_1 (mode
);
5162 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5165 arm_function_value_regno_p (const unsigned int regno
)
5167 if (regno
== ARG_REGISTER (1)
5169 && TARGET_AAPCS_BASED
5171 && TARGET_HARD_FLOAT
5172 && regno
== FIRST_VFP_REGNUM
)
5173 || (TARGET_IWMMXT_ABI
5174 && regno
== FIRST_IWMMXT_REGNUM
))
5180 /* Determine the amount of memory needed to store the possible return
5181 registers of an untyped call. */
5183 arm_apply_result_size (void)
5189 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5191 if (TARGET_IWMMXT_ABI
)
5198 /* Decide whether TYPE should be returned in memory (true)
5199 or in a register (false). FNTYPE is the type of the function making
5202 arm_return_in_memory (const_tree type
, const_tree fntype
)
5206 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5208 if (TARGET_AAPCS_BASED
)
5210 /* Simple, non-aggregate types (ie not including vectors and
5211 complex) are always returned in a register (or registers).
5212 We don't care about which register here, so we can short-cut
5213 some of the detail. */
5214 if (!AGGREGATE_TYPE_P (type
)
5215 && TREE_CODE (type
) != VECTOR_TYPE
5216 && TREE_CODE (type
) != COMPLEX_TYPE
)
5219 /* Any return value that is no larger than one word can be
5221 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5224 /* Check any available co-processors to see if they accept the
5225 type as a register candidate (VFP, for example, can return
5226 some aggregates in consecutive registers). These aren't
5227 available if the call is variadic. */
5228 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5231 /* Vector values should be returned using ARM registers, not
5232 memory (unless they're over 16 bytes, which will break since
5233 we only have four call-clobbered registers to play with). */
5234 if (TREE_CODE (type
) == VECTOR_TYPE
)
5235 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5237 /* The rest go in memory. */
5241 if (TREE_CODE (type
) == VECTOR_TYPE
)
5242 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5244 if (!AGGREGATE_TYPE_P (type
) &&
5245 (TREE_CODE (type
) != VECTOR_TYPE
))
5246 /* All simple types are returned in registers. */
5249 if (arm_abi
!= ARM_ABI_APCS
)
5251 /* ATPCS and later return aggregate types in memory only if they are
5252 larger than a word (or are variable size). */
5253 return (size
< 0 || size
> UNITS_PER_WORD
);
5256 /* For the arm-wince targets we choose to be compatible with Microsoft's
5257 ARM and Thumb compilers, which always return aggregates in memory. */
5259 /* All structures/unions bigger than one word are returned in memory.
5260 Also catch the case where int_size_in_bytes returns -1. In this case
5261 the aggregate is either huge or of variable size, and in either case
5262 we will want to return it via memory and not in a register. */
5263 if (size
< 0 || size
> UNITS_PER_WORD
)
5266 if (TREE_CODE (type
) == RECORD_TYPE
)
5270 /* For a struct the APCS says that we only return in a register
5271 if the type is 'integer like' and every addressable element
5272 has an offset of zero. For practical purposes this means
5273 that the structure can have at most one non bit-field element
5274 and that this element must be the first one in the structure. */
5276 /* Find the first field, ignoring non FIELD_DECL things which will
5277 have been created by C++. */
5278 for (field
= TYPE_FIELDS (type
);
5279 field
&& TREE_CODE (field
) != FIELD_DECL
;
5280 field
= DECL_CHAIN (field
))
5284 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5286 /* Check that the first field is valid for returning in a register. */
5288 /* ... Floats are not allowed */
5289 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5292 /* ... Aggregates that are not themselves valid for returning in
5293 a register are not allowed. */
5294 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5297 /* Now check the remaining fields, if any. Only bitfields are allowed,
5298 since they are not addressable. */
5299 for (field
= DECL_CHAIN (field
);
5301 field
= DECL_CHAIN (field
))
5303 if (TREE_CODE (field
) != FIELD_DECL
)
5306 if (!DECL_BIT_FIELD_TYPE (field
))
5313 if (TREE_CODE (type
) == UNION_TYPE
)
5317 /* Unions can be returned in registers if every element is
5318 integral, or can be returned in an integer register. */
5319 for (field
= TYPE_FIELDS (type
);
5321 field
= DECL_CHAIN (field
))
5323 if (TREE_CODE (field
) != FIELD_DECL
)
5326 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5329 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5335 #endif /* not ARM_WINCE */
5337 /* Return all other types in memory. */
5341 const struct pcs_attribute_arg
5345 } pcs_attribute_args
[] =
5347 {"aapcs", ARM_PCS_AAPCS
},
5348 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5350 /* We could recognize these, but changes would be needed elsewhere
5351 * to implement them. */
5352 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5353 {"atpcs", ARM_PCS_ATPCS
},
5354 {"apcs", ARM_PCS_APCS
},
5356 {NULL
, ARM_PCS_UNKNOWN
}
5360 arm_pcs_from_attribute (tree attr
)
5362 const struct pcs_attribute_arg
*ptr
;
5365 /* Get the value of the argument. */
5366 if (TREE_VALUE (attr
) == NULL_TREE
5367 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5368 return ARM_PCS_UNKNOWN
;
5370 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5372 /* Check it against the list of known arguments. */
5373 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5374 if (streq (arg
, ptr
->arg
))
5377 /* An unrecognized interrupt type. */
5378 return ARM_PCS_UNKNOWN
;
5381 /* Get the PCS variant to use for this call. TYPE is the function's type
5382 specification, DECL is the specific declartion. DECL may be null if
5383 the call could be indirect or if this is a library call. */
5385 arm_get_pcs_model (const_tree type
, const_tree decl
)
5387 bool user_convention
= false;
5388 enum arm_pcs user_pcs
= arm_pcs_default
;
5393 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5396 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5397 user_convention
= true;
5400 if (TARGET_AAPCS_BASED
)
5402 /* Detect varargs functions. These always use the base rules
5403 (no argument is ever a candidate for a co-processor
5405 bool base_rules
= stdarg_p (type
);
5407 if (user_convention
)
5409 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5410 sorry ("non-AAPCS derived PCS variant");
5411 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5412 error ("variadic functions must use the base AAPCS variant");
5416 return ARM_PCS_AAPCS
;
5417 else if (user_convention
)
5419 else if (decl
&& flag_unit_at_a_time
)
5421 /* Local functions never leak outside this compilation unit,
5422 so we are free to use whatever conventions are
5424 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5425 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5427 return ARM_PCS_AAPCS_LOCAL
;
5430 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5431 sorry ("PCS variant");
5433 /* For everything else we use the target's default. */
5434 return arm_pcs_default
;
5439 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5440 const_tree fntype ATTRIBUTE_UNUSED
,
5441 rtx libcall ATTRIBUTE_UNUSED
,
5442 const_tree fndecl ATTRIBUTE_UNUSED
)
5444 /* Record the unallocated VFP registers. */
5445 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5446 pcum
->aapcs_vfp_reg_alloc
= 0;
5449 /* Walk down the type tree of TYPE counting consecutive base elements.
5450 If *MODEP is VOIDmode, then set it to the first valid floating point
5451 type. If a non-floating point type is found, or if a floating point
5452 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5453 otherwise return the count in the sub-tree. */
5455 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5460 switch (TREE_CODE (type
))
5463 mode
= TYPE_MODE (type
);
5464 if (mode
!= DFmode
&& mode
!= SFmode
)
5467 if (*modep
== VOIDmode
)
5476 mode
= TYPE_MODE (TREE_TYPE (type
));
5477 if (mode
!= DFmode
&& mode
!= SFmode
)
5480 if (*modep
== VOIDmode
)
5489 /* Use V2SImode and V4SImode as representatives of all 64-bit
5490 and 128-bit vector types, whether or not those modes are
5491 supported with the present options. */
5492 size
= int_size_in_bytes (type
);
5505 if (*modep
== VOIDmode
)
5508 /* Vector modes are considered to be opaque: two vectors are
5509 equivalent for the purposes of being homogeneous aggregates
5510 if they are the same size. */
5519 tree index
= TYPE_DOMAIN (type
);
5521 /* Can't handle incomplete types nor sizes that are not
5523 if (!COMPLETE_TYPE_P (type
)
5524 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5527 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5530 || !TYPE_MAX_VALUE (index
)
5531 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5532 || !TYPE_MIN_VALUE (index
)
5533 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5537 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5538 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5540 /* There must be no padding. */
5541 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5553 /* Can't handle incomplete types nor sizes that are not
5555 if (!COMPLETE_TYPE_P (type
)
5556 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5559 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5561 if (TREE_CODE (field
) != FIELD_DECL
)
5564 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5570 /* There must be no padding. */
5571 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5578 case QUAL_UNION_TYPE
:
5580 /* These aren't very interesting except in a degenerate case. */
5585 /* Can't handle incomplete types nor sizes that are not
5587 if (!COMPLETE_TYPE_P (type
)
5588 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5591 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5593 if (TREE_CODE (field
) != FIELD_DECL
)
5596 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5599 count
= count
> sub_count
? count
: sub_count
;
5602 /* There must be no padding. */
5603 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5616 /* Return true if PCS_VARIANT should use VFP registers. */
5618 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5620 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5622 static bool seen_thumb1_vfp
= false;
5624 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5626 sorry ("Thumb-1 hard-float VFP ABI");
5627 /* sorry() is not immediately fatal, so only display this once. */
5628 seen_thumb1_vfp
= true;
5634 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5637 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5638 (TARGET_VFP_DOUBLE
|| !is_double
));
5641 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5642 suitable for passing or returning in VFP registers for the PCS
5643 variant selected. If it is, then *BASE_MODE is updated to contain
5644 a machine mode describing each element of the argument's type and
5645 *COUNT to hold the number of such elements. */
5647 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5648 machine_mode mode
, const_tree type
,
5649 machine_mode
*base_mode
, int *count
)
5651 machine_mode new_mode
= VOIDmode
;
5653 /* If we have the type information, prefer that to working things
5654 out from the mode. */
5657 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5659 if (ag_count
> 0 && ag_count
<= 4)
5664 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5665 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5666 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5671 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5674 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5680 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5683 *base_mode
= new_mode
;
5688 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5689 machine_mode mode
, const_tree type
)
5691 int count ATTRIBUTE_UNUSED
;
5692 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5694 if (!use_vfp_abi (pcs_variant
, false))
5696 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5701 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5704 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5707 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5708 &pcum
->aapcs_vfp_rmode
,
5709 &pcum
->aapcs_vfp_rcount
);
5713 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5714 const_tree type ATTRIBUTE_UNUSED
)
5716 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5717 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5720 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5721 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5723 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5725 || (mode
== TImode
&& ! TARGET_NEON
)
5726 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5729 int rcount
= pcum
->aapcs_vfp_rcount
;
5731 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5735 /* Avoid using unsupported vector modes. */
5736 if (rmode
== V2SImode
)
5738 else if (rmode
== V4SImode
)
5745 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5746 for (i
= 0; i
< rcount
; i
++)
5748 rtx tmp
= gen_rtx_REG (rmode
,
5749 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5750 tmp
= gen_rtx_EXPR_LIST
5752 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5753 XVECEXP (par
, 0, i
) = tmp
;
5756 pcum
->aapcs_reg
= par
;
5759 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5766 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5768 const_tree type ATTRIBUTE_UNUSED
)
5770 if (!use_vfp_abi (pcs_variant
, false))
5773 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5776 machine_mode ag_mode
;
5781 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5786 if (ag_mode
== V2SImode
)
5788 else if (ag_mode
== V4SImode
)
5794 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5795 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5796 for (i
= 0; i
< count
; i
++)
5798 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5799 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5800 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5801 XVECEXP (par
, 0, i
) = tmp
;
5807 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5811 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5812 machine_mode mode ATTRIBUTE_UNUSED
,
5813 const_tree type ATTRIBUTE_UNUSED
)
5815 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5816 pcum
->aapcs_vfp_reg_alloc
= 0;
5820 #define AAPCS_CP(X) \
5822 aapcs_ ## X ## _cum_init, \
5823 aapcs_ ## X ## _is_call_candidate, \
5824 aapcs_ ## X ## _allocate, \
5825 aapcs_ ## X ## _is_return_candidate, \
5826 aapcs_ ## X ## _allocate_return_reg, \
5827 aapcs_ ## X ## _advance \
5830 /* Table of co-processors that can be used to pass arguments in
5831 registers. Idealy no arugment should be a candidate for more than
5832 one co-processor table entry, but the table is processed in order
5833 and stops after the first match. If that entry then fails to put
5834 the argument into a co-processor register, the argument will go on
5838 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5839 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5841 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5842 BLKmode) is a candidate for this co-processor's registers; this
5843 function should ignore any position-dependent state in
5844 CUMULATIVE_ARGS and only use call-type dependent information. */
5845 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5847 /* Return true if the argument does get a co-processor register; it
5848 should set aapcs_reg to an RTX of the register allocated as is
5849 required for a return from FUNCTION_ARG. */
5850 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5852 /* Return true if a result of mode MODE (or type TYPE if MODE is
5853 BLKmode) is can be returned in this co-processor's registers. */
5854 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5856 /* Allocate and return an RTX element to hold the return type of a
5857 call, this routine must not fail and will only be called if
5858 is_return_candidate returned true with the same parameters. */
5859 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5861 /* Finish processing this argument and prepare to start processing
5863 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5864 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5872 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5877 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5878 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5885 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5887 /* We aren't passed a decl, so we can't check that a call is local.
5888 However, it isn't clear that that would be a win anyway, since it
5889 might limit some tail-calling opportunities. */
5890 enum arm_pcs pcs_variant
;
5894 const_tree fndecl
= NULL_TREE
;
5896 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5899 fntype
= TREE_TYPE (fntype
);
5902 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5905 pcs_variant
= arm_pcs_default
;
5907 if (pcs_variant
!= ARM_PCS_AAPCS
)
5911 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5912 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5921 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5924 /* We aren't passed a decl, so we can't check that a call is local.
5925 However, it isn't clear that that would be a win anyway, since it
5926 might limit some tail-calling opportunities. */
5927 enum arm_pcs pcs_variant
;
5928 int unsignedp ATTRIBUTE_UNUSED
;
5932 const_tree fndecl
= NULL_TREE
;
5934 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5937 fntype
= TREE_TYPE (fntype
);
5940 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5943 pcs_variant
= arm_pcs_default
;
5945 /* Promote integer types. */
5946 if (type
&& INTEGRAL_TYPE_P (type
))
5947 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5949 if (pcs_variant
!= ARM_PCS_AAPCS
)
5953 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5954 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5956 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5960 /* Promotes small structs returned in a register to full-word size
5961 for big-endian AAPCS. */
5962 if (type
&& arm_return_in_msb (type
))
5964 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5965 if (size
% UNITS_PER_WORD
!= 0)
5967 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5968 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5972 return gen_rtx_REG (mode
, R0_REGNUM
);
5976 aapcs_libcall_value (machine_mode mode
)
5978 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5979 && GET_MODE_SIZE (mode
) <= 4)
5982 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5985 /* Lay out a function argument using the AAPCS rules. The rule
5986 numbers referred to here are those in the AAPCS. */
5988 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5989 const_tree type
, bool named
)
5994 /* We only need to do this once per argument. */
5995 if (pcum
->aapcs_arg_processed
)
5998 pcum
->aapcs_arg_processed
= true;
6000 /* Special case: if named is false then we are handling an incoming
6001 anonymous argument which is on the stack. */
6005 /* Is this a potential co-processor register candidate? */
6006 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6008 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6009 pcum
->aapcs_cprc_slot
= slot
;
6011 /* We don't have to apply any of the rules from part B of the
6012 preparation phase, these are handled elsewhere in the
6017 /* A Co-processor register candidate goes either in its own
6018 class of registers or on the stack. */
6019 if (!pcum
->aapcs_cprc_failed
[slot
])
6021 /* C1.cp - Try to allocate the argument to co-processor
6023 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6026 /* C2.cp - Put the argument on the stack and note that we
6027 can't assign any more candidates in this slot. We also
6028 need to note that we have allocated stack space, so that
6029 we won't later try to split a non-cprc candidate between
6030 core registers and the stack. */
6031 pcum
->aapcs_cprc_failed
[slot
] = true;
6032 pcum
->can_split
= false;
6035 /* We didn't get a register, so this argument goes on the
6037 gcc_assert (pcum
->can_split
== false);
6042 /* C3 - For double-word aligned arguments, round the NCRN up to the
6043 next even number. */
6044 ncrn
= pcum
->aapcs_ncrn
;
6045 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6048 nregs
= ARM_NUM_REGS2(mode
, type
);
6050 /* Sigh, this test should really assert that nregs > 0, but a GCC
6051 extension allows empty structs and then gives them empty size; it
6052 then allows such a structure to be passed by value. For some of
6053 the code below we have to pretend that such an argument has
6054 non-zero size so that we 'locate' it correctly either in
6055 registers or on the stack. */
6056 gcc_assert (nregs
>= 0);
6058 nregs2
= nregs
? nregs
: 1;
6060 /* C4 - Argument fits entirely in core registers. */
6061 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6063 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6064 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6068 /* C5 - Some core registers left and there are no arguments already
6069 on the stack: split this argument between the remaining core
6070 registers and the stack. */
6071 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6073 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6074 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6075 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6079 /* C6 - NCRN is set to 4. */
6080 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6082 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6087 for a call to a function whose data type is FNTYPE.
6088 For a library call, FNTYPE is NULL. */
6090 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6092 tree fndecl ATTRIBUTE_UNUSED
)
6094 /* Long call handling. */
6096 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6098 pcum
->pcs_variant
= arm_pcs_default
;
6100 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6102 if (arm_libcall_uses_aapcs_base (libname
))
6103 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6105 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6106 pcum
->aapcs_reg
= NULL_RTX
;
6107 pcum
->aapcs_partial
= 0;
6108 pcum
->aapcs_arg_processed
= false;
6109 pcum
->aapcs_cprc_slot
= -1;
6110 pcum
->can_split
= true;
6112 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6116 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6118 pcum
->aapcs_cprc_failed
[i
] = false;
6119 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6127 /* On the ARM, the offset starts at 0. */
6129 pcum
->iwmmxt_nregs
= 0;
6130 pcum
->can_split
= true;
6132 /* Varargs vectors are treated the same as long long.
6133 named_count avoids having to change the way arm handles 'named' */
6134 pcum
->named_count
= 0;
6137 if (TARGET_REALLY_IWMMXT
&& fntype
)
6141 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6143 fn_arg
= TREE_CHAIN (fn_arg
))
6144 pcum
->named_count
+= 1;
6146 if (! pcum
->named_count
)
6147 pcum
->named_count
= INT_MAX
;
6151 /* Return true if mode/type need doubleword alignment. */
6153 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6156 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6158 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6159 if (!AGGREGATE_TYPE_P (type
))
6160 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6162 /* Array types: Use member alignment of element type. */
6163 if (TREE_CODE (type
) == ARRAY_TYPE
)
6164 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6166 /* Record/aggregate types: Use greatest member alignment of any member. */
6167 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6168 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6175 /* Determine where to put an argument to a function.
6176 Value is zero to push the argument on the stack,
6177 or a hard register in which to store the argument.
6179 MODE is the argument's machine mode.
6180 TYPE is the data type of the argument (as a tree).
6181 This is null for libcalls where that information may
6183 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6184 the preceding args and about the function being called.
6185 NAMED is nonzero if this argument is a named parameter
6186 (otherwise it is an extra parameter matching an ellipsis).
6188 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6189 other arguments are passed on the stack. If (NAMED == 0) (which happens
6190 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6191 defined), say it is passed in the stack (function_prologue will
6192 indeed make it pass in the stack if necessary). */
6195 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6196 const_tree type
, bool named
)
6198 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6201 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6202 a call insn (op3 of a call_value insn). */
6203 if (mode
== VOIDmode
)
6206 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6208 aapcs_layout_arg (pcum
, mode
, type
, named
);
6209 return pcum
->aapcs_reg
;
6212 /* Varargs vectors are treated the same as long long.
6213 named_count avoids having to change the way arm handles 'named' */
6214 if (TARGET_IWMMXT_ABI
6215 && arm_vector_mode_supported_p (mode
)
6216 && pcum
->named_count
> pcum
->nargs
+ 1)
6218 if (pcum
->iwmmxt_nregs
<= 9)
6219 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6222 pcum
->can_split
= false;
6227 /* Put doubleword aligned quantities in even register pairs. */
6229 && ARM_DOUBLEWORD_ALIGN
6230 && arm_needs_doubleword_align (mode
, type
))
6233 /* Only allow splitting an arg between regs and memory if all preceding
6234 args were allocated to regs. For args passed by reference we only count
6235 the reference pointer. */
6236 if (pcum
->can_split
)
6239 nregs
= ARM_NUM_REGS2 (mode
, type
);
6241 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6244 return gen_rtx_REG (mode
, pcum
->nregs
);
6248 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6250 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6251 ? DOUBLEWORD_ALIGNMENT
6256 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6257 tree type
, bool named
)
6259 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6260 int nregs
= pcum
->nregs
;
6262 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6264 aapcs_layout_arg (pcum
, mode
, type
, named
);
6265 return pcum
->aapcs_partial
;
6268 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6271 if (NUM_ARG_REGS
> nregs
6272 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6274 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6279 /* Update the data in PCUM to advance over an argument
6280 of mode MODE and data type TYPE.
6281 (TYPE is null for libcalls where that information may not be available.) */
6284 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6285 const_tree type
, bool named
)
6287 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6289 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6291 aapcs_layout_arg (pcum
, mode
, type
, named
);
6293 if (pcum
->aapcs_cprc_slot
>= 0)
6295 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6297 pcum
->aapcs_cprc_slot
= -1;
6300 /* Generic stuff. */
6301 pcum
->aapcs_arg_processed
= false;
6302 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6303 pcum
->aapcs_reg
= NULL_RTX
;
6304 pcum
->aapcs_partial
= 0;
6309 if (arm_vector_mode_supported_p (mode
)
6310 && pcum
->named_count
> pcum
->nargs
6311 && TARGET_IWMMXT_ABI
)
6312 pcum
->iwmmxt_nregs
+= 1;
6314 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6318 /* Variable sized types are passed by reference. This is a GCC
6319 extension to the ARM ABI. */
6322 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6323 machine_mode mode ATTRIBUTE_UNUSED
,
6324 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6326 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6329 /* Encode the current state of the #pragma [no_]long_calls. */
6332 OFF
, /* No #pragma [no_]long_calls is in effect. */
6333 LONG
, /* #pragma long_calls is in effect. */
6334 SHORT
/* #pragma no_long_calls is in effect. */
6337 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6340 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6342 arm_pragma_long_calls
= LONG
;
6346 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6348 arm_pragma_long_calls
= SHORT
;
6352 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6354 arm_pragma_long_calls
= OFF
;
6357 /* Handle an attribute requiring a FUNCTION_DECL;
6358 arguments as in struct attribute_spec.handler. */
6360 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6361 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6363 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6365 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6367 *no_add_attrs
= true;
6373 /* Handle an "interrupt" or "isr" attribute;
6374 arguments as in struct attribute_spec.handler. */
6376 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6381 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6383 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6385 *no_add_attrs
= true;
6387 /* FIXME: the argument if any is checked for type attributes;
6388 should it be checked for decl ones? */
6392 if (TREE_CODE (*node
) == FUNCTION_TYPE
6393 || TREE_CODE (*node
) == METHOD_TYPE
)
6395 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6397 warning (OPT_Wattributes
, "%qE attribute ignored",
6399 *no_add_attrs
= true;
6402 else if (TREE_CODE (*node
) == POINTER_TYPE
6403 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6404 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6405 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6407 *node
= build_variant_type_copy (*node
);
6408 TREE_TYPE (*node
) = build_type_attribute_variant
6410 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6411 *no_add_attrs
= true;
6415 /* Possibly pass this attribute on from the type to a decl. */
6416 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6417 | (int) ATTR_FLAG_FUNCTION_NEXT
6418 | (int) ATTR_FLAG_ARRAY_NEXT
))
6420 *no_add_attrs
= true;
6421 return tree_cons (name
, args
, NULL_TREE
);
6425 warning (OPT_Wattributes
, "%qE attribute ignored",
6434 /* Handle a "pcs" attribute; arguments as in struct
6435 attribute_spec.handler. */
6437 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6438 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6440 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6442 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6443 *no_add_attrs
= true;
6448 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6449 /* Handle the "notshared" attribute. This attribute is another way of
6450 requesting hidden visibility. ARM's compiler supports
6451 "__declspec(notshared)"; we support the same thing via an
6455 arm_handle_notshared_attribute (tree
*node
,
6456 tree name ATTRIBUTE_UNUSED
,
6457 tree args ATTRIBUTE_UNUSED
,
6458 int flags ATTRIBUTE_UNUSED
,
6461 tree decl
= TYPE_NAME (*node
);
6465 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6466 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6467 *no_add_attrs
= false;
6473 /* Return 0 if the attributes for two types are incompatible, 1 if they
6474 are compatible, and 2 if they are nearly compatible (which causes a
6475 warning to be generated). */
6477 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6481 /* Check for mismatch of non-default calling convention. */
6482 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6485 /* Check for mismatched call attributes. */
6486 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6487 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6488 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6489 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6491 /* Only bother to check if an attribute is defined. */
6492 if (l1
| l2
| s1
| s2
)
6494 /* If one type has an attribute, the other must have the same attribute. */
6495 if ((l1
!= l2
) || (s1
!= s2
))
6498 /* Disallow mixed attributes. */
6499 if ((l1
& s2
) || (l2
& s1
))
6503 /* Check for mismatched ISR attribute. */
6504 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6506 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6507 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6509 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6516 /* Assigns default attributes to newly defined type. This is used to
6517 set short_call/long_call attributes for function types of
6518 functions defined inside corresponding #pragma scopes. */
6520 arm_set_default_type_attributes (tree type
)
6522 /* Add __attribute__ ((long_call)) to all functions, when
6523 inside #pragma long_calls or __attribute__ ((short_call)),
6524 when inside #pragma no_long_calls. */
6525 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6527 tree type_attr_list
, attr_name
;
6528 type_attr_list
= TYPE_ATTRIBUTES (type
);
6530 if (arm_pragma_long_calls
== LONG
)
6531 attr_name
= get_identifier ("long_call");
6532 else if (arm_pragma_long_calls
== SHORT
)
6533 attr_name
= get_identifier ("short_call");
6537 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6538 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6542 /* Return true if DECL is known to be linked into section SECTION. */
6545 arm_function_in_section_p (tree decl
, section
*section
)
6547 /* We can only be certain about the prevailing symbol definition. */
6548 if (!decl_binds_to_current_def_p (decl
))
6551 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6552 if (!DECL_SECTION_NAME (decl
))
6554 /* Make sure that we will not create a unique section for DECL. */
6555 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6559 return function_section (decl
) == section
;
6562 /* Return nonzero if a 32-bit "long_call" should be generated for
6563 a call from the current function to DECL. We generate a long_call
6566 a. has an __attribute__((long call))
6567 or b. is within the scope of a #pragma long_calls
6568 or c. the -mlong-calls command line switch has been specified
6570 However we do not generate a long call if the function:
6572 d. has an __attribute__ ((short_call))
6573 or e. is inside the scope of a #pragma no_long_calls
6574 or f. is defined in the same section as the current function. */
6577 arm_is_long_call_p (tree decl
)
6582 return TARGET_LONG_CALLS
;
6584 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6585 if (lookup_attribute ("short_call", attrs
))
6588 /* For "f", be conservative, and only cater for cases in which the
6589 whole of the current function is placed in the same section. */
6590 if (!flag_reorder_blocks_and_partition
6591 && TREE_CODE (decl
) == FUNCTION_DECL
6592 && arm_function_in_section_p (decl
, current_function_section ()))
6595 if (lookup_attribute ("long_call", attrs
))
6598 return TARGET_LONG_CALLS
;
6601 /* Return nonzero if it is ok to make a tail-call to DECL. */
6603 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6605 unsigned long func_type
;
6607 if (cfun
->machine
->sibcall_blocked
)
6610 /* Never tailcall something if we are generating code for Thumb-1. */
6614 /* The PIC register is live on entry to VxWorks PLT entries, so we
6615 must make the call before restoring the PIC register. */
6616 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6619 /* If we are interworking and the function is not declared static
6620 then we can't tail-call it unless we know that it exists in this
6621 compilation unit (since it might be a Thumb routine). */
6622 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6623 && !TREE_ASM_WRITTEN (decl
))
6626 func_type
= arm_current_func_type ();
6627 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6628 if (IS_INTERRUPT (func_type
))
6631 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6633 /* Check that the return value locations are the same. For
6634 example that we aren't returning a value from the sibling in
6635 a VFP register but then need to transfer it to a core
6639 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6640 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6642 if (!rtx_equal_p (a
, b
))
6646 /* Never tailcall if function may be called with a misaligned SP. */
6647 if (IS_STACKALIGN (func_type
))
6650 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6651 references should become a NOP. Don't convert such calls into
6653 if (TARGET_AAPCS_BASED
6654 && arm_abi
== ARM_ABI_AAPCS
6656 && DECL_WEAK (decl
))
6659 /* Everything else is ok. */
6664 /* Addressing mode support functions. */
6666 /* Return nonzero if X is a legitimate immediate operand when compiling
6667 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6669 legitimate_pic_operand_p (rtx x
)
6671 if (GET_CODE (x
) == SYMBOL_REF
6672 || (GET_CODE (x
) == CONST
6673 && GET_CODE (XEXP (x
, 0)) == PLUS
6674 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6680 /* Record that the current function needs a PIC register. Initialize
6681 cfun->machine->pic_reg if we have not already done so. */
6684 require_pic_register (void)
6686 /* A lot of the logic here is made obscure by the fact that this
6687 routine gets called as part of the rtx cost estimation process.
6688 We don't want those calls to affect any assumptions about the real
6689 function; and further, we can't call entry_of_function() until we
6690 start the real expansion process. */
6691 if (!crtl
->uses_pic_offset_table
)
6693 gcc_assert (can_create_pseudo_p ());
6694 if (arm_pic_register
!= INVALID_REGNUM
6695 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6697 if (!cfun
->machine
->pic_reg
)
6698 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6700 /* Play games to avoid marking the function as needing pic
6701 if we are being called as part of the cost-estimation
6703 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6704 crtl
->uses_pic_offset_table
= 1;
6708 rtx_insn
*seq
, *insn
;
6710 if (!cfun
->machine
->pic_reg
)
6711 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6713 /* Play games to avoid marking the function as needing pic
6714 if we are being called as part of the cost-estimation
6716 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6718 crtl
->uses_pic_offset_table
= 1;
6721 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6722 && arm_pic_register
> LAST_LO_REGNUM
)
6723 emit_move_insn (cfun
->machine
->pic_reg
,
6724 gen_rtx_REG (Pmode
, arm_pic_register
));
6726 arm_load_pic_register (0UL);
6731 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6733 INSN_LOCATION (insn
) = prologue_location
;
6735 /* We can be called during expansion of PHI nodes, where
6736 we can't yet emit instructions directly in the final
6737 insn stream. Queue the insns on the entry edge, they will
6738 be committed after everything else is expanded. */
6739 insert_insn_on_edge (seq
,
6740 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6747 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6749 if (GET_CODE (orig
) == SYMBOL_REF
6750 || GET_CODE (orig
) == LABEL_REF
)
6756 gcc_assert (can_create_pseudo_p ());
6757 reg
= gen_reg_rtx (Pmode
);
6760 /* VxWorks does not impose a fixed gap between segments; the run-time
6761 gap can be different from the object-file gap. We therefore can't
6762 use GOTOFF unless we are absolutely sure that the symbol is in the
6763 same segment as the GOT. Unfortunately, the flexibility of linker
6764 scripts means that we can't be sure of that in general, so assume
6765 that GOTOFF is never valid on VxWorks. */
6766 if ((GET_CODE (orig
) == LABEL_REF
6767 || (GET_CODE (orig
) == SYMBOL_REF
&&
6768 SYMBOL_REF_LOCAL_P (orig
)))
6770 && arm_pic_data_is_text_relative
)
6771 insn
= arm_pic_static_addr (orig
, reg
);
6777 /* If this function doesn't have a pic register, create one now. */
6778 require_pic_register ();
6780 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6782 /* Make the MEM as close to a constant as possible. */
6783 mem
= SET_SRC (pat
);
6784 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6785 MEM_READONLY_P (mem
) = 1;
6786 MEM_NOTRAP_P (mem
) = 1;
6788 insn
= emit_insn (pat
);
6791 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6793 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6797 else if (GET_CODE (orig
) == CONST
)
6801 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6802 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6805 /* Handle the case where we have: const (UNSPEC_TLS). */
6806 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6807 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6810 /* Handle the case where we have:
6811 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6813 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6814 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6815 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6817 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6823 gcc_assert (can_create_pseudo_p ());
6824 reg
= gen_reg_rtx (Pmode
);
6827 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6829 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6830 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6831 base
== reg
? 0 : reg
);
6833 if (CONST_INT_P (offset
))
6835 /* The base register doesn't really matter, we only want to
6836 test the index for the appropriate mode. */
6837 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6839 gcc_assert (can_create_pseudo_p ());
6840 offset
= force_reg (Pmode
, offset
);
6843 if (CONST_INT_P (offset
))
6844 return plus_constant (Pmode
, base
, INTVAL (offset
));
6847 if (GET_MODE_SIZE (mode
) > 4
6848 && (GET_MODE_CLASS (mode
) == MODE_INT
6849 || TARGET_SOFT_FLOAT
))
6851 emit_insn (gen_addsi3 (reg
, base
, offset
));
6855 return gen_rtx_PLUS (Pmode
, base
, offset
);
6862 /* Find a spare register to use during the prolog of a function. */
6865 thumb_find_work_register (unsigned long pushed_regs_mask
)
6869 /* Check the argument registers first as these are call-used. The
6870 register allocation order means that sometimes r3 might be used
6871 but earlier argument registers might not, so check them all. */
6872 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6873 if (!df_regs_ever_live_p (reg
))
6876 /* Before going on to check the call-saved registers we can try a couple
6877 more ways of deducing that r3 is available. The first is when we are
6878 pushing anonymous arguments onto the stack and we have less than 4
6879 registers worth of fixed arguments(*). In this case r3 will be part of
6880 the variable argument list and so we can be sure that it will be
6881 pushed right at the start of the function. Hence it will be available
6882 for the rest of the prologue.
6883 (*): ie crtl->args.pretend_args_size is greater than 0. */
6884 if (cfun
->machine
->uses_anonymous_args
6885 && crtl
->args
.pretend_args_size
> 0)
6886 return LAST_ARG_REGNUM
;
6888 /* The other case is when we have fixed arguments but less than 4 registers
6889 worth. In this case r3 might be used in the body of the function, but
6890 it is not being used to convey an argument into the function. In theory
6891 we could just check crtl->args.size to see how many bytes are
6892 being passed in argument registers, but it seems that it is unreliable.
6893 Sometimes it will have the value 0 when in fact arguments are being
6894 passed. (See testcase execute/20021111-1.c for an example). So we also
6895 check the args_info.nregs field as well. The problem with this field is
6896 that it makes no allowances for arguments that are passed to the
6897 function but which are not used. Hence we could miss an opportunity
6898 when a function has an unused argument in r3. But it is better to be
6899 safe than to be sorry. */
6900 if (! cfun
->machine
->uses_anonymous_args
6901 && crtl
->args
.size
>= 0
6902 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6903 && (TARGET_AAPCS_BASED
6904 ? crtl
->args
.info
.aapcs_ncrn
< 4
6905 : crtl
->args
.info
.nregs
< 4))
6906 return LAST_ARG_REGNUM
;
6908 /* Otherwise look for a call-saved register that is going to be pushed. */
6909 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6910 if (pushed_regs_mask
& (1 << reg
))
6915 /* Thumb-2 can use high regs. */
6916 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6917 if (pushed_regs_mask
& (1 << reg
))
6920 /* Something went wrong - thumb_compute_save_reg_mask()
6921 should have arranged for a suitable register to be pushed. */
6925 static GTY(()) int pic_labelno
;
6927 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6931 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6933 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6935 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6938 gcc_assert (flag_pic
);
6940 pic_reg
= cfun
->machine
->pic_reg
;
6941 if (TARGET_VXWORKS_RTP
)
6943 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6944 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6945 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6947 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6949 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6950 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6954 /* We use an UNSPEC rather than a LABEL_REF because this label
6955 never appears in the code stream. */
6957 labelno
= GEN_INT (pic_labelno
++);
6958 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6959 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6961 /* On the ARM the PC register contains 'dot + 8' at the time of the
6962 addition, on the Thumb it is 'dot + 4'. */
6963 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6964 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6966 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6970 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6972 else /* TARGET_THUMB1 */
6974 if (arm_pic_register
!= INVALID_REGNUM
6975 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6977 /* We will have pushed the pic register, so we should always be
6978 able to find a work register. */
6979 pic_tmp
= gen_rtx_REG (SImode
,
6980 thumb_find_work_register (saved_regs
));
6981 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6982 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6983 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6985 else if (arm_pic_register
!= INVALID_REGNUM
6986 && arm_pic_register
> LAST_LO_REGNUM
6987 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6989 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6990 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6991 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6994 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6998 /* Need to emit this whether or not we obey regdecls,
6999 since setjmp/longjmp can cause life info to screw up. */
7003 /* Generate code to load the address of a static var when flag_pic is set. */
7005 arm_pic_static_addr (rtx orig
, rtx reg
)
7007 rtx l1
, labelno
, offset_rtx
, insn
;
7009 gcc_assert (flag_pic
);
7011 /* We use an UNSPEC rather than a LABEL_REF because this label
7012 never appears in the code stream. */
7013 labelno
= GEN_INT (pic_labelno
++);
7014 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7015 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7017 /* On the ARM the PC register contains 'dot + 8' at the time of the
7018 addition, on the Thumb it is 'dot + 4'. */
7019 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7020 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7021 UNSPEC_SYMBOL_OFFSET
);
7022 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7024 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7028 /* Return nonzero if X is valid as an ARM state addressing register. */
7030 arm_address_register_rtx_p (rtx x
, int strict_p
)
7040 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7042 return (regno
<= LAST_ARM_REGNUM
7043 || regno
>= FIRST_PSEUDO_REGISTER
7044 || regno
== FRAME_POINTER_REGNUM
7045 || regno
== ARG_POINTER_REGNUM
);
7048 /* Return TRUE if this rtx is the difference of a symbol and a label,
7049 and will reduce to a PC-relative relocation in the object file.
7050 Expressions like this can be left alone when generating PIC, rather
7051 than forced through the GOT. */
7053 pcrel_constant_p (rtx x
)
7055 if (GET_CODE (x
) == MINUS
)
7056 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7061 /* Return true if X will surely end up in an index register after next
7064 will_be_in_index_register (const_rtx x
)
7066 /* arm.md: calculate_pic_address will split this into a register. */
7067 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7070 /* Return nonzero if X is a valid ARM state address operand. */
7072 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7076 enum rtx_code code
= GET_CODE (x
);
7078 if (arm_address_register_rtx_p (x
, strict_p
))
7081 use_ldrd
= (TARGET_LDRD
7083 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7085 if (code
== POST_INC
|| code
== PRE_DEC
7086 || ((code
== PRE_INC
|| code
== POST_DEC
)
7087 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7088 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7090 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7091 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7092 && GET_CODE (XEXP (x
, 1)) == PLUS
7093 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7095 rtx addend
= XEXP (XEXP (x
, 1), 1);
7097 /* Don't allow ldrd post increment by register because it's hard
7098 to fixup invalid register choices. */
7100 && GET_CODE (x
) == POST_MODIFY
7104 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7105 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7108 /* After reload constants split into minipools will have addresses
7109 from a LABEL_REF. */
7110 else if (reload_completed
7111 && (code
== LABEL_REF
7113 && GET_CODE (XEXP (x
, 0)) == PLUS
7114 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7115 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7118 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7121 else if (code
== PLUS
)
7123 rtx xop0
= XEXP (x
, 0);
7124 rtx xop1
= XEXP (x
, 1);
7126 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7127 && ((CONST_INT_P (xop1
)
7128 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7129 || (!strict_p
&& will_be_in_index_register (xop1
))))
7130 || (arm_address_register_rtx_p (xop1
, strict_p
)
7131 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7135 /* Reload currently can't handle MINUS, so disable this for now */
7136 else if (GET_CODE (x
) == MINUS
)
7138 rtx xop0
= XEXP (x
, 0);
7139 rtx xop1
= XEXP (x
, 1);
7141 return (arm_address_register_rtx_p (xop0
, strict_p
)
7142 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7146 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7147 && code
== SYMBOL_REF
7148 && CONSTANT_POOL_ADDRESS_P (x
)
7150 && symbol_mentioned_p (get_pool_constant (x
))
7151 && ! pcrel_constant_p (get_pool_constant (x
))))
7157 /* Return nonzero if X is a valid Thumb-2 address operand. */
7159 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7162 enum rtx_code code
= GET_CODE (x
);
7164 if (arm_address_register_rtx_p (x
, strict_p
))
7167 use_ldrd
= (TARGET_LDRD
7169 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7171 if (code
== POST_INC
|| code
== PRE_DEC
7172 || ((code
== PRE_INC
|| code
== POST_DEC
)
7173 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7174 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7176 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7177 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7178 && GET_CODE (XEXP (x
, 1)) == PLUS
7179 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7181 /* Thumb-2 only has autoincrement by constant. */
7182 rtx addend
= XEXP (XEXP (x
, 1), 1);
7183 HOST_WIDE_INT offset
;
7185 if (!CONST_INT_P (addend
))
7188 offset
= INTVAL(addend
);
7189 if (GET_MODE_SIZE (mode
) <= 4)
7190 return (offset
> -256 && offset
< 256);
7192 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7193 && (offset
& 3) == 0);
7196 /* After reload constants split into minipools will have addresses
7197 from a LABEL_REF. */
7198 else if (reload_completed
7199 && (code
== LABEL_REF
7201 && GET_CODE (XEXP (x
, 0)) == PLUS
7202 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7203 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7206 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7209 else if (code
== PLUS
)
7211 rtx xop0
= XEXP (x
, 0);
7212 rtx xop1
= XEXP (x
, 1);
7214 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7215 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7216 || (!strict_p
&& will_be_in_index_register (xop1
))))
7217 || (arm_address_register_rtx_p (xop1
, strict_p
)
7218 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7221 /* Normally we can assign constant values to target registers without
7222 the help of constant pool. But there are cases we have to use constant
7224 1) assign a label to register.
7225 2) sign-extend a 8bit value to 32bit and then assign to register.
7227 Constant pool access in format:
7228 (set (reg r0) (mem (symbol_ref (".LC0"))))
7229 will cause the use of literal pool (later in function arm_reorg).
7230 So here we mark such format as an invalid format, then the compiler
7231 will adjust it into:
7232 (set (reg r0) (symbol_ref (".LC0")))
7233 (set (reg r0) (mem (reg r0))).
7234 No extra register is required, and (mem (reg r0)) won't cause the use
7235 of literal pools. */
7236 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7237 && CONSTANT_POOL_ADDRESS_P (x
))
7240 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7241 && code
== SYMBOL_REF
7242 && CONSTANT_POOL_ADDRESS_P (x
)
7244 && symbol_mentioned_p (get_pool_constant (x
))
7245 && ! pcrel_constant_p (get_pool_constant (x
))))
7251 /* Return nonzero if INDEX is valid for an address index operand in
7254 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7257 HOST_WIDE_INT range
;
7258 enum rtx_code code
= GET_CODE (index
);
7260 /* Standard coprocessor addressing modes. */
7261 if (TARGET_HARD_FLOAT
7263 && (mode
== SFmode
|| mode
== DFmode
))
7264 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7265 && INTVAL (index
) > -1024
7266 && (INTVAL (index
) & 3) == 0);
7268 /* For quad modes, we restrict the constant offset to be slightly less
7269 than what the instruction format permits. We do this because for
7270 quad mode moves, we will actually decompose them into two separate
7271 double-mode reads or writes. INDEX must therefore be a valid
7272 (double-mode) offset and so should INDEX+8. */
7273 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7274 return (code
== CONST_INT
7275 && INTVAL (index
) < 1016
7276 && INTVAL (index
) > -1024
7277 && (INTVAL (index
) & 3) == 0);
7279 /* We have no such constraint on double mode offsets, so we permit the
7280 full range of the instruction format. */
7281 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7282 return (code
== CONST_INT
7283 && INTVAL (index
) < 1024
7284 && INTVAL (index
) > -1024
7285 && (INTVAL (index
) & 3) == 0);
7287 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7288 return (code
== CONST_INT
7289 && INTVAL (index
) < 1024
7290 && INTVAL (index
) > -1024
7291 && (INTVAL (index
) & 3) == 0);
7293 if (arm_address_register_rtx_p (index
, strict_p
)
7294 && (GET_MODE_SIZE (mode
) <= 4))
7297 if (mode
== DImode
|| mode
== DFmode
)
7299 if (code
== CONST_INT
)
7301 HOST_WIDE_INT val
= INTVAL (index
);
7304 return val
> -256 && val
< 256;
7306 return val
> -4096 && val
< 4092;
7309 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7312 if (GET_MODE_SIZE (mode
) <= 4
7316 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7320 rtx xiop0
= XEXP (index
, 0);
7321 rtx xiop1
= XEXP (index
, 1);
7323 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7324 && power_of_two_operand (xiop1
, SImode
))
7325 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7326 && power_of_two_operand (xiop0
, SImode
)));
7328 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7329 || code
== ASHIFT
|| code
== ROTATERT
)
7331 rtx op
= XEXP (index
, 1);
7333 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7336 && INTVAL (op
) <= 31);
7340 /* For ARM v4 we may be doing a sign-extend operation during the
7346 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7352 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7354 return (code
== CONST_INT
7355 && INTVAL (index
) < range
7356 && INTVAL (index
) > -range
);
7359 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7360 index operand. i.e. 1, 2, 4 or 8. */
7362 thumb2_index_mul_operand (rtx op
)
7366 if (!CONST_INT_P (op
))
7370 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7373 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7375 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7377 enum rtx_code code
= GET_CODE (index
);
7379 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7380 /* Standard coprocessor addressing modes. */
7381 if (TARGET_HARD_FLOAT
7383 && (mode
== SFmode
|| mode
== DFmode
))
7384 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7385 /* Thumb-2 allows only > -256 index range for it's core register
7386 load/stores. Since we allow SF/DF in core registers, we have
7387 to use the intersection between -256~4096 (core) and -1024~1024
7389 && INTVAL (index
) > -256
7390 && (INTVAL (index
) & 3) == 0);
7392 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7394 /* For DImode assume values will usually live in core regs
7395 and only allow LDRD addressing modes. */
7396 if (!TARGET_LDRD
|| mode
!= DImode
)
7397 return (code
== CONST_INT
7398 && INTVAL (index
) < 1024
7399 && INTVAL (index
) > -1024
7400 && (INTVAL (index
) & 3) == 0);
7403 /* For quad modes, we restrict the constant offset to be slightly less
7404 than what the instruction format permits. We do this because for
7405 quad mode moves, we will actually decompose them into two separate
7406 double-mode reads or writes. INDEX must therefore be a valid
7407 (double-mode) offset and so should INDEX+8. */
7408 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7409 return (code
== CONST_INT
7410 && INTVAL (index
) < 1016
7411 && INTVAL (index
) > -1024
7412 && (INTVAL (index
) & 3) == 0);
7414 /* We have no such constraint on double mode offsets, so we permit the
7415 full range of the instruction format. */
7416 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7417 return (code
== CONST_INT
7418 && INTVAL (index
) < 1024
7419 && INTVAL (index
) > -1024
7420 && (INTVAL (index
) & 3) == 0);
7422 if (arm_address_register_rtx_p (index
, strict_p
)
7423 && (GET_MODE_SIZE (mode
) <= 4))
7426 if (mode
== DImode
|| mode
== DFmode
)
7428 if (code
== CONST_INT
)
7430 HOST_WIDE_INT val
= INTVAL (index
);
7431 /* ??? Can we assume ldrd for thumb2? */
7432 /* Thumb-2 ldrd only has reg+const addressing modes. */
7433 /* ldrd supports offsets of +-1020.
7434 However the ldr fallback does not. */
7435 return val
> -256 && val
< 256 && (val
& 3) == 0;
7443 rtx xiop0
= XEXP (index
, 0);
7444 rtx xiop1
= XEXP (index
, 1);
7446 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7447 && thumb2_index_mul_operand (xiop1
))
7448 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7449 && thumb2_index_mul_operand (xiop0
)));
7451 else if (code
== ASHIFT
)
7453 rtx op
= XEXP (index
, 1);
7455 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7458 && INTVAL (op
) <= 3);
7461 return (code
== CONST_INT
7462 && INTVAL (index
) < 4096
7463 && INTVAL (index
) > -256);
7466 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7468 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7478 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7480 return (regno
<= LAST_LO_REGNUM
7481 || regno
> LAST_VIRTUAL_REGISTER
7482 || regno
== FRAME_POINTER_REGNUM
7483 || (GET_MODE_SIZE (mode
) >= 4
7484 && (regno
== STACK_POINTER_REGNUM
7485 || regno
>= FIRST_PSEUDO_REGISTER
7486 || x
== hard_frame_pointer_rtx
7487 || x
== arg_pointer_rtx
)));
7490 /* Return nonzero if x is a legitimate index register. This is the case
7491 for any base register that can access a QImode object. */
7493 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7495 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7498 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7500 The AP may be eliminated to either the SP or the FP, so we use the
7501 least common denominator, e.g. SImode, and offsets from 0 to 64.
7503 ??? Verify whether the above is the right approach.
7505 ??? Also, the FP may be eliminated to the SP, so perhaps that
7506 needs special handling also.
7508 ??? Look at how the mips16 port solves this problem. It probably uses
7509 better ways to solve some of these problems.
7511 Although it is not incorrect, we don't accept QImode and HImode
7512 addresses based on the frame pointer or arg pointer until the
7513 reload pass starts. This is so that eliminating such addresses
7514 into stack based ones won't produce impossible code. */
7516 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7518 /* ??? Not clear if this is right. Experiment. */
7519 if (GET_MODE_SIZE (mode
) < 4
7520 && !(reload_in_progress
|| reload_completed
)
7521 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7522 || reg_mentioned_p (arg_pointer_rtx
, x
)
7523 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7524 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7525 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7526 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7529 /* Accept any base register. SP only in SImode or larger. */
7530 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7533 /* This is PC relative data before arm_reorg runs. */
7534 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7535 && GET_CODE (x
) == SYMBOL_REF
7536 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7539 /* This is PC relative data after arm_reorg runs. */
7540 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7542 && (GET_CODE (x
) == LABEL_REF
7543 || (GET_CODE (x
) == CONST
7544 && GET_CODE (XEXP (x
, 0)) == PLUS
7545 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7546 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7549 /* Post-inc indexing only supported for SImode and larger. */
7550 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7551 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7554 else if (GET_CODE (x
) == PLUS
)
7556 /* REG+REG address can be any two index registers. */
7557 /* We disallow FRAME+REG addressing since we know that FRAME
7558 will be replaced with STACK, and SP relative addressing only
7559 permits SP+OFFSET. */
7560 if (GET_MODE_SIZE (mode
) <= 4
7561 && XEXP (x
, 0) != frame_pointer_rtx
7562 && XEXP (x
, 1) != frame_pointer_rtx
7563 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7564 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7565 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7568 /* REG+const has 5-7 bit offset for non-SP registers. */
7569 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7570 || XEXP (x
, 0) == arg_pointer_rtx
)
7571 && CONST_INT_P (XEXP (x
, 1))
7572 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7575 /* REG+const has 10-bit offset for SP, but only SImode and
7576 larger is supported. */
7577 /* ??? Should probably check for DI/DFmode overflow here
7578 just like GO_IF_LEGITIMATE_OFFSET does. */
7579 else if (REG_P (XEXP (x
, 0))
7580 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7581 && GET_MODE_SIZE (mode
) >= 4
7582 && CONST_INT_P (XEXP (x
, 1))
7583 && INTVAL (XEXP (x
, 1)) >= 0
7584 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7585 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7588 else if (REG_P (XEXP (x
, 0))
7589 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7590 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7591 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7592 && REGNO (XEXP (x
, 0))
7593 <= LAST_VIRTUAL_POINTER_REGISTER
))
7594 && GET_MODE_SIZE (mode
) >= 4
7595 && CONST_INT_P (XEXP (x
, 1))
7596 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7600 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7601 && GET_MODE_SIZE (mode
) == 4
7602 && GET_CODE (x
) == SYMBOL_REF
7603 && CONSTANT_POOL_ADDRESS_P (x
)
7605 && symbol_mentioned_p (get_pool_constant (x
))
7606 && ! pcrel_constant_p (get_pool_constant (x
))))
7612 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7613 instruction of mode MODE. */
7615 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7617 switch (GET_MODE_SIZE (mode
))
7620 return val
>= 0 && val
< 32;
7623 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7627 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7633 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7636 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7637 else if (TARGET_THUMB2
)
7638 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7639 else /* if (TARGET_THUMB1) */
7640 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7643 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7645 Given an rtx X being reloaded into a reg required to be
7646 in class CLASS, return the class of reg to actually use.
7647 In general this is just CLASS, but for the Thumb core registers and
7648 immediate constants we prefer a LO_REGS class or a subset. */
7651 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7657 if (rclass
== GENERAL_REGS
)
7664 /* Build the SYMBOL_REF for __tls_get_addr. */
7666 static GTY(()) rtx tls_get_addr_libfunc
;
7669 get_tls_get_addr (void)
7671 if (!tls_get_addr_libfunc
)
7672 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7673 return tls_get_addr_libfunc
;
7677 arm_load_tp (rtx target
)
7680 target
= gen_reg_rtx (SImode
);
7684 /* Can return in any reg. */
7685 emit_insn (gen_load_tp_hard (target
));
7689 /* Always returned in r0. Immediately copy the result into a pseudo,
7690 otherwise other uses of r0 (e.g. setting up function arguments) may
7691 clobber the value. */
7695 emit_insn (gen_load_tp_soft ());
7697 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7698 emit_move_insn (target
, tmp
);
7704 load_tls_operand (rtx x
, rtx reg
)
7708 if (reg
== NULL_RTX
)
7709 reg
= gen_reg_rtx (SImode
);
7711 tmp
= gen_rtx_CONST (SImode
, x
);
7713 emit_move_insn (reg
, tmp
);
7719 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7721 rtx insns
, label
, labelno
, sum
;
7723 gcc_assert (reloc
!= TLS_DESCSEQ
);
7726 labelno
= GEN_INT (pic_labelno
++);
7727 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7728 label
= gen_rtx_CONST (VOIDmode
, label
);
7730 sum
= gen_rtx_UNSPEC (Pmode
,
7731 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7732 GEN_INT (TARGET_ARM
? 8 : 4)),
7734 reg
= load_tls_operand (sum
, reg
);
7737 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7739 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7741 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7742 LCT_PURE
, /* LCT_CONST? */
7743 Pmode
, 1, reg
, Pmode
);
7745 insns
= get_insns ();
7752 arm_tls_descseq_addr (rtx x
, rtx reg
)
7754 rtx labelno
= GEN_INT (pic_labelno
++);
7755 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7756 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7757 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7758 gen_rtx_CONST (VOIDmode
, label
),
7759 GEN_INT (!TARGET_ARM
)),
7761 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7763 emit_insn (gen_tlscall (x
, labelno
));
7765 reg
= gen_reg_rtx (SImode
);
7767 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7769 emit_move_insn (reg
, reg0
);
7775 legitimize_tls_address (rtx x
, rtx reg
)
7777 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7778 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7782 case TLS_MODEL_GLOBAL_DYNAMIC
:
7783 if (TARGET_GNU2_TLS
)
7785 reg
= arm_tls_descseq_addr (x
, reg
);
7787 tp
= arm_load_tp (NULL_RTX
);
7789 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7793 /* Original scheme */
7794 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7795 dest
= gen_reg_rtx (Pmode
);
7796 emit_libcall_block (insns
, dest
, ret
, x
);
7800 case TLS_MODEL_LOCAL_DYNAMIC
:
7801 if (TARGET_GNU2_TLS
)
7803 reg
= arm_tls_descseq_addr (x
, reg
);
7805 tp
= arm_load_tp (NULL_RTX
);
7807 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7811 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7813 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7814 share the LDM result with other LD model accesses. */
7815 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7817 dest
= gen_reg_rtx (Pmode
);
7818 emit_libcall_block (insns
, dest
, ret
, eqv
);
7820 /* Load the addend. */
7821 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7822 GEN_INT (TLS_LDO32
)),
7824 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7825 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7829 case TLS_MODEL_INITIAL_EXEC
:
7830 labelno
= GEN_INT (pic_labelno
++);
7831 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7832 label
= gen_rtx_CONST (VOIDmode
, label
);
7833 sum
= gen_rtx_UNSPEC (Pmode
,
7834 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7835 GEN_INT (TARGET_ARM
? 8 : 4)),
7837 reg
= load_tls_operand (sum
, reg
);
7840 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7841 else if (TARGET_THUMB2
)
7842 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7845 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7846 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7849 tp
= arm_load_tp (NULL_RTX
);
7851 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7853 case TLS_MODEL_LOCAL_EXEC
:
7854 tp
= arm_load_tp (NULL_RTX
);
7856 reg
= gen_rtx_UNSPEC (Pmode
,
7857 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7859 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7861 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7868 /* Try machine-dependent ways of modifying an illegitimate address
7869 to be legitimate. If we find one, return the new, valid address. */
7871 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7873 if (arm_tls_referenced_p (x
))
7877 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7879 addend
= XEXP (XEXP (x
, 0), 1);
7880 x
= XEXP (XEXP (x
, 0), 0);
7883 if (GET_CODE (x
) != SYMBOL_REF
)
7886 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7888 x
= legitimize_tls_address (x
, NULL_RTX
);
7892 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7901 /* TODO: legitimize_address for Thumb2. */
7904 return thumb_legitimize_address (x
, orig_x
, mode
);
7907 if (GET_CODE (x
) == PLUS
)
7909 rtx xop0
= XEXP (x
, 0);
7910 rtx xop1
= XEXP (x
, 1);
7912 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7913 xop0
= force_reg (SImode
, xop0
);
7915 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7916 && !symbol_mentioned_p (xop1
))
7917 xop1
= force_reg (SImode
, xop1
);
7919 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7920 && CONST_INT_P (xop1
))
7922 HOST_WIDE_INT n
, low_n
;
7926 /* VFP addressing modes actually allow greater offsets, but for
7927 now we just stick with the lowest common denominator. */
7929 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7941 low_n
= ((mode
) == TImode
? 0
7942 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7946 base_reg
= gen_reg_rtx (SImode
);
7947 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7948 emit_move_insn (base_reg
, val
);
7949 x
= plus_constant (Pmode
, base_reg
, low_n
);
7951 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7952 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7955 /* XXX We don't allow MINUS any more -- see comment in
7956 arm_legitimate_address_outer_p (). */
7957 else if (GET_CODE (x
) == MINUS
)
7959 rtx xop0
= XEXP (x
, 0);
7960 rtx xop1
= XEXP (x
, 1);
7962 if (CONSTANT_P (xop0
))
7963 xop0
= force_reg (SImode
, xop0
);
7965 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7966 xop1
= force_reg (SImode
, xop1
);
7968 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7969 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7972 /* Make sure to take full advantage of the pre-indexed addressing mode
7973 with absolute addresses which often allows for the base register to
7974 be factorized for multiple adjacent memory references, and it might
7975 even allows for the mini pool to be avoided entirely. */
7976 else if (CONST_INT_P (x
) && optimize
> 0)
7979 HOST_WIDE_INT mask
, base
, index
;
7982 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7983 use a 8-bit index. So let's use a 12-bit index for SImode only and
7984 hope that arm_gen_constant will enable ldrb to use more bits. */
7985 bits
= (mode
== SImode
) ? 12 : 8;
7986 mask
= (1 << bits
) - 1;
7987 base
= INTVAL (x
) & ~mask
;
7988 index
= INTVAL (x
) & mask
;
7989 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7991 /* It'll most probably be more efficient to generate the base
7992 with more bits set and use a negative index instead. */
7996 base_reg
= force_reg (SImode
, GEN_INT (base
));
7997 x
= plus_constant (Pmode
, base_reg
, index
);
8002 /* We need to find and carefully transform any SYMBOL and LABEL
8003 references; so go back to the original address expression. */
8004 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8006 if (new_x
!= orig_x
)
8014 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8015 to be legitimate. If we find one, return the new, valid address. */
8017 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8019 if (GET_CODE (x
) == PLUS
8020 && CONST_INT_P (XEXP (x
, 1))
8021 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8022 || INTVAL (XEXP (x
, 1)) < 0))
8024 rtx xop0
= XEXP (x
, 0);
8025 rtx xop1
= XEXP (x
, 1);
8026 HOST_WIDE_INT offset
= INTVAL (xop1
);
8028 /* Try and fold the offset into a biasing of the base register and
8029 then offsetting that. Don't do this when optimizing for space
8030 since it can cause too many CSEs. */
8031 if (optimize_size
&& offset
>= 0
8032 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8034 HOST_WIDE_INT delta
;
8037 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8038 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8039 delta
= 31 * GET_MODE_SIZE (mode
);
8041 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8043 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8045 x
= plus_constant (Pmode
, xop0
, delta
);
8047 else if (offset
< 0 && offset
> -256)
8048 /* Small negative offsets are best done with a subtract before the
8049 dereference, forcing these into a register normally takes two
8051 x
= force_operand (x
, NULL_RTX
);
8054 /* For the remaining cases, force the constant into a register. */
8055 xop1
= force_reg (SImode
, xop1
);
8056 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8059 else if (GET_CODE (x
) == PLUS
8060 && s_register_operand (XEXP (x
, 1), SImode
)
8061 && !s_register_operand (XEXP (x
, 0), SImode
))
8063 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8065 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8070 /* We need to find and carefully transform any SYMBOL and LABEL
8071 references; so go back to the original address expression. */
8072 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8074 if (new_x
!= orig_x
)
8081 /* Return TRUE if X contains any TLS symbol references. */
8084 arm_tls_referenced_p (rtx x
)
8086 if (! TARGET_HAVE_TLS
)
8089 subrtx_iterator::array_type array
;
8090 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8092 const_rtx x
= *iter
;
8093 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8096 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8097 TLS offsets, not real symbol references. */
8098 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8099 iter
.skip_subrtxes ();
8104 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8106 On the ARM, allow any integer (invalid ones are removed later by insn
8107 patterns), nice doubles and symbol_refs which refer to the function's
8110 When generating pic allow anything. */
8113 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8115 return flag_pic
|| !label_mentioned_p (x
);
8119 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8121 return (CONST_INT_P (x
)
8122 || CONST_DOUBLE_P (x
)
8123 || CONSTANT_ADDRESS_P (x
)
8128 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8130 return (!arm_cannot_force_const_mem (mode
, x
)
8132 ? arm_legitimate_constant_p_1 (mode
, x
)
8133 : thumb_legitimate_constant_p (mode
, x
)));
8136 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8139 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8143 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8145 split_const (x
, &base
, &offset
);
8146 if (GET_CODE (base
) == SYMBOL_REF
8147 && !offset_within_block_p (base
, INTVAL (offset
)))
8150 return arm_tls_referenced_p (x
);
8153 #define REG_OR_SUBREG_REG(X) \
8155 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8157 #define REG_OR_SUBREG_RTX(X) \
8158 (REG_P (X) ? (X) : SUBREG_REG (X))
8161 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8163 machine_mode mode
= GET_MODE (x
);
8172 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8179 return COSTS_N_INSNS (1);
8182 if (CONST_INT_P (XEXP (x
, 1)))
8185 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8192 return COSTS_N_INSNS (2) + cycles
;
8194 return COSTS_N_INSNS (1) + 16;
8197 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8199 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8200 return (COSTS_N_INSNS (words
)
8201 + 4 * ((MEM_P (SET_SRC (x
)))
8202 + MEM_P (SET_DEST (x
))));
8207 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8209 if (thumb_shiftable_const (INTVAL (x
)))
8210 return COSTS_N_INSNS (2);
8211 return COSTS_N_INSNS (3);
8213 else if ((outer
== PLUS
|| outer
== COMPARE
)
8214 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8216 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8217 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8218 return COSTS_N_INSNS (1);
8219 else if (outer
== AND
)
8222 /* This duplicates the tests in the andsi3 expander. */
8223 for (i
= 9; i
<= 31; i
++)
8224 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8225 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8226 return COSTS_N_INSNS (2);
8228 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8229 || outer
== LSHIFTRT
)
8231 return COSTS_N_INSNS (2);
8237 return COSTS_N_INSNS (3);
8255 /* XXX another guess. */
8256 /* Memory costs quite a lot for the first word, but subsequent words
8257 load at the equivalent of a single insn each. */
8258 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8259 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8264 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8270 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8271 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8277 return total
+ COSTS_N_INSNS (1);
8279 /* Assume a two-shift sequence. Increase the cost slightly so
8280 we prefer actual shifts over an extend operation. */
8281 return total
+ 1 + COSTS_N_INSNS (2);
8289 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8291 machine_mode mode
= GET_MODE (x
);
8292 enum rtx_code subcode
;
8294 enum rtx_code code
= GET_CODE (x
);
8300 /* Memory costs quite a lot for the first word, but subsequent words
8301 load at the equivalent of a single insn each. */
8302 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8309 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8310 *total
= COSTS_N_INSNS (2);
8311 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8312 *total
= COSTS_N_INSNS (4);
8314 *total
= COSTS_N_INSNS (20);
8318 if (REG_P (XEXP (x
, 1)))
8319 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8320 else if (!CONST_INT_P (XEXP (x
, 1)))
8321 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8327 *total
+= COSTS_N_INSNS (4);
8332 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8333 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8336 *total
+= COSTS_N_INSNS (3);
8340 *total
+= COSTS_N_INSNS (1);
8341 /* Increase the cost of complex shifts because they aren't any faster,
8342 and reduce dual issue opportunities. */
8343 if (arm_tune_cortex_a9
8344 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8352 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8353 if (CONST_INT_P (XEXP (x
, 0))
8354 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8356 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8360 if (CONST_INT_P (XEXP (x
, 1))
8361 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8363 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8370 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8372 if (TARGET_HARD_FLOAT
8374 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8376 *total
= COSTS_N_INSNS (1);
8377 if (CONST_DOUBLE_P (XEXP (x
, 0))
8378 && arm_const_double_rtx (XEXP (x
, 0)))
8380 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8384 if (CONST_DOUBLE_P (XEXP (x
, 1))
8385 && arm_const_double_rtx (XEXP (x
, 1)))
8387 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8393 *total
= COSTS_N_INSNS (20);
8397 *total
= COSTS_N_INSNS (1);
8398 if (CONST_INT_P (XEXP (x
, 0))
8399 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8401 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8405 subcode
= GET_CODE (XEXP (x
, 1));
8406 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8407 || subcode
== LSHIFTRT
8408 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8410 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8415 /* A shift as a part of RSB costs no more than RSB itself. */
8416 if (GET_CODE (XEXP (x
, 0)) == MULT
8417 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8419 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8420 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8425 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8427 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8428 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8432 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8433 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8435 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8436 if (REG_P (XEXP (XEXP (x
, 1), 0))
8437 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8438 *total
+= COSTS_N_INSNS (1);
8446 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8447 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8448 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8450 *total
= COSTS_N_INSNS (1);
8451 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8453 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8457 /* MLA: All arguments must be registers. We filter out
8458 multiplication by a power of two, so that we fall down into
8460 if (GET_CODE (XEXP (x
, 0)) == MULT
8461 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8463 /* The cost comes from the cost of the multiply. */
8467 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8469 if (TARGET_HARD_FLOAT
8471 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8473 *total
= COSTS_N_INSNS (1);
8474 if (CONST_DOUBLE_P (XEXP (x
, 1))
8475 && arm_const_double_rtx (XEXP (x
, 1)))
8477 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8484 *total
= COSTS_N_INSNS (20);
8488 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8489 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8491 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8492 if (REG_P (XEXP (XEXP (x
, 0), 0))
8493 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8494 *total
+= COSTS_N_INSNS (1);
8500 case AND
: case XOR
: case IOR
:
8502 /* Normally the frame registers will be spilt into reg+const during
8503 reload, so it is a bad idea to combine them with other instructions,
8504 since then they might not be moved outside of loops. As a compromise
8505 we allow integration with ops that have a constant as their second
8507 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8508 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8509 && !CONST_INT_P (XEXP (x
, 1)))
8510 *total
= COSTS_N_INSNS (1);
8514 *total
+= COSTS_N_INSNS (2);
8515 if (CONST_INT_P (XEXP (x
, 1))
8516 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8518 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8525 *total
+= COSTS_N_INSNS (1);
8526 if (CONST_INT_P (XEXP (x
, 1))
8527 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8529 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8532 subcode
= GET_CODE (XEXP (x
, 0));
8533 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8534 || subcode
== LSHIFTRT
8535 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8537 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8538 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8543 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8545 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8546 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8550 if (subcode
== UMIN
|| subcode
== UMAX
8551 || subcode
== SMIN
|| subcode
== SMAX
)
8553 *total
= COSTS_N_INSNS (3);
8560 /* This should have been handled by the CPU specific routines. */
8564 if (arm_arch3m
&& mode
== SImode
8565 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8566 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8567 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8568 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8569 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8570 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8572 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8575 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8579 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8581 if (TARGET_HARD_FLOAT
8583 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8585 *total
= COSTS_N_INSNS (1);
8588 *total
= COSTS_N_INSNS (2);
8594 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8595 if (mode
== SImode
&& code
== NOT
)
8597 subcode
= GET_CODE (XEXP (x
, 0));
8598 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8599 || subcode
== LSHIFTRT
8600 || subcode
== ROTATE
|| subcode
== ROTATERT
8602 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8604 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8605 /* Register shifts cost an extra cycle. */
8606 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8607 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8616 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8618 *total
= COSTS_N_INSNS (4);
8622 operand
= XEXP (x
, 0);
8624 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8625 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8626 && REG_P (XEXP (operand
, 0))
8627 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8628 *total
+= COSTS_N_INSNS (1);
8629 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8630 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8634 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8636 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8642 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8643 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8645 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8651 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8652 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8654 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8674 /* SCC insns. In the case where the comparison has already been
8675 performed, then they cost 2 instructions. Otherwise they need
8676 an additional comparison before them. */
8677 *total
= COSTS_N_INSNS (2);
8678 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8685 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8691 *total
+= COSTS_N_INSNS (1);
8692 if (CONST_INT_P (XEXP (x
, 1))
8693 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8695 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8699 subcode
= GET_CODE (XEXP (x
, 0));
8700 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8701 || subcode
== LSHIFTRT
8702 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8704 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8705 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8710 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8712 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8713 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8723 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8724 if (!CONST_INT_P (XEXP (x
, 1))
8725 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8726 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8730 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8732 if (TARGET_HARD_FLOAT
8734 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8736 *total
= COSTS_N_INSNS (1);
8739 *total
= COSTS_N_INSNS (20);
8742 *total
= COSTS_N_INSNS (1);
8744 *total
+= COSTS_N_INSNS (3);
8750 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8752 rtx op
= XEXP (x
, 0);
8753 machine_mode opmode
= GET_MODE (op
);
8756 *total
+= COSTS_N_INSNS (1);
8758 if (opmode
!= SImode
)
8762 /* If !arm_arch4, we use one of the extendhisi2_mem
8763 or movhi_bytes patterns for HImode. For a QImode
8764 sign extension, we first zero-extend from memory
8765 and then perform a shift sequence. */
8766 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8767 *total
+= COSTS_N_INSNS (2);
8770 *total
+= COSTS_N_INSNS (1);
8772 /* We don't have the necessary insn, so we need to perform some
8774 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8775 /* An and with constant 255. */
8776 *total
+= COSTS_N_INSNS (1);
8778 /* A shift sequence. Increase costs slightly to avoid
8779 combining two shifts into an extend operation. */
8780 *total
+= COSTS_N_INSNS (2) + 1;
8786 switch (GET_MODE (XEXP (x
, 0)))
8793 *total
= COSTS_N_INSNS (1);
8803 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8807 if (const_ok_for_arm (INTVAL (x
))
8808 || const_ok_for_arm (~INTVAL (x
)))
8809 *total
= COSTS_N_INSNS (1);
8811 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8812 INTVAL (x
), NULL_RTX
,
8819 *total
= COSTS_N_INSNS (3);
8823 *total
= COSTS_N_INSNS (1);
8827 *total
= COSTS_N_INSNS (1);
8828 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8832 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8833 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8834 *total
= COSTS_N_INSNS (1);
8836 *total
= COSTS_N_INSNS (4);
8840 /* The vec_extract patterns accept memory operands that require an
8841 address reload. Account for the cost of that reload to give the
8842 auto-inc-dec pass an incentive to try to replace them. */
8843 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8844 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8846 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8847 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8848 *total
+= COSTS_N_INSNS (1);
8851 /* Likewise for the vec_set patterns. */
8852 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8853 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8854 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8856 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8857 *total
= rtx_cost (mem
, code
, 0, speed
);
8858 if (!neon_vector_mem_operand (mem
, 2, true))
8859 *total
+= COSTS_N_INSNS (1);
8865 /* We cost this as high as our memory costs to allow this to
8866 be hoisted from loops. */
8867 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8869 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8875 && TARGET_HARD_FLOAT
8877 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8878 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8879 *total
= COSTS_N_INSNS (1);
8881 *total
= COSTS_N_INSNS (4);
8885 *total
= COSTS_N_INSNS (4);
8890 /* Estimates the size cost of thumb1 instructions.
8891 For now most of the code is copied from thumb1_rtx_costs. We need more
8892 fine grain tuning when we have more related test cases. */
8894 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8896 machine_mode mode
= GET_MODE (x
);
8905 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8909 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8910 defined by RTL expansion, especially for the expansion of
8912 if ((GET_CODE (XEXP (x
, 0)) == MULT
8913 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8914 || (GET_CODE (XEXP (x
, 1)) == MULT
8915 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8916 return COSTS_N_INSNS (2);
8917 /* On purpose fall through for normal RTX. */
8921 return COSTS_N_INSNS (1);
8924 if (CONST_INT_P (XEXP (x
, 1)))
8926 /* Thumb1 mul instruction can't operate on const. We must Load it
8927 into a register first. */
8928 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8929 /* For the targets which have a very small and high-latency multiply
8930 unit, we prefer to synthesize the mult with up to 5 instructions,
8931 giving a good balance between size and performance. */
8932 if (arm_arch6m
&& arm_m_profile_small_mul
)
8933 return COSTS_N_INSNS (5);
8935 return COSTS_N_INSNS (1) + const_size
;
8937 return COSTS_N_INSNS (1);
8940 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8942 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8943 return COSTS_N_INSNS (words
)
8944 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8945 || satisfies_constraint_K (SET_SRC (x
))
8946 /* thumb1_movdi_insn. */
8947 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8952 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8953 return COSTS_N_INSNS (1);
8954 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8955 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8956 return COSTS_N_INSNS (2);
8957 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8958 if (thumb_shiftable_const (INTVAL (x
)))
8959 return COSTS_N_INSNS (2);
8960 return COSTS_N_INSNS (3);
8962 else if ((outer
== PLUS
|| outer
== COMPARE
)
8963 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8965 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8966 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8967 return COSTS_N_INSNS (1);
8968 else if (outer
== AND
)
8971 /* This duplicates the tests in the andsi3 expander. */
8972 for (i
= 9; i
<= 31; i
++)
8973 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8974 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8975 return COSTS_N_INSNS (2);
8977 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8978 || outer
== LSHIFTRT
)
8980 return COSTS_N_INSNS (2);
8986 return COSTS_N_INSNS (3);
9000 return COSTS_N_INSNS (1);
9003 return (COSTS_N_INSNS (1)
9005 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9006 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9007 ? COSTS_N_INSNS (1) : 0));
9011 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9016 /* XXX still guessing. */
9017 switch (GET_MODE (XEXP (x
, 0)))
9020 return (1 + (mode
== DImode
? 4 : 0)
9021 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9024 return (4 + (mode
== DImode
? 4 : 0)
9025 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9028 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9039 /* RTX costs when optimizing for size. */
9041 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9044 machine_mode mode
= GET_MODE (x
);
9047 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9051 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9055 /* A memory access costs 1 insn if the mode is small, or the address is
9056 a single register, otherwise it costs one insn per word. */
9057 if (REG_P (XEXP (x
, 0)))
9058 *total
= COSTS_N_INSNS (1);
9060 && GET_CODE (XEXP (x
, 0)) == PLUS
9061 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9062 /* This will be split into two instructions.
9063 See arm.md:calculate_pic_address. */
9064 *total
= COSTS_N_INSNS (2);
9066 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9073 /* Needs a libcall, so it costs about this. */
9074 *total
= COSTS_N_INSNS (2);
9078 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9080 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9088 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9090 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9093 else if (mode
== SImode
)
9095 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9096 /* Slightly disparage register shifts, but not by much. */
9097 if (!CONST_INT_P (XEXP (x
, 1)))
9098 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9102 /* Needs a libcall. */
9103 *total
= COSTS_N_INSNS (2);
9107 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9108 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9110 *total
= COSTS_N_INSNS (1);
9116 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9117 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9119 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9120 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9121 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9122 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9123 || subcode1
== ASHIFTRT
)
9125 /* It's just the cost of the two operands. */
9130 *total
= COSTS_N_INSNS (1);
9134 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9138 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9139 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9141 *total
= COSTS_N_INSNS (1);
9145 /* A shift as a part of ADD costs nothing. */
9146 if (GET_CODE (XEXP (x
, 0)) == MULT
9147 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9149 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9150 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9151 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9156 case AND
: case XOR
: case IOR
:
9159 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9161 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9162 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9163 || (code
== AND
&& subcode
== NOT
))
9165 /* It's just the cost of the two operands. */
9171 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9175 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9179 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9180 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9182 *total
= COSTS_N_INSNS (1);
9188 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9197 if (cc_register (XEXP (x
, 0), VOIDmode
))
9200 *total
= COSTS_N_INSNS (1);
9204 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9205 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9206 *total
= COSTS_N_INSNS (1);
9208 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9213 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9216 if (const_ok_for_arm (INTVAL (x
)))
9217 /* A multiplication by a constant requires another instruction
9218 to load the constant to a register. */
9219 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9221 else if (const_ok_for_arm (~INTVAL (x
)))
9222 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9223 else if (const_ok_for_arm (-INTVAL (x
)))
9225 if (outer_code
== COMPARE
|| outer_code
== PLUS
9226 || outer_code
== MINUS
)
9229 *total
= COSTS_N_INSNS (1);
9232 *total
= COSTS_N_INSNS (2);
9238 *total
= COSTS_N_INSNS (2);
9242 *total
= COSTS_N_INSNS (4);
9247 && TARGET_HARD_FLOAT
9248 && outer_code
== SET
9249 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9250 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9251 *total
= COSTS_N_INSNS (1);
9253 *total
= COSTS_N_INSNS (4);
9258 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9259 cost of these slightly. */
9260 *total
= COSTS_N_INSNS (1) + 1;
9267 if (mode
!= VOIDmode
)
9268 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9270 *total
= COSTS_N_INSNS (4); /* How knows? */
9275 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9276 operand, then return the operand that is being shifted. If the shift
9277 is not by a constant, then set SHIFT_REG to point to the operand.
9278 Return NULL if OP is not a shifter operand. */
9280 shifter_op_p (rtx op
, rtx
*shift_reg
)
9282 enum rtx_code code
= GET_CODE (op
);
9284 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9285 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9286 return XEXP (op
, 0);
9287 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9288 return XEXP (op
, 0);
9289 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9290 || code
== ASHIFTRT
)
9292 if (!CONST_INT_P (XEXP (op
, 1)))
9293 *shift_reg
= XEXP (op
, 1);
9294 return XEXP (op
, 0);
9301 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9303 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9304 rtx_code code
= GET_CODE (x
);
9305 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9307 switch (XINT (x
, 1))
9309 case UNSPEC_UNALIGNED_LOAD
:
9310 /* We can only do unaligned loads into the integer unit, and we can't
9312 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9314 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9315 + extra_cost
->ldst
.load_unaligned
);
9318 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9319 ADDR_SPACE_GENERIC
, speed_p
);
9323 case UNSPEC_UNALIGNED_STORE
:
9324 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9326 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9327 + extra_cost
->ldst
.store_unaligned
);
9329 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9331 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9332 ADDR_SPACE_GENERIC
, speed_p
);
9342 *cost
= COSTS_N_INSNS (1);
9344 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9348 *cost
= COSTS_N_INSNS (2);
9354 /* Cost of a libcall. We assume one insn per argument, an amount for the
9355 call (one insn for -Os) and then one for processing the result. */
9356 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9358 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9361 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9362 if (shift_op != NULL \
9363 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9368 *cost += extra_cost->alu.arith_shift_reg; \
9369 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9372 *cost += extra_cost->alu.arith_shift; \
9374 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9375 + rtx_cost (XEXP (x, 1 - IDX), \
9382 /* RTX costs. Make an estimate of the cost of executing the operation
9383 X, which is contained with an operation with code OUTER_CODE.
9384 SPEED_P indicates whether the cost desired is the performance cost,
9385 or the size cost. The estimate is stored in COST and the return
9386 value is TRUE if the cost calculation is final, or FALSE if the
9387 caller should recurse through the operands of X to add additional
9390 We currently make no attempt to model the size savings of Thumb-2
9391 16-bit instructions. At the normal points in compilation where
9392 this code is called we have no measure of whether the condition
9393 flags are live or not, and thus no realistic way to determine what
9394 the size will eventually be. */
9396 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9397 const struct cpu_cost_table
*extra_cost
,
9398 int *cost
, bool speed_p
)
9400 machine_mode mode
= GET_MODE (x
);
9405 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9407 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9415 /* SET RTXs don't have a mode so we get it from the destination. */
9416 mode
= GET_MODE (SET_DEST (x
));
9418 if (REG_P (SET_SRC (x
))
9419 && REG_P (SET_DEST (x
)))
9421 /* Assume that most copies can be done with a single insn,
9422 unless we don't have HW FP, in which case everything
9423 larger than word mode will require two insns. */
9424 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9425 && GET_MODE_SIZE (mode
) > 4)
9428 /* Conditional register moves can be encoded
9429 in 16 bits in Thumb mode. */
9430 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9436 if (CONST_INT_P (SET_SRC (x
)))
9438 /* Handle CONST_INT here, since the value doesn't have a mode
9439 and we would otherwise be unable to work out the true cost. */
9440 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9442 /* Slightly lower the cost of setting a core reg to a constant.
9443 This helps break up chains and allows for better scheduling. */
9444 if (REG_P (SET_DEST (x
))
9445 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9448 /* Immediate moves with an immediate in the range [0, 255] can be
9449 encoded in 16 bits in Thumb mode. */
9450 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9451 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9453 goto const_int_cost
;
9459 /* A memory access costs 1 insn if the mode is small, or the address is
9460 a single register, otherwise it costs one insn per word. */
9461 if (REG_P (XEXP (x
, 0)))
9462 *cost
= COSTS_N_INSNS (1);
9464 && GET_CODE (XEXP (x
, 0)) == PLUS
9465 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9466 /* This will be split into two instructions.
9467 See arm.md:calculate_pic_address. */
9468 *cost
= COSTS_N_INSNS (2);
9470 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9472 /* For speed optimizations, add the costs of the address and
9473 accessing memory. */
9476 *cost
+= (extra_cost
->ldst
.load
9477 + arm_address_cost (XEXP (x
, 0), mode
,
9478 ADDR_SPACE_GENERIC
, speed_p
));
9480 *cost
+= extra_cost
->ldst
.load
;
9486 /* Calculations of LDM costs are complex. We assume an initial cost
9487 (ldm_1st) which will load the number of registers mentioned in
9488 ldm_regs_per_insn_1st registers; then each additional
9489 ldm_regs_per_insn_subsequent registers cost one more insn. The
9490 formula for N regs is thus:
9492 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9493 + ldm_regs_per_insn_subsequent - 1)
9494 / ldm_regs_per_insn_subsequent).
9496 Additional costs may also be added for addressing. A similar
9497 formula is used for STM. */
9499 bool is_ldm
= load_multiple_operation (x
, SImode
);
9500 bool is_stm
= store_multiple_operation (x
, SImode
);
9502 *cost
= COSTS_N_INSNS (1);
9504 if (is_ldm
|| is_stm
)
9508 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9509 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9510 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9511 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9512 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9513 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9514 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9516 *cost
+= regs_per_insn_1st
9517 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9518 + regs_per_insn_sub
- 1)
9519 / regs_per_insn_sub
);
9528 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9529 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9530 *cost
= COSTS_N_INSNS (speed_p
9531 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9532 else if (mode
== SImode
&& TARGET_IDIV
)
9533 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9535 *cost
= LIBCALL_COST (2);
9536 return false; /* All arguments must be in registers. */
9540 *cost
= LIBCALL_COST (2);
9541 return false; /* All arguments must be in registers. */
9544 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9546 *cost
= (COSTS_N_INSNS (2)
9547 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9549 *cost
+= extra_cost
->alu
.shift_reg
;
9557 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9559 *cost
= (COSTS_N_INSNS (3)
9560 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9562 *cost
+= 2 * extra_cost
->alu
.shift
;
9565 else if (mode
== SImode
)
9567 *cost
= (COSTS_N_INSNS (1)
9568 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9569 /* Slightly disparage register shifts at -Os, but not by much. */
9570 if (!CONST_INT_P (XEXP (x
, 1)))
9571 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9572 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9575 else if (GET_MODE_CLASS (mode
) == MODE_INT
9576 && GET_MODE_SIZE (mode
) < 4)
9580 *cost
= (COSTS_N_INSNS (1)
9581 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9582 /* Slightly disparage register shifts at -Os, but not by
9584 if (!CONST_INT_P (XEXP (x
, 1)))
9585 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9586 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9588 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9590 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9592 /* Can use SBFX/UBFX. */
9593 *cost
= COSTS_N_INSNS (1);
9595 *cost
+= extra_cost
->alu
.bfx
;
9596 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9600 *cost
= COSTS_N_INSNS (2);
9601 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9604 if (CONST_INT_P (XEXP (x
, 1)))
9605 *cost
+= 2 * extra_cost
->alu
.shift
;
9607 *cost
+= (extra_cost
->alu
.shift
9608 + extra_cost
->alu
.shift_reg
);
9611 /* Slightly disparage register shifts. */
9612 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9617 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9618 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9621 if (CONST_INT_P (XEXP (x
, 1)))
9622 *cost
+= (2 * extra_cost
->alu
.shift
9623 + extra_cost
->alu
.log_shift
);
9625 *cost
+= (extra_cost
->alu
.shift
9626 + extra_cost
->alu
.shift_reg
9627 + extra_cost
->alu
.log_shift_reg
);
9633 *cost
= LIBCALL_COST (2);
9641 *cost
= COSTS_N_INSNS (1);
9643 *cost
+= extra_cost
->alu
.rev
;
9650 /* No rev instruction available. Look at arm_legacy_rev
9651 and thumb_legacy_rev for the form of RTL used then. */
9654 *cost
= COSTS_N_INSNS (10);
9658 *cost
+= 6 * extra_cost
->alu
.shift
;
9659 *cost
+= 3 * extra_cost
->alu
.logical
;
9664 *cost
= COSTS_N_INSNS (5);
9668 *cost
+= 2 * extra_cost
->alu
.shift
;
9669 *cost
+= extra_cost
->alu
.arith_shift
;
9670 *cost
+= 2 * extra_cost
->alu
.logical
;
9678 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9679 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9681 *cost
= COSTS_N_INSNS (1);
9682 if (GET_CODE (XEXP (x
, 0)) == MULT
9683 || GET_CODE (XEXP (x
, 1)) == MULT
)
9685 rtx mul_op0
, mul_op1
, sub_op
;
9688 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9690 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9692 mul_op0
= XEXP (XEXP (x
, 0), 0);
9693 mul_op1
= XEXP (XEXP (x
, 0), 1);
9694 sub_op
= XEXP (x
, 1);
9698 mul_op0
= XEXP (XEXP (x
, 1), 0);
9699 mul_op1
= XEXP (XEXP (x
, 1), 1);
9700 sub_op
= XEXP (x
, 0);
9703 /* The first operand of the multiply may be optionally
9705 if (GET_CODE (mul_op0
) == NEG
)
9706 mul_op0
= XEXP (mul_op0
, 0);
9708 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9709 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9710 + rtx_cost (sub_op
, code
, 0, speed_p
));
9716 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9722 rtx shift_by_reg
= NULL
;
9726 *cost
= COSTS_N_INSNS (1);
9728 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9729 if (shift_op
== NULL
)
9731 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9732 non_shift_op
= XEXP (x
, 0);
9735 non_shift_op
= XEXP (x
, 1);
9737 if (shift_op
!= NULL
)
9739 if (shift_by_reg
!= NULL
)
9742 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9743 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9746 *cost
+= extra_cost
->alu
.arith_shift
;
9748 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9749 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9754 && GET_CODE (XEXP (x
, 1)) == MULT
)
9758 *cost
+= extra_cost
->mult
[0].add
;
9759 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9760 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9761 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9765 if (CONST_INT_P (XEXP (x
, 0)))
9767 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9768 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9770 *cost
= COSTS_N_INSNS (insns
);
9772 *cost
+= insns
* extra_cost
->alu
.arith
;
9773 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9777 *cost
+= extra_cost
->alu
.arith
;
9782 if (GET_MODE_CLASS (mode
) == MODE_INT
9783 && GET_MODE_SIZE (mode
) < 4)
9785 rtx shift_op
, shift_reg
;
9788 /* We check both sides of the MINUS for shifter operands since,
9789 unlike PLUS, it's not commutative. */
9791 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9792 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9794 /* Slightly disparage, as we might need to widen the result. */
9795 *cost
= 1 + COSTS_N_INSNS (1);
9797 *cost
+= extra_cost
->alu
.arith
;
9799 if (CONST_INT_P (XEXP (x
, 0)))
9801 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9810 *cost
= COSTS_N_INSNS (2);
9812 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9814 rtx op1
= XEXP (x
, 1);
9817 *cost
+= 2 * extra_cost
->alu
.arith
;
9819 if (GET_CODE (op1
) == ZERO_EXTEND
)
9820 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9822 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9823 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9827 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9830 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9831 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9833 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9836 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9837 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9840 *cost
+= (extra_cost
->alu
.arith
9841 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9842 ? extra_cost
->alu
.arith
9843 : extra_cost
->alu
.arith_shift
));
9844 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9845 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9846 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9851 *cost
+= 2 * extra_cost
->alu
.arith
;
9857 *cost
= LIBCALL_COST (2);
9861 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9862 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9864 *cost
= COSTS_N_INSNS (1);
9865 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9867 rtx mul_op0
, mul_op1
, add_op
;
9870 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9872 mul_op0
= XEXP (XEXP (x
, 0), 0);
9873 mul_op1
= XEXP (XEXP (x
, 0), 1);
9874 add_op
= XEXP (x
, 1);
9876 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9877 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9878 + rtx_cost (add_op
, code
, 0, speed_p
));
9884 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9887 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9889 *cost
= LIBCALL_COST (2);
9893 /* Narrow modes can be synthesized in SImode, but the range
9894 of useful sub-operations is limited. Check for shift operations
9895 on one of the operands. Only left shifts can be used in the
9897 if (GET_MODE_CLASS (mode
) == MODE_INT
9898 && GET_MODE_SIZE (mode
) < 4)
9900 rtx shift_op
, shift_reg
;
9903 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9905 if (CONST_INT_P (XEXP (x
, 1)))
9907 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9908 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9910 *cost
= COSTS_N_INSNS (insns
);
9912 *cost
+= insns
* extra_cost
->alu
.arith
;
9913 /* Slightly penalize a narrow operation as the result may
9915 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9919 /* Slightly penalize a narrow operation as the result may
9921 *cost
= 1 + COSTS_N_INSNS (1);
9923 *cost
+= extra_cost
->alu
.arith
;
9930 rtx shift_op
, shift_reg
;
9932 *cost
= COSTS_N_INSNS (1);
9934 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9935 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9937 /* UXTA[BH] or SXTA[BH]. */
9939 *cost
+= extra_cost
->alu
.extend_arith
;
9940 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9942 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9947 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9948 if (shift_op
!= NULL
)
9953 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9954 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9957 *cost
+= extra_cost
->alu
.arith_shift
;
9959 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9960 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9963 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9965 rtx mul_op
= XEXP (x
, 0);
9967 *cost
= COSTS_N_INSNS (1);
9969 if (TARGET_DSP_MULTIPLY
9970 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9971 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9972 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9973 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9974 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9975 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9976 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9977 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9978 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9979 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9980 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9981 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9986 *cost
+= extra_cost
->mult
[0].extend_add
;
9987 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9988 SIGN_EXTEND
, 0, speed_p
)
9989 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9990 SIGN_EXTEND
, 0, speed_p
)
9991 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9996 *cost
+= extra_cost
->mult
[0].add
;
9997 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9998 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9999 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10002 if (CONST_INT_P (XEXP (x
, 1)))
10004 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10005 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10007 *cost
= COSTS_N_INSNS (insns
);
10009 *cost
+= insns
* extra_cost
->alu
.arith
;
10010 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10014 *cost
+= extra_cost
->alu
.arith
;
10019 if (mode
== DImode
)
10022 && GET_CODE (XEXP (x
, 0)) == MULT
10023 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10024 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10025 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10026 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10028 *cost
= COSTS_N_INSNS (1);
10030 *cost
+= extra_cost
->mult
[1].extend_add
;
10031 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10032 ZERO_EXTEND
, 0, speed_p
)
10033 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10034 ZERO_EXTEND
, 0, speed_p
)
10035 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10039 *cost
= COSTS_N_INSNS (2);
10041 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10042 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10045 *cost
+= (extra_cost
->alu
.arith
10046 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10047 ? extra_cost
->alu
.arith
10048 : extra_cost
->alu
.arith_shift
));
10050 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10052 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10057 *cost
+= 2 * extra_cost
->alu
.arith
;
10062 *cost
= LIBCALL_COST (2);
10065 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10067 *cost
= COSTS_N_INSNS (1);
10069 *cost
+= extra_cost
->alu
.rev
;
10073 /* Fall through. */
10074 case AND
: case XOR
:
10075 if (mode
== SImode
)
10077 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10078 rtx op0
= XEXP (x
, 0);
10079 rtx shift_op
, shift_reg
;
10081 *cost
= COSTS_N_INSNS (1);
10085 || (code
== IOR
&& TARGET_THUMB2
)))
10086 op0
= XEXP (op0
, 0);
10089 shift_op
= shifter_op_p (op0
, &shift_reg
);
10090 if (shift_op
!= NULL
)
10095 *cost
+= extra_cost
->alu
.log_shift_reg
;
10096 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10099 *cost
+= extra_cost
->alu
.log_shift
;
10101 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10102 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10106 if (CONST_INT_P (XEXP (x
, 1)))
10108 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10109 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10112 *cost
= COSTS_N_INSNS (insns
);
10114 *cost
+= insns
* extra_cost
->alu
.logical
;
10115 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10120 *cost
+= extra_cost
->alu
.logical
;
10121 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10122 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10126 if (mode
== DImode
)
10128 rtx op0
= XEXP (x
, 0);
10129 enum rtx_code subcode
= GET_CODE (op0
);
10131 *cost
= COSTS_N_INSNS (2);
10135 || (code
== IOR
&& TARGET_THUMB2
)))
10136 op0
= XEXP (op0
, 0);
10138 if (GET_CODE (op0
) == ZERO_EXTEND
)
10141 *cost
+= 2 * extra_cost
->alu
.logical
;
10143 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10144 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10147 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10150 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10152 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10153 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10158 *cost
+= 2 * extra_cost
->alu
.logical
;
10164 *cost
= LIBCALL_COST (2);
10168 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10169 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10171 rtx op0
= XEXP (x
, 0);
10173 *cost
= COSTS_N_INSNS (1);
10175 if (GET_CODE (op0
) == NEG
)
10176 op0
= XEXP (op0
, 0);
10179 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10181 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10182 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10185 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10187 *cost
= LIBCALL_COST (2);
10191 if (mode
== SImode
)
10193 *cost
= COSTS_N_INSNS (1);
10194 if (TARGET_DSP_MULTIPLY
10195 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10196 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10197 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10198 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10199 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10200 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10201 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10202 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10203 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10204 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10206 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10209 /* SMUL[TB][TB]. */
10211 *cost
+= extra_cost
->mult
[0].extend
;
10212 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10213 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10217 *cost
+= extra_cost
->mult
[0].simple
;
10221 if (mode
== DImode
)
10224 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10225 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10226 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10227 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10229 *cost
= COSTS_N_INSNS (1);
10231 *cost
+= extra_cost
->mult
[1].extend
;
10232 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10233 ZERO_EXTEND
, 0, speed_p
)
10234 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10235 ZERO_EXTEND
, 0, speed_p
));
10239 *cost
= LIBCALL_COST (2);
10244 *cost
= LIBCALL_COST (2);
10248 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10249 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10251 *cost
= COSTS_N_INSNS (1);
10253 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10257 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10259 *cost
= LIBCALL_COST (1);
10263 if (mode
== SImode
)
10265 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10267 *cost
= COSTS_N_INSNS (2);
10268 /* Assume the non-flag-changing variant. */
10270 *cost
+= (extra_cost
->alu
.log_shift
10271 + extra_cost
->alu
.arith_shift
);
10272 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10276 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10277 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10279 *cost
= COSTS_N_INSNS (2);
10280 /* No extra cost for MOV imm and MVN imm. */
10281 /* If the comparison op is using the flags, there's no further
10282 cost, otherwise we need to add the cost of the comparison. */
10283 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10284 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10285 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10287 *cost
+= (COSTS_N_INSNS (1)
10288 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10290 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10293 *cost
+= extra_cost
->alu
.arith
;
10297 *cost
= COSTS_N_INSNS (1);
10299 *cost
+= extra_cost
->alu
.arith
;
10303 if (GET_MODE_CLASS (mode
) == MODE_INT
10304 && GET_MODE_SIZE (mode
) < 4)
10306 /* Slightly disparage, as we might need an extend operation. */
10307 *cost
= 1 + COSTS_N_INSNS (1);
10309 *cost
+= extra_cost
->alu
.arith
;
10313 if (mode
== DImode
)
10315 *cost
= COSTS_N_INSNS (2);
10317 *cost
+= 2 * extra_cost
->alu
.arith
;
10322 *cost
= LIBCALL_COST (1);
10326 if (mode
== SImode
)
10329 rtx shift_reg
= NULL
;
10331 *cost
= COSTS_N_INSNS (1);
10332 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10336 if (shift_reg
!= NULL
)
10339 *cost
+= extra_cost
->alu
.log_shift_reg
;
10340 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10343 *cost
+= extra_cost
->alu
.log_shift
;
10344 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10349 *cost
+= extra_cost
->alu
.logical
;
10352 if (mode
== DImode
)
10354 *cost
= COSTS_N_INSNS (2);
10360 *cost
+= LIBCALL_COST (1);
10365 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10367 *cost
= COSTS_N_INSNS (4);
10370 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10371 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10373 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10374 /* Assume that if one arm of the if_then_else is a register,
10375 that it will be tied with the result and eliminate the
10376 conditional insn. */
10377 if (REG_P (XEXP (x
, 1)))
10379 else if (REG_P (XEXP (x
, 2)))
10385 if (extra_cost
->alu
.non_exec_costs_exec
)
10386 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10388 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10391 *cost
+= op1cost
+ op2cost
;
10397 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10401 machine_mode op0mode
;
10402 /* We'll mostly assume that the cost of a compare is the cost of the
10403 LHS. However, there are some notable exceptions. */
10405 /* Floating point compares are never done as side-effects. */
10406 op0mode
= GET_MODE (XEXP (x
, 0));
10407 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10408 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10410 *cost
= COSTS_N_INSNS (1);
10412 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10414 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10416 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10422 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10424 *cost
= LIBCALL_COST (2);
10428 /* DImode compares normally take two insns. */
10429 if (op0mode
== DImode
)
10431 *cost
= COSTS_N_INSNS (2);
10433 *cost
+= 2 * extra_cost
->alu
.arith
;
10437 if (op0mode
== SImode
)
10442 if (XEXP (x
, 1) == const0_rtx
10443 && !(REG_P (XEXP (x
, 0))
10444 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10445 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10447 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10449 /* Multiply operations that set the flags are often
10450 significantly more expensive. */
10452 && GET_CODE (XEXP (x
, 0)) == MULT
10453 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10454 *cost
+= extra_cost
->mult
[0].flag_setting
;
10457 && GET_CODE (XEXP (x
, 0)) == PLUS
10458 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10459 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10461 *cost
+= extra_cost
->mult
[0].flag_setting
;
10466 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10467 if (shift_op
!= NULL
)
10469 *cost
= COSTS_N_INSNS (1);
10470 if (shift_reg
!= NULL
)
10472 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10474 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10477 *cost
+= extra_cost
->alu
.arith_shift
;
10478 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10479 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10483 *cost
= COSTS_N_INSNS (1);
10485 *cost
+= extra_cost
->alu
.arith
;
10486 if (CONST_INT_P (XEXP (x
, 1))
10487 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10489 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10497 *cost
= LIBCALL_COST (2);
10520 if (outer_code
== SET
)
10522 /* Is it a store-flag operation? */
10523 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10524 && XEXP (x
, 1) == const0_rtx
)
10526 /* Thumb also needs an IT insn. */
10527 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10530 if (XEXP (x
, 1) == const0_rtx
)
10535 /* LSR Rd, Rn, #31. */
10536 *cost
= COSTS_N_INSNS (1);
10538 *cost
+= extra_cost
->alu
.shift
;
10548 *cost
= COSTS_N_INSNS (2);
10552 /* RSBS T1, Rn, Rn, LSR #31
10554 *cost
= COSTS_N_INSNS (2);
10556 *cost
+= extra_cost
->alu
.arith_shift
;
10560 /* RSB Rd, Rn, Rn, ASR #1
10561 LSR Rd, Rd, #31. */
10562 *cost
= COSTS_N_INSNS (2);
10564 *cost
+= (extra_cost
->alu
.arith_shift
10565 + extra_cost
->alu
.shift
);
10571 *cost
= COSTS_N_INSNS (2);
10573 *cost
+= extra_cost
->alu
.shift
;
10577 /* Remaining cases are either meaningless or would take
10578 three insns anyway. */
10579 *cost
= COSTS_N_INSNS (3);
10582 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10587 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10588 if (CONST_INT_P (XEXP (x
, 1))
10589 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10591 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10598 /* Not directly inside a set. If it involves the condition code
10599 register it must be the condition for a branch, cond_exec or
10600 I_T_E operation. Since the comparison is performed elsewhere
10601 this is just the control part which has no additional
10603 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10604 && XEXP (x
, 1) == const0_rtx
)
10612 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10613 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10615 *cost
= COSTS_N_INSNS (1);
10617 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10621 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10623 *cost
= LIBCALL_COST (1);
10627 if (mode
== SImode
)
10629 *cost
= COSTS_N_INSNS (1);
10631 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10635 *cost
= LIBCALL_COST (1);
10639 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10640 && MEM_P (XEXP (x
, 0)))
10642 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10644 if (mode
== DImode
)
10645 *cost
+= COSTS_N_INSNS (1);
10650 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10651 *cost
+= extra_cost
->ldst
.load
;
10653 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10655 if (mode
== DImode
)
10656 *cost
+= extra_cost
->alu
.shift
;
10661 /* Widening from less than 32-bits requires an extend operation. */
10662 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10664 /* We have SXTB/SXTH. */
10665 *cost
= COSTS_N_INSNS (1);
10666 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10668 *cost
+= extra_cost
->alu
.extend
;
10670 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10672 /* Needs two shifts. */
10673 *cost
= COSTS_N_INSNS (2);
10674 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10676 *cost
+= 2 * extra_cost
->alu
.shift
;
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode
== DImode
)
10682 *cost
+= COSTS_N_INSNS (1);
10684 *cost
+= extra_cost
->alu
.shift
;
10691 || GET_MODE (XEXP (x
, 0)) == SImode
10692 || GET_MODE (XEXP (x
, 0)) == QImode
)
10693 && MEM_P (XEXP (x
, 0)))
10695 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10697 if (mode
== DImode
)
10698 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10703 /* Widening from less than 32-bits requires an extend operation. */
10704 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10706 /* UXTB can be a shorter instruction in Thumb2, but it might
10707 be slower than the AND Rd, Rn, #255 alternative. When
10708 optimizing for speed it should never be slower to use
10709 AND, and we don't really model 16-bit vs 32-bit insns
10711 *cost
= COSTS_N_INSNS (1);
10713 *cost
+= extra_cost
->alu
.logical
;
10715 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10717 /* We have UXTB/UXTH. */
10718 *cost
= COSTS_N_INSNS (1);
10719 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10721 *cost
+= extra_cost
->alu
.extend
;
10723 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10725 /* Needs two shifts. It's marginally preferable to use
10726 shifts rather than two BIC instructions as the second
10727 shift may merge with a subsequent insn as a shifter
10729 *cost
= COSTS_N_INSNS (2);
10730 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10732 *cost
+= 2 * extra_cost
->alu
.shift
;
10734 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10735 *cost
= COSTS_N_INSNS (1);
10737 /* Widening beyond 32-bits requires one more insn. */
10738 if (mode
== DImode
)
10740 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10747 /* CONST_INT has no mode, so we cannot tell for sure how many
10748 insns are really going to be needed. The best we can do is
10749 look at the value passed. If it fits in SImode, then assume
10750 that's the mode it will be used for. Otherwise assume it
10751 will be used in DImode. */
10752 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10757 /* Avoid blowing up in arm_gen_constant (). */
10758 if (!(outer_code
== PLUS
10759 || outer_code
== AND
10760 || outer_code
== IOR
10761 || outer_code
== XOR
10762 || outer_code
== MINUS
))
10766 if (mode
== SImode
)
10768 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10769 INTVAL (x
), NULL
, NULL
,
10775 *cost
+= COSTS_N_INSNS (arm_gen_constant
10776 (outer_code
, SImode
, NULL
,
10777 trunc_int_for_mode (INTVAL (x
), SImode
),
10779 + arm_gen_constant (outer_code
, SImode
, NULL
,
10780 INTVAL (x
) >> 32, NULL
,
10792 if (arm_arch_thumb2
&& !flag_pic
)
10793 *cost
= COSTS_N_INSNS (2);
10795 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10798 *cost
= COSTS_N_INSNS (2);
10802 *cost
+= COSTS_N_INSNS (1);
10804 *cost
+= extra_cost
->alu
.arith
;
10810 *cost
= COSTS_N_INSNS (4);
10815 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10816 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10818 if (vfp3_const_double_rtx (x
))
10820 *cost
= COSTS_N_INSNS (1);
10822 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10828 *cost
= COSTS_N_INSNS (1);
10829 if (mode
== DFmode
)
10830 *cost
+= extra_cost
->ldst
.loadd
;
10832 *cost
+= extra_cost
->ldst
.loadf
;
10835 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10839 *cost
= COSTS_N_INSNS (4);
10845 && TARGET_HARD_FLOAT
10846 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10847 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10848 *cost
= COSTS_N_INSNS (1);
10850 *cost
= COSTS_N_INSNS (4);
10855 *cost
= COSTS_N_INSNS (1);
10856 /* When optimizing for size, we prefer constant pool entries to
10857 MOVW/MOVT pairs, so bump the cost of these slightly. */
10863 *cost
= COSTS_N_INSNS (1);
10865 *cost
+= extra_cost
->alu
.clz
;
10869 if (XEXP (x
, 1) == const0_rtx
)
10871 *cost
= COSTS_N_INSNS (1);
10873 *cost
+= extra_cost
->alu
.log_shift
;
10874 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10877 /* Fall through. */
10881 *cost
= COSTS_N_INSNS (2);
10885 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10886 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10887 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10888 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10889 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10890 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10891 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10892 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10895 *cost
= COSTS_N_INSNS (1);
10897 *cost
+= extra_cost
->mult
[1].extend
;
10898 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10900 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10904 *cost
= LIBCALL_COST (1);
10907 case UNSPEC_VOLATILE
:
10909 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10912 /* Reading the PC is like reading any other register. Writing it
10913 is more expensive, but we take that into account elsewhere. */
10918 /* TODO: Simple zero_extract of bottom bits using AND. */
10919 /* Fall through. */
10923 && CONST_INT_P (XEXP (x
, 1))
10924 && CONST_INT_P (XEXP (x
, 2)))
10926 *cost
= COSTS_N_INSNS (1);
10928 *cost
+= extra_cost
->alu
.bfx
;
10929 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10932 /* Without UBFX/SBFX, need to resort to shift operations. */
10933 *cost
= COSTS_N_INSNS (2);
10935 *cost
+= 2 * extra_cost
->alu
.shift
;
10936 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10940 if (TARGET_HARD_FLOAT
)
10942 *cost
= COSTS_N_INSNS (1);
10944 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10945 if (!TARGET_FPU_ARMV8
10946 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10948 /* Pre v8, widening HF->DF is a two-step process, first
10949 widening to SFmode. */
10950 *cost
+= COSTS_N_INSNS (1);
10952 *cost
+= extra_cost
->fp
[0].widen
;
10954 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10958 *cost
= LIBCALL_COST (1);
10961 case FLOAT_TRUNCATE
:
10962 if (TARGET_HARD_FLOAT
)
10964 *cost
= COSTS_N_INSNS (1);
10966 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10967 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10969 /* Vector modes? */
10971 *cost
= LIBCALL_COST (1);
10975 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10977 rtx op0
= XEXP (x
, 0);
10978 rtx op1
= XEXP (x
, 1);
10979 rtx op2
= XEXP (x
, 2);
10981 *cost
= COSTS_N_INSNS (1);
10983 /* vfms or vfnma. */
10984 if (GET_CODE (op0
) == NEG
)
10985 op0
= XEXP (op0
, 0);
10987 /* vfnms or vfnma. */
10988 if (GET_CODE (op2
) == NEG
)
10989 op2
= XEXP (op2
, 0);
10991 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10992 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10993 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10996 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11001 *cost
= LIBCALL_COST (3);
11006 if (TARGET_HARD_FLOAT
)
11008 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11010 *cost
= COSTS_N_INSNS (1);
11012 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
11013 /* Strip of the 'cost' of rounding towards zero. */
11014 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11015 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11017 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11018 /* ??? Increase the cost to deal with transferring from
11019 FP -> CORE registers? */
11022 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11023 && TARGET_FPU_ARMV8
)
11025 *cost
= COSTS_N_INSNS (1);
11027 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11030 /* Vector costs? */
11032 *cost
= LIBCALL_COST (1);
11036 case UNSIGNED_FLOAT
:
11037 if (TARGET_HARD_FLOAT
)
11039 /* ??? Increase the cost to deal with transferring from CORE
11040 -> FP registers? */
11041 *cost
= COSTS_N_INSNS (1);
11043 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11046 *cost
= LIBCALL_COST (1);
11050 *cost
= COSTS_N_INSNS (1);
11055 /* Just a guess. Guess number of instructions in the asm
11056 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11057 though (see PR60663). */
11058 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11059 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11061 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11065 if (mode
!= VOIDmode
)
11066 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11068 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11073 #undef HANDLE_NARROW_SHIFT_ARITH
11075 /* RTX costs when optimizing for size. */
11077 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11078 int *total
, bool speed
)
11082 if (TARGET_OLD_RTX_COSTS
11083 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11085 /* Old way. (Deprecated.) */
11087 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11088 (enum rtx_code
) outer_code
, total
);
11090 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11091 (enum rtx_code
) outer_code
, total
,
11097 if (current_tune
->insn_extra_cost
)
11098 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11099 (enum rtx_code
) outer_code
,
11100 current_tune
->insn_extra_cost
,
11102 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11103 && current_tune->insn_extra_cost != NULL */
11105 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11106 (enum rtx_code
) outer_code
,
11107 &generic_extra_costs
, total
, speed
);
11110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11112 print_rtl_single (dump_file
, x
);
11113 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11114 *total
, result
? "final" : "partial");
11119 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11120 supported on any "slowmul" cores, so it can be ignored. */
11123 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11124 int *total
, bool speed
)
11126 machine_mode mode
= GET_MODE (x
);
11130 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11137 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11140 *total
= COSTS_N_INSNS (20);
11144 if (CONST_INT_P (XEXP (x
, 1)))
11146 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11147 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11148 int cost
, const_ok
= const_ok_for_arm (i
);
11149 int j
, booth_unit_size
;
11151 /* Tune as appropriate. */
11152 cost
= const_ok
? 4 : 8;
11153 booth_unit_size
= 2;
11154 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11156 i
>>= booth_unit_size
;
11160 *total
= COSTS_N_INSNS (cost
);
11161 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11165 *total
= COSTS_N_INSNS (20);
11169 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11174 /* RTX cost for cores with a fast multiply unit (M variants). */
11177 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11178 int *total
, bool speed
)
11180 machine_mode mode
= GET_MODE (x
);
11184 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11188 /* ??? should thumb2 use different costs? */
11192 /* There is no point basing this on the tuning, since it is always the
11193 fast variant if it exists at all. */
11195 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11196 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11197 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11199 *total
= COSTS_N_INSNS(2);
11204 if (mode
== DImode
)
11206 *total
= COSTS_N_INSNS (5);
11210 if (CONST_INT_P (XEXP (x
, 1)))
11212 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11213 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11214 int cost
, const_ok
= const_ok_for_arm (i
);
11215 int j
, booth_unit_size
;
11217 /* Tune as appropriate. */
11218 cost
= const_ok
? 4 : 8;
11219 booth_unit_size
= 8;
11220 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11222 i
>>= booth_unit_size
;
11226 *total
= COSTS_N_INSNS(cost
);
11230 if (mode
== SImode
)
11232 *total
= COSTS_N_INSNS (4);
11236 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11238 if (TARGET_HARD_FLOAT
11240 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11242 *total
= COSTS_N_INSNS (1);
11247 /* Requires a lib call */
11248 *total
= COSTS_N_INSNS (20);
11252 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11257 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11258 so it can be ignored. */
11261 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11262 int *total
, bool speed
)
11264 machine_mode mode
= GET_MODE (x
);
11268 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11275 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11276 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11278 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11279 will stall until the multiplication is complete. */
11280 *total
= COSTS_N_INSNS (3);
11284 /* There is no point basing this on the tuning, since it is always the
11285 fast variant if it exists at all. */
11287 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11288 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11289 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11291 *total
= COSTS_N_INSNS (2);
11296 if (mode
== DImode
)
11298 *total
= COSTS_N_INSNS (5);
11302 if (CONST_INT_P (XEXP (x
, 1)))
11304 /* If operand 1 is a constant we can more accurately
11305 calculate the cost of the multiply. The multiplier can
11306 retire 15 bits on the first cycle and a further 12 on the
11307 second. We do, of course, have to load the constant into
11308 a register first. */
11309 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11310 /* There's a general overhead of one cycle. */
11312 unsigned HOST_WIDE_INT masked_const
;
11314 if (i
& 0x80000000)
11317 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11319 masked_const
= i
& 0xffff8000;
11320 if (masked_const
!= 0)
11323 masked_const
= i
& 0xf8000000;
11324 if (masked_const
!= 0)
11327 *total
= COSTS_N_INSNS (cost
);
11331 if (mode
== SImode
)
11333 *total
= COSTS_N_INSNS (3);
11337 /* Requires a lib call */
11338 *total
= COSTS_N_INSNS (20);
11342 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11347 /* RTX costs for 9e (and later) cores. */
11350 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11351 int *total
, bool speed
)
11353 machine_mode mode
= GET_MODE (x
);
11360 /* Small multiply: 32 cycles for an integer multiply inst. */
11361 if (arm_arch6m
&& arm_m_profile_small_mul
)
11362 *total
= COSTS_N_INSNS (32);
11364 *total
= COSTS_N_INSNS (3);
11368 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11376 /* There is no point basing this on the tuning, since it is always the
11377 fast variant if it exists at all. */
11379 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11380 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11381 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11383 *total
= COSTS_N_INSNS (2);
11388 if (mode
== DImode
)
11390 *total
= COSTS_N_INSNS (5);
11394 if (mode
== SImode
)
11396 *total
= COSTS_N_INSNS (2);
11400 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11402 if (TARGET_HARD_FLOAT
11404 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11406 *total
= COSTS_N_INSNS (1);
11411 *total
= COSTS_N_INSNS (20);
11415 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11418 /* All address computations that can be done are free, but rtx cost returns
11419 the same for practically all of them. So we weight the different types
11420 of address here in the order (most pref first):
11421 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11423 arm_arm_address_cost (rtx x
)
11425 enum rtx_code c
= GET_CODE (x
);
11427 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11429 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11434 if (CONST_INT_P (XEXP (x
, 1)))
11437 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11447 arm_thumb_address_cost (rtx x
)
11449 enum rtx_code c
= GET_CODE (x
);
11454 && REG_P (XEXP (x
, 0))
11455 && CONST_INT_P (XEXP (x
, 1)))
11462 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11463 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11465 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11468 /* Adjust cost hook for XScale. */
11470 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11472 /* Some true dependencies can have a higher cost depending
11473 on precisely how certain input operands are used. */
11474 if (REG_NOTE_KIND(link
) == 0
11475 && recog_memoized (insn
) >= 0
11476 && recog_memoized (dep
) >= 0)
11478 int shift_opnum
= get_attr_shift (insn
);
11479 enum attr_type attr_type
= get_attr_type (dep
);
11481 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11482 operand for INSN. If we have a shifted input operand and the
11483 instruction we depend on is another ALU instruction, then we may
11484 have to account for an additional stall. */
11485 if (shift_opnum
!= 0
11486 && (attr_type
== TYPE_ALU_SHIFT_IMM
11487 || attr_type
== TYPE_ALUS_SHIFT_IMM
11488 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11489 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11490 || attr_type
== TYPE_ALU_SHIFT_REG
11491 || attr_type
== TYPE_ALUS_SHIFT_REG
11492 || attr_type
== TYPE_LOGIC_SHIFT_REG
11493 || attr_type
== TYPE_LOGICS_SHIFT_REG
11494 || attr_type
== TYPE_MOV_SHIFT
11495 || attr_type
== TYPE_MVN_SHIFT
11496 || attr_type
== TYPE_MOV_SHIFT_REG
11497 || attr_type
== TYPE_MVN_SHIFT_REG
))
11499 rtx shifted_operand
;
11502 /* Get the shifted operand. */
11503 extract_insn (insn
);
11504 shifted_operand
= recog_data
.operand
[shift_opnum
];
11506 /* Iterate over all the operands in DEP. If we write an operand
11507 that overlaps with SHIFTED_OPERAND, then we have increase the
11508 cost of this dependency. */
11509 extract_insn (dep
);
11510 preprocess_constraints (dep
);
11511 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11513 /* We can ignore strict inputs. */
11514 if (recog_data
.operand_type
[opno
] == OP_IN
)
11517 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11529 /* Adjust cost hook for Cortex A9. */
11531 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11533 switch (REG_NOTE_KIND (link
))
11540 case REG_DEP_OUTPUT
:
11541 if (recog_memoized (insn
) >= 0
11542 && recog_memoized (dep
) >= 0)
11544 if (GET_CODE (PATTERN (insn
)) == SET
)
11547 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11549 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11551 enum attr_type attr_type_insn
= get_attr_type (insn
);
11552 enum attr_type attr_type_dep
= get_attr_type (dep
);
11554 /* By default all dependencies of the form
11557 have an extra latency of 1 cycle because
11558 of the input and output dependency in this
11559 case. However this gets modeled as an true
11560 dependency and hence all these checks. */
11561 if (REG_P (SET_DEST (PATTERN (insn
)))
11562 && REG_P (SET_DEST (PATTERN (dep
)))
11563 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11564 SET_DEST (PATTERN (dep
))))
11566 /* FMACS is a special case where the dependent
11567 instruction can be issued 3 cycles before
11568 the normal latency in case of an output
11570 if ((attr_type_insn
== TYPE_FMACS
11571 || attr_type_insn
== TYPE_FMACD
)
11572 && (attr_type_dep
== TYPE_FMACS
11573 || attr_type_dep
== TYPE_FMACD
))
11575 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11576 *cost
= insn_default_latency (dep
) - 3;
11578 *cost
= insn_default_latency (dep
);
11583 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11584 *cost
= insn_default_latency (dep
) + 1;
11586 *cost
= insn_default_latency (dep
);
11596 gcc_unreachable ();
11602 /* Adjust cost hook for FA726TE. */
11604 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11606 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11607 have penalty of 3. */
11608 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11609 && recog_memoized (insn
) >= 0
11610 && recog_memoized (dep
) >= 0
11611 && get_attr_conds (dep
) == CONDS_SET
)
11613 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11614 if (get_attr_conds (insn
) == CONDS_USE
11615 && get_attr_type (insn
) != TYPE_BRANCH
)
11621 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11622 || get_attr_conds (insn
) == CONDS_USE
)
11632 /* Implement TARGET_REGISTER_MOVE_COST.
11634 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11635 it is typically more expensive than a single memory access. We set
11636 the cost to less than two memory accesses so that floating
11637 point to integer conversion does not go through memory. */
11640 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11641 reg_class_t from
, reg_class_t to
)
11645 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11646 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11648 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11649 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11651 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11658 if (from
== HI_REGS
|| to
== HI_REGS
)
11665 /* Implement TARGET_MEMORY_MOVE_COST. */
11668 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11669 bool in ATTRIBUTE_UNUSED
)
11675 if (GET_MODE_SIZE (mode
) < 4)
11678 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11682 /* Vectorizer cost model implementation. */
11684 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11686 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11688 int misalign ATTRIBUTE_UNUSED
)
11692 switch (type_of_cost
)
11695 return current_tune
->vec_costs
->scalar_stmt_cost
;
11698 return current_tune
->vec_costs
->scalar_load_cost
;
11701 return current_tune
->vec_costs
->scalar_store_cost
;
11704 return current_tune
->vec_costs
->vec_stmt_cost
;
11707 return current_tune
->vec_costs
->vec_align_load_cost
;
11710 return current_tune
->vec_costs
->vec_store_cost
;
11712 case vec_to_scalar
:
11713 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11715 case scalar_to_vec
:
11716 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11718 case unaligned_load
:
11719 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11721 case unaligned_store
:
11722 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11724 case cond_branch_taken
:
11725 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11727 case cond_branch_not_taken
:
11728 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11731 case vec_promote_demote
:
11732 return current_tune
->vec_costs
->vec_stmt_cost
;
11734 case vec_construct
:
11735 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11736 return elements
/ 2 + 1;
11739 gcc_unreachable ();
11743 /* Implement targetm.vectorize.add_stmt_cost. */
11746 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11747 struct _stmt_vec_info
*stmt_info
, int misalign
,
11748 enum vect_cost_model_location where
)
11750 unsigned *cost
= (unsigned *) data
;
11751 unsigned retval
= 0;
11753 if (flag_vect_cost_model
)
11755 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11756 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11758 /* Statements in an inner loop relative to the loop being
11759 vectorized are weighted more heavily. The value here is
11760 arbitrary and could potentially be improved with analysis. */
11761 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11762 count
*= 50; /* FIXME. */
11764 retval
= (unsigned) (count
* stmt_cost
);
11765 cost
[where
] += retval
;
11771 /* Return true if and only if this insn can dual-issue only as older. */
11773 cortexa7_older_only (rtx_insn
*insn
)
11775 if (recog_memoized (insn
) < 0)
11778 switch (get_attr_type (insn
))
11780 case TYPE_ALU_DSP_REG
:
11781 case TYPE_ALU_SREG
:
11782 case TYPE_ALUS_SREG
:
11783 case TYPE_LOGIC_REG
:
11784 case TYPE_LOGICS_REG
:
11786 case TYPE_ADCS_REG
:
11791 case TYPE_SHIFT_IMM
:
11792 case TYPE_SHIFT_REG
:
11793 case TYPE_LOAD_BYTE
:
11796 case TYPE_FFARITHS
:
11798 case TYPE_FFARITHD
:
11816 case TYPE_F_STORES
:
11823 /* Return true if and only if this insn can dual-issue as younger. */
11825 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11827 if (recog_memoized (insn
) < 0)
11830 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11834 switch (get_attr_type (insn
))
11837 case TYPE_ALUS_IMM
:
11838 case TYPE_LOGIC_IMM
:
11839 case TYPE_LOGICS_IMM
:
11844 case TYPE_MOV_SHIFT
:
11845 case TYPE_MOV_SHIFT_REG
:
11855 /* Look for an instruction that can dual issue only as an older
11856 instruction, and move it in front of any instructions that can
11857 dual-issue as younger, while preserving the relative order of all
11858 other instructions in the ready list. This is a hueuristic to help
11859 dual-issue in later cycles, by postponing issue of more flexible
11860 instructions. This heuristic may affect dual issue opportunities
11861 in the current cycle. */
11863 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11864 int *n_readyp
, int clock
)
11867 int first_older_only
= -1, first_younger
= -1;
11871 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11875 /* Traverse the ready list from the head (the instruction to issue
11876 first), and looking for the first instruction that can issue as
11877 younger and the first instruction that can dual-issue only as
11879 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11881 rtx_insn
*insn
= ready
[i
];
11882 if (cortexa7_older_only (insn
))
11884 first_older_only
= i
;
11886 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11889 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11893 /* Nothing to reorder because either no younger insn found or insn
11894 that can dual-issue only as older appears before any insn that
11895 can dual-issue as younger. */
11896 if (first_younger
== -1)
11899 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11903 /* Nothing to reorder because no older-only insn in the ready list. */
11904 if (first_older_only
== -1)
11907 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11911 /* Move first_older_only insn before first_younger. */
11913 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11914 INSN_UID(ready
[first_older_only
]),
11915 INSN_UID(ready
[first_younger
]));
11916 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11917 for (i
= first_older_only
; i
< first_younger
; i
++)
11919 ready
[i
] = ready
[i
+1];
11922 ready
[i
] = first_older_only_insn
;
11926 /* Implement TARGET_SCHED_REORDER. */
11928 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11934 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11937 /* Do nothing for other cores. */
11941 return arm_issue_rate ();
11944 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11945 It corrects the value of COST based on the relationship between
11946 INSN and DEP through the dependence LINK. It returns the new
11947 value. There is a per-core adjust_cost hook to adjust scheduler costs
11948 and the per-core hook can choose to completely override the generic
11949 adjust_cost function. Only put bits of code into arm_adjust_cost that
11950 are common across all cores. */
11952 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11956 /* When generating Thumb-1 code, we want to place flag-setting operations
11957 close to a conditional branch which depends on them, so that we can
11958 omit the comparison. */
11960 && REG_NOTE_KIND (link
) == 0
11961 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11962 && recog_memoized (dep
) >= 0
11963 && get_attr_conds (dep
) == CONDS_SET
)
11966 if (current_tune
->sched_adjust_cost
!= NULL
)
11968 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11972 /* XXX Is this strictly true? */
11973 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11974 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11977 /* Call insns don't incur a stall, even if they follow a load. */
11978 if (REG_NOTE_KIND (link
) == 0
11982 if ((i_pat
= single_set (insn
)) != NULL
11983 && MEM_P (SET_SRC (i_pat
))
11984 && (d_pat
= single_set (dep
)) != NULL
11985 && MEM_P (SET_DEST (d_pat
)))
11987 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11988 /* This is a load after a store, there is no conflict if the load reads
11989 from a cached area. Assume that loads from the stack, and from the
11990 constant pool are cached, and that others will miss. This is a
11993 if ((GET_CODE (src_mem
) == SYMBOL_REF
11994 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11995 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11996 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11997 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12005 arm_max_conditional_execute (void)
12007 return max_insns_skipped
;
12011 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12014 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12016 return (optimize
> 0) ? 2 : 0;
12020 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12022 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12025 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12026 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12027 sequences of non-executed instructions in IT blocks probably take the same
12028 amount of time as executed instructions (and the IT instruction itself takes
12029 space in icache). This function was experimentally determined to give good
12030 results on a popular embedded benchmark. */
12033 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12035 return (TARGET_32BIT
&& speed_p
) ? 1
12036 : arm_default_branch_cost (speed_p
, predictable_p
);
12040 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12042 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12045 static bool fp_consts_inited
= false;
12047 static REAL_VALUE_TYPE value_fp0
;
12050 init_fp_table (void)
12054 r
= REAL_VALUE_ATOF ("0", DFmode
);
12056 fp_consts_inited
= true;
12059 /* Return TRUE if rtx X is a valid immediate FP constant. */
12061 arm_const_double_rtx (rtx x
)
12065 if (!fp_consts_inited
)
12068 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12069 if (REAL_VALUE_MINUS_ZERO (r
))
12072 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12078 /* VFPv3 has a fairly wide range of representable immediates, formed from
12079 "quarter-precision" floating-point values. These can be evaluated using this
12080 formula (with ^ for exponentiation):
12084 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12085 16 <= n <= 31 and 0 <= r <= 7.
12087 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12089 - A (most-significant) is the sign bit.
12090 - BCD are the exponent (encoded as r XOR 3).
12091 - EFGH are the mantissa (encoded as n - 16).
12094 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12095 fconst[sd] instruction, or -1 if X isn't suitable. */
12097 vfp3_const_double_index (rtx x
)
12099 REAL_VALUE_TYPE r
, m
;
12100 int sign
, exponent
;
12101 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12102 unsigned HOST_WIDE_INT mask
;
12103 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12106 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12109 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12111 /* We can't represent these things, so detect them first. */
12112 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12115 /* Extract sign, exponent and mantissa. */
12116 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12117 r
= real_value_abs (&r
);
12118 exponent
= REAL_EXP (&r
);
12119 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12120 highest (sign) bit, with a fixed binary point at bit point_pos.
12121 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12122 bits for the mantissa, this may fail (low bits would be lost). */
12123 real_ldexp (&m
, &r
, point_pos
- exponent
);
12124 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12125 mantissa
= w
.elt (0);
12126 mant_hi
= w
.elt (1);
12128 /* If there are bits set in the low part of the mantissa, we can't
12129 represent this value. */
12133 /* Now make it so that mantissa contains the most-significant bits, and move
12134 the point_pos to indicate that the least-significant bits have been
12136 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12137 mantissa
= mant_hi
;
12139 /* We can permit four significant bits of mantissa only, plus a high bit
12140 which is always 1. */
12141 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12142 if ((mantissa
& mask
) != 0)
12145 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12146 mantissa
>>= point_pos
- 5;
12148 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12149 floating-point immediate zero with Neon using an integer-zero load, but
12150 that case is handled elsewhere.) */
12154 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12156 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12157 normalized significands are in the range [1, 2). (Our mantissa is shifted
12158 left 4 places at this point relative to normalized IEEE754 values). GCC
12159 internally uses [0.5, 1) (see real.c), so the exponent returned from
12160 REAL_EXP must be altered. */
12161 exponent
= 5 - exponent
;
12163 if (exponent
< 0 || exponent
> 7)
12166 /* Sign, mantissa and exponent are now in the correct form to plug into the
12167 formula described in the comment above. */
12168 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12171 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12173 vfp3_const_double_rtx (rtx x
)
12178 return vfp3_const_double_index (x
) != -1;
12181 /* Recognize immediates which can be used in various Neon instructions. Legal
12182 immediates are described by the following table (for VMVN variants, the
12183 bitwise inverse of the constant shown is recognized. In either case, VMOV
12184 is output and the correct instruction to use for a given constant is chosen
12185 by the assembler). The constant shown is replicated across all elements of
12186 the destination vector.
12188 insn elems variant constant (binary)
12189 ---- ----- ------- -----------------
12190 vmov i32 0 00000000 00000000 00000000 abcdefgh
12191 vmov i32 1 00000000 00000000 abcdefgh 00000000
12192 vmov i32 2 00000000 abcdefgh 00000000 00000000
12193 vmov i32 3 abcdefgh 00000000 00000000 00000000
12194 vmov i16 4 00000000 abcdefgh
12195 vmov i16 5 abcdefgh 00000000
12196 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12197 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12198 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12199 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12200 vmvn i16 10 00000000 abcdefgh
12201 vmvn i16 11 abcdefgh 00000000
12202 vmov i32 12 00000000 00000000 abcdefgh 11111111
12203 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12204 vmov i32 14 00000000 abcdefgh 11111111 11111111
12205 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12206 vmov i8 16 abcdefgh
12207 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12208 eeeeeeee ffffffff gggggggg hhhhhhhh
12209 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12210 vmov f32 19 00000000 00000000 00000000 00000000
12212 For case 18, B = !b. Representable values are exactly those accepted by
12213 vfp3_const_double_index, but are output as floating-point numbers rather
12216 For case 19, we will change it to vmov.i32 when assembling.
12218 Variants 0-5 (inclusive) may also be used as immediates for the second
12219 operand of VORR/VBIC instructions.
12221 The INVERSE argument causes the bitwise inverse of the given operand to be
12222 recognized instead (used for recognizing legal immediates for the VAND/VORN
12223 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12224 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12225 output, rather than the real insns vbic/vorr).
12227 INVERSE makes no difference to the recognition of float vectors.
12229 The return value is the variant of immediate as shown in the above table, or
12230 -1 if the given value doesn't match any of the listed patterns.
12233 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12234 rtx
*modconst
, int *elementwidth
)
12236 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12238 for (i = 0; i < idx; i += (STRIDE)) \
12243 immtype = (CLASS); \
12244 elsize = (ELSIZE); \
12248 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12249 unsigned int innersize
;
12250 unsigned char bytes
[16];
12251 int immtype
= -1, matches
;
12252 unsigned int invmask
= inverse
? 0xff : 0;
12253 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12257 n_elts
= CONST_VECTOR_NUNITS (op
);
12258 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12263 if (mode
== VOIDmode
)
12265 innersize
= GET_MODE_SIZE (mode
);
12268 /* Vectors of float constants. */
12269 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12271 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12272 REAL_VALUE_TYPE r0
;
12274 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12277 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12279 for (i
= 1; i
< n_elts
; i
++)
12281 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12282 REAL_VALUE_TYPE re
;
12284 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12286 if (!REAL_VALUES_EQUAL (r0
, re
))
12291 *modconst
= CONST_VECTOR_ELT (op
, 0);
12296 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12302 /* Splat vector constant out into a byte vector. */
12303 for (i
= 0; i
< n_elts
; i
++)
12305 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12306 unsigned HOST_WIDE_INT elpart
;
12307 unsigned int part
, parts
;
12309 if (CONST_INT_P (el
))
12311 elpart
= INTVAL (el
);
12314 else if (CONST_DOUBLE_P (el
))
12316 elpart
= CONST_DOUBLE_LOW (el
);
12320 gcc_unreachable ();
12322 for (part
= 0; part
< parts
; part
++)
12325 for (byte
= 0; byte
< innersize
; byte
++)
12327 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12328 elpart
>>= BITS_PER_UNIT
;
12330 if (CONST_DOUBLE_P (el
))
12331 elpart
= CONST_DOUBLE_HIGH (el
);
12335 /* Sanity check. */
12336 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12340 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12341 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12343 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12344 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12346 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12347 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12349 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12350 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12352 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12354 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12356 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12357 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12359 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12360 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12362 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12363 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12365 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12366 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12368 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12370 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12372 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12373 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12375 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12376 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12378 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12379 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12381 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12382 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12384 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12386 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12387 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12395 *elementwidth
= elsize
;
12399 unsigned HOST_WIDE_INT imm
= 0;
12401 /* Un-invert bytes of recognized vector, if necessary. */
12403 for (i
= 0; i
< idx
; i
++)
12404 bytes
[i
] ^= invmask
;
12408 /* FIXME: Broken on 32-bit H_W_I hosts. */
12409 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12411 for (i
= 0; i
< 8; i
++)
12412 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12413 << (i
* BITS_PER_UNIT
);
12415 *modconst
= GEN_INT (imm
);
12419 unsigned HOST_WIDE_INT imm
= 0;
12421 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12422 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12424 *modconst
= GEN_INT (imm
);
12432 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12433 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12434 float elements), and a modified constant (whatever should be output for a
12435 VMOV) in *MODCONST. */
12438 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12439 rtx
*modconst
, int *elementwidth
)
12443 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12449 *modconst
= tmpconst
;
12452 *elementwidth
= tmpwidth
;
12457 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12458 the immediate is valid, write a constant suitable for using as an operand
12459 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12460 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12463 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12464 rtx
*modconst
, int *elementwidth
)
12468 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12470 if (retval
< 0 || retval
> 5)
12474 *modconst
= tmpconst
;
12477 *elementwidth
= tmpwidth
;
12482 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12483 the immediate is valid, write a constant suitable for using as an operand
12484 to VSHR/VSHL to *MODCONST and the corresponding element width to
12485 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12486 because they have different limitations. */
12489 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12490 rtx
*modconst
, int *elementwidth
,
12493 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12494 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12495 unsigned HOST_WIDE_INT last_elt
= 0;
12496 unsigned HOST_WIDE_INT maxshift
;
12498 /* Split vector constant out into a byte vector. */
12499 for (i
= 0; i
< n_elts
; i
++)
12501 rtx el
= CONST_VECTOR_ELT (op
, i
);
12502 unsigned HOST_WIDE_INT elpart
;
12504 if (CONST_INT_P (el
))
12505 elpart
= INTVAL (el
);
12506 else if (CONST_DOUBLE_P (el
))
12509 gcc_unreachable ();
12511 if (i
!= 0 && elpart
!= last_elt
)
12517 /* Shift less than element size. */
12518 maxshift
= innersize
* 8;
12522 /* Left shift immediate value can be from 0 to <size>-1. */
12523 if (last_elt
>= maxshift
)
12528 /* Right shift immediate value can be from 1 to <size>. */
12529 if (last_elt
== 0 || last_elt
> maxshift
)
12534 *elementwidth
= innersize
* 8;
12537 *modconst
= CONST_VECTOR_ELT (op
, 0);
12542 /* Return a string suitable for output of Neon immediate logic operation
12546 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12547 int inverse
, int quad
)
12549 int width
, is_valid
;
12550 static char templ
[40];
12552 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12554 gcc_assert (is_valid
!= 0);
12557 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12559 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12564 /* Return a string suitable for output of Neon immediate shift operation
12565 (VSHR or VSHL) MNEM. */
12568 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12569 machine_mode mode
, int quad
,
12572 int width
, is_valid
;
12573 static char templ
[40];
12575 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12576 gcc_assert (is_valid
!= 0);
12579 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12581 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12586 /* Output a sequence of pairwise operations to implement a reduction.
12587 NOTE: We do "too much work" here, because pairwise operations work on two
12588 registers-worth of operands in one go. Unfortunately we can't exploit those
12589 extra calculations to do the full operation in fewer steps, I don't think.
12590 Although all vector elements of the result but the first are ignored, we
12591 actually calculate the same result in each of the elements. An alternative
12592 such as initially loading a vector with zero to use as each of the second
12593 operands would use up an additional register and take an extra instruction,
12594 for no particular gain. */
12597 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12598 rtx (*reduc
) (rtx
, rtx
, rtx
))
12600 machine_mode inner
= GET_MODE_INNER (mode
);
12601 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12604 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12606 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12607 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12612 /* If VALS is a vector constant that can be loaded into a register
12613 using VDUP, generate instructions to do so and return an RTX to
12614 assign to the register. Otherwise return NULL_RTX. */
12617 neon_vdup_constant (rtx vals
)
12619 machine_mode mode
= GET_MODE (vals
);
12620 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12621 int n_elts
= GET_MODE_NUNITS (mode
);
12622 bool all_same
= true;
12626 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12629 for (i
= 0; i
< n_elts
; ++i
)
12631 x
= XVECEXP (vals
, 0, i
);
12632 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12637 /* The elements are not all the same. We could handle repeating
12638 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12639 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12643 /* We can load this constant by using VDUP and a constant in a
12644 single ARM register. This will be cheaper than a vector
12647 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12648 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12651 /* Generate code to load VALS, which is a PARALLEL containing only
12652 constants (for vec_init) or CONST_VECTOR, efficiently into a
12653 register. Returns an RTX to copy into the register, or NULL_RTX
12654 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12657 neon_make_constant (rtx vals
)
12659 machine_mode mode
= GET_MODE (vals
);
12661 rtx const_vec
= NULL_RTX
;
12662 int n_elts
= GET_MODE_NUNITS (mode
);
12666 if (GET_CODE (vals
) == CONST_VECTOR
)
12668 else if (GET_CODE (vals
) == PARALLEL
)
12670 /* A CONST_VECTOR must contain only CONST_INTs and
12671 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12672 Only store valid constants in a CONST_VECTOR. */
12673 for (i
= 0; i
< n_elts
; ++i
)
12675 rtx x
= XVECEXP (vals
, 0, i
);
12676 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12679 if (n_const
== n_elts
)
12680 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12683 gcc_unreachable ();
12685 if (const_vec
!= NULL
12686 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12687 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12689 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12690 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12691 pipeline cycle; creating the constant takes one or two ARM
12692 pipeline cycles. */
12694 else if (const_vec
!= NULL_RTX
)
12695 /* Load from constant pool. On Cortex-A8 this takes two cycles
12696 (for either double or quad vectors). We can not take advantage
12697 of single-cycle VLD1 because we need a PC-relative addressing
12701 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12702 We can not construct an initializer. */
12706 /* Initialize vector TARGET to VALS. */
12709 neon_expand_vector_init (rtx target
, rtx vals
)
12711 machine_mode mode
= GET_MODE (target
);
12712 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12713 int n_elts
= GET_MODE_NUNITS (mode
);
12714 int n_var
= 0, one_var
= -1;
12715 bool all_same
= true;
12719 for (i
= 0; i
< n_elts
; ++i
)
12721 x
= XVECEXP (vals
, 0, i
);
12722 if (!CONSTANT_P (x
))
12723 ++n_var
, one_var
= i
;
12725 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12731 rtx constant
= neon_make_constant (vals
);
12732 if (constant
!= NULL_RTX
)
12734 emit_move_insn (target
, constant
);
12739 /* Splat a single non-constant element if we can. */
12740 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12742 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12743 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12747 /* One field is non-constant. Load constant then overwrite varying
12748 field. This is more efficient than using the stack. */
12751 rtx copy
= copy_rtx (vals
);
12752 rtx index
= GEN_INT (one_var
);
12754 /* Load constant part of vector, substitute neighboring value for
12755 varying element. */
12756 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12757 neon_expand_vector_init (target
, copy
);
12759 /* Insert variable. */
12760 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12764 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12767 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12770 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12773 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12776 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12779 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12782 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12785 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12788 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12791 gcc_unreachable ();
12796 /* Construct the vector in memory one field at a time
12797 and load the whole vector. */
12798 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12799 for (i
= 0; i
< n_elts
; i
++)
12800 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12801 i
* GET_MODE_SIZE (inner_mode
)),
12802 XVECEXP (vals
, 0, i
));
12803 emit_move_insn (target
, mem
);
12806 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12807 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12808 reported source locations are bogus. */
12811 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12814 HOST_WIDE_INT lane
;
12816 gcc_assert (CONST_INT_P (operand
));
12818 lane
= INTVAL (operand
);
12820 if (lane
< low
|| lane
>= high
)
12824 /* Bounds-check lanes. */
12827 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12829 bounds_check (operand
, low
, high
, "lane out of range");
12832 /* Bounds-check constants. */
12835 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12837 bounds_check (operand
, low
, high
, "constant out of range");
12841 neon_element_bits (machine_mode mode
)
12843 if (mode
== DImode
)
12844 return GET_MODE_BITSIZE (mode
);
12846 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12850 /* Predicates for `match_operand' and `match_operator'. */
12852 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12853 WB is true if full writeback address modes are allowed and is false
12854 if limited writeback address modes (POST_INC and PRE_DEC) are
12858 arm_coproc_mem_operand (rtx op
, bool wb
)
12862 /* Reject eliminable registers. */
12863 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12864 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12865 || reg_mentioned_p (arg_pointer_rtx
, op
)
12866 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12867 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12868 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12869 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12872 /* Constants are converted into offsets from labels. */
12876 ind
= XEXP (op
, 0);
12878 if (reload_completed
12879 && (GET_CODE (ind
) == LABEL_REF
12880 || (GET_CODE (ind
) == CONST
12881 && GET_CODE (XEXP (ind
, 0)) == PLUS
12882 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12883 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12886 /* Match: (mem (reg)). */
12888 return arm_address_register_rtx_p (ind
, 0);
12890 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12891 acceptable in any case (subject to verification by
12892 arm_address_register_rtx_p). We need WB to be true to accept
12893 PRE_INC and POST_DEC. */
12894 if (GET_CODE (ind
) == POST_INC
12895 || GET_CODE (ind
) == PRE_DEC
12897 && (GET_CODE (ind
) == PRE_INC
12898 || GET_CODE (ind
) == POST_DEC
)))
12899 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12902 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12903 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12904 && GET_CODE (XEXP (ind
, 1)) == PLUS
12905 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12906 ind
= XEXP (ind
, 1);
12911 if (GET_CODE (ind
) == PLUS
12912 && REG_P (XEXP (ind
, 0))
12913 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12914 && CONST_INT_P (XEXP (ind
, 1))
12915 && INTVAL (XEXP (ind
, 1)) > -1024
12916 && INTVAL (XEXP (ind
, 1)) < 1024
12917 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12923 /* Return TRUE if OP is a memory operand which we can load or store a vector
12924 to/from. TYPE is one of the following values:
12925 0 - Vector load/stor (vldr)
12926 1 - Core registers (ldm)
12927 2 - Element/structure loads (vld1)
12930 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12934 /* Reject eliminable registers. */
12935 if (! (reload_in_progress
|| reload_completed
)
12936 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12937 || reg_mentioned_p (arg_pointer_rtx
, op
)
12938 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12939 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12940 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12941 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12944 /* Constants are converted into offsets from labels. */
12948 ind
= XEXP (op
, 0);
12950 if (reload_completed
12951 && (GET_CODE (ind
) == LABEL_REF
12952 || (GET_CODE (ind
) == CONST
12953 && GET_CODE (XEXP (ind
, 0)) == PLUS
12954 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12955 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12958 /* Match: (mem (reg)). */
12960 return arm_address_register_rtx_p (ind
, 0);
12962 /* Allow post-increment with Neon registers. */
12963 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12964 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12965 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12967 /* Allow post-increment by register for VLDn */
12968 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12969 && GET_CODE (XEXP (ind
, 1)) == PLUS
12970 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12977 && GET_CODE (ind
) == PLUS
12978 && REG_P (XEXP (ind
, 0))
12979 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12980 && CONST_INT_P (XEXP (ind
, 1))
12981 && INTVAL (XEXP (ind
, 1)) > -1024
12982 /* For quad modes, we restrict the constant offset to be slightly less
12983 than what the instruction format permits. We have no such constraint
12984 on double mode offsets. (This must match arm_legitimate_index_p.) */
12985 && (INTVAL (XEXP (ind
, 1))
12986 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12987 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12993 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12996 neon_struct_mem_operand (rtx op
)
13000 /* Reject eliminable registers. */
13001 if (! (reload_in_progress
|| reload_completed
)
13002 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13003 || reg_mentioned_p (arg_pointer_rtx
, op
)
13004 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13005 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13006 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13007 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13010 /* Constants are converted into offsets from labels. */
13014 ind
= XEXP (op
, 0);
13016 if (reload_completed
13017 && (GET_CODE (ind
) == LABEL_REF
13018 || (GET_CODE (ind
) == CONST
13019 && GET_CODE (XEXP (ind
, 0)) == PLUS
13020 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13021 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13024 /* Match: (mem (reg)). */
13026 return arm_address_register_rtx_p (ind
, 0);
13028 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13029 if (GET_CODE (ind
) == POST_INC
13030 || GET_CODE (ind
) == PRE_DEC
)
13031 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13036 /* Return true if X is a register that will be eliminated later on. */
13038 arm_eliminable_register (rtx x
)
13040 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13041 || REGNO (x
) == ARG_POINTER_REGNUM
13042 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13043 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13046 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13047 coprocessor registers. Otherwise return NO_REGS. */
13050 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13052 if (mode
== HFmode
)
13054 if (!TARGET_NEON_FP16
)
13055 return GENERAL_REGS
;
13056 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13058 return GENERAL_REGS
;
13061 /* The neon move patterns handle all legitimate vector and struct
13064 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13065 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13066 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13067 || VALID_NEON_STRUCT_MODE (mode
)))
13070 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13073 return GENERAL_REGS
;
13076 /* Values which must be returned in the most-significant end of the return
13080 arm_return_in_msb (const_tree valtype
)
13082 return (TARGET_AAPCS_BASED
13083 && BYTES_BIG_ENDIAN
13084 && (AGGREGATE_TYPE_P (valtype
)
13085 || TREE_CODE (valtype
) == COMPLEX_TYPE
13086 || FIXED_POINT_TYPE_P (valtype
)));
13089 /* Return TRUE if X references a SYMBOL_REF. */
13091 symbol_mentioned_p (rtx x
)
13096 if (GET_CODE (x
) == SYMBOL_REF
)
13099 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13100 are constant offsets, not symbols. */
13101 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13104 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13106 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13112 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13113 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13116 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13123 /* Return TRUE if X references a LABEL_REF. */
13125 label_mentioned_p (rtx x
)
13130 if (GET_CODE (x
) == LABEL_REF
)
13133 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13134 instruction, but they are constant offsets, not symbols. */
13135 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13138 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13139 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13145 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13146 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13149 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13157 tls_mentioned_p (rtx x
)
13159 switch (GET_CODE (x
))
13162 return tls_mentioned_p (XEXP (x
, 0));
13165 if (XINT (x
, 1) == UNSPEC_TLS
)
13173 /* Must not copy any rtx that uses a pc-relative address. */
13176 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13178 /* The tls call insn cannot be copied, as it is paired with a data
13180 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13183 subrtx_iterator::array_type array
;
13184 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13186 const_rtx x
= *iter
;
13187 if (GET_CODE (x
) == UNSPEC
13188 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13189 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13196 minmax_code (rtx x
)
13198 enum rtx_code code
= GET_CODE (x
);
13211 gcc_unreachable ();
13215 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13218 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13219 int *mask
, bool *signed_sat
)
13221 /* The high bound must be a power of two minus one. */
13222 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13226 /* The low bound is either zero (for usat) or one less than the
13227 negation of the high bound (for ssat). */
13228 if (INTVAL (lo_bound
) == 0)
13233 *signed_sat
= false;
13238 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13243 *signed_sat
= true;
13251 /* Return 1 if memory locations are adjacent. */
13253 adjacent_mem_locations (rtx a
, rtx b
)
13255 /* We don't guarantee to preserve the order of these memory refs. */
13256 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13259 if ((REG_P (XEXP (a
, 0))
13260 || (GET_CODE (XEXP (a
, 0)) == PLUS
13261 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13262 && (REG_P (XEXP (b
, 0))
13263 || (GET_CODE (XEXP (b
, 0)) == PLUS
13264 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13266 HOST_WIDE_INT val0
= 0, val1
= 0;
13270 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13272 reg0
= XEXP (XEXP (a
, 0), 0);
13273 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13276 reg0
= XEXP (a
, 0);
13278 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13280 reg1
= XEXP (XEXP (b
, 0), 0);
13281 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13284 reg1
= XEXP (b
, 0);
13286 /* Don't accept any offset that will require multiple
13287 instructions to handle, since this would cause the
13288 arith_adjacentmem pattern to output an overlong sequence. */
13289 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13292 /* Don't allow an eliminable register: register elimination can make
13293 the offset too large. */
13294 if (arm_eliminable_register (reg0
))
13297 val_diff
= val1
- val0
;
13301 /* If the target has load delay slots, then there's no benefit
13302 to using an ldm instruction unless the offset is zero and
13303 we are optimizing for size. */
13304 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13305 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13306 && (val_diff
== 4 || val_diff
== -4));
13309 return ((REGNO (reg0
) == REGNO (reg1
))
13310 && (val_diff
== 4 || val_diff
== -4));
13316 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13317 for load operations, false for store operations. CONSECUTIVE is true
13318 if the register numbers in the operation must be consecutive in the register
13319 bank. RETURN_PC is true if value is to be loaded in PC.
13320 The pattern we are trying to match for load is:
13321 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13322 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13325 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13328 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13329 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13330 3. If consecutive is TRUE, then for kth register being loaded,
13331 REGNO (R_dk) = REGNO (R_d0) + k.
13332 The pattern for store is similar. */
13334 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13335 bool consecutive
, bool return_pc
)
13337 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13338 rtx reg
, mem
, addr
;
13340 unsigned first_regno
;
13341 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13343 bool addr_reg_in_reglist
= false;
13344 bool update
= false;
13349 /* If not in SImode, then registers must be consecutive
13350 (e.g., VLDM instructions for DFmode). */
13351 gcc_assert ((mode
== SImode
) || consecutive
);
13352 /* Setting return_pc for stores is illegal. */
13353 gcc_assert (!return_pc
|| load
);
13355 /* Set up the increments and the regs per val based on the mode. */
13356 reg_increment
= GET_MODE_SIZE (mode
);
13357 regs_per_val
= reg_increment
/ 4;
13358 offset_adj
= return_pc
? 1 : 0;
13361 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13362 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13365 /* Check if this is a write-back. */
13366 elt
= XVECEXP (op
, 0, offset_adj
);
13367 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13373 /* The offset adjustment must be the number of registers being
13374 popped times the size of a single register. */
13375 if (!REG_P (SET_DEST (elt
))
13376 || !REG_P (XEXP (SET_SRC (elt
), 0))
13377 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13378 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13379 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13380 ((count
- 1 - offset_adj
) * reg_increment
))
13384 i
= i
+ offset_adj
;
13385 base
= base
+ offset_adj
;
13386 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13387 success depends on the type: VLDM can do just one reg,
13388 LDM must do at least two. */
13389 if ((count
<= i
) && (mode
== SImode
))
13392 elt
= XVECEXP (op
, 0, i
- 1);
13393 if (GET_CODE (elt
) != SET
)
13398 reg
= SET_DEST (elt
);
13399 mem
= SET_SRC (elt
);
13403 reg
= SET_SRC (elt
);
13404 mem
= SET_DEST (elt
);
13407 if (!REG_P (reg
) || !MEM_P (mem
))
13410 regno
= REGNO (reg
);
13411 first_regno
= regno
;
13412 addr
= XEXP (mem
, 0);
13413 if (GET_CODE (addr
) == PLUS
)
13415 if (!CONST_INT_P (XEXP (addr
, 1)))
13418 offset
= INTVAL (XEXP (addr
, 1));
13419 addr
= XEXP (addr
, 0);
13425 /* Don't allow SP to be loaded unless it is also the base register. It
13426 guarantees that SP is reset correctly when an LDM instruction
13427 is interrupted. Otherwise, we might end up with a corrupt stack. */
13428 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13431 for (; i
< count
; i
++)
13433 elt
= XVECEXP (op
, 0, i
);
13434 if (GET_CODE (elt
) != SET
)
13439 reg
= SET_DEST (elt
);
13440 mem
= SET_SRC (elt
);
13444 reg
= SET_SRC (elt
);
13445 mem
= SET_DEST (elt
);
13449 || GET_MODE (reg
) != mode
13450 || REGNO (reg
) <= regno
13453 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13454 /* Don't allow SP to be loaded unless it is also the base register. It
13455 guarantees that SP is reset correctly when an LDM instruction
13456 is interrupted. Otherwise, we might end up with a corrupt stack. */
13457 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13459 || GET_MODE (mem
) != mode
13460 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13461 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13462 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13463 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13464 offset
+ (i
- base
) * reg_increment
))
13465 && (!REG_P (XEXP (mem
, 0))
13466 || offset
+ (i
- base
) * reg_increment
!= 0)))
13469 regno
= REGNO (reg
);
13470 if (regno
== REGNO (addr
))
13471 addr_reg_in_reglist
= true;
13476 if (update
&& addr_reg_in_reglist
)
13479 /* For Thumb-1, address register is always modified - either by write-back
13480 or by explicit load. If the pattern does not describe an update,
13481 then the address register must be in the list of loaded registers. */
13483 return update
|| addr_reg_in_reglist
;
13489 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13490 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13491 instruction. ADD_OFFSET is nonzero if the base address register needs
13492 to be modified with an add instruction before we can use it. */
13495 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13496 int nops
, HOST_WIDE_INT add_offset
)
13498 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13499 if the offset isn't small enough. The reason 2 ldrs are faster
13500 is because these ARMs are able to do more than one cache access
13501 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13502 whilst the ARM8 has a double bandwidth cache. This means that
13503 these cores can do both an instruction fetch and a data fetch in
13504 a single cycle, so the trick of calculating the address into a
13505 scratch register (one of the result regs) and then doing a load
13506 multiple actually becomes slower (and no smaller in code size).
13507 That is the transformation
13509 ldr rd1, [rbase + offset]
13510 ldr rd2, [rbase + offset + 4]
13514 add rd1, rbase, offset
13515 ldmia rd1, {rd1, rd2}
13517 produces worse code -- '3 cycles + any stalls on rd2' instead of
13518 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13519 access per cycle, the first sequence could never complete in less
13520 than 6 cycles, whereas the ldm sequence would only take 5 and
13521 would make better use of sequential accesses if not hitting the
13524 We cheat here and test 'arm_ld_sched' which we currently know to
13525 only be true for the ARM8, ARM9 and StrongARM. If this ever
13526 changes, then the test below needs to be reworked. */
13527 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13530 /* XScale has load-store double instructions, but they have stricter
13531 alignment requirements than load-store multiple, so we cannot
13534 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13535 the pipeline until completion.
13543 An ldr instruction takes 1-3 cycles, but does not block the
13552 Best case ldr will always win. However, the more ldr instructions
13553 we issue, the less likely we are to be able to schedule them well.
13554 Using ldr instructions also increases code size.
13556 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13557 for counts of 3 or 4 regs. */
13558 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13563 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13564 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13565 an array ORDER which describes the sequence to use when accessing the
13566 offsets that produces an ascending order. In this sequence, each
13567 offset must be larger by exactly 4 than the previous one. ORDER[0]
13568 must have been filled in with the lowest offset by the caller.
13569 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13570 we use to verify that ORDER produces an ascending order of registers.
13571 Return true if it was possible to construct such an order, false if
13575 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13576 int *unsorted_regs
)
13579 for (i
= 1; i
< nops
; i
++)
13583 order
[i
] = order
[i
- 1];
13584 for (j
= 0; j
< nops
; j
++)
13585 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13587 /* We must find exactly one offset that is higher than the
13588 previous one by 4. */
13589 if (order
[i
] != order
[i
- 1])
13593 if (order
[i
] == order
[i
- 1])
13595 /* The register numbers must be ascending. */
13596 if (unsorted_regs
!= NULL
13597 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13603 /* Used to determine in a peephole whether a sequence of load
13604 instructions can be changed into a load-multiple instruction.
13605 NOPS is the number of separate load instructions we are examining. The
13606 first NOPS entries in OPERANDS are the destination registers, the
13607 next NOPS entries are memory operands. If this function is
13608 successful, *BASE is set to the common base register of the memory
13609 accesses; *LOAD_OFFSET is set to the first memory location's offset
13610 from that base register.
13611 REGS is an array filled in with the destination register numbers.
13612 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13613 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13614 the sequence of registers in REGS matches the loads from ascending memory
13615 locations, and the function verifies that the register numbers are
13616 themselves ascending. If CHECK_REGS is false, the register numbers
13617 are stored in the order they are found in the operands. */
13619 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13620 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13622 int unsorted_regs
[MAX_LDM_STM_OPS
];
13623 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13624 int order
[MAX_LDM_STM_OPS
];
13625 rtx base_reg_rtx
= NULL
;
13629 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13630 easily extended if required. */
13631 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13633 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13635 /* Loop over the operands and check that the memory references are
13636 suitable (i.e. immediate offsets from the same base register). At
13637 the same time, extract the target register, and the memory
13639 for (i
= 0; i
< nops
; i
++)
13644 /* Convert a subreg of a mem into the mem itself. */
13645 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13646 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13648 gcc_assert (MEM_P (operands
[nops
+ i
]));
13650 /* Don't reorder volatile memory references; it doesn't seem worth
13651 looking for the case where the order is ok anyway. */
13652 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13655 offset
= const0_rtx
;
13657 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13658 || (GET_CODE (reg
) == SUBREG
13659 && REG_P (reg
= SUBREG_REG (reg
))))
13660 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13661 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13662 || (GET_CODE (reg
) == SUBREG
13663 && REG_P (reg
= SUBREG_REG (reg
))))
13664 && (CONST_INT_P (offset
13665 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13669 base_reg
= REGNO (reg
);
13670 base_reg_rtx
= reg
;
13671 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13674 else if (base_reg
!= (int) REGNO (reg
))
13675 /* Not addressed from the same base register. */
13678 unsorted_regs
[i
] = (REG_P (operands
[i
])
13679 ? REGNO (operands
[i
])
13680 : REGNO (SUBREG_REG (operands
[i
])));
13682 /* If it isn't an integer register, or if it overwrites the
13683 base register but isn't the last insn in the list, then
13684 we can't do this. */
13685 if (unsorted_regs
[i
] < 0
13686 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13687 || unsorted_regs
[i
] > 14
13688 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13691 /* Don't allow SP to be loaded unless it is also the base
13692 register. It guarantees that SP is reset correctly when
13693 an LDM instruction is interrupted. Otherwise, we might
13694 end up with a corrupt stack. */
13695 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13698 unsorted_offsets
[i
] = INTVAL (offset
);
13699 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13703 /* Not a suitable memory address. */
13707 /* All the useful information has now been extracted from the
13708 operands into unsorted_regs and unsorted_offsets; additionally,
13709 order[0] has been set to the lowest offset in the list. Sort
13710 the offsets into order, verifying that they are adjacent, and
13711 check that the register numbers are ascending. */
13712 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13713 check_regs
? unsorted_regs
: NULL
))
13717 memcpy (saved_order
, order
, sizeof order
);
13723 for (i
= 0; i
< nops
; i
++)
13724 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13726 *load_offset
= unsorted_offsets
[order
[0]];
13730 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13733 if (unsorted_offsets
[order
[0]] == 0)
13734 ldm_case
= 1; /* ldmia */
13735 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13736 ldm_case
= 2; /* ldmib */
13737 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13738 ldm_case
= 3; /* ldmda */
13739 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13740 ldm_case
= 4; /* ldmdb */
13741 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13742 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13747 if (!multiple_operation_profitable_p (false, nops
,
13749 ? unsorted_offsets
[order
[0]] : 0))
13755 /* Used to determine in a peephole whether a sequence of store instructions can
13756 be changed into a store-multiple instruction.
13757 NOPS is the number of separate store instructions we are examining.
13758 NOPS_TOTAL is the total number of instructions recognized by the peephole
13760 The first NOPS entries in OPERANDS are the source registers, the next
13761 NOPS entries are memory operands. If this function is successful, *BASE is
13762 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13763 to the first memory location's offset from that base register. REGS is an
13764 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13765 likewise filled with the corresponding rtx's.
13766 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13767 numbers to an ascending order of stores.
13768 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13769 from ascending memory locations, and the function verifies that the register
13770 numbers are themselves ascending. If CHECK_REGS is false, the register
13771 numbers are stored in the order they are found in the operands. */
13773 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13774 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13775 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13777 int unsorted_regs
[MAX_LDM_STM_OPS
];
13778 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13779 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13780 int order
[MAX_LDM_STM_OPS
];
13782 rtx base_reg_rtx
= NULL
;
13785 /* Write back of base register is currently only supported for Thumb 1. */
13786 int base_writeback
= TARGET_THUMB1
;
13788 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13789 easily extended if required. */
13790 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13792 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13794 /* Loop over the operands and check that the memory references are
13795 suitable (i.e. immediate offsets from the same base register). At
13796 the same time, extract the target register, and the memory
13798 for (i
= 0; i
< nops
; i
++)
13803 /* Convert a subreg of a mem into the mem itself. */
13804 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13805 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13807 gcc_assert (MEM_P (operands
[nops
+ i
]));
13809 /* Don't reorder volatile memory references; it doesn't seem worth
13810 looking for the case where the order is ok anyway. */
13811 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13814 offset
= const0_rtx
;
13816 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13817 || (GET_CODE (reg
) == SUBREG
13818 && REG_P (reg
= SUBREG_REG (reg
))))
13819 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13820 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13821 || (GET_CODE (reg
) == SUBREG
13822 && REG_P (reg
= SUBREG_REG (reg
))))
13823 && (CONST_INT_P (offset
13824 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13826 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13827 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13828 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13832 base_reg
= REGNO (reg
);
13833 base_reg_rtx
= reg
;
13834 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13837 else if (base_reg
!= (int) REGNO (reg
))
13838 /* Not addressed from the same base register. */
13841 /* If it isn't an integer register, then we can't do this. */
13842 if (unsorted_regs
[i
] < 0
13843 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13844 /* The effects are unpredictable if the base register is
13845 both updated and stored. */
13846 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13847 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13848 || unsorted_regs
[i
] > 14)
13851 unsorted_offsets
[i
] = INTVAL (offset
);
13852 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13856 /* Not a suitable memory address. */
13860 /* All the useful information has now been extracted from the
13861 operands into unsorted_regs and unsorted_offsets; additionally,
13862 order[0] has been set to the lowest offset in the list. Sort
13863 the offsets into order, verifying that they are adjacent, and
13864 check that the register numbers are ascending. */
13865 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13866 check_regs
? unsorted_regs
: NULL
))
13870 memcpy (saved_order
, order
, sizeof order
);
13876 for (i
= 0; i
< nops
; i
++)
13878 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13880 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13883 *load_offset
= unsorted_offsets
[order
[0]];
13887 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13890 if (unsorted_offsets
[order
[0]] == 0)
13891 stm_case
= 1; /* stmia */
13892 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13893 stm_case
= 2; /* stmib */
13894 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13895 stm_case
= 3; /* stmda */
13896 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13897 stm_case
= 4; /* stmdb */
13901 if (!multiple_operation_profitable_p (false, nops
, 0))
13907 /* Routines for use in generating RTL. */
13909 /* Generate a load-multiple instruction. COUNT is the number of loads in
13910 the instruction; REGS and MEMS are arrays containing the operands.
13911 BASEREG is the base register to be used in addressing the memory operands.
13912 WBACK_OFFSET is nonzero if the instruction should update the base
13916 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13917 HOST_WIDE_INT wback_offset
)
13922 if (!multiple_operation_profitable_p (false, count
, 0))
13928 for (i
= 0; i
< count
; i
++)
13929 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13931 if (wback_offset
!= 0)
13932 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13934 seq
= get_insns ();
13940 result
= gen_rtx_PARALLEL (VOIDmode
,
13941 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13942 if (wback_offset
!= 0)
13944 XVECEXP (result
, 0, 0)
13945 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13950 for (j
= 0; i
< count
; i
++, j
++)
13951 XVECEXP (result
, 0, i
)
13952 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13957 /* Generate a store-multiple instruction. COUNT is the number of stores in
13958 the instruction; REGS and MEMS are arrays containing the operands.
13959 BASEREG is the base register to be used in addressing the memory operands.
13960 WBACK_OFFSET is nonzero if the instruction should update the base
13964 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13965 HOST_WIDE_INT wback_offset
)
13970 if (GET_CODE (basereg
) == PLUS
)
13971 basereg
= XEXP (basereg
, 0);
13973 if (!multiple_operation_profitable_p (false, count
, 0))
13979 for (i
= 0; i
< count
; i
++)
13980 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13982 if (wback_offset
!= 0)
13983 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13985 seq
= get_insns ();
13991 result
= gen_rtx_PARALLEL (VOIDmode
,
13992 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13993 if (wback_offset
!= 0)
13995 XVECEXP (result
, 0, 0)
13996 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14001 for (j
= 0; i
< count
; i
++, j
++)
14002 XVECEXP (result
, 0, i
)
14003 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14008 /* Generate either a load-multiple or a store-multiple instruction. This
14009 function can be used in situations where we can start with a single MEM
14010 rtx and adjust its address upwards.
14011 COUNT is the number of operations in the instruction, not counting a
14012 possible update of the base register. REGS is an array containing the
14014 BASEREG is the base register to be used in addressing the memory operands,
14015 which are constructed from BASEMEM.
14016 WRITE_BACK specifies whether the generated instruction should include an
14017 update of the base register.
14018 OFFSETP is used to pass an offset to and from this function; this offset
14019 is not used when constructing the address (instead BASEMEM should have an
14020 appropriate offset in its address), it is used only for setting
14021 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14024 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14025 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14027 rtx mems
[MAX_LDM_STM_OPS
];
14028 HOST_WIDE_INT offset
= *offsetp
;
14031 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14033 if (GET_CODE (basereg
) == PLUS
)
14034 basereg
= XEXP (basereg
, 0);
14036 for (i
= 0; i
< count
; i
++)
14038 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14039 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14047 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14048 write_back
? 4 * count
: 0);
14050 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14051 write_back
? 4 * count
: 0);
14055 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14056 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14058 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14063 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14064 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14066 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14070 /* Called from a peephole2 expander to turn a sequence of loads into an
14071 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14072 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14073 is true if we can reorder the registers because they are used commutatively
14075 Returns true iff we could generate a new instruction. */
14078 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14080 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14081 rtx mems
[MAX_LDM_STM_OPS
];
14082 int i
, j
, base_reg
;
14084 HOST_WIDE_INT offset
;
14085 int write_back
= FALSE
;
14089 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14090 &base_reg
, &offset
, !sort_regs
);
14096 for (i
= 0; i
< nops
- 1; i
++)
14097 for (j
= i
+ 1; j
< nops
; j
++)
14098 if (regs
[i
] > regs
[j
])
14104 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14108 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14109 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14115 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14116 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14118 if (!TARGET_THUMB1
)
14120 base_reg
= regs
[0];
14121 base_reg_rtx
= newbase
;
14125 for (i
= 0; i
< nops
; i
++)
14127 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14128 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14131 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14132 write_back
? offset
+ i
* 4 : 0));
14136 /* Called from a peephole2 expander to turn a sequence of stores into an
14137 STM instruction. OPERANDS are the operands found by the peephole matcher;
14138 NOPS indicates how many separate stores we are trying to combine.
14139 Returns true iff we could generate a new instruction. */
14142 gen_stm_seq (rtx
*operands
, int nops
)
14145 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14146 rtx mems
[MAX_LDM_STM_OPS
];
14149 HOST_WIDE_INT offset
;
14150 int write_back
= FALSE
;
14153 bool base_reg_dies
;
14155 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14156 mem_order
, &base_reg
, &offset
, true);
14161 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14163 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14166 gcc_assert (base_reg_dies
);
14172 gcc_assert (base_reg_dies
);
14173 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14177 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14179 for (i
= 0; i
< nops
; i
++)
14181 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14182 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14185 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14186 write_back
? offset
+ i
* 4 : 0));
14190 /* Called from a peephole2 expander to turn a sequence of stores that are
14191 preceded by constant loads into an STM instruction. OPERANDS are the
14192 operands found by the peephole matcher; NOPS indicates how many
14193 separate stores we are trying to combine; there are 2 * NOPS
14194 instructions in the peephole.
14195 Returns true iff we could generate a new instruction. */
14198 gen_const_stm_seq (rtx
*operands
, int nops
)
14200 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14201 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14202 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14203 rtx mems
[MAX_LDM_STM_OPS
];
14206 HOST_WIDE_INT offset
;
14207 int write_back
= FALSE
;
14210 bool base_reg_dies
;
14212 HARD_REG_SET allocated
;
14214 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14215 mem_order
, &base_reg
, &offset
, false);
14220 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14222 /* If the same register is used more than once, try to find a free
14224 CLEAR_HARD_REG_SET (allocated
);
14225 for (i
= 0; i
< nops
; i
++)
14227 for (j
= i
+ 1; j
< nops
; j
++)
14228 if (regs
[i
] == regs
[j
])
14230 rtx t
= peep2_find_free_register (0, nops
* 2,
14231 TARGET_THUMB1
? "l" : "r",
14232 SImode
, &allocated
);
14236 regs
[i
] = REGNO (t
);
14240 /* Compute an ordering that maps the register numbers to an ascending
14243 for (i
= 0; i
< nops
; i
++)
14244 if (regs
[i
] < regs
[reg_order
[0]])
14247 for (i
= 1; i
< nops
; i
++)
14249 int this_order
= reg_order
[i
- 1];
14250 for (j
= 0; j
< nops
; j
++)
14251 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14252 && (this_order
== reg_order
[i
- 1]
14253 || regs
[j
] < regs
[this_order
]))
14255 reg_order
[i
] = this_order
;
14258 /* Ensure that registers that must be live after the instruction end
14259 up with the correct value. */
14260 for (i
= 0; i
< nops
; i
++)
14262 int this_order
= reg_order
[i
];
14263 if ((this_order
!= mem_order
[i
]
14264 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14265 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14269 /* Load the constants. */
14270 for (i
= 0; i
< nops
; i
++)
14272 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14273 sorted_regs
[i
] = regs
[reg_order
[i
]];
14274 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14277 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14279 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14282 gcc_assert (base_reg_dies
);
14288 gcc_assert (base_reg_dies
);
14289 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14293 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14295 for (i
= 0; i
< nops
; i
++)
14297 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14298 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14301 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14302 write_back
? offset
+ i
* 4 : 0));
14306 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14307 unaligned copies on processors which support unaligned semantics for those
14308 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14309 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14310 An interleave factor of 1 (the minimum) will perform no interleaving.
14311 Load/store multiple are used for aligned addresses where possible. */
14314 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14315 HOST_WIDE_INT length
,
14316 unsigned int interleave_factor
)
14318 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14319 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14320 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14321 HOST_WIDE_INT i
, j
;
14322 HOST_WIDE_INT remaining
= length
, words
;
14323 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14325 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14326 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14327 HOST_WIDE_INT srcoffset
, dstoffset
;
14328 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14331 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14333 /* Use hard registers if we have aligned source or destination so we can use
14334 load/store multiple with contiguous registers. */
14335 if (dst_aligned
|| src_aligned
)
14336 for (i
= 0; i
< interleave_factor
; i
++)
14337 regs
[i
] = gen_rtx_REG (SImode
, i
);
14339 for (i
= 0; i
< interleave_factor
; i
++)
14340 regs
[i
] = gen_reg_rtx (SImode
);
14342 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14343 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14345 srcoffset
= dstoffset
= 0;
14347 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14348 For copying the last bytes we want to subtract this offset again. */
14349 src_autoinc
= dst_autoinc
= 0;
14351 for (i
= 0; i
< interleave_factor
; i
++)
14354 /* Copy BLOCK_SIZE_BYTES chunks. */
14356 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14359 if (src_aligned
&& interleave_factor
> 1)
14361 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14362 TRUE
, srcbase
, &srcoffset
));
14363 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14367 for (j
= 0; j
< interleave_factor
; j
++)
14369 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14371 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14372 srcoffset
+ j
* UNITS_PER_WORD
);
14373 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14375 srcoffset
+= block_size_bytes
;
14379 if (dst_aligned
&& interleave_factor
> 1)
14381 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14382 TRUE
, dstbase
, &dstoffset
));
14383 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14387 for (j
= 0; j
< interleave_factor
; j
++)
14389 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14391 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14392 dstoffset
+ j
* UNITS_PER_WORD
);
14393 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14395 dstoffset
+= block_size_bytes
;
14398 remaining
-= block_size_bytes
;
14401 /* Copy any whole words left (note these aren't interleaved with any
14402 subsequent halfword/byte load/stores in the interests of simplicity). */
14404 words
= remaining
/ UNITS_PER_WORD
;
14406 gcc_assert (words
< interleave_factor
);
14408 if (src_aligned
&& words
> 1)
14410 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14412 src_autoinc
+= UNITS_PER_WORD
* words
;
14416 for (j
= 0; j
< words
; j
++)
14418 addr
= plus_constant (Pmode
, src
,
14419 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14420 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14421 srcoffset
+ j
* UNITS_PER_WORD
);
14422 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14424 srcoffset
+= words
* UNITS_PER_WORD
;
14427 if (dst_aligned
&& words
> 1)
14429 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14431 dst_autoinc
+= words
* UNITS_PER_WORD
;
14435 for (j
= 0; j
< words
; j
++)
14437 addr
= plus_constant (Pmode
, dst
,
14438 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14439 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14440 dstoffset
+ j
* UNITS_PER_WORD
);
14441 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14443 dstoffset
+= words
* UNITS_PER_WORD
;
14446 remaining
-= words
* UNITS_PER_WORD
;
14448 gcc_assert (remaining
< 4);
14450 /* Copy a halfword if necessary. */
14452 if (remaining
>= 2)
14454 halfword_tmp
= gen_reg_rtx (SImode
);
14456 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14457 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14458 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14460 /* Either write out immediately, or delay until we've loaded the last
14461 byte, depending on interleave factor. */
14462 if (interleave_factor
== 1)
14464 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14465 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14466 emit_insn (gen_unaligned_storehi (mem
,
14467 gen_lowpart (HImode
, halfword_tmp
)));
14468 halfword_tmp
= NULL
;
14476 gcc_assert (remaining
< 2);
14478 /* Copy last byte. */
14480 if ((remaining
& 1) != 0)
14482 byte_tmp
= gen_reg_rtx (SImode
);
14484 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14485 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14486 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14488 if (interleave_factor
== 1)
14490 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14491 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14492 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14501 /* Store last halfword if we haven't done so already. */
14505 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14506 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14507 emit_insn (gen_unaligned_storehi (mem
,
14508 gen_lowpart (HImode
, halfword_tmp
)));
14512 /* Likewise for last byte. */
14516 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14517 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14518 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14522 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14525 /* From mips_adjust_block_mem:
14527 Helper function for doing a loop-based block operation on memory
14528 reference MEM. Each iteration of the loop will operate on LENGTH
14531 Create a new base register for use within the loop and point it to
14532 the start of MEM. Create a new memory reference that uses this
14533 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14536 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14539 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14541 /* Although the new mem does not refer to a known location,
14542 it does keep up to LENGTH bytes of alignment. */
14543 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14544 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14547 /* From mips_block_move_loop:
14549 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14550 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14551 the memory regions do not overlap. */
14554 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14555 unsigned int interleave_factor
,
14556 HOST_WIDE_INT bytes_per_iter
)
14558 rtx src_reg
, dest_reg
, final_src
, test
;
14559 HOST_WIDE_INT leftover
;
14561 leftover
= length
% bytes_per_iter
;
14562 length
-= leftover
;
14564 /* Create registers and memory references for use within the loop. */
14565 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14566 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14568 /* Calculate the value that SRC_REG should have after the last iteration of
14570 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14571 0, 0, OPTAB_WIDEN
);
14573 /* Emit the start of the loop. */
14574 rtx_code_label
*label
= gen_label_rtx ();
14575 emit_label (label
);
14577 /* Emit the loop body. */
14578 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14579 interleave_factor
);
14581 /* Move on to the next block. */
14582 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14583 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14585 /* Emit the loop condition. */
14586 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14587 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14589 /* Mop up any left-over bytes. */
14591 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14594 /* Emit a block move when either the source or destination is unaligned (not
14595 aligned to a four-byte boundary). This may need further tuning depending on
14596 core type, optimize_size setting, etc. */
14599 arm_movmemqi_unaligned (rtx
*operands
)
14601 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14605 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14606 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14607 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14608 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14609 or dst_aligned though: allow more interleaving in those cases since the
14610 resulting code can be smaller. */
14611 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14612 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14615 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14616 interleave_factor
, bytes_per_iter
);
14618 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14619 interleave_factor
);
14623 /* Note that the loop created by arm_block_move_unaligned_loop may be
14624 subject to loop unrolling, which makes tuning this condition a little
14627 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14629 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14636 arm_gen_movmemqi (rtx
*operands
)
14638 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14639 HOST_WIDE_INT srcoffset
, dstoffset
;
14641 rtx src
, dst
, srcbase
, dstbase
;
14642 rtx part_bytes_reg
= NULL
;
14645 if (!CONST_INT_P (operands
[2])
14646 || !CONST_INT_P (operands
[3])
14647 || INTVAL (operands
[2]) > 64)
14650 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14651 return arm_movmemqi_unaligned (operands
);
14653 if (INTVAL (operands
[3]) & 3)
14656 dstbase
= operands
[0];
14657 srcbase
= operands
[1];
14659 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14660 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14662 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14663 out_words_to_go
= INTVAL (operands
[2]) / 4;
14664 last_bytes
= INTVAL (operands
[2]) & 3;
14665 dstoffset
= srcoffset
= 0;
14667 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14668 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14670 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14672 if (in_words_to_go
> 4)
14673 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14674 TRUE
, srcbase
, &srcoffset
));
14676 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14677 src
, FALSE
, srcbase
,
14680 if (out_words_to_go
)
14682 if (out_words_to_go
> 4)
14683 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14684 TRUE
, dstbase
, &dstoffset
));
14685 else if (out_words_to_go
!= 1)
14686 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14687 out_words_to_go
, dst
,
14690 dstbase
, &dstoffset
));
14693 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14694 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14695 if (last_bytes
!= 0)
14697 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14703 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14704 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14707 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14708 if (out_words_to_go
)
14712 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14713 sreg
= copy_to_reg (mem
);
14715 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14716 emit_move_insn (mem
, sreg
);
14719 gcc_assert (!in_words_to_go
); /* Sanity check */
14722 if (in_words_to_go
)
14724 gcc_assert (in_words_to_go
> 0);
14726 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14727 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14730 gcc_assert (!last_bytes
|| part_bytes_reg
);
14732 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14734 rtx tmp
= gen_reg_rtx (SImode
);
14736 /* The bytes we want are in the top end of the word. */
14737 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14738 GEN_INT (8 * (4 - last_bytes
))));
14739 part_bytes_reg
= tmp
;
14743 mem
= adjust_automodify_address (dstbase
, QImode
,
14744 plus_constant (Pmode
, dst
,
14746 dstoffset
+ last_bytes
- 1);
14747 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14751 tmp
= gen_reg_rtx (SImode
);
14752 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14753 part_bytes_reg
= tmp
;
14760 if (last_bytes
> 1)
14762 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14763 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14767 rtx tmp
= gen_reg_rtx (SImode
);
14768 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14769 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14770 part_bytes_reg
= tmp
;
14777 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14778 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14785 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14788 next_consecutive_mem (rtx mem
)
14790 machine_mode mode
= GET_MODE (mem
);
14791 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14792 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14794 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14797 /* Copy using LDRD/STRD instructions whenever possible.
14798 Returns true upon success. */
14800 gen_movmem_ldrd_strd (rtx
*operands
)
14802 unsigned HOST_WIDE_INT len
;
14803 HOST_WIDE_INT align
;
14804 rtx src
, dst
, base
;
14806 bool src_aligned
, dst_aligned
;
14807 bool src_volatile
, dst_volatile
;
14809 gcc_assert (CONST_INT_P (operands
[2]));
14810 gcc_assert (CONST_INT_P (operands
[3]));
14812 len
= UINTVAL (operands
[2]);
14816 /* Maximum alignment we can assume for both src and dst buffers. */
14817 align
= INTVAL (operands
[3]);
14819 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14822 /* Place src and dst addresses in registers
14823 and update the corresponding mem rtx. */
14825 dst_volatile
= MEM_VOLATILE_P (dst
);
14826 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14827 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14828 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14831 src_volatile
= MEM_VOLATILE_P (src
);
14832 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14833 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14834 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14836 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14839 if (src_volatile
|| dst_volatile
)
14842 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14843 if (!(dst_aligned
|| src_aligned
))
14844 return arm_gen_movmemqi (operands
);
14846 src
= adjust_address (src
, DImode
, 0);
14847 dst
= adjust_address (dst
, DImode
, 0);
14851 reg0
= gen_reg_rtx (DImode
);
14853 emit_move_insn (reg0
, src
);
14855 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14858 emit_move_insn (dst
, reg0
);
14860 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14862 src
= next_consecutive_mem (src
);
14863 dst
= next_consecutive_mem (dst
);
14866 gcc_assert (len
< 8);
14869 /* More than a word but less than a double-word to copy. Copy a word. */
14870 reg0
= gen_reg_rtx (SImode
);
14871 src
= adjust_address (src
, SImode
, 0);
14872 dst
= adjust_address (dst
, SImode
, 0);
14874 emit_move_insn (reg0
, src
);
14876 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14879 emit_move_insn (dst
, reg0
);
14881 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14883 src
= next_consecutive_mem (src
);
14884 dst
= next_consecutive_mem (dst
);
14891 /* Copy the remaining bytes. */
14894 dst
= adjust_address (dst
, HImode
, 0);
14895 src
= adjust_address (src
, HImode
, 0);
14896 reg0
= gen_reg_rtx (SImode
);
14898 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14900 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14903 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14905 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14907 src
= next_consecutive_mem (src
);
14908 dst
= next_consecutive_mem (dst
);
14913 dst
= adjust_address (dst
, QImode
, 0);
14914 src
= adjust_address (src
, QImode
, 0);
14915 reg0
= gen_reg_rtx (QImode
);
14916 emit_move_insn (reg0
, src
);
14917 emit_move_insn (dst
, reg0
);
14921 /* Select a dominance comparison mode if possible for a test of the general
14922 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14923 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14924 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14925 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14926 In all cases OP will be either EQ or NE, but we don't need to know which
14927 here. If we are unable to support a dominance comparison we return
14928 CC mode. This will then fail to match for the RTL expressions that
14929 generate this call. */
14931 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14933 enum rtx_code cond1
, cond2
;
14936 /* Currently we will probably get the wrong result if the individual
14937 comparisons are not simple. This also ensures that it is safe to
14938 reverse a comparison if necessary. */
14939 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14941 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14945 /* The if_then_else variant of this tests the second condition if the
14946 first passes, but is true if the first fails. Reverse the first
14947 condition to get a true "inclusive-or" expression. */
14948 if (cond_or
== DOM_CC_NX_OR_Y
)
14949 cond1
= reverse_condition (cond1
);
14951 /* If the comparisons are not equal, and one doesn't dominate the other,
14952 then we can't do this. */
14954 && !comparison_dominates_p (cond1
, cond2
)
14955 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14959 std::swap (cond1
, cond2
);
14964 if (cond_or
== DOM_CC_X_AND_Y
)
14969 case EQ
: return CC_DEQmode
;
14970 case LE
: return CC_DLEmode
;
14971 case LEU
: return CC_DLEUmode
;
14972 case GE
: return CC_DGEmode
;
14973 case GEU
: return CC_DGEUmode
;
14974 default: gcc_unreachable ();
14978 if (cond_or
== DOM_CC_X_AND_Y
)
14990 gcc_unreachable ();
14994 if (cond_or
== DOM_CC_X_AND_Y
)
15006 gcc_unreachable ();
15010 if (cond_or
== DOM_CC_X_AND_Y
)
15011 return CC_DLTUmode
;
15016 return CC_DLTUmode
;
15018 return CC_DLEUmode
;
15022 gcc_unreachable ();
15026 if (cond_or
== DOM_CC_X_AND_Y
)
15027 return CC_DGTUmode
;
15032 return CC_DGTUmode
;
15034 return CC_DGEUmode
;
15038 gcc_unreachable ();
15041 /* The remaining cases only occur when both comparisons are the
15044 gcc_assert (cond1
== cond2
);
15048 gcc_assert (cond1
== cond2
);
15052 gcc_assert (cond1
== cond2
);
15056 gcc_assert (cond1
== cond2
);
15057 return CC_DLEUmode
;
15060 gcc_assert (cond1
== cond2
);
15061 return CC_DGEUmode
;
15064 gcc_unreachable ();
15069 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15071 /* All floating point compares return CCFP if it is an equality
15072 comparison, and CCFPE otherwise. */
15073 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15096 gcc_unreachable ();
15100 /* A compare with a shifted operand. Because of canonicalization, the
15101 comparison will have to be swapped when we emit the assembler. */
15102 if (GET_MODE (y
) == SImode
15103 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15104 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15105 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15106 || GET_CODE (x
) == ROTATERT
))
15109 /* This operation is performed swapped, but since we only rely on the Z
15110 flag we don't need an additional mode. */
15111 if (GET_MODE (y
) == SImode
15112 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15113 && GET_CODE (x
) == NEG
15114 && (op
== EQ
|| op
== NE
))
15117 /* This is a special case that is used by combine to allow a
15118 comparison of a shifted byte load to be split into a zero-extend
15119 followed by a comparison of the shifted integer (only valid for
15120 equalities and unsigned inequalities). */
15121 if (GET_MODE (x
) == SImode
15122 && GET_CODE (x
) == ASHIFT
15123 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15124 && GET_CODE (XEXP (x
, 0)) == SUBREG
15125 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15126 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15127 && (op
== EQ
|| op
== NE
15128 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15129 && CONST_INT_P (y
))
15132 /* A construct for a conditional compare, if the false arm contains
15133 0, then both conditions must be true, otherwise either condition
15134 must be true. Not all conditions are possible, so CCmode is
15135 returned if it can't be done. */
15136 if (GET_CODE (x
) == IF_THEN_ELSE
15137 && (XEXP (x
, 2) == const0_rtx
15138 || XEXP (x
, 2) == const1_rtx
)
15139 && COMPARISON_P (XEXP (x
, 0))
15140 && COMPARISON_P (XEXP (x
, 1)))
15141 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15142 INTVAL (XEXP (x
, 2)));
15144 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15145 if (GET_CODE (x
) == AND
15146 && (op
== EQ
|| op
== NE
)
15147 && COMPARISON_P (XEXP (x
, 0))
15148 && COMPARISON_P (XEXP (x
, 1)))
15149 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15152 if (GET_CODE (x
) == IOR
15153 && (op
== EQ
|| op
== NE
)
15154 && COMPARISON_P (XEXP (x
, 0))
15155 && COMPARISON_P (XEXP (x
, 1)))
15156 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15159 /* An operation (on Thumb) where we want to test for a single bit.
15160 This is done by shifting that bit up into the top bit of a
15161 scratch register; we can then branch on the sign bit. */
15163 && GET_MODE (x
) == SImode
15164 && (op
== EQ
|| op
== NE
)
15165 && GET_CODE (x
) == ZERO_EXTRACT
15166 && XEXP (x
, 1) == const1_rtx
)
15169 /* An operation that sets the condition codes as a side-effect, the
15170 V flag is not set correctly, so we can only use comparisons where
15171 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15173 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15174 if (GET_MODE (x
) == SImode
15176 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15177 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15178 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15179 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15180 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15181 || GET_CODE (x
) == LSHIFTRT
15182 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15183 || GET_CODE (x
) == ROTATERT
15184 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15185 return CC_NOOVmode
;
15187 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15190 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15191 && GET_CODE (x
) == PLUS
15192 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15195 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15201 /* A DImode comparison against zero can be implemented by
15202 or'ing the two halves together. */
15203 if (y
== const0_rtx
)
15206 /* We can do an equality test in three Thumb instructions. */
15216 /* DImode unsigned comparisons can be implemented by cmp +
15217 cmpeq without a scratch register. Not worth doing in
15228 /* DImode signed and unsigned comparisons can be implemented
15229 by cmp + sbcs with a scratch register, but that does not
15230 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15231 gcc_assert (op
!= EQ
&& op
!= NE
);
15235 gcc_unreachable ();
15239 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15240 return GET_MODE (x
);
15245 /* X and Y are two things to compare using CODE. Emit the compare insn and
15246 return the rtx for register 0 in the proper mode. FP means this is a
15247 floating point compare: I don't think that it is needed on the arm. */
15249 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15253 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15255 /* We might have X as a constant, Y as a register because of the predicates
15256 used for cmpdi. If so, force X to a register here. */
15257 if (dimode_comparison
&& !REG_P (x
))
15258 x
= force_reg (DImode
, x
);
15260 mode
= SELECT_CC_MODE (code
, x
, y
);
15261 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15263 if (dimode_comparison
15264 && mode
!= CC_CZmode
)
15268 /* To compare two non-zero values for equality, XOR them and
15269 then compare against zero. Not used for ARM mode; there
15270 CC_CZmode is cheaper. */
15271 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15273 gcc_assert (!reload_completed
);
15274 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15278 /* A scratch register is required. */
15279 if (reload_completed
)
15280 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15282 scratch
= gen_rtx_SCRATCH (SImode
);
15284 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15285 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15286 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15289 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15294 /* Generate a sequence of insns that will generate the correct return
15295 address mask depending on the physical architecture that the program
15298 arm_gen_return_addr_mask (void)
15300 rtx reg
= gen_reg_rtx (Pmode
);
15302 emit_insn (gen_return_addr_mask (reg
));
15307 arm_reload_in_hi (rtx
*operands
)
15309 rtx ref
= operands
[1];
15311 HOST_WIDE_INT offset
= 0;
15313 if (GET_CODE (ref
) == SUBREG
)
15315 offset
= SUBREG_BYTE (ref
);
15316 ref
= SUBREG_REG (ref
);
15321 /* We have a pseudo which has been spilt onto the stack; there
15322 are two cases here: the first where there is a simple
15323 stack-slot replacement and a second where the stack-slot is
15324 out of range, or is used as a subreg. */
15325 if (reg_equiv_mem (REGNO (ref
)))
15327 ref
= reg_equiv_mem (REGNO (ref
));
15328 base
= find_replacement (&XEXP (ref
, 0));
15331 /* The slot is out of range, or was dressed up in a SUBREG. */
15332 base
= reg_equiv_address (REGNO (ref
));
15335 base
= find_replacement (&XEXP (ref
, 0));
15337 /* Handle the case where the address is too complex to be offset by 1. */
15338 if (GET_CODE (base
) == MINUS
15339 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15341 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15343 emit_set_insn (base_plus
, base
);
15346 else if (GET_CODE (base
) == PLUS
)
15348 /* The addend must be CONST_INT, or we would have dealt with it above. */
15349 HOST_WIDE_INT hi
, lo
;
15351 offset
+= INTVAL (XEXP (base
, 1));
15352 base
= XEXP (base
, 0);
15354 /* Rework the address into a legal sequence of insns. */
15355 /* Valid range for lo is -4095 -> 4095 */
15358 : -((-offset
) & 0xfff));
15360 /* Corner case, if lo is the max offset then we would be out of range
15361 once we have added the additional 1 below, so bump the msb into the
15362 pre-loading insn(s). */
15366 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15367 ^ (HOST_WIDE_INT
) 0x80000000)
15368 - (HOST_WIDE_INT
) 0x80000000);
15370 gcc_assert (hi
+ lo
== offset
);
15374 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15376 /* Get the base address; addsi3 knows how to handle constants
15377 that require more than one insn. */
15378 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15384 /* Operands[2] may overlap operands[0] (though it won't overlap
15385 operands[1]), that's why we asked for a DImode reg -- so we can
15386 use the bit that does not overlap. */
15387 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15388 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15390 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15392 emit_insn (gen_zero_extendqisi2 (scratch
,
15393 gen_rtx_MEM (QImode
,
15394 plus_constant (Pmode
, base
,
15396 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15397 gen_rtx_MEM (QImode
,
15398 plus_constant (Pmode
, base
,
15400 if (!BYTES_BIG_ENDIAN
)
15401 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15402 gen_rtx_IOR (SImode
,
15405 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15409 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15410 gen_rtx_IOR (SImode
,
15411 gen_rtx_ASHIFT (SImode
, scratch
,
15413 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15416 /* Handle storing a half-word to memory during reload by synthesizing as two
15417 byte stores. Take care not to clobber the input values until after we
15418 have moved them somewhere safe. This code assumes that if the DImode
15419 scratch in operands[2] overlaps either the input value or output address
15420 in some way, then that value must die in this insn (we absolutely need
15421 two scratch registers for some corner cases). */
15423 arm_reload_out_hi (rtx
*operands
)
15425 rtx ref
= operands
[0];
15426 rtx outval
= operands
[1];
15428 HOST_WIDE_INT offset
= 0;
15430 if (GET_CODE (ref
) == SUBREG
)
15432 offset
= SUBREG_BYTE (ref
);
15433 ref
= SUBREG_REG (ref
);
15438 /* We have a pseudo which has been spilt onto the stack; there
15439 are two cases here: the first where there is a simple
15440 stack-slot replacement and a second where the stack-slot is
15441 out of range, or is used as a subreg. */
15442 if (reg_equiv_mem (REGNO (ref
)))
15444 ref
= reg_equiv_mem (REGNO (ref
));
15445 base
= find_replacement (&XEXP (ref
, 0));
15448 /* The slot is out of range, or was dressed up in a SUBREG. */
15449 base
= reg_equiv_address (REGNO (ref
));
15452 base
= find_replacement (&XEXP (ref
, 0));
15454 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15456 /* Handle the case where the address is too complex to be offset by 1. */
15457 if (GET_CODE (base
) == MINUS
15458 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15460 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15462 /* Be careful not to destroy OUTVAL. */
15463 if (reg_overlap_mentioned_p (base_plus
, outval
))
15465 /* Updating base_plus might destroy outval, see if we can
15466 swap the scratch and base_plus. */
15467 if (!reg_overlap_mentioned_p (scratch
, outval
))
15468 std::swap (scratch
, base_plus
);
15471 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15473 /* Be conservative and copy OUTVAL into the scratch now,
15474 this should only be necessary if outval is a subreg
15475 of something larger than a word. */
15476 /* XXX Might this clobber base? I can't see how it can,
15477 since scratch is known to overlap with OUTVAL, and
15478 must be wider than a word. */
15479 emit_insn (gen_movhi (scratch_hi
, outval
));
15480 outval
= scratch_hi
;
15484 emit_set_insn (base_plus
, base
);
15487 else if (GET_CODE (base
) == PLUS
)
15489 /* The addend must be CONST_INT, or we would have dealt with it above. */
15490 HOST_WIDE_INT hi
, lo
;
15492 offset
+= INTVAL (XEXP (base
, 1));
15493 base
= XEXP (base
, 0);
15495 /* Rework the address into a legal sequence of insns. */
15496 /* Valid range for lo is -4095 -> 4095 */
15499 : -((-offset
) & 0xfff));
15501 /* Corner case, if lo is the max offset then we would be out of range
15502 once we have added the additional 1 below, so bump the msb into the
15503 pre-loading insn(s). */
15507 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15508 ^ (HOST_WIDE_INT
) 0x80000000)
15509 - (HOST_WIDE_INT
) 0x80000000);
15511 gcc_assert (hi
+ lo
== offset
);
15515 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15517 /* Be careful not to destroy OUTVAL. */
15518 if (reg_overlap_mentioned_p (base_plus
, outval
))
15520 /* Updating base_plus might destroy outval, see if we
15521 can swap the scratch and base_plus. */
15522 if (!reg_overlap_mentioned_p (scratch
, outval
))
15523 std::swap (scratch
, base_plus
);
15526 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15528 /* Be conservative and copy outval into scratch now,
15529 this should only be necessary if outval is a
15530 subreg of something larger than a word. */
15531 /* XXX Might this clobber base? I can't see how it
15532 can, since scratch is known to overlap with
15534 emit_insn (gen_movhi (scratch_hi
, outval
));
15535 outval
= scratch_hi
;
15539 /* Get the base address; addsi3 knows how to handle constants
15540 that require more than one insn. */
15541 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15547 if (BYTES_BIG_ENDIAN
)
15549 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15550 plus_constant (Pmode
, base
,
15552 gen_lowpart (QImode
, outval
)));
15553 emit_insn (gen_lshrsi3 (scratch
,
15554 gen_rtx_SUBREG (SImode
, outval
, 0),
15556 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15558 gen_lowpart (QImode
, scratch
)));
15562 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15564 gen_lowpart (QImode
, outval
)));
15565 emit_insn (gen_lshrsi3 (scratch
,
15566 gen_rtx_SUBREG (SImode
, outval
, 0),
15568 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15569 plus_constant (Pmode
, base
,
15571 gen_lowpart (QImode
, scratch
)));
15575 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15576 (padded to the size of a word) should be passed in a register. */
15579 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15581 if (TARGET_AAPCS_BASED
)
15582 return must_pass_in_stack_var_size (mode
, type
);
15584 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15588 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15589 Return true if an argument passed on the stack should be padded upwards,
15590 i.e. if the least-significant byte has useful data.
15591 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15592 aggregate types are placed in the lowest memory address. */
15595 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15597 if (!TARGET_AAPCS_BASED
)
15598 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15600 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15607 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15608 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15609 register has useful data, and return the opposite if the most
15610 significant byte does. */
15613 arm_pad_reg_upward (machine_mode mode
,
15614 tree type
, int first ATTRIBUTE_UNUSED
)
15616 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15618 /* For AAPCS, small aggregates, small fixed-point types,
15619 and small complex types are always padded upwards. */
15622 if ((AGGREGATE_TYPE_P (type
)
15623 || TREE_CODE (type
) == COMPLEX_TYPE
15624 || FIXED_POINT_TYPE_P (type
))
15625 && int_size_in_bytes (type
) <= 4)
15630 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15631 && GET_MODE_SIZE (mode
) <= 4)
15636 /* Otherwise, use default padding. */
15637 return !BYTES_BIG_ENDIAN
;
15640 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15641 assuming that the address in the base register is word aligned. */
15643 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15645 HOST_WIDE_INT max_offset
;
15647 /* Offset must be a multiple of 4 in Thumb mode. */
15648 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15653 else if (TARGET_ARM
)
15658 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15661 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15662 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15663 Assumes that the address in the base register RN is word aligned. Pattern
15664 guarantees that both memory accesses use the same base register,
15665 the offsets are constants within the range, and the gap between the offsets is 4.
15666 If preload complete then check that registers are legal. WBACK indicates whether
15667 address is updated. LOAD indicates whether memory access is load or store. */
15669 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15670 bool wback
, bool load
)
15672 unsigned int t
, t2
, n
;
15674 if (!reload_completed
)
15677 if (!offset_ok_for_ldrd_strd (offset
))
15684 if ((TARGET_THUMB2
)
15685 && ((wback
&& (n
== t
|| n
== t2
))
15686 || (t
== SP_REGNUM
)
15687 || (t
== PC_REGNUM
)
15688 || (t2
== SP_REGNUM
)
15689 || (t2
== PC_REGNUM
)
15690 || (!load
&& (n
== PC_REGNUM
))
15691 || (load
&& (t
== t2
))
15692 /* Triggers Cortex-M3 LDRD errata. */
15693 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15697 && ((wback
&& (n
== t
|| n
== t2
))
15698 || (t2
== PC_REGNUM
)
15699 || (t
% 2 != 0) /* First destination register is not even. */
15701 /* PC can be used as base register (for offset addressing only),
15702 but it is depricated. */
15703 || (n
== PC_REGNUM
)))
15709 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15710 operand MEM's address contains an immediate offset from the base
15711 register and has no side effects, in which case it sets BASE and
15712 OFFSET accordingly. */
15714 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15718 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15720 /* TODO: Handle more general memory operand patterns, such as
15721 PRE_DEC and PRE_INC. */
15723 if (side_effects_p (mem
))
15726 /* Can't deal with subregs. */
15727 if (GET_CODE (mem
) == SUBREG
)
15730 gcc_assert (MEM_P (mem
));
15732 *offset
= const0_rtx
;
15734 addr
= XEXP (mem
, 0);
15736 /* If addr isn't valid for DImode, then we can't handle it. */
15737 if (!arm_legitimate_address_p (DImode
, addr
,
15738 reload_in_progress
|| reload_completed
))
15746 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15748 *base
= XEXP (addr
, 0);
15749 *offset
= XEXP (addr
, 1);
15750 return (REG_P (*base
) && CONST_INT_P (*offset
));
15756 /* Called from a peephole2 to replace two word-size accesses with a
15757 single LDRD/STRD instruction. Returns true iff we can generate a
15758 new instruction sequence. That is, both accesses use the same base
15759 register and the gap between constant offsets is 4. This function
15760 may reorder its operands to match ldrd/strd RTL templates.
15761 OPERANDS are the operands found by the peephole matcher;
15762 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15763 corresponding memory operands. LOAD indicaates whether the access
15764 is load or store. CONST_STORE indicates a store of constant
15765 integer values held in OPERANDS[4,5] and assumes that the pattern
15766 is of length 4 insn, for the purpose of checking dead registers.
15767 COMMUTE indicates that register operands may be reordered. */
15769 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15770 bool const_store
, bool commute
)
15773 HOST_WIDE_INT offsets
[2], offset
;
15774 rtx base
= NULL_RTX
;
15775 rtx cur_base
, cur_offset
, tmp
;
15777 HARD_REG_SET regset
;
15779 gcc_assert (!const_store
|| !load
);
15780 /* Check that the memory references are immediate offsets from the
15781 same base register. Extract the base register, the destination
15782 registers, and the corresponding memory offsets. */
15783 for (i
= 0; i
< nops
; i
++)
15785 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15790 else if (REGNO (base
) != REGNO (cur_base
))
15793 offsets
[i
] = INTVAL (cur_offset
);
15794 if (GET_CODE (operands
[i
]) == SUBREG
)
15796 tmp
= SUBREG_REG (operands
[i
]);
15797 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15802 /* Make sure there is no dependency between the individual loads. */
15803 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15804 return false; /* RAW */
15806 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15807 return false; /* WAW */
15809 /* If the same input register is used in both stores
15810 when storing different constants, try to find a free register.
15811 For example, the code
15816 can be transformed into
15819 in Thumb mode assuming that r1 is free. */
15821 && REGNO (operands
[0]) == REGNO (operands
[1])
15822 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15826 CLEAR_HARD_REG_SET (regset
);
15827 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15828 if (tmp
== NULL_RTX
)
15831 /* Use the new register in the first load to ensure that
15832 if the original input register is not dead after peephole,
15833 then it will have the correct constant value. */
15836 else if (TARGET_ARM
)
15839 int regno
= REGNO (operands
[0]);
15840 if (!peep2_reg_dead_p (4, operands
[0]))
15842 /* When the input register is even and is not dead after the
15843 pattern, it has to hold the second constant but we cannot
15844 form a legal STRD in ARM mode with this register as the second
15846 if (regno
% 2 == 0)
15849 /* Is regno-1 free? */
15850 SET_HARD_REG_SET (regset
);
15851 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15852 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15853 if (tmp
== NULL_RTX
)
15860 /* Find a DImode register. */
15861 CLEAR_HARD_REG_SET (regset
);
15862 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15863 if (tmp
!= NULL_RTX
)
15865 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15866 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15870 /* Can we use the input register to form a DI register? */
15871 SET_HARD_REG_SET (regset
);
15872 CLEAR_HARD_REG_BIT(regset
,
15873 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15874 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15875 if (tmp
== NULL_RTX
)
15877 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15881 gcc_assert (operands
[0] != NULL_RTX
);
15882 gcc_assert (operands
[1] != NULL_RTX
);
15883 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15884 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15888 /* Make sure the instructions are ordered with lower memory access first. */
15889 if (offsets
[0] > offsets
[1])
15891 gap
= offsets
[0] - offsets
[1];
15892 offset
= offsets
[1];
15894 /* Swap the instructions such that lower memory is accessed first. */
15895 std::swap (operands
[0], operands
[1]);
15896 std::swap (operands
[2], operands
[3]);
15898 std::swap (operands
[4], operands
[5]);
15902 gap
= offsets
[1] - offsets
[0];
15903 offset
= offsets
[0];
15906 /* Make sure accesses are to consecutive memory locations. */
15910 /* Make sure we generate legal instructions. */
15911 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15915 /* In Thumb state, where registers are almost unconstrained, there
15916 is little hope to fix it. */
15920 if (load
&& commute
)
15922 /* Try reordering registers. */
15923 std::swap (operands
[0], operands
[1]);
15924 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15931 /* If input registers are dead after this pattern, they can be
15932 reordered or replaced by other registers that are free in the
15933 current pattern. */
15934 if (!peep2_reg_dead_p (4, operands
[0])
15935 || !peep2_reg_dead_p (4, operands
[1]))
15938 /* Try to reorder the input registers. */
15939 /* For example, the code
15944 can be transformed into
15949 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15952 std::swap (operands
[0], operands
[1]);
15956 /* Try to find a free DI register. */
15957 CLEAR_HARD_REG_SET (regset
);
15958 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15959 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15962 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15963 if (tmp
== NULL_RTX
)
15966 /* DREG must be an even-numbered register in DImode.
15967 Split it into SI registers. */
15968 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15969 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15970 gcc_assert (operands
[0] != NULL_RTX
);
15971 gcc_assert (operands
[1] != NULL_RTX
);
15972 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15973 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15975 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15987 /* Print a symbolic form of X to the debug file, F. */
15989 arm_print_value (FILE *f
, rtx x
)
15991 switch (GET_CODE (x
))
15994 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15998 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16006 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16008 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16009 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16017 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16021 fprintf (f
, "`%s'", XSTR (x
, 0));
16025 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16029 arm_print_value (f
, XEXP (x
, 0));
16033 arm_print_value (f
, XEXP (x
, 0));
16035 arm_print_value (f
, XEXP (x
, 1));
16043 fprintf (f
, "????");
16048 /* Routines for manipulation of the constant pool. */
16050 /* Arm instructions cannot load a large constant directly into a
16051 register; they have to come from a pc relative load. The constant
16052 must therefore be placed in the addressable range of the pc
16053 relative load. Depending on the precise pc relative load
16054 instruction the range is somewhere between 256 bytes and 4k. This
16055 means that we often have to dump a constant inside a function, and
16056 generate code to branch around it.
16058 It is important to minimize this, since the branches will slow
16059 things down and make the code larger.
16061 Normally we can hide the table after an existing unconditional
16062 branch so that there is no interruption of the flow, but in the
16063 worst case the code looks like this:
16081 We fix this by performing a scan after scheduling, which notices
16082 which instructions need to have their operands fetched from the
16083 constant table and builds the table.
16085 The algorithm starts by building a table of all the constants that
16086 need fixing up and all the natural barriers in the function (places
16087 where a constant table can be dropped without breaking the flow).
16088 For each fixup we note how far the pc-relative replacement will be
16089 able to reach and the offset of the instruction into the function.
16091 Having built the table we then group the fixes together to form
16092 tables that are as large as possible (subject to addressing
16093 constraints) and emit each table of constants after the last
16094 barrier that is within range of all the instructions in the group.
16095 If a group does not contain a barrier, then we forcibly create one
16096 by inserting a jump instruction into the flow. Once the table has
16097 been inserted, the insns are then modified to reference the
16098 relevant entry in the pool.
16100 Possible enhancements to the algorithm (not implemented) are:
16102 1) For some processors and object formats, there may be benefit in
16103 aligning the pools to the start of cache lines; this alignment
16104 would need to be taken into account when calculating addressability
16107 /* These typedefs are located at the start of this file, so that
16108 they can be used in the prototypes there. This comment is to
16109 remind readers of that fact so that the following structures
16110 can be understood more easily.
16112 typedef struct minipool_node Mnode;
16113 typedef struct minipool_fixup Mfix; */
16115 struct minipool_node
16117 /* Doubly linked chain of entries. */
16120 /* The maximum offset into the code that this entry can be placed. While
16121 pushing fixes for forward references, all entries are sorted in order
16122 of increasing max_address. */
16123 HOST_WIDE_INT max_address
;
16124 /* Similarly for an entry inserted for a backwards ref. */
16125 HOST_WIDE_INT min_address
;
16126 /* The number of fixes referencing this entry. This can become zero
16127 if we "unpush" an entry. In this case we ignore the entry when we
16128 come to emit the code. */
16130 /* The offset from the start of the minipool. */
16131 HOST_WIDE_INT offset
;
16132 /* The value in table. */
16134 /* The mode of value. */
16136 /* The size of the value. With iWMMXt enabled
16137 sizes > 4 also imply an alignment of 8-bytes. */
16141 struct minipool_fixup
16145 HOST_WIDE_INT address
;
16151 HOST_WIDE_INT forwards
;
16152 HOST_WIDE_INT backwards
;
16155 /* Fixes less than a word need padding out to a word boundary. */
16156 #define MINIPOOL_FIX_SIZE(mode) \
16157 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16159 static Mnode
* minipool_vector_head
;
16160 static Mnode
* minipool_vector_tail
;
16161 static rtx_code_label
*minipool_vector_label
;
16162 static int minipool_pad
;
16164 /* The linked list of all minipool fixes required for this function. */
16165 Mfix
* minipool_fix_head
;
16166 Mfix
* minipool_fix_tail
;
16167 /* The fix entry for the current minipool, once it has been placed. */
16168 Mfix
* minipool_barrier
;
16170 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16171 #define JUMP_TABLES_IN_TEXT_SECTION 0
16174 static HOST_WIDE_INT
16175 get_jump_table_size (rtx_jump_table_data
*insn
)
16177 /* ADDR_VECs only take room if read-only data does into the text
16179 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16181 rtx body
= PATTERN (insn
);
16182 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16183 HOST_WIDE_INT size
;
16184 HOST_WIDE_INT modesize
;
16186 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16187 size
= modesize
* XVECLEN (body
, elt
);
16191 /* Round up size of TBB table to a halfword boundary. */
16192 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16195 /* No padding necessary for TBH. */
16198 /* Add two bytes for alignment on Thumb. */
16203 gcc_unreachable ();
16211 /* Return the maximum amount of padding that will be inserted before
16214 static HOST_WIDE_INT
16215 get_label_padding (rtx label
)
16217 HOST_WIDE_INT align
, min_insn_size
;
16219 align
= 1 << label_to_alignment (label
);
16220 min_insn_size
= TARGET_THUMB
? 2 : 4;
16221 return align
> min_insn_size
? align
- min_insn_size
: 0;
16224 /* Move a minipool fix MP from its current location to before MAX_MP.
16225 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16226 constraints may need updating. */
16228 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16229 HOST_WIDE_INT max_address
)
16231 /* The code below assumes these are different. */
16232 gcc_assert (mp
!= max_mp
);
16234 if (max_mp
== NULL
)
16236 if (max_address
< mp
->max_address
)
16237 mp
->max_address
= max_address
;
16241 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16242 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16244 mp
->max_address
= max_address
;
16246 /* Unlink MP from its current position. Since max_mp is non-null,
16247 mp->prev must be non-null. */
16248 mp
->prev
->next
= mp
->next
;
16249 if (mp
->next
!= NULL
)
16250 mp
->next
->prev
= mp
->prev
;
16252 minipool_vector_tail
= mp
->prev
;
16254 /* Re-insert it before MAX_MP. */
16256 mp
->prev
= max_mp
->prev
;
16259 if (mp
->prev
!= NULL
)
16260 mp
->prev
->next
= mp
;
16262 minipool_vector_head
= mp
;
16265 /* Save the new entry. */
16268 /* Scan over the preceding entries and adjust their addresses as
16270 while (mp
->prev
!= NULL
16271 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16273 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16280 /* Add a constant to the minipool for a forward reference. Returns the
16281 node added or NULL if the constant will not fit in this pool. */
16283 add_minipool_forward_ref (Mfix
*fix
)
16285 /* If set, max_mp is the first pool_entry that has a lower
16286 constraint than the one we are trying to add. */
16287 Mnode
* max_mp
= NULL
;
16288 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16291 /* If the minipool starts before the end of FIX->INSN then this FIX
16292 can not be placed into the current pool. Furthermore, adding the
16293 new constant pool entry may cause the pool to start FIX_SIZE bytes
16295 if (minipool_vector_head
&&
16296 (fix
->address
+ get_attr_length (fix
->insn
)
16297 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16300 /* Scan the pool to see if a constant with the same value has
16301 already been added. While we are doing this, also note the
16302 location where we must insert the constant if it doesn't already
16304 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16306 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16307 && fix
->mode
== mp
->mode
16308 && (!LABEL_P (fix
->value
)
16309 || (CODE_LABEL_NUMBER (fix
->value
)
16310 == CODE_LABEL_NUMBER (mp
->value
)))
16311 && rtx_equal_p (fix
->value
, mp
->value
))
16313 /* More than one fix references this entry. */
16315 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16318 /* Note the insertion point if necessary. */
16320 && mp
->max_address
> max_address
)
16323 /* If we are inserting an 8-bytes aligned quantity and
16324 we have not already found an insertion point, then
16325 make sure that all such 8-byte aligned quantities are
16326 placed at the start of the pool. */
16327 if (ARM_DOUBLEWORD_ALIGN
16329 && fix
->fix_size
>= 8
16330 && mp
->fix_size
< 8)
16333 max_address
= mp
->max_address
;
16337 /* The value is not currently in the minipool, so we need to create
16338 a new entry for it. If MAX_MP is NULL, the entry will be put on
16339 the end of the list since the placement is less constrained than
16340 any existing entry. Otherwise, we insert the new fix before
16341 MAX_MP and, if necessary, adjust the constraints on the other
16344 mp
->fix_size
= fix
->fix_size
;
16345 mp
->mode
= fix
->mode
;
16346 mp
->value
= fix
->value
;
16348 /* Not yet required for a backwards ref. */
16349 mp
->min_address
= -65536;
16351 if (max_mp
== NULL
)
16353 mp
->max_address
= max_address
;
16355 mp
->prev
= minipool_vector_tail
;
16357 if (mp
->prev
== NULL
)
16359 minipool_vector_head
= mp
;
16360 minipool_vector_label
= gen_label_rtx ();
16363 mp
->prev
->next
= mp
;
16365 minipool_vector_tail
= mp
;
16369 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16370 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16372 mp
->max_address
= max_address
;
16375 mp
->prev
= max_mp
->prev
;
16377 if (mp
->prev
!= NULL
)
16378 mp
->prev
->next
= mp
;
16380 minipool_vector_head
= mp
;
16383 /* Save the new entry. */
16386 /* Scan over the preceding entries and adjust their addresses as
16388 while (mp
->prev
!= NULL
16389 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16391 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16399 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16400 HOST_WIDE_INT min_address
)
16402 HOST_WIDE_INT offset
;
16404 /* The code below assumes these are different. */
16405 gcc_assert (mp
!= min_mp
);
16407 if (min_mp
== NULL
)
16409 if (min_address
> mp
->min_address
)
16410 mp
->min_address
= min_address
;
16414 /* We will adjust this below if it is too loose. */
16415 mp
->min_address
= min_address
;
16417 /* Unlink MP from its current position. Since min_mp is non-null,
16418 mp->next must be non-null. */
16419 mp
->next
->prev
= mp
->prev
;
16420 if (mp
->prev
!= NULL
)
16421 mp
->prev
->next
= mp
->next
;
16423 minipool_vector_head
= mp
->next
;
16425 /* Reinsert it after MIN_MP. */
16427 mp
->next
= min_mp
->next
;
16429 if (mp
->next
!= NULL
)
16430 mp
->next
->prev
= mp
;
16432 minipool_vector_tail
= mp
;
16438 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16440 mp
->offset
= offset
;
16441 if (mp
->refcount
> 0)
16442 offset
+= mp
->fix_size
;
16444 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16445 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16451 /* Add a constant to the minipool for a backward reference. Returns the
16452 node added or NULL if the constant will not fit in this pool.
16454 Note that the code for insertion for a backwards reference can be
16455 somewhat confusing because the calculated offsets for each fix do
16456 not take into account the size of the pool (which is still under
16459 add_minipool_backward_ref (Mfix
*fix
)
16461 /* If set, min_mp is the last pool_entry that has a lower constraint
16462 than the one we are trying to add. */
16463 Mnode
*min_mp
= NULL
;
16464 /* This can be negative, since it is only a constraint. */
16465 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16468 /* If we can't reach the current pool from this insn, or if we can't
16469 insert this entry at the end of the pool without pushing other
16470 fixes out of range, then we don't try. This ensures that we
16471 can't fail later on. */
16472 if (min_address
>= minipool_barrier
->address
16473 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16474 >= minipool_barrier
->address
))
16477 /* Scan the pool to see if a constant with the same value has
16478 already been added. While we are doing this, also note the
16479 location where we must insert the constant if it doesn't already
16481 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16483 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16484 && fix
->mode
== mp
->mode
16485 && (!LABEL_P (fix
->value
)
16486 || (CODE_LABEL_NUMBER (fix
->value
)
16487 == CODE_LABEL_NUMBER (mp
->value
)))
16488 && rtx_equal_p (fix
->value
, mp
->value
)
16489 /* Check that there is enough slack to move this entry to the
16490 end of the table (this is conservative). */
16491 && (mp
->max_address
16492 > (minipool_barrier
->address
16493 + minipool_vector_tail
->offset
16494 + minipool_vector_tail
->fix_size
)))
16497 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16500 if (min_mp
!= NULL
)
16501 mp
->min_address
+= fix
->fix_size
;
16504 /* Note the insertion point if necessary. */
16505 if (mp
->min_address
< min_address
)
16507 /* For now, we do not allow the insertion of 8-byte alignment
16508 requiring nodes anywhere but at the start of the pool. */
16509 if (ARM_DOUBLEWORD_ALIGN
16510 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16515 else if (mp
->max_address
16516 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16518 /* Inserting before this entry would push the fix beyond
16519 its maximum address (which can happen if we have
16520 re-located a forwards fix); force the new fix to come
16522 if (ARM_DOUBLEWORD_ALIGN
16523 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16528 min_address
= mp
->min_address
+ fix
->fix_size
;
16531 /* Do not insert a non-8-byte aligned quantity before 8-byte
16532 aligned quantities. */
16533 else if (ARM_DOUBLEWORD_ALIGN
16534 && fix
->fix_size
< 8
16535 && mp
->fix_size
>= 8)
16538 min_address
= mp
->min_address
+ fix
->fix_size
;
16543 /* We need to create a new entry. */
16545 mp
->fix_size
= fix
->fix_size
;
16546 mp
->mode
= fix
->mode
;
16547 mp
->value
= fix
->value
;
16549 mp
->max_address
= minipool_barrier
->address
+ 65536;
16551 mp
->min_address
= min_address
;
16553 if (min_mp
== NULL
)
16556 mp
->next
= minipool_vector_head
;
16558 if (mp
->next
== NULL
)
16560 minipool_vector_tail
= mp
;
16561 minipool_vector_label
= gen_label_rtx ();
16564 mp
->next
->prev
= mp
;
16566 minipool_vector_head
= mp
;
16570 mp
->next
= min_mp
->next
;
16574 if (mp
->next
!= NULL
)
16575 mp
->next
->prev
= mp
;
16577 minipool_vector_tail
= mp
;
16580 /* Save the new entry. */
16588 /* Scan over the following entries and adjust their offsets. */
16589 while (mp
->next
!= NULL
)
16591 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16592 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16595 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16597 mp
->next
->offset
= mp
->offset
;
16606 assign_minipool_offsets (Mfix
*barrier
)
16608 HOST_WIDE_INT offset
= 0;
16611 minipool_barrier
= barrier
;
16613 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16615 mp
->offset
= offset
;
16617 if (mp
->refcount
> 0)
16618 offset
+= mp
->fix_size
;
16622 /* Output the literal table */
16624 dump_minipool (rtx_insn
*scan
)
16630 if (ARM_DOUBLEWORD_ALIGN
)
16631 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16632 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16639 fprintf (dump_file
,
16640 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16641 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16643 scan
= emit_label_after (gen_label_rtx (), scan
);
16644 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16645 scan
= emit_label_after (minipool_vector_label
, scan
);
16647 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16649 if (mp
->refcount
> 0)
16653 fprintf (dump_file
,
16654 ";; Offset %u, min %ld, max %ld ",
16655 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16656 (unsigned long) mp
->max_address
);
16657 arm_print_value (dump_file
, mp
->value
);
16658 fputc ('\n', dump_file
);
16661 switch (GET_MODE_SIZE (mp
->mode
))
16663 #ifdef HAVE_consttable_1
16665 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16669 #ifdef HAVE_consttable_2
16671 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16675 #ifdef HAVE_consttable_4
16677 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16681 #ifdef HAVE_consttable_8
16683 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16687 #ifdef HAVE_consttable_16
16689 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16694 gcc_unreachable ();
16702 minipool_vector_head
= minipool_vector_tail
= NULL
;
16703 scan
= emit_insn_after (gen_consttable_end (), scan
);
16704 scan
= emit_barrier_after (scan
);
16707 /* Return the cost of forcibly inserting a barrier after INSN. */
16709 arm_barrier_cost (rtx_insn
*insn
)
16711 /* Basing the location of the pool on the loop depth is preferable,
16712 but at the moment, the basic block information seems to be
16713 corrupt by this stage of the compilation. */
16714 int base_cost
= 50;
16715 rtx_insn
*next
= next_nonnote_insn (insn
);
16717 if (next
!= NULL
&& LABEL_P (next
))
16720 switch (GET_CODE (insn
))
16723 /* It will always be better to place the table before the label, rather
16732 return base_cost
- 10;
16735 return base_cost
+ 10;
16739 /* Find the best place in the insn stream in the range
16740 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16741 Create the barrier by inserting a jump and add a new fix entry for
16744 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16746 HOST_WIDE_INT count
= 0;
16747 rtx_barrier
*barrier
;
16748 rtx_insn
*from
= fix
->insn
;
16749 /* The instruction after which we will insert the jump. */
16750 rtx_insn
*selected
= NULL
;
16752 /* The address at which the jump instruction will be placed. */
16753 HOST_WIDE_INT selected_address
;
16755 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16756 rtx_code_label
*label
= gen_label_rtx ();
16758 selected_cost
= arm_barrier_cost (from
);
16759 selected_address
= fix
->address
;
16761 while (from
&& count
< max_count
)
16763 rtx_jump_table_data
*tmp
;
16766 /* This code shouldn't have been called if there was a natural barrier
16768 gcc_assert (!BARRIER_P (from
));
16770 /* Count the length of this insn. This must stay in sync with the
16771 code that pushes minipool fixes. */
16772 if (LABEL_P (from
))
16773 count
+= get_label_padding (from
);
16775 count
+= get_attr_length (from
);
16777 /* If there is a jump table, add its length. */
16778 if (tablejump_p (from
, NULL
, &tmp
))
16780 count
+= get_jump_table_size (tmp
);
16782 /* Jump tables aren't in a basic block, so base the cost on
16783 the dispatch insn. If we select this location, we will
16784 still put the pool after the table. */
16785 new_cost
= arm_barrier_cost (from
);
16787 if (count
< max_count
16788 && (!selected
|| new_cost
<= selected_cost
))
16791 selected_cost
= new_cost
;
16792 selected_address
= fix
->address
+ count
;
16795 /* Continue after the dispatch table. */
16796 from
= NEXT_INSN (tmp
);
16800 new_cost
= arm_barrier_cost (from
);
16802 if (count
< max_count
16803 && (!selected
|| new_cost
<= selected_cost
))
16806 selected_cost
= new_cost
;
16807 selected_address
= fix
->address
+ count
;
16810 from
= NEXT_INSN (from
);
16813 /* Make sure that we found a place to insert the jump. */
16814 gcc_assert (selected
);
16816 /* Make sure we do not split a call and its corresponding
16817 CALL_ARG_LOCATION note. */
16818 if (CALL_P (selected
))
16820 rtx_insn
*next
= NEXT_INSN (selected
);
16821 if (next
&& NOTE_P (next
)
16822 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16826 /* Create a new JUMP_INSN that branches around a barrier. */
16827 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16828 JUMP_LABEL (from
) = label
;
16829 barrier
= emit_barrier_after (from
);
16830 emit_label_after (label
, barrier
);
16832 /* Create a minipool barrier entry for the new barrier. */
16833 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16834 new_fix
->insn
= barrier
;
16835 new_fix
->address
= selected_address
;
16836 new_fix
->next
= fix
->next
;
16837 fix
->next
= new_fix
;
16842 /* Record that there is a natural barrier in the insn stream at
16845 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16847 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16850 fix
->address
= address
;
16853 if (minipool_fix_head
!= NULL
)
16854 minipool_fix_tail
->next
= fix
;
16856 minipool_fix_head
= fix
;
16858 minipool_fix_tail
= fix
;
16861 /* Record INSN, which will need fixing up to load a value from the
16862 minipool. ADDRESS is the offset of the insn since the start of the
16863 function; LOC is a pointer to the part of the insn which requires
16864 fixing; VALUE is the constant that must be loaded, which is of type
16867 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16868 machine_mode mode
, rtx value
)
16870 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16873 fix
->address
= address
;
16876 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16877 fix
->value
= value
;
16878 fix
->forwards
= get_attr_pool_range (insn
);
16879 fix
->backwards
= get_attr_neg_pool_range (insn
);
16880 fix
->minipool
= NULL
;
16882 /* If an insn doesn't have a range defined for it, then it isn't
16883 expecting to be reworked by this code. Better to stop now than
16884 to generate duff assembly code. */
16885 gcc_assert (fix
->forwards
|| fix
->backwards
);
16887 /* If an entry requires 8-byte alignment then assume all constant pools
16888 require 4 bytes of padding. Trying to do this later on a per-pool
16889 basis is awkward because existing pool entries have to be modified. */
16890 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16895 fprintf (dump_file
,
16896 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16897 GET_MODE_NAME (mode
),
16898 INSN_UID (insn
), (unsigned long) address
,
16899 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16900 arm_print_value (dump_file
, fix
->value
);
16901 fprintf (dump_file
, "\n");
16904 /* Add it to the chain of fixes. */
16907 if (minipool_fix_head
!= NULL
)
16908 minipool_fix_tail
->next
= fix
;
16910 minipool_fix_head
= fix
;
16912 minipool_fix_tail
= fix
;
16915 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16916 Returns the number of insns needed, or 99 if we always want to synthesize
16919 arm_max_const_double_inline_cost ()
16921 /* Let the value get synthesized to avoid the use of literal pools. */
16922 if (arm_disable_literal_pool
)
16925 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16928 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16929 Returns the number of insns needed, or 99 if we don't know how to
16932 arm_const_double_inline_cost (rtx val
)
16934 rtx lowpart
, highpart
;
16937 mode
= GET_MODE (val
);
16939 if (mode
== VOIDmode
)
16942 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16944 lowpart
= gen_lowpart (SImode
, val
);
16945 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16947 gcc_assert (CONST_INT_P (lowpart
));
16948 gcc_assert (CONST_INT_P (highpart
));
16950 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16951 NULL_RTX
, NULL_RTX
, 0, 0)
16952 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16953 NULL_RTX
, NULL_RTX
, 0, 0));
16956 /* Cost of loading a SImode constant. */
16958 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16960 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16961 NULL_RTX
, NULL_RTX
, 1, 0);
16964 /* Return true if it is worthwhile to split a 64-bit constant into two
16965 32-bit operations. This is the case if optimizing for size, or
16966 if we have load delay slots, or if one 32-bit part can be done with
16967 a single data operation. */
16969 arm_const_double_by_parts (rtx val
)
16971 machine_mode mode
= GET_MODE (val
);
16974 if (optimize_size
|| arm_ld_sched
)
16977 if (mode
== VOIDmode
)
16980 part
= gen_highpart_mode (SImode
, mode
, val
);
16982 gcc_assert (CONST_INT_P (part
));
16984 if (const_ok_for_arm (INTVAL (part
))
16985 || const_ok_for_arm (~INTVAL (part
)))
16988 part
= gen_lowpart (SImode
, val
);
16990 gcc_assert (CONST_INT_P (part
));
16992 if (const_ok_for_arm (INTVAL (part
))
16993 || const_ok_for_arm (~INTVAL (part
)))
16999 /* Return true if it is possible to inline both the high and low parts
17000 of a 64-bit constant into 32-bit data processing instructions. */
17002 arm_const_double_by_immediates (rtx val
)
17004 machine_mode mode
= GET_MODE (val
);
17007 if (mode
== VOIDmode
)
17010 part
= gen_highpart_mode (SImode
, mode
, val
);
17012 gcc_assert (CONST_INT_P (part
));
17014 if (!const_ok_for_arm (INTVAL (part
)))
17017 part
= gen_lowpart (SImode
, val
);
17019 gcc_assert (CONST_INT_P (part
));
17021 if (!const_ok_for_arm (INTVAL (part
)))
17027 /* Scan INSN and note any of its operands that need fixing.
17028 If DO_PUSHES is false we do not actually push any of the fixups
17031 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17035 extract_constrain_insn (insn
);
17037 if (recog_data
.n_alternatives
== 0)
17040 /* Fill in recog_op_alt with information about the constraints of
17042 preprocess_constraints (insn
);
17044 const operand_alternative
*op_alt
= which_op_alt ();
17045 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17047 /* Things we need to fix can only occur in inputs. */
17048 if (recog_data
.operand_type
[opno
] != OP_IN
)
17051 /* If this alternative is a memory reference, then any mention
17052 of constants in this alternative is really to fool reload
17053 into allowing us to accept one there. We need to fix them up
17054 now so that we output the right code. */
17055 if (op_alt
[opno
].memory_ok
)
17057 rtx op
= recog_data
.operand
[opno
];
17059 if (CONSTANT_P (op
))
17062 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17063 recog_data
.operand_mode
[opno
], op
);
17065 else if (MEM_P (op
)
17066 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17067 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17071 rtx cop
= avoid_constant_pool_reference (op
);
17073 /* Casting the address of something to a mode narrower
17074 than a word can cause avoid_constant_pool_reference()
17075 to return the pool reference itself. That's no good to
17076 us here. Lets just hope that we can use the
17077 constant pool value directly. */
17079 cop
= get_pool_constant (XEXP (op
, 0));
17081 push_minipool_fix (insn
, address
,
17082 recog_data
.operand_loc
[opno
],
17083 recog_data
.operand_mode
[opno
], cop
);
17093 /* Rewrite move insn into subtract of 0 if the condition codes will
17094 be useful in next conditional jump insn. */
17097 thumb1_reorg (void)
17101 FOR_EACH_BB_FN (bb
, cfun
)
17104 rtx pat
, op0
, set
= NULL
;
17105 rtx_insn
*prev
, *insn
= BB_END (bb
);
17106 bool insn_clobbered
= false;
17108 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17109 insn
= PREV_INSN (insn
);
17111 /* Find the last cbranchsi4_insn in basic block BB. */
17112 if (insn
== BB_HEAD (bb
)
17113 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17116 /* Get the register with which we are comparing. */
17117 pat
= PATTERN (insn
);
17118 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17120 /* Find the first flag setting insn before INSN in basic block BB. */
17121 gcc_assert (insn
!= BB_HEAD (bb
));
17122 for (prev
= PREV_INSN (insn
);
17124 && prev
!= BB_HEAD (bb
)
17126 || DEBUG_INSN_P (prev
)
17127 || ((set
= single_set (prev
)) != NULL
17128 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17129 prev
= PREV_INSN (prev
))
17131 if (reg_set_p (op0
, prev
))
17132 insn_clobbered
= true;
17135 /* Skip if op0 is clobbered by insn other than prev. */
17136 if (insn_clobbered
)
17142 dest
= SET_DEST (set
);
17143 src
= SET_SRC (set
);
17144 if (!low_register_operand (dest
, SImode
)
17145 || !low_register_operand (src
, SImode
))
17148 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17149 in INSN. Both src and dest of the move insn are checked. */
17150 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17152 dest
= copy_rtx (dest
);
17153 src
= copy_rtx (src
);
17154 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17155 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17156 INSN_CODE (prev
) = -1;
17157 /* Set test register in INSN to dest. */
17158 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17159 INSN_CODE (insn
) = -1;
17164 /* Convert instructions to their cc-clobbering variant if possible, since
17165 that allows us to use smaller encodings. */
17168 thumb2_reorg (void)
17173 INIT_REG_SET (&live
);
17175 /* We are freeing block_for_insn in the toplev to keep compatibility
17176 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17177 compute_bb_for_insn ();
17180 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17182 FOR_EACH_BB_FN (bb
, cfun
)
17184 if ((current_tune
->disparage_flag_setting_t16_encodings
17185 == tune_params::DISPARAGE_FLAGS_ALL
)
17186 && optimize_bb_for_speed_p (bb
))
17190 Convert_Action action
= SKIP
;
17191 Convert_Action action_for_partial_flag_setting
17192 = ((current_tune
->disparage_flag_setting_t16_encodings
17193 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17194 && optimize_bb_for_speed_p (bb
))
17197 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17198 df_simulate_initialize_backwards (bb
, &live
);
17199 FOR_BB_INSNS_REVERSE (bb
, insn
)
17201 if (NONJUMP_INSN_P (insn
)
17202 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17203 && GET_CODE (PATTERN (insn
)) == SET
)
17206 rtx pat
= PATTERN (insn
);
17207 rtx dst
= XEXP (pat
, 0);
17208 rtx src
= XEXP (pat
, 1);
17209 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17211 if (UNARY_P (src
) || BINARY_P (src
))
17212 op0
= XEXP (src
, 0);
17214 if (BINARY_P (src
))
17215 op1
= XEXP (src
, 1);
17217 if (low_register_operand (dst
, SImode
))
17219 switch (GET_CODE (src
))
17222 /* Adding two registers and storing the result
17223 in the first source is already a 16-bit
17225 if (rtx_equal_p (dst
, op0
)
17226 && register_operand (op1
, SImode
))
17229 if (low_register_operand (op0
, SImode
))
17231 /* ADDS <Rd>,<Rn>,<Rm> */
17232 if (low_register_operand (op1
, SImode
))
17234 /* ADDS <Rdn>,#<imm8> */
17235 /* SUBS <Rdn>,#<imm8> */
17236 else if (rtx_equal_p (dst
, op0
)
17237 && CONST_INT_P (op1
)
17238 && IN_RANGE (INTVAL (op1
), -255, 255))
17240 /* ADDS <Rd>,<Rn>,#<imm3> */
17241 /* SUBS <Rd>,<Rn>,#<imm3> */
17242 else if (CONST_INT_P (op1
)
17243 && IN_RANGE (INTVAL (op1
), -7, 7))
17246 /* ADCS <Rd>, <Rn> */
17247 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17248 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17249 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17251 && COMPARISON_P (op1
)
17252 && cc_register (XEXP (op1
, 0), VOIDmode
)
17253 && maybe_get_arm_condition_code (op1
) == ARM_CS
17254 && XEXP (op1
, 1) == const0_rtx
)
17259 /* RSBS <Rd>,<Rn>,#0
17260 Not handled here: see NEG below. */
17261 /* SUBS <Rd>,<Rn>,#<imm3>
17263 Not handled here: see PLUS above. */
17264 /* SUBS <Rd>,<Rn>,<Rm> */
17265 if (low_register_operand (op0
, SImode
)
17266 && low_register_operand (op1
, SImode
))
17271 /* MULS <Rdm>,<Rn>,<Rdm>
17272 As an exception to the rule, this is only used
17273 when optimizing for size since MULS is slow on all
17274 known implementations. We do not even want to use
17275 MULS in cold code, if optimizing for speed, so we
17276 test the global flag here. */
17277 if (!optimize_size
)
17279 /* else fall through. */
17283 /* ANDS <Rdn>,<Rm> */
17284 if (rtx_equal_p (dst
, op0
)
17285 && low_register_operand (op1
, SImode
))
17286 action
= action_for_partial_flag_setting
;
17287 else if (rtx_equal_p (dst
, op1
)
17288 && low_register_operand (op0
, SImode
))
17289 action
= action_for_partial_flag_setting
== SKIP
17290 ? SKIP
: SWAP_CONV
;
17296 /* ASRS <Rdn>,<Rm> */
17297 /* LSRS <Rdn>,<Rm> */
17298 /* LSLS <Rdn>,<Rm> */
17299 if (rtx_equal_p (dst
, op0
)
17300 && low_register_operand (op1
, SImode
))
17301 action
= action_for_partial_flag_setting
;
17302 /* ASRS <Rd>,<Rm>,#<imm5> */
17303 /* LSRS <Rd>,<Rm>,#<imm5> */
17304 /* LSLS <Rd>,<Rm>,#<imm5> */
17305 else if (low_register_operand (op0
, SImode
)
17306 && CONST_INT_P (op1
)
17307 && IN_RANGE (INTVAL (op1
), 0, 31))
17308 action
= action_for_partial_flag_setting
;
17312 /* RORS <Rdn>,<Rm> */
17313 if (rtx_equal_p (dst
, op0
)
17314 && low_register_operand (op1
, SImode
))
17315 action
= action_for_partial_flag_setting
;
17319 /* MVNS <Rd>,<Rm> */
17320 if (low_register_operand (op0
, SImode
))
17321 action
= action_for_partial_flag_setting
;
17325 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17326 if (low_register_operand (op0
, SImode
))
17331 /* MOVS <Rd>,#<imm8> */
17332 if (CONST_INT_P (src
)
17333 && IN_RANGE (INTVAL (src
), 0, 255))
17334 action
= action_for_partial_flag_setting
;
17338 /* MOVS and MOV<c> with registers have different
17339 encodings, so are not relevant here. */
17347 if (action
!= SKIP
)
17349 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17350 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17353 if (action
== SWAP_CONV
)
17355 src
= copy_rtx (src
);
17356 XEXP (src
, 0) = op1
;
17357 XEXP (src
, 1) = op0
;
17358 pat
= gen_rtx_SET (dst
, src
);
17359 vec
= gen_rtvec (2, pat
, clobber
);
17361 else /* action == CONV */
17362 vec
= gen_rtvec (2, pat
, clobber
);
17364 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17365 INSN_CODE (insn
) = -1;
17369 if (NONDEBUG_INSN_P (insn
))
17370 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17374 CLEAR_REG_SET (&live
);
17377 /* Gcc puts the pool in the wrong place for ARM, since we can only
17378 load addresses a limited distance around the pc. We do some
17379 special munging to move the constant pool values to the correct
17380 point in the code. */
17385 HOST_WIDE_INT address
= 0;
17390 else if (TARGET_THUMB2
)
17393 /* Ensure all insns that must be split have been split at this point.
17394 Otherwise, the pool placement code below may compute incorrect
17395 insn lengths. Note that when optimizing, all insns have already
17396 been split at this point. */
17398 split_all_insns_noflow ();
17400 minipool_fix_head
= minipool_fix_tail
= NULL
;
17402 /* The first insn must always be a note, or the code below won't
17403 scan it properly. */
17404 insn
= get_insns ();
17405 gcc_assert (NOTE_P (insn
));
17408 /* Scan all the insns and record the operands that will need fixing. */
17409 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17411 if (BARRIER_P (insn
))
17412 push_minipool_barrier (insn
, address
);
17413 else if (INSN_P (insn
))
17415 rtx_jump_table_data
*table
;
17417 note_invalid_constants (insn
, address
, true);
17418 address
+= get_attr_length (insn
);
17420 /* If the insn is a vector jump, add the size of the table
17421 and skip the table. */
17422 if (tablejump_p (insn
, NULL
, &table
))
17424 address
+= get_jump_table_size (table
);
17428 else if (LABEL_P (insn
))
17429 /* Add the worst-case padding due to alignment. We don't add
17430 the _current_ padding because the minipool insertions
17431 themselves might change it. */
17432 address
+= get_label_padding (insn
);
17435 fix
= minipool_fix_head
;
17437 /* Now scan the fixups and perform the required changes. */
17442 Mfix
* last_added_fix
;
17443 Mfix
* last_barrier
= NULL
;
17446 /* Skip any further barriers before the next fix. */
17447 while (fix
&& BARRIER_P (fix
->insn
))
17450 /* No more fixes. */
17454 last_added_fix
= NULL
;
17456 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17458 if (BARRIER_P (ftmp
->insn
))
17460 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17463 last_barrier
= ftmp
;
17465 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17468 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17471 /* If we found a barrier, drop back to that; any fixes that we
17472 could have reached but come after the barrier will now go in
17473 the next mini-pool. */
17474 if (last_barrier
!= NULL
)
17476 /* Reduce the refcount for those fixes that won't go into this
17478 for (fdel
= last_barrier
->next
;
17479 fdel
&& fdel
!= ftmp
;
17482 fdel
->minipool
->refcount
--;
17483 fdel
->minipool
= NULL
;
17486 ftmp
= last_barrier
;
17490 /* ftmp is first fix that we can't fit into this pool and
17491 there no natural barriers that we could use. Insert a
17492 new barrier in the code somewhere between the previous
17493 fix and this one, and arrange to jump around it. */
17494 HOST_WIDE_INT max_address
;
17496 /* The last item on the list of fixes must be a barrier, so
17497 we can never run off the end of the list of fixes without
17498 last_barrier being set. */
17501 max_address
= minipool_vector_head
->max_address
;
17502 /* Check that there isn't another fix that is in range that
17503 we couldn't fit into this pool because the pool was
17504 already too large: we need to put the pool before such an
17505 instruction. The pool itself may come just after the
17506 fix because create_fix_barrier also allows space for a
17507 jump instruction. */
17508 if (ftmp
->address
< max_address
)
17509 max_address
= ftmp
->address
+ 1;
17511 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17514 assign_minipool_offsets (last_barrier
);
17518 if (!BARRIER_P (ftmp
->insn
)
17519 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17526 /* Scan over the fixes we have identified for this pool, fixing them
17527 up and adding the constants to the pool itself. */
17528 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17529 this_fix
= this_fix
->next
)
17530 if (!BARRIER_P (this_fix
->insn
))
17533 = plus_constant (Pmode
,
17534 gen_rtx_LABEL_REF (VOIDmode
,
17535 minipool_vector_label
),
17536 this_fix
->minipool
->offset
);
17537 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17540 dump_minipool (last_barrier
->insn
);
17544 /* From now on we must synthesize any constants that we can't handle
17545 directly. This can happen if the RTL gets split during final
17546 instruction generation. */
17547 cfun
->machine
->after_arm_reorg
= 1;
17549 /* Free the minipool memory. */
17550 obstack_free (&minipool_obstack
, minipool_startobj
);
17553 /* Routines to output assembly language. */
17555 /* Return string representation of passed in real value. */
17556 static const char *
17557 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17559 if (!fp_consts_inited
)
17562 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17566 /* OPERANDS[0] is the entire list of insns that constitute pop,
17567 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17568 is in the list, UPDATE is true iff the list contains explicit
17569 update of base register. */
17571 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17577 const char *conditional
;
17578 int num_saves
= XVECLEN (operands
[0], 0);
17579 unsigned int regno
;
17580 unsigned int regno_base
= REGNO (operands
[1]);
17583 offset
+= update
? 1 : 0;
17584 offset
+= return_pc
? 1 : 0;
17586 /* Is the base register in the list? */
17587 for (i
= offset
; i
< num_saves
; i
++)
17589 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17590 /* If SP is in the list, then the base register must be SP. */
17591 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17592 /* If base register is in the list, there must be no explicit update. */
17593 if (regno
== regno_base
)
17594 gcc_assert (!update
);
17597 conditional
= reverse
? "%?%D0" : "%?%d0";
17598 if ((regno_base
== SP_REGNUM
) && TARGET_THUMB
)
17600 /* Output pop (not stmfd) because it has a shorter encoding. */
17601 gcc_assert (update
);
17602 sprintf (pattern
, "pop%s\t{", conditional
);
17606 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17607 It's just a convention, their semantics are identical. */
17608 if (regno_base
== SP_REGNUM
)
17609 sprintf (pattern
, "ldm%sfd\t", conditional
);
17610 else if (TARGET_UNIFIED_ASM
)
17611 sprintf (pattern
, "ldmia%s\t", conditional
);
17613 sprintf (pattern
, "ldm%sia\t", conditional
);
17615 strcat (pattern
, reg_names
[regno_base
]);
17617 strcat (pattern
, "!, {");
17619 strcat (pattern
, ", {");
17622 /* Output the first destination register. */
17624 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17626 /* Output the rest of the destination registers. */
17627 for (i
= offset
+ 1; i
< num_saves
; i
++)
17629 strcat (pattern
, ", ");
17631 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17634 strcat (pattern
, "}");
17636 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17637 strcat (pattern
, "^");
17639 output_asm_insn (pattern
, &cond
);
17643 /* Output the assembly for a store multiple. */
17646 vfp_output_vstmd (rtx
* operands
)
17652 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17653 ? XEXP (operands
[0], 0)
17654 : XEXP (XEXP (operands
[0], 0), 0);
17655 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17658 strcpy (pattern
, "vpush%?.64\t{%P1");
17660 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17662 p
= strlen (pattern
);
17664 gcc_assert (REG_P (operands
[1]));
17666 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17667 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17669 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17671 strcpy (&pattern
[p
], "}");
17673 output_asm_insn (pattern
, operands
);
17678 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17679 number of bytes pushed. */
17682 vfp_emit_fstmd (int base_reg
, int count
)
17689 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17690 register pairs are stored by a store multiple insn. We avoid this
17691 by pushing an extra pair. */
17692 if (count
== 2 && !arm_arch6
)
17694 if (base_reg
== LAST_VFP_REGNUM
- 3)
17699 /* FSTMD may not store more than 16 doubleword registers at once. Split
17700 larger stores into multiple parts (up to a maximum of two, in
17705 /* NOTE: base_reg is an internal register number, so each D register
17707 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17708 saved
+= vfp_emit_fstmd (base_reg
, 16);
17712 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17713 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17715 reg
= gen_rtx_REG (DFmode
, base_reg
);
17718 XVECEXP (par
, 0, 0)
17719 = gen_rtx_SET (gen_frame_mem
17721 gen_rtx_PRE_MODIFY (Pmode
,
17724 (Pmode
, stack_pointer_rtx
,
17727 gen_rtx_UNSPEC (BLKmode
,
17728 gen_rtvec (1, reg
),
17729 UNSPEC_PUSH_MULT
));
17731 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17732 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17733 RTX_FRAME_RELATED_P (tmp
) = 1;
17734 XVECEXP (dwarf
, 0, 0) = tmp
;
17736 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17737 RTX_FRAME_RELATED_P (tmp
) = 1;
17738 XVECEXP (dwarf
, 0, 1) = tmp
;
17740 for (i
= 1; i
< count
; i
++)
17742 reg
= gen_rtx_REG (DFmode
, base_reg
);
17744 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17746 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17747 plus_constant (Pmode
,
17751 RTX_FRAME_RELATED_P (tmp
) = 1;
17752 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17755 par
= emit_insn (par
);
17756 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17757 RTX_FRAME_RELATED_P (par
) = 1;
17762 /* Emit a call instruction with pattern PAT. ADDR is the address of
17763 the call target. */
17766 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17770 insn
= emit_call_insn (pat
);
17772 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17773 If the call might use such an entry, add a use of the PIC register
17774 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17775 if (TARGET_VXWORKS_RTP
17778 && GET_CODE (addr
) == SYMBOL_REF
17779 && (SYMBOL_REF_DECL (addr
)
17780 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17781 : !SYMBOL_REF_LOCAL_P (addr
)))
17783 require_pic_register ();
17784 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17787 if (TARGET_AAPCS_BASED
)
17789 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17790 linker. We need to add an IP clobber to allow setting
17791 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17792 is not needed since it's a fixed register. */
17793 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17794 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17798 /* Output a 'call' insn. */
17800 output_call (rtx
*operands
)
17802 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17804 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17805 if (REGNO (operands
[0]) == LR_REGNUM
)
17807 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17808 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17811 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17813 if (TARGET_INTERWORK
|| arm_arch4t
)
17814 output_asm_insn ("bx%?\t%0", operands
);
17816 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17821 /* Output a 'call' insn that is a reference in memory. This is
17822 disabled for ARMv5 and we prefer a blx instead because otherwise
17823 there's a significant performance overhead. */
17825 output_call_mem (rtx
*operands
)
17827 gcc_assert (!arm_arch5
);
17828 if (TARGET_INTERWORK
)
17830 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17831 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17832 output_asm_insn ("bx%?\t%|ip", operands
);
17834 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17836 /* LR is used in the memory address. We load the address in the
17837 first instruction. It's safe to use IP as the target of the
17838 load since the call will kill it anyway. */
17839 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17840 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17842 output_asm_insn ("bx%?\t%|ip", operands
);
17844 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17848 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17849 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17856 /* Output a move from arm registers to arm registers of a long double
17857 OPERANDS[0] is the destination.
17858 OPERANDS[1] is the source. */
17860 output_mov_long_double_arm_from_arm (rtx
*operands
)
17862 /* We have to be careful here because the two might overlap. */
17863 int dest_start
= REGNO (operands
[0]);
17864 int src_start
= REGNO (operands
[1]);
17868 if (dest_start
< src_start
)
17870 for (i
= 0; i
< 3; i
++)
17872 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17873 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17874 output_asm_insn ("mov%?\t%0, %1", ops
);
17879 for (i
= 2; i
>= 0; i
--)
17881 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17882 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17883 output_asm_insn ("mov%?\t%0, %1", ops
);
17891 arm_emit_movpair (rtx dest
, rtx src
)
17893 /* If the src is an immediate, simplify it. */
17894 if (CONST_INT_P (src
))
17896 HOST_WIDE_INT val
= INTVAL (src
);
17897 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17898 if ((val
>> 16) & 0x0000ffff)
17899 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17901 GEN_INT ((val
>> 16) & 0x0000ffff));
17904 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17905 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17908 /* Output a move between double words. It must be REG<-MEM
17911 output_move_double (rtx
*operands
, bool emit
, int *count
)
17913 enum rtx_code code0
= GET_CODE (operands
[0]);
17914 enum rtx_code code1
= GET_CODE (operands
[1]);
17919 /* The only case when this might happen is when
17920 you are looking at the length of a DImode instruction
17921 that has an invalid constant in it. */
17922 if (code0
== REG
&& code1
!= MEM
)
17924 gcc_assert (!emit
);
17931 unsigned int reg0
= REGNO (operands
[0]);
17933 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17935 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17937 switch (GET_CODE (XEXP (operands
[1], 0)))
17944 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17947 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17952 gcc_assert (TARGET_LDRD
);
17954 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17961 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17963 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17971 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17973 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17978 gcc_assert (TARGET_LDRD
);
17980 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17985 /* Autoicrement addressing modes should never have overlapping
17986 base and destination registers, and overlapping index registers
17987 are already prohibited, so this doesn't need to worry about
17989 otherops
[0] = operands
[0];
17990 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17991 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17993 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17995 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17997 /* Registers overlap so split out the increment. */
18000 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18001 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18008 /* Use a single insn if we can.
18009 FIXME: IWMMXT allows offsets larger than ldrd can
18010 handle, fix these up with a pair of ldr. */
18012 || !CONST_INT_P (otherops
[2])
18013 || (INTVAL (otherops
[2]) > -256
18014 && INTVAL (otherops
[2]) < 256))
18017 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18023 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18024 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18034 /* Use a single insn if we can.
18035 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18036 fix these up with a pair of ldr. */
18038 || !CONST_INT_P (otherops
[2])
18039 || (INTVAL (otherops
[2]) > -256
18040 && INTVAL (otherops
[2]) < 256))
18043 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18049 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18050 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18060 /* We might be able to use ldrd %0, %1 here. However the range is
18061 different to ldr/adr, and it is broken on some ARMv7-M
18062 implementations. */
18063 /* Use the second register of the pair to avoid problematic
18065 otherops
[1] = operands
[1];
18067 output_asm_insn ("adr%?\t%0, %1", otherops
);
18068 operands
[1] = otherops
[0];
18072 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18074 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18081 /* ??? This needs checking for thumb2. */
18083 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18084 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18086 otherops
[0] = operands
[0];
18087 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18088 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18090 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18092 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18094 switch ((int) INTVAL (otherops
[2]))
18098 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18104 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18110 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18114 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18115 operands
[1] = otherops
[0];
18117 && (REG_P (otherops
[2])
18119 || (CONST_INT_P (otherops
[2])
18120 && INTVAL (otherops
[2]) > -256
18121 && INTVAL (otherops
[2]) < 256)))
18123 if (reg_overlap_mentioned_p (operands
[0],
18126 /* Swap base and index registers over to
18127 avoid a conflict. */
18128 std::swap (otherops
[1], otherops
[2]);
18130 /* If both registers conflict, it will usually
18131 have been fixed by a splitter. */
18132 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18133 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18137 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18138 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18145 otherops
[0] = operands
[0];
18147 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18152 if (CONST_INT_P (otherops
[2]))
18156 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18157 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18165 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18171 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18178 return "ldr%(d%)\t%0, [%1]";
18180 return "ldm%(ia%)\t%1, %M0";
18184 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18185 /* Take care of overlapping base/data reg. */
18186 if (reg_mentioned_p (operands
[0], operands
[1]))
18190 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18191 output_asm_insn ("ldr%?\t%0, %1", operands
);
18201 output_asm_insn ("ldr%?\t%0, %1", operands
);
18202 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18212 /* Constraints should ensure this. */
18213 gcc_assert (code0
== MEM
&& code1
== REG
);
18214 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18215 || (TARGET_ARM
&& TARGET_LDRD
));
18217 switch (GET_CODE (XEXP (operands
[0], 0)))
18223 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18225 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18230 gcc_assert (TARGET_LDRD
);
18232 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18239 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18241 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18249 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18251 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18256 gcc_assert (TARGET_LDRD
);
18258 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18263 otherops
[0] = operands
[1];
18264 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18265 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18267 /* IWMMXT allows offsets larger than ldrd can handle,
18268 fix these up with a pair of ldr. */
18270 && CONST_INT_P (otherops
[2])
18271 && (INTVAL(otherops
[2]) <= -256
18272 || INTVAL(otherops
[2]) >= 256))
18274 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18278 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18279 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18288 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18289 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18295 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18298 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18303 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18308 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18309 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18311 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18315 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18322 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18329 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18334 && (REG_P (otherops
[2])
18336 || (CONST_INT_P (otherops
[2])
18337 && INTVAL (otherops
[2]) > -256
18338 && INTVAL (otherops
[2]) < 256)))
18340 otherops
[0] = operands
[1];
18341 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18343 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18349 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18350 otherops
[1] = operands
[1];
18353 output_asm_insn ("str%?\t%1, %0", operands
);
18354 output_asm_insn ("str%?\t%H1, %0", otherops
);
18364 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18365 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18368 output_move_quad (rtx
*operands
)
18370 if (REG_P (operands
[0]))
18372 /* Load, or reg->reg move. */
18374 if (MEM_P (operands
[1]))
18376 switch (GET_CODE (XEXP (operands
[1], 0)))
18379 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18384 output_asm_insn ("adr%?\t%0, %1", operands
);
18385 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18389 gcc_unreachable ();
18397 gcc_assert (REG_P (operands
[1]));
18399 dest
= REGNO (operands
[0]);
18400 src
= REGNO (operands
[1]);
18402 /* This seems pretty dumb, but hopefully GCC won't try to do it
18405 for (i
= 0; i
< 4; i
++)
18407 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18408 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18409 output_asm_insn ("mov%?\t%0, %1", ops
);
18412 for (i
= 3; i
>= 0; i
--)
18414 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18415 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18416 output_asm_insn ("mov%?\t%0, %1", ops
);
18422 gcc_assert (MEM_P (operands
[0]));
18423 gcc_assert (REG_P (operands
[1]));
18424 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18426 switch (GET_CODE (XEXP (operands
[0], 0)))
18429 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18433 gcc_unreachable ();
18440 /* Output a VFP load or store instruction. */
18443 output_move_vfp (rtx
*operands
)
18445 rtx reg
, mem
, addr
, ops
[2];
18446 int load
= REG_P (operands
[0]);
18447 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18448 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18453 reg
= operands
[!load
];
18454 mem
= operands
[load
];
18456 mode
= GET_MODE (reg
);
18458 gcc_assert (REG_P (reg
));
18459 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18460 gcc_assert (mode
== SFmode
18464 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18465 gcc_assert (MEM_P (mem
));
18467 addr
= XEXP (mem
, 0);
18469 switch (GET_CODE (addr
))
18472 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18473 ops
[0] = XEXP (addr
, 0);
18478 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18479 ops
[0] = XEXP (addr
, 0);
18484 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18490 sprintf (buff
, templ
,
18491 load
? "ld" : "st",
18494 integer_p
? "\t%@ int" : "");
18495 output_asm_insn (buff
, ops
);
18500 /* Output a Neon double-word or quad-word load or store, or a load
18501 or store for larger structure modes.
18503 WARNING: The ordering of elements is weird in big-endian mode,
18504 because the EABI requires that vectors stored in memory appear
18505 as though they were stored by a VSTM, as required by the EABI.
18506 GCC RTL defines element ordering based on in-memory order.
18507 This can be different from the architectural ordering of elements
18508 within a NEON register. The intrinsics defined in arm_neon.h use the
18509 NEON register element ordering, not the GCC RTL element ordering.
18511 For example, the in-memory ordering of a big-endian a quadword
18512 vector with 16-bit elements when stored from register pair {d0,d1}
18513 will be (lowest address first, d0[N] is NEON register element N):
18515 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18517 When necessary, quadword registers (dN, dN+1) are moved to ARM
18518 registers from rN in the order:
18520 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18522 So that STM/LDM can be used on vectors in ARM registers, and the
18523 same memory layout will result as if VSTM/VLDM were used.
18525 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18526 possible, which allows use of appropriate alignment tags.
18527 Note that the choice of "64" is independent of the actual vector
18528 element size; this size simply ensures that the behavior is
18529 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18531 Due to limitations of those instructions, use of VST1.64/VLD1.64
18532 is not possible if:
18533 - the address contains PRE_DEC, or
18534 - the mode refers to more than 4 double-word registers
18536 In those cases, it would be possible to replace VSTM/VLDM by a
18537 sequence of instructions; this is not currently implemented since
18538 this is not certain to actually improve performance. */
18541 output_move_neon (rtx
*operands
)
18543 rtx reg
, mem
, addr
, ops
[2];
18544 int regno
, nregs
, load
= REG_P (operands
[0]);
18549 reg
= operands
[!load
];
18550 mem
= operands
[load
];
18552 mode
= GET_MODE (reg
);
18554 gcc_assert (REG_P (reg
));
18555 regno
= REGNO (reg
);
18556 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18557 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18558 || NEON_REGNO_OK_FOR_QUAD (regno
));
18559 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18560 || VALID_NEON_QREG_MODE (mode
)
18561 || VALID_NEON_STRUCT_MODE (mode
));
18562 gcc_assert (MEM_P (mem
));
18564 addr
= XEXP (mem
, 0);
18566 /* Strip off const from addresses like (const (plus (...))). */
18567 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18568 addr
= XEXP (addr
, 0);
18570 switch (GET_CODE (addr
))
18573 /* We have to use vldm / vstm for too-large modes. */
18576 templ
= "v%smia%%?\t%%0!, %%h1";
18577 ops
[0] = XEXP (addr
, 0);
18581 templ
= "v%s1.64\t%%h1, %%A0";
18588 /* We have to use vldm / vstm in this case, since there is no
18589 pre-decrement form of the vld1 / vst1 instructions. */
18590 templ
= "v%smdb%%?\t%%0!, %%h1";
18591 ops
[0] = XEXP (addr
, 0);
18596 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18597 gcc_unreachable ();
18600 /* We have to use vldm / vstm for too-large modes. */
18604 templ
= "v%smia%%?\t%%m0, %%h1";
18606 templ
= "v%s1.64\t%%h1, %%A0";
18612 /* Fall through. */
18618 for (i
= 0; i
< nregs
; i
++)
18620 /* We're only using DImode here because it's a convenient size. */
18621 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18622 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18623 if (reg_overlap_mentioned_p (ops
[0], mem
))
18625 gcc_assert (overlap
== -1);
18630 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18631 output_asm_insn (buff
, ops
);
18636 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18637 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18638 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18639 output_asm_insn (buff
, ops
);
18646 gcc_unreachable ();
18649 sprintf (buff
, templ
, load
? "ld" : "st");
18650 output_asm_insn (buff
, ops
);
18655 /* Compute and return the length of neon_mov<mode>, where <mode> is
18656 one of VSTRUCT modes: EI, OI, CI or XI. */
18658 arm_attr_length_move_neon (rtx_insn
*insn
)
18660 rtx reg
, mem
, addr
;
18664 extract_insn_cached (insn
);
18666 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18668 mode
= GET_MODE (recog_data
.operand
[0]);
18679 gcc_unreachable ();
18683 load
= REG_P (recog_data
.operand
[0]);
18684 reg
= recog_data
.operand
[!load
];
18685 mem
= recog_data
.operand
[load
];
18687 gcc_assert (MEM_P (mem
));
18689 mode
= GET_MODE (reg
);
18690 addr
= XEXP (mem
, 0);
18692 /* Strip off const from addresses like (const (plus (...))). */
18693 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18694 addr
= XEXP (addr
, 0);
18696 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18698 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18705 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18709 arm_address_offset_is_imm (rtx_insn
*insn
)
18713 extract_insn_cached (insn
);
18715 if (REG_P (recog_data
.operand
[0]))
18718 mem
= recog_data
.operand
[0];
18720 gcc_assert (MEM_P (mem
));
18722 addr
= XEXP (mem
, 0);
18725 || (GET_CODE (addr
) == PLUS
18726 && REG_P (XEXP (addr
, 0))
18727 && CONST_INT_P (XEXP (addr
, 1))))
18733 /* Output an ADD r, s, #n where n may be too big for one instruction.
18734 If adding zero to one register, output nothing. */
18736 output_add_immediate (rtx
*operands
)
18738 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18740 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18743 output_multi_immediate (operands
,
18744 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18747 output_multi_immediate (operands
,
18748 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18755 /* Output a multiple immediate operation.
18756 OPERANDS is the vector of operands referred to in the output patterns.
18757 INSTR1 is the output pattern to use for the first constant.
18758 INSTR2 is the output pattern to use for subsequent constants.
18759 IMMED_OP is the index of the constant slot in OPERANDS.
18760 N is the constant value. */
18761 static const char *
18762 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18763 int immed_op
, HOST_WIDE_INT n
)
18765 #if HOST_BITS_PER_WIDE_INT > 32
18771 /* Quick and easy output. */
18772 operands
[immed_op
] = const0_rtx
;
18773 output_asm_insn (instr1
, operands
);
18778 const char * instr
= instr1
;
18780 /* Note that n is never zero here (which would give no output). */
18781 for (i
= 0; i
< 32; i
+= 2)
18785 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18786 output_asm_insn (instr
, operands
);
18796 /* Return the name of a shifter operation. */
18797 static const char *
18798 arm_shift_nmem(enum rtx_code code
)
18803 return ARM_LSL_NAME
;
18819 /* Return the appropriate ARM instruction for the operation code.
18820 The returned result should not be overwritten. OP is the rtx of the
18821 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18824 arithmetic_instr (rtx op
, int shift_first_arg
)
18826 switch (GET_CODE (op
))
18832 return shift_first_arg
? "rsb" : "sub";
18847 return arm_shift_nmem(GET_CODE(op
));
18850 gcc_unreachable ();
18854 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18855 for the operation code. The returned result should not be overwritten.
18856 OP is the rtx code of the shift.
18857 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18859 static const char *
18860 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18863 enum rtx_code code
= GET_CODE (op
);
18868 if (!CONST_INT_P (XEXP (op
, 1)))
18870 output_operand_lossage ("invalid shift operand");
18875 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18883 mnem
= arm_shift_nmem(code
);
18884 if (CONST_INT_P (XEXP (op
, 1)))
18886 *amountp
= INTVAL (XEXP (op
, 1));
18888 else if (REG_P (XEXP (op
, 1)))
18895 output_operand_lossage ("invalid shift operand");
18901 /* We never have to worry about the amount being other than a
18902 power of 2, since this case can never be reloaded from a reg. */
18903 if (!CONST_INT_P (XEXP (op
, 1)))
18905 output_operand_lossage ("invalid shift operand");
18909 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18911 /* Amount must be a power of two. */
18912 if (*amountp
& (*amountp
- 1))
18914 output_operand_lossage ("invalid shift operand");
18918 *amountp
= int_log2 (*amountp
);
18919 return ARM_LSL_NAME
;
18922 output_operand_lossage ("invalid shift operand");
18926 /* This is not 100% correct, but follows from the desire to merge
18927 multiplication by a power of 2 with the recognizer for a
18928 shift. >=32 is not a valid shift for "lsl", so we must try and
18929 output a shift that produces the correct arithmetical result.
18930 Using lsr #32 is identical except for the fact that the carry bit
18931 is not set correctly if we set the flags; but we never use the
18932 carry bit from such an operation, so we can ignore that. */
18933 if (code
== ROTATERT
)
18934 /* Rotate is just modulo 32. */
18936 else if (*amountp
!= (*amountp
& 31))
18938 if (code
== ASHIFT
)
18943 /* Shifts of 0 are no-ops. */
18950 /* Obtain the shift from the POWER of two. */
18952 static HOST_WIDE_INT
18953 int_log2 (HOST_WIDE_INT power
)
18955 HOST_WIDE_INT shift
= 0;
18957 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18959 gcc_assert (shift
<= 31);
18966 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18967 because /bin/as is horribly restrictive. The judgement about
18968 whether or not each character is 'printable' (and can be output as
18969 is) or not (and must be printed with an octal escape) must be made
18970 with reference to the *host* character set -- the situation is
18971 similar to that discussed in the comments above pp_c_char in
18972 c-pretty-print.c. */
18974 #define MAX_ASCII_LEN 51
18977 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18980 int len_so_far
= 0;
18982 fputs ("\t.ascii\t\"", stream
);
18984 for (i
= 0; i
< len
; i
++)
18988 if (len_so_far
>= MAX_ASCII_LEN
)
18990 fputs ("\"\n\t.ascii\t\"", stream
);
18996 if (c
== '\\' || c
== '\"')
18998 putc ('\\', stream
);
19006 fprintf (stream
, "\\%03o", c
);
19011 fputs ("\"\n", stream
);
19014 /* Whether a register is callee saved or not. This is necessary because high
19015 registers are marked as caller saved when optimizing for size on Thumb-1
19016 targets despite being callee saved in order to avoid using them. */
19017 #define callee_saved_reg_p(reg) \
19018 (!call_used_regs[reg] \
19019 || (TARGET_THUMB1 && optimize_size \
19020 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19022 /* Compute the register save mask for registers 0 through 12
19023 inclusive. This code is used by arm_compute_save_reg_mask. */
19025 static unsigned long
19026 arm_compute_save_reg0_reg12_mask (void)
19028 unsigned long func_type
= arm_current_func_type ();
19029 unsigned long save_reg_mask
= 0;
19032 if (IS_INTERRUPT (func_type
))
19034 unsigned int max_reg
;
19035 /* Interrupt functions must not corrupt any registers,
19036 even call clobbered ones. If this is a leaf function
19037 we can just examine the registers used by the RTL, but
19038 otherwise we have to assume that whatever function is
19039 called might clobber anything, and so we have to save
19040 all the call-clobbered registers as well. */
19041 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19042 /* FIQ handlers have registers r8 - r12 banked, so
19043 we only need to check r0 - r7, Normal ISRs only
19044 bank r14 and r15, so we must check up to r12.
19045 r13 is the stack pointer which is always preserved,
19046 so we do not need to consider it here. */
19051 for (reg
= 0; reg
<= max_reg
; reg
++)
19052 if (df_regs_ever_live_p (reg
)
19053 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19054 save_reg_mask
|= (1 << reg
);
19056 /* Also save the pic base register if necessary. */
19058 && !TARGET_SINGLE_PIC_BASE
19059 && arm_pic_register
!= INVALID_REGNUM
19060 && crtl
->uses_pic_offset_table
)
19061 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19063 else if (IS_VOLATILE(func_type
))
19065 /* For noreturn functions we historically omitted register saves
19066 altogether. However this really messes up debugging. As a
19067 compromise save just the frame pointers. Combined with the link
19068 register saved elsewhere this should be sufficient to get
19070 if (frame_pointer_needed
)
19071 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19072 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19073 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19074 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19075 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19079 /* In the normal case we only need to save those registers
19080 which are call saved and which are used by this function. */
19081 for (reg
= 0; reg
<= 11; reg
++)
19082 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19083 save_reg_mask
|= (1 << reg
);
19085 /* Handle the frame pointer as a special case. */
19086 if (frame_pointer_needed
)
19087 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19089 /* If we aren't loading the PIC register,
19090 don't stack it even though it may be live. */
19092 && !TARGET_SINGLE_PIC_BASE
19093 && arm_pic_register
!= INVALID_REGNUM
19094 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19095 || crtl
->uses_pic_offset_table
))
19096 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19098 /* The prologue will copy SP into R0, so save it. */
19099 if (IS_STACKALIGN (func_type
))
19100 save_reg_mask
|= 1;
19103 /* Save registers so the exception handler can modify them. */
19104 if (crtl
->calls_eh_return
)
19110 reg
= EH_RETURN_DATA_REGNO (i
);
19111 if (reg
== INVALID_REGNUM
)
19113 save_reg_mask
|= 1 << reg
;
19117 return save_reg_mask
;
19120 /* Return true if r3 is live at the start of the function. */
19123 arm_r3_live_at_start_p (void)
19125 /* Just look at cfg info, which is still close enough to correct at this
19126 point. This gives false positives for broken functions that might use
19127 uninitialized data that happens to be allocated in r3, but who cares? */
19128 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19131 /* Compute the number of bytes used to store the static chain register on the
19132 stack, above the stack frame. We need to know this accurately to get the
19133 alignment of the rest of the stack frame correct. */
19136 arm_compute_static_chain_stack_bytes (void)
19138 /* See the defining assertion in arm_expand_prologue. */
19139 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19140 && IS_NESTED (arm_current_func_type ())
19141 && arm_r3_live_at_start_p ()
19142 && crtl
->args
.pretend_args_size
== 0)
19148 /* Compute a bit mask of which registers need to be
19149 saved on the stack for the current function.
19150 This is used by arm_get_frame_offsets, which may add extra registers. */
19152 static unsigned long
19153 arm_compute_save_reg_mask (void)
19155 unsigned int save_reg_mask
= 0;
19156 unsigned long func_type
= arm_current_func_type ();
19159 if (IS_NAKED (func_type
))
19160 /* This should never really happen. */
19163 /* If we are creating a stack frame, then we must save the frame pointer,
19164 IP (which will hold the old stack pointer), LR and the PC. */
19165 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19167 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19170 | (1 << PC_REGNUM
);
19172 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19174 /* Decide if we need to save the link register.
19175 Interrupt routines have their own banked link register,
19176 so they never need to save it.
19177 Otherwise if we do not use the link register we do not need to save
19178 it. If we are pushing other registers onto the stack however, we
19179 can save an instruction in the epilogue by pushing the link register
19180 now and then popping it back into the PC. This incurs extra memory
19181 accesses though, so we only do it when optimizing for size, and only
19182 if we know that we will not need a fancy return sequence. */
19183 if (df_regs_ever_live_p (LR_REGNUM
)
19186 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19187 && !crtl
->tail_call_emit
19188 && !crtl
->calls_eh_return
))
19189 save_reg_mask
|= 1 << LR_REGNUM
;
19191 if (cfun
->machine
->lr_save_eliminated
)
19192 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19194 if (TARGET_REALLY_IWMMXT
19195 && ((bit_count (save_reg_mask
)
19196 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19197 arm_compute_static_chain_stack_bytes())
19200 /* The total number of registers that are going to be pushed
19201 onto the stack is odd. We need to ensure that the stack
19202 is 64-bit aligned before we start to save iWMMXt registers,
19203 and also before we start to create locals. (A local variable
19204 might be a double or long long which we will load/store using
19205 an iWMMXt instruction). Therefore we need to push another
19206 ARM register, so that the stack will be 64-bit aligned. We
19207 try to avoid using the arg registers (r0 -r3) as they might be
19208 used to pass values in a tail call. */
19209 for (reg
= 4; reg
<= 12; reg
++)
19210 if ((save_reg_mask
& (1 << reg
)) == 0)
19214 save_reg_mask
|= (1 << reg
);
19217 cfun
->machine
->sibcall_blocked
= 1;
19218 save_reg_mask
|= (1 << 3);
19222 /* We may need to push an additional register for use initializing the
19223 PIC base register. */
19224 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19225 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19227 reg
= thumb_find_work_register (1 << 4);
19228 if (!call_used_regs
[reg
])
19229 save_reg_mask
|= (1 << reg
);
19232 return save_reg_mask
;
19236 /* Compute a bit mask of which registers need to be
19237 saved on the stack for the current function. */
19238 static unsigned long
19239 thumb1_compute_save_reg_mask (void)
19241 unsigned long mask
;
19245 for (reg
= 0; reg
< 12; reg
++)
19246 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19250 && !TARGET_SINGLE_PIC_BASE
19251 && arm_pic_register
!= INVALID_REGNUM
19252 && crtl
->uses_pic_offset_table
)
19253 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19255 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19256 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19257 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19259 /* LR will also be pushed if any lo regs are pushed. */
19260 if (mask
& 0xff || thumb_force_lr_save ())
19261 mask
|= (1 << LR_REGNUM
);
19263 /* Make sure we have a low work register if we need one.
19264 We will need one if we are going to push a high register,
19265 but we are not currently intending to push a low register. */
19266 if ((mask
& 0xff) == 0
19267 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19269 /* Use thumb_find_work_register to choose which register
19270 we will use. If the register is live then we will
19271 have to push it. Use LAST_LO_REGNUM as our fallback
19272 choice for the register to select. */
19273 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19274 /* Make sure the register returned by thumb_find_work_register is
19275 not part of the return value. */
19276 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19277 reg
= LAST_LO_REGNUM
;
19279 if (callee_saved_reg_p (reg
))
19283 /* The 504 below is 8 bytes less than 512 because there are two possible
19284 alignment words. We can't tell here if they will be present or not so we
19285 have to play it safe and assume that they are. */
19286 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19287 ROUND_UP_WORD (get_frame_size ()) +
19288 crtl
->outgoing_args_size
) >= 504)
19290 /* This is the same as the code in thumb1_expand_prologue() which
19291 determines which register to use for stack decrement. */
19292 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19293 if (mask
& (1 << reg
))
19296 if (reg
> LAST_LO_REGNUM
)
19298 /* Make sure we have a register available for stack decrement. */
19299 mask
|= 1 << LAST_LO_REGNUM
;
19307 /* Return the number of bytes required to save VFP registers. */
19309 arm_get_vfp_saved_size (void)
19311 unsigned int regno
;
19316 /* Space for saved VFP registers. */
19317 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19320 for (regno
= FIRST_VFP_REGNUM
;
19321 regno
< LAST_VFP_REGNUM
;
19324 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19325 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19329 /* Workaround ARM10 VFPr1 bug. */
19330 if (count
== 2 && !arm_arch6
)
19332 saved
+= count
* 8;
19341 if (count
== 2 && !arm_arch6
)
19343 saved
+= count
* 8;
19350 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19351 everything bar the final return instruction. If simple_return is true,
19352 then do not output epilogue, because it has already been emitted in RTL. */
19354 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19355 bool simple_return
)
19357 char conditional
[10];
19360 unsigned long live_regs_mask
;
19361 unsigned long func_type
;
19362 arm_stack_offsets
*offsets
;
19364 func_type
= arm_current_func_type ();
19366 if (IS_NAKED (func_type
))
19369 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19371 /* If this function was declared non-returning, and we have
19372 found a tail call, then we have to trust that the called
19373 function won't return. */
19378 /* Otherwise, trap an attempted return by aborting. */
19380 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19382 assemble_external_libcall (ops
[1]);
19383 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19389 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19391 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19393 cfun
->machine
->return_used_this_function
= 1;
19395 offsets
= arm_get_frame_offsets ();
19396 live_regs_mask
= offsets
->saved_regs_mask
;
19398 if (!simple_return
&& live_regs_mask
)
19400 const char * return_reg
;
19402 /* If we do not have any special requirements for function exit
19403 (e.g. interworking) then we can load the return address
19404 directly into the PC. Otherwise we must load it into LR. */
19406 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19407 return_reg
= reg_names
[PC_REGNUM
];
19409 return_reg
= reg_names
[LR_REGNUM
];
19411 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19413 /* There are three possible reasons for the IP register
19414 being saved. 1) a stack frame was created, in which case
19415 IP contains the old stack pointer, or 2) an ISR routine
19416 corrupted it, or 3) it was saved to align the stack on
19417 iWMMXt. In case 1, restore IP into SP, otherwise just
19419 if (frame_pointer_needed
)
19421 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19422 live_regs_mask
|= (1 << SP_REGNUM
);
19425 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19428 /* On some ARM architectures it is faster to use LDR rather than
19429 LDM to load a single register. On other architectures, the
19430 cost is the same. In 26 bit mode, or for exception handlers,
19431 we have to use LDM to load the PC so that the CPSR is also
19433 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19434 if (live_regs_mask
== (1U << reg
))
19437 if (reg
<= LAST_ARM_REGNUM
19438 && (reg
!= LR_REGNUM
19440 || ! IS_INTERRUPT (func_type
)))
19442 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19443 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19450 /* Generate the load multiple instruction to restore the
19451 registers. Note we can get here, even if
19452 frame_pointer_needed is true, but only if sp already
19453 points to the base of the saved core registers. */
19454 if (live_regs_mask
& (1 << SP_REGNUM
))
19456 unsigned HOST_WIDE_INT stack_adjust
;
19458 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19459 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19461 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19462 if (TARGET_UNIFIED_ASM
)
19463 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19465 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19468 /* If we can't use ldmib (SA110 bug),
19469 then try to pop r3 instead. */
19471 live_regs_mask
|= 1 << 3;
19473 if (TARGET_UNIFIED_ASM
)
19474 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19476 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19480 if (TARGET_UNIFIED_ASM
)
19481 sprintf (instr
, "pop%s\t{", conditional
);
19483 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19485 p
= instr
+ strlen (instr
);
19487 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19488 if (live_regs_mask
& (1 << reg
))
19490 int l
= strlen (reg_names
[reg
]);
19496 memcpy (p
, ", ", 2);
19500 memcpy (p
, "%|", 2);
19501 memcpy (p
+ 2, reg_names
[reg
], l
);
19505 if (live_regs_mask
& (1 << LR_REGNUM
))
19507 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19508 /* If returning from an interrupt, restore the CPSR. */
19509 if (IS_INTERRUPT (func_type
))
19516 output_asm_insn (instr
, & operand
);
19518 /* See if we need to generate an extra instruction to
19519 perform the actual function return. */
19521 && func_type
!= ARM_FT_INTERWORKED
19522 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19524 /* The return has already been handled
19525 by loading the LR into the PC. */
19532 switch ((int) ARM_FUNC_TYPE (func_type
))
19536 /* ??? This is wrong for unified assembly syntax. */
19537 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19540 case ARM_FT_INTERWORKED
:
19541 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19544 case ARM_FT_EXCEPTION
:
19545 /* ??? This is wrong for unified assembly syntax. */
19546 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19550 /* Use bx if it's available. */
19551 if (arm_arch5
|| arm_arch4t
)
19552 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19554 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19558 output_asm_insn (instr
, & operand
);
19564 /* Write the function name into the code section, directly preceding
19565 the function prologue.
19567 Code will be output similar to this:
19569 .ascii "arm_poke_function_name", 0
19572 .word 0xff000000 + (t1 - t0)
19573 arm_poke_function_name
19575 stmfd sp!, {fp, ip, lr, pc}
19578 When performing a stack backtrace, code can inspect the value
19579 of 'pc' stored at 'fp' + 0. If the trace function then looks
19580 at location pc - 12 and the top 8 bits are set, then we know
19581 that there is a function name embedded immediately preceding this
19582 location and has length ((pc[-3]) & 0xff000000).
19584 We assume that pc is declared as a pointer to an unsigned long.
19586 It is of no benefit to output the function name if we are assembling
19587 a leaf function. These function types will not contain a stack
19588 backtrace structure, therefore it is not possible to determine the
19591 arm_poke_function_name (FILE *stream
, const char *name
)
19593 unsigned long alignlength
;
19594 unsigned long length
;
19597 length
= strlen (name
) + 1;
19598 alignlength
= ROUND_UP_WORD (length
);
19600 ASM_OUTPUT_ASCII (stream
, name
, length
);
19601 ASM_OUTPUT_ALIGN (stream
, 2);
19602 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19603 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19606 /* Place some comments into the assembler stream
19607 describing the current function. */
19609 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19611 unsigned long func_type
;
19613 /* ??? Do we want to print some of the below anyway? */
19617 /* Sanity check. */
19618 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19620 func_type
= arm_current_func_type ();
19622 switch ((int) ARM_FUNC_TYPE (func_type
))
19625 case ARM_FT_NORMAL
:
19627 case ARM_FT_INTERWORKED
:
19628 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19631 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19634 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19636 case ARM_FT_EXCEPTION
:
19637 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19641 if (IS_NAKED (func_type
))
19642 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19644 if (IS_VOLATILE (func_type
))
19645 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19647 if (IS_NESTED (func_type
))
19648 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19649 if (IS_STACKALIGN (func_type
))
19650 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19652 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19654 crtl
->args
.pretend_args_size
, frame_size
);
19656 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19657 frame_pointer_needed
,
19658 cfun
->machine
->uses_anonymous_args
);
19660 if (cfun
->machine
->lr_save_eliminated
)
19661 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19663 if (crtl
->calls_eh_return
)
19664 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19669 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19670 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19672 arm_stack_offsets
*offsets
;
19678 /* Emit any call-via-reg trampolines that are needed for v4t support
19679 of call_reg and call_value_reg type insns. */
19680 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19682 rtx label
= cfun
->machine
->call_via
[regno
];
19686 switch_to_section (function_section (current_function_decl
));
19687 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19688 CODE_LABEL_NUMBER (label
));
19689 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19693 /* ??? Probably not safe to set this here, since it assumes that a
19694 function will be emitted as assembly immediately after we generate
19695 RTL for it. This does not happen for inline functions. */
19696 cfun
->machine
->return_used_this_function
= 0;
19698 else /* TARGET_32BIT */
19700 /* We need to take into account any stack-frame rounding. */
19701 offsets
= arm_get_frame_offsets ();
19703 gcc_assert (!use_return_insn (FALSE
, NULL
)
19704 || (cfun
->machine
->return_used_this_function
!= 0)
19705 || offsets
->saved_regs
== offsets
->outgoing_args
19706 || frame_pointer_needed
);
19710 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19711 STR and STRD. If an even number of registers are being pushed, one
19712 or more STRD patterns are created for each register pair. If an
19713 odd number of registers are pushed, emit an initial STR followed by
19714 as many STRD instructions as are needed. This works best when the
19715 stack is initially 64-bit aligned (the normal case), since it
19716 ensures that each STRD is also 64-bit aligned. */
19718 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19723 rtx par
= NULL_RTX
;
19724 rtx dwarf
= NULL_RTX
;
19728 num_regs
= bit_count (saved_regs_mask
);
19730 /* Must be at least one register to save, and can't save SP or PC. */
19731 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19732 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19733 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19735 /* Create sequence for DWARF info. All the frame-related data for
19736 debugging is held in this wrapper. */
19737 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19739 /* Describe the stack adjustment. */
19740 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19741 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19742 RTX_FRAME_RELATED_P (tmp
) = 1;
19743 XVECEXP (dwarf
, 0, 0) = tmp
;
19745 /* Find the first register. */
19746 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19751 /* If there's an odd number of registers to push. Start off by
19752 pushing a single register. This ensures that subsequent strd
19753 operations are dword aligned (assuming that SP was originally
19754 64-bit aligned). */
19755 if ((num_regs
& 1) != 0)
19757 rtx reg
, mem
, insn
;
19759 reg
= gen_rtx_REG (SImode
, regno
);
19761 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19762 stack_pointer_rtx
));
19764 mem
= gen_frame_mem (Pmode
,
19766 (Pmode
, stack_pointer_rtx
,
19767 plus_constant (Pmode
, stack_pointer_rtx
,
19770 tmp
= gen_rtx_SET (mem
, reg
);
19771 RTX_FRAME_RELATED_P (tmp
) = 1;
19772 insn
= emit_insn (tmp
);
19773 RTX_FRAME_RELATED_P (insn
) = 1;
19774 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19775 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19776 RTX_FRAME_RELATED_P (tmp
) = 1;
19779 XVECEXP (dwarf
, 0, i
) = tmp
;
19783 while (i
< num_regs
)
19784 if (saved_regs_mask
& (1 << regno
))
19786 rtx reg1
, reg2
, mem1
, mem2
;
19787 rtx tmp0
, tmp1
, tmp2
;
19790 /* Find the register to pair with this one. */
19791 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19795 reg1
= gen_rtx_REG (SImode
, regno
);
19796 reg2
= gen_rtx_REG (SImode
, regno2
);
19803 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19806 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19808 -4 * (num_regs
- 1)));
19809 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19810 plus_constant (Pmode
, stack_pointer_rtx
,
19812 tmp1
= gen_rtx_SET (mem1
, reg1
);
19813 tmp2
= gen_rtx_SET (mem2
, reg2
);
19814 RTX_FRAME_RELATED_P (tmp0
) = 1;
19815 RTX_FRAME_RELATED_P (tmp1
) = 1;
19816 RTX_FRAME_RELATED_P (tmp2
) = 1;
19817 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19818 XVECEXP (par
, 0, 0) = tmp0
;
19819 XVECEXP (par
, 0, 1) = tmp1
;
19820 XVECEXP (par
, 0, 2) = tmp2
;
19821 insn
= emit_insn (par
);
19822 RTX_FRAME_RELATED_P (insn
) = 1;
19823 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19827 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19830 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19833 tmp1
= gen_rtx_SET (mem1
, reg1
);
19834 tmp2
= gen_rtx_SET (mem2
, reg2
);
19835 RTX_FRAME_RELATED_P (tmp1
) = 1;
19836 RTX_FRAME_RELATED_P (tmp2
) = 1;
19837 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19838 XVECEXP (par
, 0, 0) = tmp1
;
19839 XVECEXP (par
, 0, 1) = tmp2
;
19843 /* Create unwind information. This is an approximation. */
19844 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19845 plus_constant (Pmode
,
19849 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19850 plus_constant (Pmode
,
19855 RTX_FRAME_RELATED_P (tmp1
) = 1;
19856 RTX_FRAME_RELATED_P (tmp2
) = 1;
19857 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19858 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19860 regno
= regno2
+ 1;
19868 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19869 whenever possible, otherwise it emits single-word stores. The first store
19870 also allocates stack space for all saved registers, using writeback with
19871 post-addressing mode. All other stores use offset addressing. If no STRD
19872 can be emitted, this function emits a sequence of single-word stores,
19873 and not an STM as before, because single-word stores provide more freedom
19874 scheduling and can be turned into an STM by peephole optimizations. */
19876 arm_emit_strd_push (unsigned long saved_regs_mask
)
19879 int i
, j
, dwarf_index
= 0;
19881 rtx dwarf
= NULL_RTX
;
19882 rtx insn
= NULL_RTX
;
19885 /* TODO: A more efficient code can be emitted by changing the
19886 layout, e.g., first push all pairs that can use STRD to keep the
19887 stack aligned, and then push all other registers. */
19888 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19889 if (saved_regs_mask
& (1 << i
))
19892 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19893 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19894 gcc_assert (num_regs
> 0);
19896 /* Create sequence for DWARF info. */
19897 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19899 /* For dwarf info, we generate explicit stack update. */
19900 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19901 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19902 RTX_FRAME_RELATED_P (tmp
) = 1;
19903 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19905 /* Save registers. */
19906 offset
= - 4 * num_regs
;
19908 while (j
<= LAST_ARM_REGNUM
)
19909 if (saved_regs_mask
& (1 << j
))
19912 && (saved_regs_mask
& (1 << (j
+ 1))))
19914 /* Current register and previous register form register pair for
19915 which STRD can be generated. */
19918 /* Allocate stack space for all saved registers. */
19919 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19920 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19921 mem
= gen_frame_mem (DImode
, tmp
);
19924 else if (offset
> 0)
19925 mem
= gen_frame_mem (DImode
,
19926 plus_constant (Pmode
,
19930 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19932 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19933 RTX_FRAME_RELATED_P (tmp
) = 1;
19934 tmp
= emit_insn (tmp
);
19936 /* Record the first store insn. */
19937 if (dwarf_index
== 1)
19940 /* Generate dwarf info. */
19941 mem
= gen_frame_mem (SImode
,
19942 plus_constant (Pmode
,
19945 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19946 RTX_FRAME_RELATED_P (tmp
) = 1;
19947 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19949 mem
= gen_frame_mem (SImode
,
19950 plus_constant (Pmode
,
19953 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19954 RTX_FRAME_RELATED_P (tmp
) = 1;
19955 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19962 /* Emit a single word store. */
19965 /* Allocate stack space for all saved registers. */
19966 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19967 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19968 mem
= gen_frame_mem (SImode
, tmp
);
19971 else if (offset
> 0)
19972 mem
= gen_frame_mem (SImode
,
19973 plus_constant (Pmode
,
19977 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19979 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19980 RTX_FRAME_RELATED_P (tmp
) = 1;
19981 tmp
= emit_insn (tmp
);
19983 /* Record the first store insn. */
19984 if (dwarf_index
== 1)
19987 /* Generate dwarf info. */
19988 mem
= gen_frame_mem (SImode
,
19989 plus_constant(Pmode
,
19992 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19993 RTX_FRAME_RELATED_P (tmp
) = 1;
19994 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20003 /* Attach dwarf info to the first insn we generate. */
20004 gcc_assert (insn
!= NULL_RTX
);
20005 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20006 RTX_FRAME_RELATED_P (insn
) = 1;
20009 /* Generate and emit an insn that we will recognize as a push_multi.
20010 Unfortunately, since this insn does not reflect very well the actual
20011 semantics of the operation, we need to annotate the insn for the benefit
20012 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20013 MASK for registers that should be annotated for DWARF2 frame unwind
20016 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20019 int num_dwarf_regs
= 0;
20023 int dwarf_par_index
;
20026 /* We don't record the PC in the dwarf frame information. */
20027 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20029 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20031 if (mask
& (1 << i
))
20033 if (dwarf_regs_mask
& (1 << i
))
20037 gcc_assert (num_regs
&& num_regs
<= 16);
20038 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20040 /* For the body of the insn we are going to generate an UNSPEC in
20041 parallel with several USEs. This allows the insn to be recognized
20042 by the push_multi pattern in the arm.md file.
20044 The body of the insn looks something like this:
20047 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20048 (const_int:SI <num>)))
20049 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20055 For the frame note however, we try to be more explicit and actually
20056 show each register being stored into the stack frame, plus a (single)
20057 decrement of the stack pointer. We do it this way in order to be
20058 friendly to the stack unwinding code, which only wants to see a single
20059 stack decrement per instruction. The RTL we generate for the note looks
20060 something like this:
20063 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20064 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20065 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20066 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20070 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20071 instead we'd have a parallel expression detailing all
20072 the stores to the various memory addresses so that debug
20073 information is more up-to-date. Remember however while writing
20074 this to take care of the constraints with the push instruction.
20076 Note also that this has to be taken care of for the VFP registers.
20078 For more see PR43399. */
20080 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20081 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20082 dwarf_par_index
= 1;
20084 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20086 if (mask
& (1 << i
))
20088 reg
= gen_rtx_REG (SImode
, i
);
20090 XVECEXP (par
, 0, 0)
20091 = gen_rtx_SET (gen_frame_mem
20093 gen_rtx_PRE_MODIFY (Pmode
,
20096 (Pmode
, stack_pointer_rtx
,
20099 gen_rtx_UNSPEC (BLKmode
,
20100 gen_rtvec (1, reg
),
20101 UNSPEC_PUSH_MULT
));
20103 if (dwarf_regs_mask
& (1 << i
))
20105 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20107 RTX_FRAME_RELATED_P (tmp
) = 1;
20108 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20115 for (j
= 1, i
++; j
< num_regs
; i
++)
20117 if (mask
& (1 << i
))
20119 reg
= gen_rtx_REG (SImode
, i
);
20121 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20123 if (dwarf_regs_mask
& (1 << i
))
20126 = gen_rtx_SET (gen_frame_mem
20128 plus_constant (Pmode
, stack_pointer_rtx
,
20131 RTX_FRAME_RELATED_P (tmp
) = 1;
20132 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20139 par
= emit_insn (par
);
20141 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20142 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20143 RTX_FRAME_RELATED_P (tmp
) = 1;
20144 XVECEXP (dwarf
, 0, 0) = tmp
;
20146 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20151 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20152 SIZE is the offset to be adjusted.
20153 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20155 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20159 RTX_FRAME_RELATED_P (insn
) = 1;
20160 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20161 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20164 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20165 SAVED_REGS_MASK shows which registers need to be restored.
20167 Unfortunately, since this insn does not reflect very well the actual
20168 semantics of the operation, we need to annotate the insn for the benefit
20169 of DWARF2 frame unwind information. */
20171 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20176 rtx dwarf
= NULL_RTX
;
20178 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20182 offset_adj
= return_in_pc
? 1 : 0;
20183 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20184 if (saved_regs_mask
& (1 << i
))
20187 gcc_assert (num_regs
&& num_regs
<= 16);
20189 /* If SP is in reglist, then we don't emit SP update insn. */
20190 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20192 /* The parallel needs to hold num_regs SETs
20193 and one SET for the stack update. */
20194 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20197 XVECEXP (par
, 0, 0) = ret_rtx
;
20201 /* Increment the stack pointer, based on there being
20202 num_regs 4-byte registers to restore. */
20203 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20204 plus_constant (Pmode
,
20207 RTX_FRAME_RELATED_P (tmp
) = 1;
20208 XVECEXP (par
, 0, offset_adj
) = tmp
;
20211 /* Now restore every reg, which may include PC. */
20212 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20213 if (saved_regs_mask
& (1 << i
))
20215 reg
= gen_rtx_REG (SImode
, i
);
20216 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20218 /* Emit single load with writeback. */
20219 tmp
= gen_frame_mem (SImode
,
20220 gen_rtx_POST_INC (Pmode
,
20221 stack_pointer_rtx
));
20222 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20223 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20227 tmp
= gen_rtx_SET (reg
,
20230 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20231 RTX_FRAME_RELATED_P (tmp
) = 1;
20232 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20234 /* We need to maintain a sequence for DWARF info too. As dwarf info
20235 should not have PC, skip PC. */
20236 if (i
!= PC_REGNUM
)
20237 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20243 par
= emit_jump_insn (par
);
20245 par
= emit_insn (par
);
20247 REG_NOTES (par
) = dwarf
;
20249 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20250 stack_pointer_rtx
, stack_pointer_rtx
);
20253 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20254 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20256 Unfortunately, since this insn does not reflect very well the actual
20257 semantics of the operation, we need to annotate the insn for the benefit
20258 of DWARF2 frame unwind information. */
20260 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20264 rtx dwarf
= NULL_RTX
;
20267 gcc_assert (num_regs
&& num_regs
<= 32);
20269 /* Workaround ARM10 VFPr1 bug. */
20270 if (num_regs
== 2 && !arm_arch6
)
20272 if (first_reg
== 15)
20278 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20279 there could be up to 32 D-registers to restore.
20280 If there are more than 16 D-registers, make two recursive calls,
20281 each of which emits one pop_multi instruction. */
20284 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20285 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20289 /* The parallel needs to hold num_regs SETs
20290 and one SET for the stack update. */
20291 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20293 /* Increment the stack pointer, based on there being
20294 num_regs 8-byte registers to restore. */
20295 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20296 RTX_FRAME_RELATED_P (tmp
) = 1;
20297 XVECEXP (par
, 0, 0) = tmp
;
20299 /* Now show every reg that will be restored, using a SET for each. */
20300 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20302 reg
= gen_rtx_REG (DFmode
, i
);
20304 tmp
= gen_rtx_SET (reg
,
20307 plus_constant (Pmode
, base_reg
, 8 * j
)));
20308 RTX_FRAME_RELATED_P (tmp
) = 1;
20309 XVECEXP (par
, 0, j
+ 1) = tmp
;
20311 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20316 par
= emit_insn (par
);
20317 REG_NOTES (par
) = dwarf
;
20319 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20320 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20322 RTX_FRAME_RELATED_P (par
) = 1;
20323 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20326 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20327 base_reg
, base_reg
);
20330 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20331 number of registers are being popped, multiple LDRD patterns are created for
20332 all register pairs. If odd number of registers are popped, last register is
20333 loaded by using LDR pattern. */
20335 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20339 rtx par
= NULL_RTX
;
20340 rtx dwarf
= NULL_RTX
;
20341 rtx tmp
, reg
, tmp1
;
20342 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20344 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20345 if (saved_regs_mask
& (1 << i
))
20348 gcc_assert (num_regs
&& num_regs
<= 16);
20350 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20351 to be popped. So, if num_regs is even, now it will become odd,
20352 and we can generate pop with PC. If num_regs is odd, it will be
20353 even now, and ldr with return can be generated for PC. */
20357 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20359 /* Var j iterates over all the registers to gather all the registers in
20360 saved_regs_mask. Var i gives index of saved registers in stack frame.
20361 A PARALLEL RTX of register-pair is created here, so that pattern for
20362 LDRD can be matched. As PC is always last register to be popped, and
20363 we have already decremented num_regs if PC, we don't have to worry
20364 about PC in this loop. */
20365 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20366 if (saved_regs_mask
& (1 << j
))
20368 /* Create RTX for memory load. */
20369 reg
= gen_rtx_REG (SImode
, j
);
20370 tmp
= gen_rtx_SET (reg
,
20371 gen_frame_mem (SImode
,
20372 plus_constant (Pmode
,
20373 stack_pointer_rtx
, 4 * i
)));
20374 RTX_FRAME_RELATED_P (tmp
) = 1;
20378 /* When saved-register index (i) is even, the RTX to be emitted is
20379 yet to be created. Hence create it first. The LDRD pattern we
20380 are generating is :
20381 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20382 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20383 where target registers need not be consecutive. */
20384 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20388 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20389 added as 0th element and if i is odd, reg_i is added as 1st element
20390 of LDRD pattern shown above. */
20391 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20392 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20396 /* When saved-register index (i) is odd, RTXs for both the registers
20397 to be loaded are generated in above given LDRD pattern, and the
20398 pattern can be emitted now. */
20399 par
= emit_insn (par
);
20400 REG_NOTES (par
) = dwarf
;
20401 RTX_FRAME_RELATED_P (par
) = 1;
20407 /* If the number of registers pushed is odd AND return_in_pc is false OR
20408 number of registers are even AND return_in_pc is true, last register is
20409 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20410 then LDR with post increment. */
20412 /* Increment the stack pointer, based on there being
20413 num_regs 4-byte registers to restore. */
20414 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20415 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20416 RTX_FRAME_RELATED_P (tmp
) = 1;
20417 tmp
= emit_insn (tmp
);
20420 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20421 stack_pointer_rtx
, stack_pointer_rtx
);
20426 if (((num_regs
% 2) == 1 && !return_in_pc
)
20427 || ((num_regs
% 2) == 0 && return_in_pc
))
20429 /* Scan for the single register to be popped. Skip until the saved
20430 register is found. */
20431 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20433 /* Gen LDR with post increment here. */
20434 tmp1
= gen_rtx_MEM (SImode
,
20435 gen_rtx_POST_INC (SImode
,
20436 stack_pointer_rtx
));
20437 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20439 reg
= gen_rtx_REG (SImode
, j
);
20440 tmp
= gen_rtx_SET (reg
, tmp1
);
20441 RTX_FRAME_RELATED_P (tmp
) = 1;
20442 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20446 /* If return_in_pc, j must be PC_REGNUM. */
20447 gcc_assert (j
== PC_REGNUM
);
20448 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20449 XVECEXP (par
, 0, 0) = ret_rtx
;
20450 XVECEXP (par
, 0, 1) = tmp
;
20451 par
= emit_jump_insn (par
);
20455 par
= emit_insn (tmp
);
20456 REG_NOTES (par
) = dwarf
;
20457 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20458 stack_pointer_rtx
, stack_pointer_rtx
);
20462 else if ((num_regs
% 2) == 1 && return_in_pc
)
20464 /* There are 2 registers to be popped. So, generate the pattern
20465 pop_multiple_with_stack_update_and_return to pop in PC. */
20466 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20472 /* LDRD in ARM mode needs consecutive registers as operands. This function
20473 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20474 offset addressing and then generates one separate stack udpate. This provides
20475 more scheduling freedom, compared to writeback on every load. However,
20476 if the function returns using load into PC directly
20477 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20478 before the last load. TODO: Add a peephole optimization to recognize
20479 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20480 peephole optimization to merge the load at stack-offset zero
20481 with the stack update instruction using load with writeback
20482 in post-index addressing mode. */
20484 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20488 rtx par
= NULL_RTX
;
20489 rtx dwarf
= NULL_RTX
;
20492 /* Restore saved registers. */
20493 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20495 while (j
<= LAST_ARM_REGNUM
)
20496 if (saved_regs_mask
& (1 << j
))
20499 && (saved_regs_mask
& (1 << (j
+ 1)))
20500 && (j
+ 1) != PC_REGNUM
)
20502 /* Current register and next register form register pair for which
20503 LDRD can be generated. PC is always the last register popped, and
20504 we handle it separately. */
20506 mem
= gen_frame_mem (DImode
,
20507 plus_constant (Pmode
,
20511 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20513 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20514 tmp
= emit_insn (tmp
);
20515 RTX_FRAME_RELATED_P (tmp
) = 1;
20517 /* Generate dwarf info. */
20519 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20520 gen_rtx_REG (SImode
, j
),
20522 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20523 gen_rtx_REG (SImode
, j
+ 1),
20526 REG_NOTES (tmp
) = dwarf
;
20531 else if (j
!= PC_REGNUM
)
20533 /* Emit a single word load. */
20535 mem
= gen_frame_mem (SImode
,
20536 plus_constant (Pmode
,
20540 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20542 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20543 tmp
= emit_insn (tmp
);
20544 RTX_FRAME_RELATED_P (tmp
) = 1;
20546 /* Generate dwarf info. */
20547 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20548 gen_rtx_REG (SImode
, j
),
20554 else /* j == PC_REGNUM */
20560 /* Update the stack. */
20563 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20564 plus_constant (Pmode
,
20567 tmp
= emit_insn (tmp
);
20568 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20569 stack_pointer_rtx
, stack_pointer_rtx
);
20573 if (saved_regs_mask
& (1 << PC_REGNUM
))
20575 /* Only PC is to be popped. */
20576 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20577 XVECEXP (par
, 0, 0) = ret_rtx
;
20578 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20579 gen_frame_mem (SImode
,
20580 gen_rtx_POST_INC (SImode
,
20581 stack_pointer_rtx
)));
20582 RTX_FRAME_RELATED_P (tmp
) = 1;
20583 XVECEXP (par
, 0, 1) = tmp
;
20584 par
= emit_jump_insn (par
);
20586 /* Generate dwarf info. */
20587 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20588 gen_rtx_REG (SImode
, PC_REGNUM
),
20590 REG_NOTES (par
) = dwarf
;
20591 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20592 stack_pointer_rtx
, stack_pointer_rtx
);
20596 /* Calculate the size of the return value that is passed in registers. */
20598 arm_size_return_regs (void)
20602 if (crtl
->return_rtx
!= 0)
20603 mode
= GET_MODE (crtl
->return_rtx
);
20605 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20607 return GET_MODE_SIZE (mode
);
20610 /* Return true if the current function needs to save/restore LR. */
20612 thumb_force_lr_save (void)
20614 return !cfun
->machine
->lr_save_eliminated
20615 && (!leaf_function_p ()
20616 || thumb_far_jump_used_p ()
20617 || df_regs_ever_live_p (LR_REGNUM
));
20620 /* We do not know if r3 will be available because
20621 we do have an indirect tailcall happening in this
20622 particular case. */
20624 is_indirect_tailcall_p (rtx call
)
20626 rtx pat
= PATTERN (call
);
20628 /* Indirect tail call. */
20629 pat
= XVECEXP (pat
, 0, 0);
20630 if (GET_CODE (pat
) == SET
)
20631 pat
= SET_SRC (pat
);
20633 pat
= XEXP (XEXP (pat
, 0), 0);
20634 return REG_P (pat
);
20637 /* Return true if r3 is used by any of the tail call insns in the
20638 current function. */
20640 any_sibcall_could_use_r3 (void)
20645 if (!crtl
->tail_call_emit
)
20647 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20648 if (e
->flags
& EDGE_SIBCALL
)
20650 rtx call
= BB_END (e
->src
);
20651 if (!CALL_P (call
))
20652 call
= prev_nonnote_nondebug_insn (call
);
20653 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20654 if (find_regno_fusage (call
, USE
, 3)
20655 || is_indirect_tailcall_p (call
))
20662 /* Compute the distance from register FROM to register TO.
20663 These can be the arg pointer (26), the soft frame pointer (25),
20664 the stack pointer (13) or the hard frame pointer (11).
20665 In thumb mode r7 is used as the soft frame pointer, if needed.
20666 Typical stack layout looks like this:
20668 old stack pointer -> | |
20671 | | saved arguments for
20672 | | vararg functions
20675 hard FP & arg pointer -> | | \
20683 soft frame pointer -> | | /
20688 locals base pointer -> | | /
20693 current stack pointer -> | | /
20696 For a given function some or all of these stack components
20697 may not be needed, giving rise to the possibility of
20698 eliminating some of the registers.
20700 The values returned by this function must reflect the behavior
20701 of arm_expand_prologue() and arm_compute_save_reg_mask().
20703 The sign of the number returned reflects the direction of stack
20704 growth, so the values are positive for all eliminations except
20705 from the soft frame pointer to the hard frame pointer.
20707 SFP may point just inside the local variables block to ensure correct
20711 /* Calculate stack offsets. These are used to calculate register elimination
20712 offsets and in prologue/epilogue code. Also calculates which registers
20713 should be saved. */
20715 static arm_stack_offsets
*
20716 arm_get_frame_offsets (void)
20718 struct arm_stack_offsets
*offsets
;
20719 unsigned long func_type
;
20723 HOST_WIDE_INT frame_size
;
20726 offsets
= &cfun
->machine
->stack_offsets
;
20728 /* We need to know if we are a leaf function. Unfortunately, it
20729 is possible to be called after start_sequence has been called,
20730 which causes get_insns to return the insns for the sequence,
20731 not the function, which will cause leaf_function_p to return
20732 the incorrect result.
20734 to know about leaf functions once reload has completed, and the
20735 frame size cannot be changed after that time, so we can safely
20736 use the cached value. */
20738 if (reload_completed
)
20741 /* Initially this is the size of the local variables. It will translated
20742 into an offset once we have determined the size of preceding data. */
20743 frame_size
= ROUND_UP_WORD (get_frame_size ());
20745 leaf
= leaf_function_p ();
20747 /* Space for variadic functions. */
20748 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20750 /* In Thumb mode this is incorrect, but never used. */
20752 = (offsets
->saved_args
20753 + arm_compute_static_chain_stack_bytes ()
20754 + (frame_pointer_needed
? 4 : 0));
20758 unsigned int regno
;
20760 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20761 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20762 saved
= core_saved
;
20764 /* We know that SP will be doubleword aligned on entry, and we must
20765 preserve that condition at any subroutine call. We also require the
20766 soft frame pointer to be doubleword aligned. */
20768 if (TARGET_REALLY_IWMMXT
)
20770 /* Check for the call-saved iWMMXt registers. */
20771 for (regno
= FIRST_IWMMXT_REGNUM
;
20772 regno
<= LAST_IWMMXT_REGNUM
;
20774 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20778 func_type
= arm_current_func_type ();
20779 /* Space for saved VFP registers. */
20780 if (! IS_VOLATILE (func_type
)
20781 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20782 saved
+= arm_get_vfp_saved_size ();
20784 else /* TARGET_THUMB1 */
20786 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20787 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20788 saved
= core_saved
;
20789 if (TARGET_BACKTRACE
)
20793 /* Saved registers include the stack frame. */
20794 offsets
->saved_regs
20795 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20796 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20798 /* A leaf function does not need any stack alignment if it has nothing
20800 if (leaf
&& frame_size
== 0
20801 /* However if it calls alloca(), we have a dynamically allocated
20802 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20803 && ! cfun
->calls_alloca
)
20805 offsets
->outgoing_args
= offsets
->soft_frame
;
20806 offsets
->locals_base
= offsets
->soft_frame
;
20810 /* Ensure SFP has the correct alignment. */
20811 if (ARM_DOUBLEWORD_ALIGN
20812 && (offsets
->soft_frame
& 7))
20814 offsets
->soft_frame
+= 4;
20815 /* Try to align stack by pushing an extra reg. Don't bother doing this
20816 when there is a stack frame as the alignment will be rolled into
20817 the normal stack adjustment. */
20818 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20822 /* Register r3 is caller-saved. Normally it does not need to be
20823 saved on entry by the prologue. However if we choose to save
20824 it for padding then we may confuse the compiler into thinking
20825 a prologue sequence is required when in fact it is not. This
20826 will occur when shrink-wrapping if r3 is used as a scratch
20827 register and there are no other callee-saved writes.
20829 This situation can be avoided when other callee-saved registers
20830 are available and r3 is not mandatory if we choose a callee-saved
20831 register for padding. */
20832 bool prefer_callee_reg_p
= false;
20834 /* If it is safe to use r3, then do so. This sometimes
20835 generates better code on Thumb-2 by avoiding the need to
20836 use 32-bit push/pop instructions. */
20837 if (! any_sibcall_could_use_r3 ()
20838 && arm_size_return_regs () <= 12
20839 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20841 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20844 if (!TARGET_THUMB2
)
20845 prefer_callee_reg_p
= true;
20848 || prefer_callee_reg_p
)
20850 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20852 /* Avoid fixed registers; they may be changed at
20853 arbitrary times so it's unsafe to restore them
20854 during the epilogue. */
20856 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20866 offsets
->saved_regs
+= 4;
20867 offsets
->saved_regs_mask
|= (1 << reg
);
20872 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20873 offsets
->outgoing_args
= (offsets
->locals_base
20874 + crtl
->outgoing_args_size
);
20876 if (ARM_DOUBLEWORD_ALIGN
)
20878 /* Ensure SP remains doubleword aligned. */
20879 if (offsets
->outgoing_args
& 7)
20880 offsets
->outgoing_args
+= 4;
20881 gcc_assert (!(offsets
->outgoing_args
& 7));
20888 /* Calculate the relative offsets for the different stack pointers. Positive
20889 offsets are in the direction of stack growth. */
20892 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20894 arm_stack_offsets
*offsets
;
20896 offsets
= arm_get_frame_offsets ();
20898 /* OK, now we have enough information to compute the distances.
20899 There must be an entry in these switch tables for each pair
20900 of registers in ELIMINABLE_REGS, even if some of the entries
20901 seem to be redundant or useless. */
20904 case ARG_POINTER_REGNUM
:
20907 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20910 case FRAME_POINTER_REGNUM
:
20911 /* This is the reverse of the soft frame pointer
20912 to hard frame pointer elimination below. */
20913 return offsets
->soft_frame
- offsets
->saved_args
;
20915 case ARM_HARD_FRAME_POINTER_REGNUM
:
20916 /* This is only non-zero in the case where the static chain register
20917 is stored above the frame. */
20918 return offsets
->frame
- offsets
->saved_args
- 4;
20920 case STACK_POINTER_REGNUM
:
20921 /* If nothing has been pushed on the stack at all
20922 then this will return -4. This *is* correct! */
20923 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20926 gcc_unreachable ();
20928 gcc_unreachable ();
20930 case FRAME_POINTER_REGNUM
:
20933 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20936 case ARM_HARD_FRAME_POINTER_REGNUM
:
20937 /* The hard frame pointer points to the top entry in the
20938 stack frame. The soft frame pointer to the bottom entry
20939 in the stack frame. If there is no stack frame at all,
20940 then they are identical. */
20942 return offsets
->frame
- offsets
->soft_frame
;
20944 case STACK_POINTER_REGNUM
:
20945 return offsets
->outgoing_args
- offsets
->soft_frame
;
20948 gcc_unreachable ();
20950 gcc_unreachable ();
20953 /* You cannot eliminate from the stack pointer.
20954 In theory you could eliminate from the hard frame
20955 pointer to the stack pointer, but this will never
20956 happen, since if a stack frame is not needed the
20957 hard frame pointer will never be used. */
20958 gcc_unreachable ();
20962 /* Given FROM and TO register numbers, say whether this elimination is
20963 allowed. Frame pointer elimination is automatically handled.
20965 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20966 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20967 pointer, we must eliminate FRAME_POINTER_REGNUM into
20968 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20969 ARG_POINTER_REGNUM. */
20972 arm_can_eliminate (const int from
, const int to
)
20974 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20975 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20976 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20977 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20981 /* Emit RTL to save coprocessor registers on function entry. Returns the
20982 number of bytes pushed. */
20985 arm_save_coproc_regs(void)
20987 int saved_size
= 0;
20989 unsigned start_reg
;
20992 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20993 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20995 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20996 insn
= gen_rtx_MEM (V2SImode
, insn
);
20997 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20998 RTX_FRAME_RELATED_P (insn
) = 1;
21002 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21004 start_reg
= FIRST_VFP_REGNUM
;
21006 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21008 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21009 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21011 if (start_reg
!= reg
)
21012 saved_size
+= vfp_emit_fstmd (start_reg
,
21013 (reg
- start_reg
) / 2);
21014 start_reg
= reg
+ 2;
21017 if (start_reg
!= reg
)
21018 saved_size
+= vfp_emit_fstmd (start_reg
,
21019 (reg
- start_reg
) / 2);
21025 /* Set the Thumb frame pointer from the stack pointer. */
21028 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21030 HOST_WIDE_INT amount
;
21033 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21035 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21036 stack_pointer_rtx
, GEN_INT (amount
)));
21039 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21040 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21041 expects the first two operands to be the same. */
21044 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21046 hard_frame_pointer_rtx
));
21050 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21051 hard_frame_pointer_rtx
,
21052 stack_pointer_rtx
));
21054 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21055 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21056 RTX_FRAME_RELATED_P (dwarf
) = 1;
21057 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21060 RTX_FRAME_RELATED_P (insn
) = 1;
21063 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21066 arm_expand_prologue (void)
21071 unsigned long live_regs_mask
;
21072 unsigned long func_type
;
21074 int saved_pretend_args
= 0;
21075 int saved_regs
= 0;
21076 unsigned HOST_WIDE_INT args_to_push
;
21077 arm_stack_offsets
*offsets
;
21079 func_type
= arm_current_func_type ();
21081 /* Naked functions don't have prologues. */
21082 if (IS_NAKED (func_type
))
21085 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21086 args_to_push
= crtl
->args
.pretend_args_size
;
21088 /* Compute which register we will have to save onto the stack. */
21089 offsets
= arm_get_frame_offsets ();
21090 live_regs_mask
= offsets
->saved_regs_mask
;
21092 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21094 if (IS_STACKALIGN (func_type
))
21098 /* Handle a word-aligned stack pointer. We generate the following:
21103 <save and restore r0 in normal prologue/epilogue>
21107 The unwinder doesn't need to know about the stack realignment.
21108 Just tell it we saved SP in r0. */
21109 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21111 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21112 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21114 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21115 RTX_FRAME_RELATED_P (insn
) = 1;
21116 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21118 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21120 /* ??? The CFA changes here, which may cause GDB to conclude that it
21121 has entered a different function. That said, the unwind info is
21122 correct, individually, before and after this instruction because
21123 we've described the save of SP, which will override the default
21124 handling of SP as restoring from the CFA. */
21125 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21128 /* For APCS frames, if IP register is clobbered
21129 when creating frame, save that register in a special
21131 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21133 if (IS_INTERRUPT (func_type
))
21135 /* Interrupt functions must not corrupt any registers.
21136 Creating a frame pointer however, corrupts the IP
21137 register, so we must push it first. */
21138 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21140 /* Do not set RTX_FRAME_RELATED_P on this insn.
21141 The dwarf stack unwinding code only wants to see one
21142 stack decrement per function, and this is not it. If
21143 this instruction is labeled as being part of the frame
21144 creation sequence then dwarf2out_frame_debug_expr will
21145 die when it encounters the assignment of IP to FP
21146 later on, since the use of SP here establishes SP as
21147 the CFA register and not IP.
21149 Anyway this instruction is not really part of the stack
21150 frame creation although it is part of the prologue. */
21152 else if (IS_NESTED (func_type
))
21154 /* The static chain register is the same as the IP register
21155 used as a scratch register during stack frame creation.
21156 To get around this need to find somewhere to store IP
21157 whilst the frame is being created. We try the following
21160 1. The last argument register r3 if it is available.
21161 2. A slot on the stack above the frame if there are no
21162 arguments to push onto the stack.
21163 3. Register r3 again, after pushing the argument registers
21164 onto the stack, if this is a varargs function.
21165 4. The last slot on the stack created for the arguments to
21166 push, if this isn't a varargs function.
21168 Note - we only need to tell the dwarf2 backend about the SP
21169 adjustment in the second variant; the static chain register
21170 doesn't need to be unwound, as it doesn't contain a value
21171 inherited from the caller. */
21173 if (!arm_r3_live_at_start_p ())
21174 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21175 else if (args_to_push
== 0)
21179 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21182 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21183 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21186 /* Just tell the dwarf backend that we adjusted SP. */
21187 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21188 plus_constant (Pmode
, stack_pointer_rtx
,
21190 RTX_FRAME_RELATED_P (insn
) = 1;
21191 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21195 /* Store the args on the stack. */
21196 if (cfun
->machine
->uses_anonymous_args
)
21199 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21200 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21201 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21202 saved_pretend_args
= 1;
21208 if (args_to_push
== 4)
21209 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21212 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21213 plus_constant (Pmode
,
21217 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21219 /* Just tell the dwarf backend that we adjusted SP. */
21221 = gen_rtx_SET (stack_pointer_rtx
,
21222 plus_constant (Pmode
, stack_pointer_rtx
,
21224 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21227 RTX_FRAME_RELATED_P (insn
) = 1;
21228 fp_offset
= args_to_push
;
21233 insn
= emit_set_insn (ip_rtx
,
21234 plus_constant (Pmode
, stack_pointer_rtx
,
21236 RTX_FRAME_RELATED_P (insn
) = 1;
21241 /* Push the argument registers, or reserve space for them. */
21242 if (cfun
->machine
->uses_anonymous_args
)
21243 insn
= emit_multi_reg_push
21244 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21245 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21248 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21249 GEN_INT (- args_to_push
)));
21250 RTX_FRAME_RELATED_P (insn
) = 1;
21253 /* If this is an interrupt service routine, and the link register
21254 is going to be pushed, and we're not generating extra
21255 push of IP (needed when frame is needed and frame layout if apcs),
21256 subtracting four from LR now will mean that the function return
21257 can be done with a single instruction. */
21258 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21259 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21260 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21263 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21265 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21268 if (live_regs_mask
)
21270 unsigned long dwarf_regs_mask
= live_regs_mask
;
21272 saved_regs
+= bit_count (live_regs_mask
) * 4;
21273 if (optimize_size
&& !frame_pointer_needed
21274 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21276 /* If no coprocessor registers are being pushed and we don't have
21277 to worry about a frame pointer then push extra registers to
21278 create the stack frame. This is done is a way that does not
21279 alter the frame layout, so is independent of the epilogue. */
21283 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21285 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21286 if (frame
&& n
* 4 >= frame
)
21289 live_regs_mask
|= (1 << n
) - 1;
21290 saved_regs
+= frame
;
21295 && current_tune
->prefer_ldrd_strd
21296 && !optimize_function_for_size_p (cfun
))
21298 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21300 thumb2_emit_strd_push (live_regs_mask
);
21301 else if (TARGET_ARM
21302 && !TARGET_APCS_FRAME
21303 && !IS_INTERRUPT (func_type
))
21304 arm_emit_strd_push (live_regs_mask
);
21307 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21308 RTX_FRAME_RELATED_P (insn
) = 1;
21313 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21314 RTX_FRAME_RELATED_P (insn
) = 1;
21318 if (! IS_VOLATILE (func_type
))
21319 saved_regs
+= arm_save_coproc_regs ();
21321 if (frame_pointer_needed
&& TARGET_ARM
)
21323 /* Create the new frame pointer. */
21324 if (TARGET_APCS_FRAME
)
21326 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21327 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21328 RTX_FRAME_RELATED_P (insn
) = 1;
21330 if (IS_NESTED (func_type
))
21332 /* Recover the static chain register. */
21333 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21334 insn
= gen_rtx_REG (SImode
, 3);
21337 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21338 insn
= gen_frame_mem (SImode
, insn
);
21340 emit_set_insn (ip_rtx
, insn
);
21341 /* Add a USE to stop propagate_one_insn() from barfing. */
21342 emit_insn (gen_force_register_use (ip_rtx
));
21347 insn
= GEN_INT (saved_regs
- 4);
21348 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21349 stack_pointer_rtx
, insn
));
21350 RTX_FRAME_RELATED_P (insn
) = 1;
21354 if (flag_stack_usage_info
)
21355 current_function_static_stack_size
21356 = offsets
->outgoing_args
- offsets
->saved_args
;
21358 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21360 /* This add can produce multiple insns for a large constant, so we
21361 need to get tricky. */
21362 rtx_insn
*last
= get_last_insn ();
21364 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21365 - offsets
->outgoing_args
);
21367 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21371 last
= last
? NEXT_INSN (last
) : get_insns ();
21372 RTX_FRAME_RELATED_P (last
) = 1;
21374 while (last
!= insn
);
21376 /* If the frame pointer is needed, emit a special barrier that
21377 will prevent the scheduler from moving stores to the frame
21378 before the stack adjustment. */
21379 if (frame_pointer_needed
)
21380 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21381 hard_frame_pointer_rtx
));
21385 if (frame_pointer_needed
&& TARGET_THUMB2
)
21386 thumb_set_frame_pointer (offsets
);
21388 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21390 unsigned long mask
;
21392 mask
= live_regs_mask
;
21393 mask
&= THUMB2_WORK_REGS
;
21394 if (!IS_NESTED (func_type
))
21395 mask
|= (1 << IP_REGNUM
);
21396 arm_load_pic_register (mask
);
21399 /* If we are profiling, make sure no instructions are scheduled before
21400 the call to mcount. Similarly if the user has requested no
21401 scheduling in the prolog. Similarly if we want non-call exceptions
21402 using the EABI unwinder, to prevent faulting instructions from being
21403 swapped with a stack adjustment. */
21404 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21405 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21406 && cfun
->can_throw_non_call_exceptions
))
21407 emit_insn (gen_blockage ());
21409 /* If the link register is being kept alive, with the return address in it,
21410 then make sure that it does not get reused by the ce2 pass. */
21411 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21412 cfun
->machine
->lr_save_eliminated
= 1;
21415 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21417 arm_print_condition (FILE *stream
)
21419 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21421 /* Branch conversion is not implemented for Thumb-2. */
21424 output_operand_lossage ("predicated Thumb instruction");
21427 if (current_insn_predicate
!= NULL
)
21429 output_operand_lossage
21430 ("predicated instruction in conditional sequence");
21434 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21436 else if (current_insn_predicate
)
21438 enum arm_cond_code code
;
21442 output_operand_lossage ("predicated Thumb instruction");
21446 code
= get_arm_condition_code (current_insn_predicate
);
21447 fputs (arm_condition_codes
[code
], stream
);
21452 /* Globally reserved letters: acln
21453 Puncutation letters currently used: @_|?().!#
21454 Lower case letters currently used: bcdefhimpqtvwxyz
21455 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21456 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21458 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21460 If CODE is 'd', then the X is a condition operand and the instruction
21461 should only be executed if the condition is true.
21462 if CODE is 'D', then the X is a condition operand and the instruction
21463 should only be executed if the condition is false: however, if the mode
21464 of the comparison is CCFPEmode, then always execute the instruction -- we
21465 do this because in these circumstances !GE does not necessarily imply LT;
21466 in these cases the instruction pattern will take care to make sure that
21467 an instruction containing %d will follow, thereby undoing the effects of
21468 doing this instruction unconditionally.
21469 If CODE is 'N' then X is a floating point operand that must be negated
21471 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21472 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21474 arm_print_operand (FILE *stream
, rtx x
, int code
)
21479 fputs (ASM_COMMENT_START
, stream
);
21483 fputs (user_label_prefix
, stream
);
21487 fputs (REGISTER_PREFIX
, stream
);
21491 arm_print_condition (stream
);
21495 /* Nothing in unified syntax, otherwise the current condition code. */
21496 if (!TARGET_UNIFIED_ASM
)
21497 arm_print_condition (stream
);
21501 /* The current condition code in unified syntax, otherwise nothing. */
21502 if (TARGET_UNIFIED_ASM
)
21503 arm_print_condition (stream
);
21507 /* The current condition code for a condition code setting instruction.
21508 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21509 if (TARGET_UNIFIED_ASM
)
21511 fputc('s', stream
);
21512 arm_print_condition (stream
);
21516 arm_print_condition (stream
);
21517 fputc('s', stream
);
21522 /* If the instruction is conditionally executed then print
21523 the current condition code, otherwise print 's'. */
21524 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21525 if (current_insn_predicate
)
21526 arm_print_condition (stream
);
21528 fputc('s', stream
);
21531 /* %# is a "break" sequence. It doesn't output anything, but is used to
21532 separate e.g. operand numbers from following text, if that text consists
21533 of further digits which we don't want to be part of the operand
21541 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21542 r
= real_value_negate (&r
);
21543 fprintf (stream
, "%s", fp_const_from_val (&r
));
21547 /* An integer or symbol address without a preceding # sign. */
21549 switch (GET_CODE (x
))
21552 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21556 output_addr_const (stream
, x
);
21560 if (GET_CODE (XEXP (x
, 0)) == PLUS
21561 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21563 output_addr_const (stream
, x
);
21566 /* Fall through. */
21569 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21573 /* An integer that we want to print in HEX. */
21575 switch (GET_CODE (x
))
21578 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21582 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21587 if (CONST_INT_P (x
))
21590 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21591 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21595 putc ('~', stream
);
21596 output_addr_const (stream
, x
);
21601 /* Print the log2 of a CONST_INT. */
21605 if (!CONST_INT_P (x
)
21606 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21607 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21609 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21614 /* The low 16 bits of an immediate constant. */
21615 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21619 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21623 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21631 shift
= shift_op (x
, &val
);
21635 fprintf (stream
, ", %s ", shift
);
21637 arm_print_operand (stream
, XEXP (x
, 1), 0);
21639 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21644 /* An explanation of the 'Q', 'R' and 'H' register operands:
21646 In a pair of registers containing a DI or DF value the 'Q'
21647 operand returns the register number of the register containing
21648 the least significant part of the value. The 'R' operand returns
21649 the register number of the register containing the most
21650 significant part of the value.
21652 The 'H' operand returns the higher of the two register numbers.
21653 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21654 same as the 'Q' operand, since the most significant part of the
21655 value is held in the lower number register. The reverse is true
21656 on systems where WORDS_BIG_ENDIAN is false.
21658 The purpose of these operands is to distinguish between cases
21659 where the endian-ness of the values is important (for example
21660 when they are added together), and cases where the endian-ness
21661 is irrelevant, but the order of register operations is important.
21662 For example when loading a value from memory into a register
21663 pair, the endian-ness does not matter. Provided that the value
21664 from the lower memory address is put into the lower numbered
21665 register, and the value from the higher address is put into the
21666 higher numbered register, the load will work regardless of whether
21667 the value being loaded is big-wordian or little-wordian. The
21668 order of the two register loads can matter however, if the address
21669 of the memory location is actually held in one of the registers
21670 being overwritten by the load.
21672 The 'Q' and 'R' constraints are also available for 64-bit
21675 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21677 rtx part
= gen_lowpart (SImode
, x
);
21678 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21682 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21684 output_operand_lossage ("invalid operand for code '%c'", code
);
21688 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21692 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21694 machine_mode mode
= GET_MODE (x
);
21697 if (mode
== VOIDmode
)
21699 part
= gen_highpart_mode (SImode
, mode
, x
);
21700 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21704 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21706 output_operand_lossage ("invalid operand for code '%c'", code
);
21710 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21714 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21716 output_operand_lossage ("invalid operand for code '%c'", code
);
21720 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21724 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21726 output_operand_lossage ("invalid operand for code '%c'", code
);
21730 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21734 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21736 output_operand_lossage ("invalid operand for code '%c'", code
);
21740 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21744 asm_fprintf (stream
, "%r",
21745 REG_P (XEXP (x
, 0))
21746 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21750 asm_fprintf (stream
, "{%r-%r}",
21752 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21755 /* Like 'M', but writing doubleword vector registers, for use by Neon
21759 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21760 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21762 asm_fprintf (stream
, "{d%d}", regno
);
21764 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21769 /* CONST_TRUE_RTX means always -- that's the default. */
21770 if (x
== const_true_rtx
)
21773 if (!COMPARISON_P (x
))
21775 output_operand_lossage ("invalid operand for code '%c'", code
);
21779 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21784 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21785 want to do that. */
21786 if (x
== const_true_rtx
)
21788 output_operand_lossage ("instruction never executed");
21791 if (!COMPARISON_P (x
))
21793 output_operand_lossage ("invalid operand for code '%c'", code
);
21797 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21798 (get_arm_condition_code (x
))],
21808 /* Former Maverick support, removed after GCC-4.7. */
21809 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21814 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21815 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21816 /* Bad value for wCG register number. */
21818 output_operand_lossage ("invalid operand for code '%c'", code
);
21823 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21826 /* Print an iWMMXt control register name. */
21828 if (!CONST_INT_P (x
)
21830 || INTVAL (x
) >= 16)
21831 /* Bad value for wC register number. */
21833 output_operand_lossage ("invalid operand for code '%c'", code
);
21839 static const char * wc_reg_names
[16] =
21841 "wCID", "wCon", "wCSSF", "wCASF",
21842 "wC4", "wC5", "wC6", "wC7",
21843 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21844 "wC12", "wC13", "wC14", "wC15"
21847 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21851 /* Print the high single-precision register of a VFP double-precision
21855 machine_mode mode
= GET_MODE (x
);
21858 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21860 output_operand_lossage ("invalid operand for code '%c'", code
);
21865 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21867 output_operand_lossage ("invalid operand for code '%c'", code
);
21871 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21875 /* Print a VFP/Neon double precision or quad precision register name. */
21879 machine_mode mode
= GET_MODE (x
);
21880 int is_quad
= (code
== 'q');
21883 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21885 output_operand_lossage ("invalid operand for code '%c'", code
);
21890 || !IS_VFP_REGNUM (REGNO (x
)))
21892 output_operand_lossage ("invalid operand for code '%c'", code
);
21897 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21898 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21900 output_operand_lossage ("invalid operand for code '%c'", code
);
21904 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21905 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21909 /* These two codes print the low/high doubleword register of a Neon quad
21910 register, respectively. For pair-structure types, can also print
21911 low/high quadword registers. */
21915 machine_mode mode
= GET_MODE (x
);
21918 if ((GET_MODE_SIZE (mode
) != 16
21919 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21921 output_operand_lossage ("invalid operand for code '%c'", code
);
21926 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21928 output_operand_lossage ("invalid operand for code '%c'", code
);
21932 if (GET_MODE_SIZE (mode
) == 16)
21933 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21934 + (code
== 'f' ? 1 : 0));
21936 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21937 + (code
== 'f' ? 1 : 0));
21941 /* Print a VFPv3 floating-point constant, represented as an integer
21945 int index
= vfp3_const_double_index (x
);
21946 gcc_assert (index
!= -1);
21947 fprintf (stream
, "%d", index
);
21951 /* Print bits representing opcode features for Neon.
21953 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21954 and polynomials as unsigned.
21956 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21958 Bit 2 is 1 for rounding functions, 0 otherwise. */
21960 /* Identify the type as 's', 'u', 'p' or 'f'. */
21963 HOST_WIDE_INT bits
= INTVAL (x
);
21964 fputc ("uspf"[bits
& 3], stream
);
21968 /* Likewise, but signed and unsigned integers are both 'i'. */
21971 HOST_WIDE_INT bits
= INTVAL (x
);
21972 fputc ("iipf"[bits
& 3], stream
);
21976 /* As for 'T', but emit 'u' instead of 'p'. */
21979 HOST_WIDE_INT bits
= INTVAL (x
);
21980 fputc ("usuf"[bits
& 3], stream
);
21984 /* Bit 2: rounding (vs none). */
21987 HOST_WIDE_INT bits
= INTVAL (x
);
21988 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21992 /* Memory operand for vld1/vst1 instruction. */
21996 bool postinc
= FALSE
;
21997 rtx postinc_reg
= NULL
;
21998 unsigned align
, memsize
, align_bits
;
22000 gcc_assert (MEM_P (x
));
22001 addr
= XEXP (x
, 0);
22002 if (GET_CODE (addr
) == POST_INC
)
22005 addr
= XEXP (addr
, 0);
22007 if (GET_CODE (addr
) == POST_MODIFY
)
22009 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22010 addr
= XEXP (addr
, 0);
22012 asm_fprintf (stream
, "[%r", REGNO (addr
));
22014 /* We know the alignment of this access, so we can emit a hint in the
22015 instruction (for some alignments) as an aid to the memory subsystem
22017 align
= MEM_ALIGN (x
) >> 3;
22018 memsize
= MEM_SIZE (x
);
22020 /* Only certain alignment specifiers are supported by the hardware. */
22021 if (memsize
== 32 && (align
% 32) == 0)
22023 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22025 else if (memsize
>= 8 && (align
% 8) == 0)
22030 if (align_bits
!= 0)
22031 asm_fprintf (stream
, ":%d", align_bits
);
22033 asm_fprintf (stream
, "]");
22036 fputs("!", stream
);
22038 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22046 gcc_assert (MEM_P (x
));
22047 addr
= XEXP (x
, 0);
22048 gcc_assert (REG_P (addr
));
22049 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22053 /* Translate an S register number into a D register number and element index. */
22056 machine_mode mode
= GET_MODE (x
);
22059 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22061 output_operand_lossage ("invalid operand for code '%c'", code
);
22066 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22068 output_operand_lossage ("invalid operand for code '%c'", code
);
22072 regno
= regno
- FIRST_VFP_REGNUM
;
22073 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22078 gcc_assert (CONST_DOUBLE_P (x
));
22080 result
= vfp3_const_double_for_fract_bits (x
);
22082 result
= vfp3_const_double_for_bits (x
);
22083 fprintf (stream
, "#%d", result
);
22086 /* Register specifier for vld1.16/vst1.16. Translate the S register
22087 number into a D register number and element index. */
22090 machine_mode mode
= GET_MODE (x
);
22093 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22095 output_operand_lossage ("invalid operand for code '%c'", code
);
22100 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22102 output_operand_lossage ("invalid operand for code '%c'", code
);
22106 regno
= regno
- FIRST_VFP_REGNUM
;
22107 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22114 output_operand_lossage ("missing operand");
22118 switch (GET_CODE (x
))
22121 asm_fprintf (stream
, "%r", REGNO (x
));
22125 output_memory_reference_mode
= GET_MODE (x
);
22126 output_address (XEXP (x
, 0));
22132 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22133 sizeof (fpstr
), 0, 1);
22134 fprintf (stream
, "#%s", fpstr
);
22139 gcc_assert (GET_CODE (x
) != NEG
);
22140 fputc ('#', stream
);
22141 if (GET_CODE (x
) == HIGH
)
22143 fputs (":lower16:", stream
);
22147 output_addr_const (stream
, x
);
22153 /* Target hook for printing a memory address. */
22155 arm_print_operand_address (FILE *stream
, rtx x
)
22159 int is_minus
= GET_CODE (x
) == MINUS
;
22162 asm_fprintf (stream
, "[%r]", REGNO (x
));
22163 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22165 rtx base
= XEXP (x
, 0);
22166 rtx index
= XEXP (x
, 1);
22167 HOST_WIDE_INT offset
= 0;
22169 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22171 /* Ensure that BASE is a register. */
22172 /* (one of them must be). */
22173 /* Also ensure the SP is not used as in index register. */
22174 std::swap (base
, index
);
22176 switch (GET_CODE (index
))
22179 offset
= INTVAL (index
);
22182 asm_fprintf (stream
, "[%r, #%wd]",
22183 REGNO (base
), offset
);
22187 asm_fprintf (stream
, "[%r, %s%r]",
22188 REGNO (base
), is_minus
? "-" : "",
22198 asm_fprintf (stream
, "[%r, %s%r",
22199 REGNO (base
), is_minus
? "-" : "",
22200 REGNO (XEXP (index
, 0)));
22201 arm_print_operand (stream
, index
, 'S');
22202 fputs ("]", stream
);
22207 gcc_unreachable ();
22210 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22211 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22213 extern machine_mode output_memory_reference_mode
;
22215 gcc_assert (REG_P (XEXP (x
, 0)));
22217 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22218 asm_fprintf (stream
, "[%r, #%s%d]!",
22219 REGNO (XEXP (x
, 0)),
22220 GET_CODE (x
) == PRE_DEC
? "-" : "",
22221 GET_MODE_SIZE (output_memory_reference_mode
));
22223 asm_fprintf (stream
, "[%r], #%s%d",
22224 REGNO (XEXP (x
, 0)),
22225 GET_CODE (x
) == POST_DEC
? "-" : "",
22226 GET_MODE_SIZE (output_memory_reference_mode
));
22228 else if (GET_CODE (x
) == PRE_MODIFY
)
22230 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22231 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22232 asm_fprintf (stream
, "#%wd]!",
22233 INTVAL (XEXP (XEXP (x
, 1), 1)));
22235 asm_fprintf (stream
, "%r]!",
22236 REGNO (XEXP (XEXP (x
, 1), 1)));
22238 else if (GET_CODE (x
) == POST_MODIFY
)
22240 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22241 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22242 asm_fprintf (stream
, "#%wd",
22243 INTVAL (XEXP (XEXP (x
, 1), 1)));
22245 asm_fprintf (stream
, "%r",
22246 REGNO (XEXP (XEXP (x
, 1), 1)));
22248 else output_addr_const (stream
, x
);
22253 asm_fprintf (stream
, "[%r]", REGNO (x
));
22254 else if (GET_CODE (x
) == POST_INC
)
22255 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22256 else if (GET_CODE (x
) == PLUS
)
22258 gcc_assert (REG_P (XEXP (x
, 0)));
22259 if (CONST_INT_P (XEXP (x
, 1)))
22260 asm_fprintf (stream
, "[%r, #%wd]",
22261 REGNO (XEXP (x
, 0)),
22262 INTVAL (XEXP (x
, 1)));
22264 asm_fprintf (stream
, "[%r, %r]",
22265 REGNO (XEXP (x
, 0)),
22266 REGNO (XEXP (x
, 1)));
22269 output_addr_const (stream
, x
);
22273 /* Target hook for indicating whether a punctuation character for
22274 TARGET_PRINT_OPERAND is valid. */
22276 arm_print_operand_punct_valid_p (unsigned char code
)
22278 return (code
== '@' || code
== '|' || code
== '.'
22279 || code
== '(' || code
== ')' || code
== '#'
22280 || (TARGET_32BIT
&& (code
== '?'))
22281 || (TARGET_THUMB2
&& (code
== '!'))
22282 || (TARGET_THUMB
&& (code
== '_')));
22285 /* Target hook for assembling integer objects. The ARM version needs to
22286 handle word-sized values specially. */
22288 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22292 if (size
== UNITS_PER_WORD
&& aligned_p
)
22294 fputs ("\t.word\t", asm_out_file
);
22295 output_addr_const (asm_out_file
, x
);
22297 /* Mark symbols as position independent. We only do this in the
22298 .text segment, not in the .data segment. */
22299 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22300 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22302 /* See legitimize_pic_address for an explanation of the
22303 TARGET_VXWORKS_RTP check. */
22304 if (!arm_pic_data_is_text_relative
22305 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22306 fputs ("(GOT)", asm_out_file
);
22308 fputs ("(GOTOFF)", asm_out_file
);
22310 fputc ('\n', asm_out_file
);
22314 mode
= GET_MODE (x
);
22316 if (arm_vector_mode_supported_p (mode
))
22320 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22322 units
= CONST_VECTOR_NUNITS (x
);
22323 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22325 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22326 for (i
= 0; i
< units
; i
++)
22328 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22330 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22333 for (i
= 0; i
< units
; i
++)
22335 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22336 REAL_VALUE_TYPE rval
;
22338 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22341 (rval
, GET_MODE_INNER (mode
),
22342 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22348 return default_assemble_integer (x
, size
, aligned_p
);
22352 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22356 if (!TARGET_AAPCS_BASED
)
22359 default_named_section_asm_out_constructor
22360 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22364 /* Put these in the .init_array section, using a special relocation. */
22365 if (priority
!= DEFAULT_INIT_PRIORITY
)
22368 sprintf (buf
, "%s.%.5u",
22369 is_ctor
? ".init_array" : ".fini_array",
22371 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22378 switch_to_section (s
);
22379 assemble_align (POINTER_SIZE
);
22380 fputs ("\t.word\t", asm_out_file
);
22381 output_addr_const (asm_out_file
, symbol
);
22382 fputs ("(target1)\n", asm_out_file
);
22385 /* Add a function to the list of static constructors. */
22388 arm_elf_asm_constructor (rtx symbol
, int priority
)
22390 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22393 /* Add a function to the list of static destructors. */
22396 arm_elf_asm_destructor (rtx symbol
, int priority
)
22398 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22401 /* A finite state machine takes care of noticing whether or not instructions
22402 can be conditionally executed, and thus decrease execution time and code
22403 size by deleting branch instructions. The fsm is controlled by
22404 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22406 /* The state of the fsm controlling condition codes are:
22407 0: normal, do nothing special
22408 1: make ASM_OUTPUT_OPCODE not output this instruction
22409 2: make ASM_OUTPUT_OPCODE not output this instruction
22410 3: make instructions conditional
22411 4: make instructions conditional
22413 State transitions (state->state by whom under condition):
22414 0 -> 1 final_prescan_insn if the `target' is a label
22415 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22416 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22417 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22418 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22419 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22420 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22421 (the target insn is arm_target_insn).
22423 If the jump clobbers the conditions then we use states 2 and 4.
22425 A similar thing can be done with conditional return insns.
22427 XXX In case the `target' is an unconditional branch, this conditionalising
22428 of the instructions always reduces code size, but not always execution
22429 time. But then, I want to reduce the code size to somewhere near what
22430 /bin/cc produces. */
22432 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22433 instructions. When a COND_EXEC instruction is seen the subsequent
22434 instructions are scanned so that multiple conditional instructions can be
22435 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22436 specify the length and true/false mask for the IT block. These will be
22437 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22439 /* Returns the index of the ARM condition code string in
22440 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22441 COMPARISON should be an rtx like `(eq (...) (...))'. */
22444 maybe_get_arm_condition_code (rtx comparison
)
22446 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22447 enum arm_cond_code code
;
22448 enum rtx_code comp_code
= GET_CODE (comparison
);
22450 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22451 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22452 XEXP (comparison
, 1));
22456 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22457 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22458 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22459 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22460 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22461 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22462 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22463 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22464 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22465 case CC_DLTUmode
: code
= ARM_CC
;
22468 if (comp_code
== EQ
)
22469 return ARM_INVERSE_CONDITION_CODE (code
);
22470 if (comp_code
== NE
)
22477 case NE
: return ARM_NE
;
22478 case EQ
: return ARM_EQ
;
22479 case GE
: return ARM_PL
;
22480 case LT
: return ARM_MI
;
22481 default: return ARM_NV
;
22487 case NE
: return ARM_NE
;
22488 case EQ
: return ARM_EQ
;
22489 default: return ARM_NV
;
22495 case NE
: return ARM_MI
;
22496 case EQ
: return ARM_PL
;
22497 default: return ARM_NV
;
22502 /* We can handle all cases except UNEQ and LTGT. */
22505 case GE
: return ARM_GE
;
22506 case GT
: return ARM_GT
;
22507 case LE
: return ARM_LS
;
22508 case LT
: return ARM_MI
;
22509 case NE
: return ARM_NE
;
22510 case EQ
: return ARM_EQ
;
22511 case ORDERED
: return ARM_VC
;
22512 case UNORDERED
: return ARM_VS
;
22513 case UNLT
: return ARM_LT
;
22514 case UNLE
: return ARM_LE
;
22515 case UNGT
: return ARM_HI
;
22516 case UNGE
: return ARM_PL
;
22517 /* UNEQ and LTGT do not have a representation. */
22518 case UNEQ
: /* Fall through. */
22519 case LTGT
: /* Fall through. */
22520 default: return ARM_NV
;
22526 case NE
: return ARM_NE
;
22527 case EQ
: return ARM_EQ
;
22528 case GE
: return ARM_LE
;
22529 case GT
: return ARM_LT
;
22530 case LE
: return ARM_GE
;
22531 case LT
: return ARM_GT
;
22532 case GEU
: return ARM_LS
;
22533 case GTU
: return ARM_CC
;
22534 case LEU
: return ARM_CS
;
22535 case LTU
: return ARM_HI
;
22536 default: return ARM_NV
;
22542 case LTU
: return ARM_CS
;
22543 case GEU
: return ARM_CC
;
22544 default: return ARM_NV
;
22550 case NE
: return ARM_NE
;
22551 case EQ
: return ARM_EQ
;
22552 case GEU
: return ARM_CS
;
22553 case GTU
: return ARM_HI
;
22554 case LEU
: return ARM_LS
;
22555 case LTU
: return ARM_CC
;
22556 default: return ARM_NV
;
22562 case GE
: return ARM_GE
;
22563 case LT
: return ARM_LT
;
22564 case GEU
: return ARM_CS
;
22565 case LTU
: return ARM_CC
;
22566 default: return ARM_NV
;
22572 case NE
: return ARM_NE
;
22573 case EQ
: return ARM_EQ
;
22574 case GE
: return ARM_GE
;
22575 case GT
: return ARM_GT
;
22576 case LE
: return ARM_LE
;
22577 case LT
: return ARM_LT
;
22578 case GEU
: return ARM_CS
;
22579 case GTU
: return ARM_HI
;
22580 case LEU
: return ARM_LS
;
22581 case LTU
: return ARM_CC
;
22582 default: return ARM_NV
;
22585 default: gcc_unreachable ();
22589 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22590 static enum arm_cond_code
22591 get_arm_condition_code (rtx comparison
)
22593 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22594 gcc_assert (code
!= ARM_NV
);
22598 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22601 thumb2_final_prescan_insn (rtx_insn
*insn
)
22603 rtx_insn
*first_insn
= insn
;
22604 rtx body
= PATTERN (insn
);
22606 enum arm_cond_code code
;
22611 /* max_insns_skipped in the tune was already taken into account in the
22612 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22613 just emit the IT blocks as we can. It does not make sense to split
22615 max
= MAX_INSN_PER_IT_BLOCK
;
22617 /* Remove the previous insn from the count of insns to be output. */
22618 if (arm_condexec_count
)
22619 arm_condexec_count
--;
22621 /* Nothing to do if we are already inside a conditional block. */
22622 if (arm_condexec_count
)
22625 if (GET_CODE (body
) != COND_EXEC
)
22628 /* Conditional jumps are implemented directly. */
22632 predicate
= COND_EXEC_TEST (body
);
22633 arm_current_cc
= get_arm_condition_code (predicate
);
22635 n
= get_attr_ce_count (insn
);
22636 arm_condexec_count
= 1;
22637 arm_condexec_mask
= (1 << n
) - 1;
22638 arm_condexec_masklen
= n
;
22639 /* See if subsequent instructions can be combined into the same block. */
22642 insn
= next_nonnote_insn (insn
);
22644 /* Jumping into the middle of an IT block is illegal, so a label or
22645 barrier terminates the block. */
22646 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22649 body
= PATTERN (insn
);
22650 /* USE and CLOBBER aren't really insns, so just skip them. */
22651 if (GET_CODE (body
) == USE
22652 || GET_CODE (body
) == CLOBBER
)
22655 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22656 if (GET_CODE (body
) != COND_EXEC
)
22658 /* Maximum number of conditionally executed instructions in a block. */
22659 n
= get_attr_ce_count (insn
);
22660 if (arm_condexec_masklen
+ n
> max
)
22663 predicate
= COND_EXEC_TEST (body
);
22664 code
= get_arm_condition_code (predicate
);
22665 mask
= (1 << n
) - 1;
22666 if (arm_current_cc
== code
)
22667 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22668 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22671 arm_condexec_count
++;
22672 arm_condexec_masklen
+= n
;
22674 /* A jump must be the last instruction in a conditional block. */
22678 /* Restore recog_data (getting the attributes of other insns can
22679 destroy this array, but final.c assumes that it remains intact
22680 across this call). */
22681 extract_constrain_insn_cached (first_insn
);
22685 arm_final_prescan_insn (rtx_insn
*insn
)
22687 /* BODY will hold the body of INSN. */
22688 rtx body
= PATTERN (insn
);
22690 /* This will be 1 if trying to repeat the trick, and things need to be
22691 reversed if it appears to fail. */
22694 /* If we start with a return insn, we only succeed if we find another one. */
22695 int seeking_return
= 0;
22696 enum rtx_code return_code
= UNKNOWN
;
22698 /* START_INSN will hold the insn from where we start looking. This is the
22699 first insn after the following code_label if REVERSE is true. */
22700 rtx_insn
*start_insn
= insn
;
22702 /* If in state 4, check if the target branch is reached, in order to
22703 change back to state 0. */
22704 if (arm_ccfsm_state
== 4)
22706 if (insn
== arm_target_insn
)
22708 arm_target_insn
= NULL
;
22709 arm_ccfsm_state
= 0;
22714 /* If in state 3, it is possible to repeat the trick, if this insn is an
22715 unconditional branch to a label, and immediately following this branch
22716 is the previous target label which is only used once, and the label this
22717 branch jumps to is not too far off. */
22718 if (arm_ccfsm_state
== 3)
22720 if (simplejump_p (insn
))
22722 start_insn
= next_nonnote_insn (start_insn
);
22723 if (BARRIER_P (start_insn
))
22725 /* XXX Isn't this always a barrier? */
22726 start_insn
= next_nonnote_insn (start_insn
);
22728 if (LABEL_P (start_insn
)
22729 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22730 && LABEL_NUSES (start_insn
) == 1)
22735 else if (ANY_RETURN_P (body
))
22737 start_insn
= next_nonnote_insn (start_insn
);
22738 if (BARRIER_P (start_insn
))
22739 start_insn
= next_nonnote_insn (start_insn
);
22740 if (LABEL_P (start_insn
)
22741 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22742 && LABEL_NUSES (start_insn
) == 1)
22745 seeking_return
= 1;
22746 return_code
= GET_CODE (body
);
22755 gcc_assert (!arm_ccfsm_state
|| reverse
);
22756 if (!JUMP_P (insn
))
22759 /* This jump might be paralleled with a clobber of the condition codes
22760 the jump should always come first */
22761 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22762 body
= XVECEXP (body
, 0, 0);
22765 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22766 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22769 int fail
= FALSE
, succeed
= FALSE
;
22770 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22771 int then_not_else
= TRUE
;
22772 rtx_insn
*this_insn
= start_insn
;
22775 /* Register the insn jumped to. */
22778 if (!seeking_return
)
22779 label
= XEXP (SET_SRC (body
), 0);
22781 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22782 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22783 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22785 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22786 then_not_else
= FALSE
;
22788 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22790 seeking_return
= 1;
22791 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22793 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22795 seeking_return
= 1;
22796 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22797 then_not_else
= FALSE
;
22800 gcc_unreachable ();
22802 /* See how many insns this branch skips, and what kind of insns. If all
22803 insns are okay, and the label or unconditional branch to the same
22804 label is not too far away, succeed. */
22805 for (insns_skipped
= 0;
22806 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22810 this_insn
= next_nonnote_insn (this_insn
);
22814 switch (GET_CODE (this_insn
))
22817 /* Succeed if it is the target label, otherwise fail since
22818 control falls in from somewhere else. */
22819 if (this_insn
== label
)
22821 arm_ccfsm_state
= 1;
22829 /* Succeed if the following insn is the target label.
22831 If return insns are used then the last insn in a function
22832 will be a barrier. */
22833 this_insn
= next_nonnote_insn (this_insn
);
22834 if (this_insn
&& this_insn
== label
)
22836 arm_ccfsm_state
= 1;
22844 /* The AAPCS says that conditional calls should not be
22845 used since they make interworking inefficient (the
22846 linker can't transform BL<cond> into BLX). That's
22847 only a problem if the machine has BLX. */
22854 /* Succeed if the following insn is the target label, or
22855 if the following two insns are a barrier and the
22857 this_insn
= next_nonnote_insn (this_insn
);
22858 if (this_insn
&& BARRIER_P (this_insn
))
22859 this_insn
= next_nonnote_insn (this_insn
);
22861 if (this_insn
&& this_insn
== label
22862 && insns_skipped
< max_insns_skipped
)
22864 arm_ccfsm_state
= 1;
22872 /* If this is an unconditional branch to the same label, succeed.
22873 If it is to another label, do nothing. If it is conditional,
22875 /* XXX Probably, the tests for SET and the PC are
22878 scanbody
= PATTERN (this_insn
);
22879 if (GET_CODE (scanbody
) == SET
22880 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22882 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22883 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22885 arm_ccfsm_state
= 2;
22888 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22891 /* Fail if a conditional return is undesirable (e.g. on a
22892 StrongARM), but still allow this if optimizing for size. */
22893 else if (GET_CODE (scanbody
) == return_code
22894 && !use_return_insn (TRUE
, NULL
)
22897 else if (GET_CODE (scanbody
) == return_code
)
22899 arm_ccfsm_state
= 2;
22902 else if (GET_CODE (scanbody
) == PARALLEL
)
22904 switch (get_attr_conds (this_insn
))
22914 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22919 /* Instructions using or affecting the condition codes make it
22921 scanbody
= PATTERN (this_insn
);
22922 if (!(GET_CODE (scanbody
) == SET
22923 || GET_CODE (scanbody
) == PARALLEL
)
22924 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22934 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22935 arm_target_label
= CODE_LABEL_NUMBER (label
);
22938 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22940 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22942 this_insn
= next_nonnote_insn (this_insn
);
22943 gcc_assert (!this_insn
22944 || (!BARRIER_P (this_insn
)
22945 && !LABEL_P (this_insn
)));
22949 /* Oh, dear! we ran off the end.. give up. */
22950 extract_constrain_insn_cached (insn
);
22951 arm_ccfsm_state
= 0;
22952 arm_target_insn
= NULL
;
22955 arm_target_insn
= this_insn
;
22958 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22961 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22963 if (reverse
|| then_not_else
)
22964 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22967 /* Restore recog_data (getting the attributes of other insns can
22968 destroy this array, but final.c assumes that it remains intact
22969 across this call. */
22970 extract_constrain_insn_cached (insn
);
22974 /* Output IT instructions. */
22976 thumb2_asm_output_opcode (FILE * stream
)
22981 if (arm_condexec_mask
)
22983 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22984 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22986 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22987 arm_condition_codes
[arm_current_cc
]);
22988 arm_condexec_mask
= 0;
22992 /* Returns true if REGNO is a valid register
22993 for holding a quantity of type MODE. */
22995 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22997 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22998 return (regno
== CC_REGNUM
22999 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23000 && regno
== VFPCC_REGNUM
));
23002 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23006 /* For the Thumb we only allow values bigger than SImode in
23007 registers 0 - 6, so that there is always a second low
23008 register available to hold the upper part of the value.
23009 We probably we ought to ensure that the register is the
23010 start of an even numbered register pair. */
23011 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23013 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23014 && IS_VFP_REGNUM (regno
))
23016 if (mode
== SFmode
|| mode
== SImode
)
23017 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23019 if (mode
== DFmode
)
23020 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23022 /* VFP registers can hold HFmode values, but there is no point in
23023 putting them there unless we have hardware conversion insns. */
23024 if (mode
== HFmode
)
23025 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23028 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23029 || (VALID_NEON_QREG_MODE (mode
)
23030 && NEON_REGNO_OK_FOR_QUAD (regno
))
23031 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23032 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23033 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23034 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23035 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23040 if (TARGET_REALLY_IWMMXT
)
23042 if (IS_IWMMXT_GR_REGNUM (regno
))
23043 return mode
== SImode
;
23045 if (IS_IWMMXT_REGNUM (regno
))
23046 return VALID_IWMMXT_REG_MODE (mode
);
23049 /* We allow almost any value to be stored in the general registers.
23050 Restrict doubleword quantities to even register pairs in ARM state
23051 so that we can use ldrd. Do not allow very large Neon structure
23052 opaque modes in general registers; they would use too many. */
23053 if (regno
<= LAST_ARM_REGNUM
)
23055 if (ARM_NUM_REGS (mode
) > 4)
23061 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23064 if (regno
== FRAME_POINTER_REGNUM
23065 || regno
== ARG_POINTER_REGNUM
)
23066 /* We only allow integers in the fake hard registers. */
23067 return GET_MODE_CLASS (mode
) == MODE_INT
;
23072 /* Implement MODES_TIEABLE_P. */
23075 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23077 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23080 /* We specifically want to allow elements of "structure" modes to
23081 be tieable to the structure. This more general condition allows
23082 other rarer situations too. */
23084 && (VALID_NEON_DREG_MODE (mode1
)
23085 || VALID_NEON_QREG_MODE (mode1
)
23086 || VALID_NEON_STRUCT_MODE (mode1
))
23087 && (VALID_NEON_DREG_MODE (mode2
)
23088 || VALID_NEON_QREG_MODE (mode2
)
23089 || VALID_NEON_STRUCT_MODE (mode2
)))
23095 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23096 not used in arm mode. */
23099 arm_regno_class (int regno
)
23101 if (regno
== PC_REGNUM
)
23106 if (regno
== STACK_POINTER_REGNUM
)
23108 if (regno
== CC_REGNUM
)
23115 if (TARGET_THUMB2
&& regno
< 8)
23118 if ( regno
<= LAST_ARM_REGNUM
23119 || regno
== FRAME_POINTER_REGNUM
23120 || regno
== ARG_POINTER_REGNUM
)
23121 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23123 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23124 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23126 if (IS_VFP_REGNUM (regno
))
23128 if (regno
<= D7_VFP_REGNUM
)
23129 return VFP_D0_D7_REGS
;
23130 else if (regno
<= LAST_LO_VFP_REGNUM
)
23131 return VFP_LO_REGS
;
23133 return VFP_HI_REGS
;
23136 if (IS_IWMMXT_REGNUM (regno
))
23137 return IWMMXT_REGS
;
23139 if (IS_IWMMXT_GR_REGNUM (regno
))
23140 return IWMMXT_GR_REGS
;
23145 /* Handle a special case when computing the offset
23146 of an argument from the frame pointer. */
23148 arm_debugger_arg_offset (int value
, rtx addr
)
23152 /* We are only interested if dbxout_parms() failed to compute the offset. */
23156 /* We can only cope with the case where the address is held in a register. */
23160 /* If we are using the frame pointer to point at the argument, then
23161 an offset of 0 is correct. */
23162 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23165 /* If we are using the stack pointer to point at the
23166 argument, then an offset of 0 is correct. */
23167 /* ??? Check this is consistent with thumb2 frame layout. */
23168 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23169 && REGNO (addr
) == SP_REGNUM
)
23172 /* Oh dear. The argument is pointed to by a register rather
23173 than being held in a register, or being stored at a known
23174 offset from the frame pointer. Since GDB only understands
23175 those two kinds of argument we must translate the address
23176 held in the register into an offset from the frame pointer.
23177 We do this by searching through the insns for the function
23178 looking to see where this register gets its value. If the
23179 register is initialized from the frame pointer plus an offset
23180 then we are in luck and we can continue, otherwise we give up.
23182 This code is exercised by producing debugging information
23183 for a function with arguments like this:
23185 double func (double a, double b, int c, double d) {return d;}
23187 Without this code the stab for parameter 'd' will be set to
23188 an offset of 0 from the frame pointer, rather than 8. */
23190 /* The if() statement says:
23192 If the insn is a normal instruction
23193 and if the insn is setting the value in a register
23194 and if the register being set is the register holding the address of the argument
23195 and if the address is computing by an addition
23196 that involves adding to a register
23197 which is the frame pointer
23202 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23204 if ( NONJUMP_INSN_P (insn
)
23205 && GET_CODE (PATTERN (insn
)) == SET
23206 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23207 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23208 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23209 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23210 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23213 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23222 warning (0, "unable to compute real location of stacked parameter");
23223 value
= 8; /* XXX magic hack */
23229 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23231 static const char *
23232 arm_invalid_parameter_type (const_tree t
)
23234 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23235 return N_("function parameters cannot have __fp16 type");
23239 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23241 static const char *
23242 arm_invalid_return_type (const_tree t
)
23244 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23245 return N_("functions cannot return __fp16 type");
23249 /* Implement TARGET_PROMOTED_TYPE. */
23252 arm_promoted_type (const_tree t
)
23254 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23255 return float_type_node
;
23259 /* Implement TARGET_CONVERT_TO_TYPE.
23260 Specifically, this hook implements the peculiarity of the ARM
23261 half-precision floating-point C semantics that requires conversions between
23262 __fp16 to or from double to do an intermediate conversion to float. */
23265 arm_convert_to_type (tree type
, tree expr
)
23267 tree fromtype
= TREE_TYPE (expr
);
23268 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23270 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23271 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23272 return convert (type
, convert (float_type_node
, expr
));
23276 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23277 This simply adds HFmode as a supported mode; even though we don't
23278 implement arithmetic on this type directly, it's supported by
23279 optabs conversions, much the way the double-word arithmetic is
23280 special-cased in the default hook. */
23283 arm_scalar_mode_supported_p (machine_mode mode
)
23285 if (mode
== HFmode
)
23286 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23287 else if (ALL_FIXED_POINT_MODE_P (mode
))
23290 return default_scalar_mode_supported_p (mode
);
23293 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23295 neon_reinterpret (rtx dest
, rtx src
)
23297 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23300 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23301 not to early-clobber SRC registers in the process.
23303 We assume that the operands described by SRC and DEST represent a
23304 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23305 number of components into which the copy has been decomposed. */
23307 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23311 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23312 || REGNO (operands
[0]) < REGNO (operands
[1]))
23314 for (i
= 0; i
< count
; i
++)
23316 operands
[2 * i
] = dest
[i
];
23317 operands
[2 * i
+ 1] = src
[i
];
23322 for (i
= 0; i
< count
; i
++)
23324 operands
[2 * i
] = dest
[count
- i
- 1];
23325 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23330 /* Split operands into moves from op[1] + op[2] into op[0]. */
23333 neon_split_vcombine (rtx operands
[3])
23335 unsigned int dest
= REGNO (operands
[0]);
23336 unsigned int src1
= REGNO (operands
[1]);
23337 unsigned int src2
= REGNO (operands
[2]);
23338 machine_mode halfmode
= GET_MODE (operands
[1]);
23339 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23340 rtx destlo
, desthi
;
23342 if (src1
== dest
&& src2
== dest
+ halfregs
)
23344 /* No-op move. Can't split to nothing; emit something. */
23345 emit_note (NOTE_INSN_DELETED
);
23349 /* Preserve register attributes for variable tracking. */
23350 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23351 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23352 GET_MODE_SIZE (halfmode
));
23354 /* Special case of reversed high/low parts. Use VSWP. */
23355 if (src2
== dest
&& src1
== dest
+ halfregs
)
23357 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23358 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23359 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23363 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23365 /* Try to avoid unnecessary moves if part of the result
23366 is in the right place already. */
23368 emit_move_insn (destlo
, operands
[1]);
23369 if (src2
!= dest
+ halfregs
)
23370 emit_move_insn (desthi
, operands
[2]);
23374 if (src2
!= dest
+ halfregs
)
23375 emit_move_insn (desthi
, operands
[2]);
23377 emit_move_insn (destlo
, operands
[1]);
23381 /* Return the number (counting from 0) of
23382 the least significant set bit in MASK. */
23385 number_of_first_bit_set (unsigned mask
)
23387 return ctz_hwi (mask
);
23390 /* Like emit_multi_reg_push, but allowing for a different set of
23391 registers to be described as saved. MASK is the set of registers
23392 to be saved; REAL_REGS is the set of registers to be described as
23393 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23396 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23398 unsigned long regno
;
23399 rtx par
[10], tmp
, reg
;
23403 /* Build the parallel of the registers actually being stored. */
23404 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23406 regno
= ctz_hwi (mask
);
23407 reg
= gen_rtx_REG (SImode
, regno
);
23410 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23412 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23417 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23418 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23419 tmp
= gen_frame_mem (BLKmode
, tmp
);
23420 tmp
= gen_rtx_SET (tmp
, par
[0]);
23423 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23424 insn
= emit_insn (tmp
);
23426 /* Always build the stack adjustment note for unwind info. */
23427 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23428 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23431 /* Build the parallel of the registers recorded as saved for unwind. */
23432 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23434 regno
= ctz_hwi (real_regs
);
23435 reg
= gen_rtx_REG (SImode
, regno
);
23437 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23438 tmp
= gen_frame_mem (SImode
, tmp
);
23439 tmp
= gen_rtx_SET (tmp
, reg
);
23440 RTX_FRAME_RELATED_P (tmp
) = 1;
23448 RTX_FRAME_RELATED_P (par
[0]) = 1;
23449 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23452 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23457 /* Emit code to push or pop registers to or from the stack. F is the
23458 assembly file. MASK is the registers to pop. */
23460 thumb_pop (FILE *f
, unsigned long mask
)
23463 int lo_mask
= mask
& 0xFF;
23464 int pushed_words
= 0;
23468 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23470 /* Special case. Do not generate a POP PC statement here, do it in
23472 thumb_exit (f
, -1);
23476 fprintf (f
, "\tpop\t{");
23478 /* Look at the low registers first. */
23479 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23483 asm_fprintf (f
, "%r", regno
);
23485 if ((lo_mask
& ~1) != 0)
23492 if (mask
& (1 << PC_REGNUM
))
23494 /* Catch popping the PC. */
23495 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23496 || crtl
->calls_eh_return
)
23498 /* The PC is never poped directly, instead
23499 it is popped into r3 and then BX is used. */
23500 fprintf (f
, "}\n");
23502 thumb_exit (f
, -1);
23511 asm_fprintf (f
, "%r", PC_REGNUM
);
23515 fprintf (f
, "}\n");
23518 /* Generate code to return from a thumb function.
23519 If 'reg_containing_return_addr' is -1, then the return address is
23520 actually on the stack, at the stack pointer. */
23522 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23524 unsigned regs_available_for_popping
;
23525 unsigned regs_to_pop
;
23527 unsigned available
;
23531 int restore_a4
= FALSE
;
23533 /* Compute the registers we need to pop. */
23537 if (reg_containing_return_addr
== -1)
23539 regs_to_pop
|= 1 << LR_REGNUM
;
23543 if (TARGET_BACKTRACE
)
23545 /* Restore the (ARM) frame pointer and stack pointer. */
23546 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23550 /* If there is nothing to pop then just emit the BX instruction and
23552 if (pops_needed
== 0)
23554 if (crtl
->calls_eh_return
)
23555 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23557 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23560 /* Otherwise if we are not supporting interworking and we have not created
23561 a backtrace structure and the function was not entered in ARM mode then
23562 just pop the return address straight into the PC. */
23563 else if (!TARGET_INTERWORK
23564 && !TARGET_BACKTRACE
23565 && !is_called_in_ARM_mode (current_function_decl
)
23566 && !crtl
->calls_eh_return
)
23568 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23572 /* Find out how many of the (return) argument registers we can corrupt. */
23573 regs_available_for_popping
= 0;
23575 /* If returning via __builtin_eh_return, the bottom three registers
23576 all contain information needed for the return. */
23577 if (crtl
->calls_eh_return
)
23581 /* If we can deduce the registers used from the function's
23582 return value. This is more reliable that examining
23583 df_regs_ever_live_p () because that will be set if the register is
23584 ever used in the function, not just if the register is used
23585 to hold a return value. */
23587 if (crtl
->return_rtx
!= 0)
23588 mode
= GET_MODE (crtl
->return_rtx
);
23590 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23592 size
= GET_MODE_SIZE (mode
);
23596 /* In a void function we can use any argument register.
23597 In a function that returns a structure on the stack
23598 we can use the second and third argument registers. */
23599 if (mode
== VOIDmode
)
23600 regs_available_for_popping
=
23601 (1 << ARG_REGISTER (1))
23602 | (1 << ARG_REGISTER (2))
23603 | (1 << ARG_REGISTER (3));
23605 regs_available_for_popping
=
23606 (1 << ARG_REGISTER (2))
23607 | (1 << ARG_REGISTER (3));
23609 else if (size
<= 4)
23610 regs_available_for_popping
=
23611 (1 << ARG_REGISTER (2))
23612 | (1 << ARG_REGISTER (3));
23613 else if (size
<= 8)
23614 regs_available_for_popping
=
23615 (1 << ARG_REGISTER (3));
23618 /* Match registers to be popped with registers into which we pop them. */
23619 for (available
= regs_available_for_popping
,
23620 required
= regs_to_pop
;
23621 required
!= 0 && available
!= 0;
23622 available
&= ~(available
& - available
),
23623 required
&= ~(required
& - required
))
23626 /* If we have any popping registers left over, remove them. */
23628 regs_available_for_popping
&= ~available
;
23630 /* Otherwise if we need another popping register we can use
23631 the fourth argument register. */
23632 else if (pops_needed
)
23634 /* If we have not found any free argument registers and
23635 reg a4 contains the return address, we must move it. */
23636 if (regs_available_for_popping
== 0
23637 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23639 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23640 reg_containing_return_addr
= LR_REGNUM
;
23642 else if (size
> 12)
23644 /* Register a4 is being used to hold part of the return value,
23645 but we have dire need of a free, low register. */
23648 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23651 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23653 /* The fourth argument register is available. */
23654 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23660 /* Pop as many registers as we can. */
23661 thumb_pop (f
, regs_available_for_popping
);
23663 /* Process the registers we popped. */
23664 if (reg_containing_return_addr
== -1)
23666 /* The return address was popped into the lowest numbered register. */
23667 regs_to_pop
&= ~(1 << LR_REGNUM
);
23669 reg_containing_return_addr
=
23670 number_of_first_bit_set (regs_available_for_popping
);
23672 /* Remove this register for the mask of available registers, so that
23673 the return address will not be corrupted by further pops. */
23674 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23677 /* If we popped other registers then handle them here. */
23678 if (regs_available_for_popping
)
23682 /* Work out which register currently contains the frame pointer. */
23683 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23685 /* Move it into the correct place. */
23686 asm_fprintf (f
, "\tmov\t%r, %r\n",
23687 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23689 /* (Temporarily) remove it from the mask of popped registers. */
23690 regs_available_for_popping
&= ~(1 << frame_pointer
);
23691 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23693 if (regs_available_for_popping
)
23697 /* We popped the stack pointer as well,
23698 find the register that contains it. */
23699 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23701 /* Move it into the stack register. */
23702 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23704 /* At this point we have popped all necessary registers, so
23705 do not worry about restoring regs_available_for_popping
23706 to its correct value:
23708 assert (pops_needed == 0)
23709 assert (regs_available_for_popping == (1 << frame_pointer))
23710 assert (regs_to_pop == (1 << STACK_POINTER)) */
23714 /* Since we have just move the popped value into the frame
23715 pointer, the popping register is available for reuse, and
23716 we know that we still have the stack pointer left to pop. */
23717 regs_available_for_popping
|= (1 << frame_pointer
);
23721 /* If we still have registers left on the stack, but we no longer have
23722 any registers into which we can pop them, then we must move the return
23723 address into the link register and make available the register that
23725 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23727 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23729 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23730 reg_containing_return_addr
);
23732 reg_containing_return_addr
= LR_REGNUM
;
23735 /* If we have registers left on the stack then pop some more.
23736 We know that at most we will want to pop FP and SP. */
23737 if (pops_needed
> 0)
23742 thumb_pop (f
, regs_available_for_popping
);
23744 /* We have popped either FP or SP.
23745 Move whichever one it is into the correct register. */
23746 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23747 move_to
= number_of_first_bit_set (regs_to_pop
);
23749 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23751 regs_to_pop
&= ~(1 << move_to
);
23756 /* If we still have not popped everything then we must have only
23757 had one register available to us and we are now popping the SP. */
23758 if (pops_needed
> 0)
23762 thumb_pop (f
, regs_available_for_popping
);
23764 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23766 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23768 assert (regs_to_pop == (1 << STACK_POINTER))
23769 assert (pops_needed == 1)
23773 /* If necessary restore the a4 register. */
23776 if (reg_containing_return_addr
!= LR_REGNUM
)
23778 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23779 reg_containing_return_addr
= LR_REGNUM
;
23782 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23785 if (crtl
->calls_eh_return
)
23786 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23788 /* Return to caller. */
23789 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23792 /* Scan INSN just before assembler is output for it.
23793 For Thumb-1, we track the status of the condition codes; this
23794 information is used in the cbranchsi4_insn pattern. */
23796 thumb1_final_prescan_insn (rtx_insn
*insn
)
23798 if (flag_print_asm_name
)
23799 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23800 INSN_ADDRESSES (INSN_UID (insn
)));
23801 /* Don't overwrite the previous setter when we get to a cbranch. */
23802 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23804 enum attr_conds conds
;
23806 if (cfun
->machine
->thumb1_cc_insn
)
23808 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23809 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23812 conds
= get_attr_conds (insn
);
23813 if (conds
== CONDS_SET
)
23815 rtx set
= single_set (insn
);
23816 cfun
->machine
->thumb1_cc_insn
= insn
;
23817 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23818 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23819 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23820 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23822 rtx src1
= XEXP (SET_SRC (set
), 1);
23823 if (src1
== const0_rtx
)
23824 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23826 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23828 /* Record the src register operand instead of dest because
23829 cprop_hardreg pass propagates src. */
23830 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23833 else if (conds
!= CONDS_NOCOND
)
23834 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23837 /* Check if unexpected far jump is used. */
23838 if (cfun
->machine
->lr_save_eliminated
23839 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23840 internal_error("Unexpected thumb1 far jump");
23844 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23846 unsigned HOST_WIDE_INT mask
= 0xff;
23849 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23850 if (val
== 0) /* XXX */
23853 for (i
= 0; i
< 25; i
++)
23854 if ((val
& (mask
<< i
)) == val
)
23860 /* Returns nonzero if the current function contains,
23861 or might contain a far jump. */
23863 thumb_far_jump_used_p (void)
23866 bool far_jump
= false;
23867 unsigned int func_size
= 0;
23869 /* This test is only important for leaf functions. */
23870 /* assert (!leaf_function_p ()); */
23872 /* If we have already decided that far jumps may be used,
23873 do not bother checking again, and always return true even if
23874 it turns out that they are not being used. Once we have made
23875 the decision that far jumps are present (and that hence the link
23876 register will be pushed onto the stack) we cannot go back on it. */
23877 if (cfun
->machine
->far_jump_used
)
23880 /* If this function is not being called from the prologue/epilogue
23881 generation code then it must be being called from the
23882 INITIAL_ELIMINATION_OFFSET macro. */
23883 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23885 /* In this case we know that we are being asked about the elimination
23886 of the arg pointer register. If that register is not being used,
23887 then there are no arguments on the stack, and we do not have to
23888 worry that a far jump might force the prologue to push the link
23889 register, changing the stack offsets. In this case we can just
23890 return false, since the presence of far jumps in the function will
23891 not affect stack offsets.
23893 If the arg pointer is live (or if it was live, but has now been
23894 eliminated and so set to dead) then we do have to test to see if
23895 the function might contain a far jump. This test can lead to some
23896 false negatives, since before reload is completed, then length of
23897 branch instructions is not known, so gcc defaults to returning their
23898 longest length, which in turn sets the far jump attribute to true.
23900 A false negative will not result in bad code being generated, but it
23901 will result in a needless push and pop of the link register. We
23902 hope that this does not occur too often.
23904 If we need doubleword stack alignment this could affect the other
23905 elimination offsets so we can't risk getting it wrong. */
23906 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23907 cfun
->machine
->arg_pointer_live
= 1;
23908 else if (!cfun
->machine
->arg_pointer_live
)
23912 /* We should not change far_jump_used during or after reload, as there is
23913 no chance to change stack frame layout. */
23914 if (reload_in_progress
|| reload_completed
)
23917 /* Check to see if the function contains a branch
23918 insn with the far jump attribute set. */
23919 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23921 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23925 func_size
+= get_attr_length (insn
);
23928 /* Attribute far_jump will always be true for thumb1 before
23929 shorten_branch pass. So checking far_jump attribute before
23930 shorten_branch isn't much useful.
23932 Following heuristic tries to estimate more accurately if a far jump
23933 may finally be used. The heuristic is very conservative as there is
23934 no chance to roll-back the decision of not to use far jump.
23936 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23937 2-byte insn is associated with a 4 byte constant pool. Using
23938 function size 2048/3 as the threshold is conservative enough. */
23941 if ((func_size
* 3) >= 2048)
23943 /* Record the fact that we have decided that
23944 the function does use far jumps. */
23945 cfun
->machine
->far_jump_used
= 1;
23953 /* Return nonzero if FUNC must be entered in ARM mode. */
23955 is_called_in_ARM_mode (tree func
)
23957 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23959 /* Ignore the problem about functions whose address is taken. */
23960 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23964 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23970 /* Given the stack offsets and register mask in OFFSETS, decide how
23971 many additional registers to push instead of subtracting a constant
23972 from SP. For epilogues the principle is the same except we use pop.
23973 FOR_PROLOGUE indicates which we're generating. */
23975 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23977 HOST_WIDE_INT amount
;
23978 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23979 /* Extract a mask of the ones we can give to the Thumb's push/pop
23981 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23982 /* Then count how many other high registers will need to be pushed. */
23983 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23984 int n_free
, reg_base
, size
;
23986 if (!for_prologue
&& frame_pointer_needed
)
23987 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23989 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23991 /* If the stack frame size is 512 exactly, we can save one load
23992 instruction, which should make this a win even when optimizing
23994 if (!optimize_size
&& amount
!= 512)
23997 /* Can't do this if there are high registers to push. */
23998 if (high_regs_pushed
!= 0)
24001 /* Shouldn't do it in the prologue if no registers would normally
24002 be pushed at all. In the epilogue, also allow it if we'll have
24003 a pop insn for the PC. */
24006 || TARGET_BACKTRACE
24007 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24008 || TARGET_INTERWORK
24009 || crtl
->args
.pretend_args_size
!= 0))
24012 /* Don't do this if thumb_expand_prologue wants to emit instructions
24013 between the push and the stack frame allocation. */
24015 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24016 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24023 size
= arm_size_return_regs ();
24024 reg_base
= ARM_NUM_INTS (size
);
24025 live_regs_mask
>>= reg_base
;
24028 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24029 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24031 live_regs_mask
>>= 1;
24037 gcc_assert (amount
/ 4 * 4 == amount
);
24039 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24040 return (amount
- 508) / 4;
24041 if (amount
<= n_free
* 4)
24046 /* The bits which aren't usefully expanded as rtl. */
24048 thumb1_unexpanded_epilogue (void)
24050 arm_stack_offsets
*offsets
;
24052 unsigned long live_regs_mask
= 0;
24053 int high_regs_pushed
= 0;
24055 int had_to_push_lr
;
24058 if (cfun
->machine
->return_used_this_function
!= 0)
24061 if (IS_NAKED (arm_current_func_type ()))
24064 offsets
= arm_get_frame_offsets ();
24065 live_regs_mask
= offsets
->saved_regs_mask
;
24066 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24068 /* If we can deduce the registers used from the function's return value.
24069 This is more reliable that examining df_regs_ever_live_p () because that
24070 will be set if the register is ever used in the function, not just if
24071 the register is used to hold a return value. */
24072 size
= arm_size_return_regs ();
24074 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24077 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24078 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24081 /* The prolog may have pushed some high registers to use as
24082 work registers. e.g. the testsuite file:
24083 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24084 compiles to produce:
24085 push {r4, r5, r6, r7, lr}
24089 as part of the prolog. We have to undo that pushing here. */
24091 if (high_regs_pushed
)
24093 unsigned long mask
= live_regs_mask
& 0xff;
24096 /* The available low registers depend on the size of the value we are
24104 /* Oh dear! We have no low registers into which we can pop
24107 ("no low registers available for popping high registers");
24109 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24110 if (live_regs_mask
& (1 << next_hi_reg
))
24113 while (high_regs_pushed
)
24115 /* Find lo register(s) into which the high register(s) can
24117 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24119 if (mask
& (1 << regno
))
24120 high_regs_pushed
--;
24121 if (high_regs_pushed
== 0)
24125 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24127 /* Pop the values into the low register(s). */
24128 thumb_pop (asm_out_file
, mask
);
24130 /* Move the value(s) into the high registers. */
24131 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24133 if (mask
& (1 << regno
))
24135 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24138 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24139 if (live_regs_mask
& (1 << next_hi_reg
))
24144 live_regs_mask
&= ~0x0f00;
24147 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24148 live_regs_mask
&= 0xff;
24150 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24152 /* Pop the return address into the PC. */
24153 if (had_to_push_lr
)
24154 live_regs_mask
|= 1 << PC_REGNUM
;
24156 /* Either no argument registers were pushed or a backtrace
24157 structure was created which includes an adjusted stack
24158 pointer, so just pop everything. */
24159 if (live_regs_mask
)
24160 thumb_pop (asm_out_file
, live_regs_mask
);
24162 /* We have either just popped the return address into the
24163 PC or it is was kept in LR for the entire function.
24164 Note that thumb_pop has already called thumb_exit if the
24165 PC was in the list. */
24166 if (!had_to_push_lr
)
24167 thumb_exit (asm_out_file
, LR_REGNUM
);
24171 /* Pop everything but the return address. */
24172 if (live_regs_mask
)
24173 thumb_pop (asm_out_file
, live_regs_mask
);
24175 if (had_to_push_lr
)
24179 /* We have no free low regs, so save one. */
24180 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24184 /* Get the return address into a temporary register. */
24185 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24189 /* Move the return address to lr. */
24190 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24192 /* Restore the low register. */
24193 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24198 regno
= LAST_ARG_REGNUM
;
24203 /* Remove the argument registers that were pushed onto the stack. */
24204 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24205 SP_REGNUM
, SP_REGNUM
,
24206 crtl
->args
.pretend_args_size
);
24208 thumb_exit (asm_out_file
, regno
);
24214 /* Functions to save and restore machine-specific function data. */
24215 static struct machine_function
*
24216 arm_init_machine_status (void)
24218 struct machine_function
*machine
;
24219 machine
= ggc_cleared_alloc
<machine_function
> ();
24221 #if ARM_FT_UNKNOWN != 0
24222 machine
->func_type
= ARM_FT_UNKNOWN
;
24227 /* Return an RTX indicating where the return address to the
24228 calling function can be found. */
24230 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24235 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24238 /* Do anything needed before RTL is emitted for each function. */
24240 arm_init_expanders (void)
24242 /* Arrange to initialize and mark the machine per-function status. */
24243 init_machine_status
= arm_init_machine_status
;
24245 /* This is to stop the combine pass optimizing away the alignment
24246 adjustment of va_arg. */
24247 /* ??? It is claimed that this should not be necessary. */
24249 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24252 /* Check that FUNC is called with a different mode. */
24255 arm_change_mode_p (tree func
)
24257 if (TREE_CODE (func
) != FUNCTION_DECL
)
24260 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24263 callee_tree
= target_option_default_node
;
24265 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24266 int flags
= callee_opts
->x_target_flags
;
24268 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24271 /* Like arm_compute_initial_elimination offset. Simpler because there
24272 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24273 to point at the base of the local variables after static stack
24274 space for a function has been allocated. */
24277 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24279 arm_stack_offsets
*offsets
;
24281 offsets
= arm_get_frame_offsets ();
24285 case ARG_POINTER_REGNUM
:
24288 case STACK_POINTER_REGNUM
:
24289 return offsets
->outgoing_args
- offsets
->saved_args
;
24291 case FRAME_POINTER_REGNUM
:
24292 return offsets
->soft_frame
- offsets
->saved_args
;
24294 case ARM_HARD_FRAME_POINTER_REGNUM
:
24295 return offsets
->saved_regs
- offsets
->saved_args
;
24297 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24298 return offsets
->locals_base
- offsets
->saved_args
;
24301 gcc_unreachable ();
24305 case FRAME_POINTER_REGNUM
:
24308 case STACK_POINTER_REGNUM
:
24309 return offsets
->outgoing_args
- offsets
->soft_frame
;
24311 case ARM_HARD_FRAME_POINTER_REGNUM
:
24312 return offsets
->saved_regs
- offsets
->soft_frame
;
24314 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24315 return offsets
->locals_base
- offsets
->soft_frame
;
24318 gcc_unreachable ();
24323 gcc_unreachable ();
24327 /* Generate the function's prologue. */
24330 thumb1_expand_prologue (void)
24334 HOST_WIDE_INT amount
;
24335 arm_stack_offsets
*offsets
;
24336 unsigned long func_type
;
24338 unsigned long live_regs_mask
;
24339 unsigned long l_mask
;
24340 unsigned high_regs_pushed
= 0;
24342 func_type
= arm_current_func_type ();
24344 /* Naked functions don't have prologues. */
24345 if (IS_NAKED (func_type
))
24348 if (IS_INTERRUPT (func_type
))
24350 error ("interrupt Service Routines cannot be coded in Thumb mode");
24354 if (is_called_in_ARM_mode (current_function_decl
))
24355 emit_insn (gen_prologue_thumb1_interwork ());
24357 offsets
= arm_get_frame_offsets ();
24358 live_regs_mask
= offsets
->saved_regs_mask
;
24360 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24361 l_mask
= live_regs_mask
& 0x40ff;
24362 /* Then count how many other high registers will need to be pushed. */
24363 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24365 if (crtl
->args
.pretend_args_size
)
24367 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24369 if (cfun
->machine
->uses_anonymous_args
)
24371 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24372 unsigned long mask
;
24374 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24375 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24377 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24381 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24382 stack_pointer_rtx
, x
));
24384 RTX_FRAME_RELATED_P (insn
) = 1;
24387 if (TARGET_BACKTRACE
)
24389 HOST_WIDE_INT offset
= 0;
24390 unsigned work_register
;
24391 rtx work_reg
, x
, arm_hfp_rtx
;
24393 /* We have been asked to create a stack backtrace structure.
24394 The code looks like this:
24398 0 sub SP, #16 Reserve space for 4 registers.
24399 2 push {R7} Push low registers.
24400 4 add R7, SP, #20 Get the stack pointer before the push.
24401 6 str R7, [SP, #8] Store the stack pointer
24402 (before reserving the space).
24403 8 mov R7, PC Get hold of the start of this code + 12.
24404 10 str R7, [SP, #16] Store it.
24405 12 mov R7, FP Get hold of the current frame pointer.
24406 14 str R7, [SP, #4] Store it.
24407 16 mov R7, LR Get hold of the current return address.
24408 18 str R7, [SP, #12] Store it.
24409 20 add R7, SP, #16 Point at the start of the
24410 backtrace structure.
24411 22 mov FP, R7 Put this value into the frame pointer. */
24413 work_register
= thumb_find_work_register (live_regs_mask
);
24414 work_reg
= gen_rtx_REG (SImode
, work_register
);
24415 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24417 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24418 stack_pointer_rtx
, GEN_INT (-16)));
24419 RTX_FRAME_RELATED_P (insn
) = 1;
24423 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24424 RTX_FRAME_RELATED_P (insn
) = 1;
24426 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24429 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24430 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24432 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24433 x
= gen_frame_mem (SImode
, x
);
24434 emit_move_insn (x
, work_reg
);
24436 /* Make sure that the instruction fetching the PC is in the right place
24437 to calculate "start of backtrace creation code + 12". */
24438 /* ??? The stores using the common WORK_REG ought to be enough to
24439 prevent the scheduler from doing anything weird. Failing that
24440 we could always move all of the following into an UNSPEC_VOLATILE. */
24443 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24444 emit_move_insn (work_reg
, x
);
24446 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24447 x
= gen_frame_mem (SImode
, x
);
24448 emit_move_insn (x
, work_reg
);
24450 emit_move_insn (work_reg
, arm_hfp_rtx
);
24452 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24453 x
= gen_frame_mem (SImode
, x
);
24454 emit_move_insn (x
, work_reg
);
24458 emit_move_insn (work_reg
, arm_hfp_rtx
);
24460 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24461 x
= gen_frame_mem (SImode
, x
);
24462 emit_move_insn (x
, work_reg
);
24464 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24465 emit_move_insn (work_reg
, x
);
24467 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24468 x
= gen_frame_mem (SImode
, x
);
24469 emit_move_insn (x
, work_reg
);
24472 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24473 emit_move_insn (work_reg
, x
);
24475 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24476 x
= gen_frame_mem (SImode
, x
);
24477 emit_move_insn (x
, work_reg
);
24479 x
= GEN_INT (offset
+ 12);
24480 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24482 emit_move_insn (arm_hfp_rtx
, work_reg
);
24484 /* Optimization: If we are not pushing any low registers but we are going
24485 to push some high registers then delay our first push. This will just
24486 be a push of LR and we can combine it with the push of the first high
24488 else if ((l_mask
& 0xff) != 0
24489 || (high_regs_pushed
== 0 && l_mask
))
24491 unsigned long mask
= l_mask
;
24492 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24493 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24494 RTX_FRAME_RELATED_P (insn
) = 1;
24497 if (high_regs_pushed
)
24499 unsigned pushable_regs
;
24500 unsigned next_hi_reg
;
24501 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24502 : crtl
->args
.info
.nregs
;
24503 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24505 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24506 if (live_regs_mask
& (1 << next_hi_reg
))
24509 /* Here we need to mask out registers used for passing arguments
24510 even if they can be pushed. This is to avoid using them to stash the high
24511 registers. Such kind of stash may clobber the use of arguments. */
24512 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24514 if (pushable_regs
== 0)
24515 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24517 while (high_regs_pushed
> 0)
24519 unsigned long real_regs_mask
= 0;
24521 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24523 if (pushable_regs
& (1 << regno
))
24525 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24526 gen_rtx_REG (SImode
, next_hi_reg
));
24528 high_regs_pushed
--;
24529 real_regs_mask
|= (1 << next_hi_reg
);
24531 if (high_regs_pushed
)
24533 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24535 if (live_regs_mask
& (1 << next_hi_reg
))
24540 pushable_regs
&= ~((1 << regno
) - 1);
24546 /* If we had to find a work register and we have not yet
24547 saved the LR then add it to the list of regs to push. */
24548 if (l_mask
== (1 << LR_REGNUM
))
24550 pushable_regs
|= l_mask
;
24551 real_regs_mask
|= l_mask
;
24555 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24556 RTX_FRAME_RELATED_P (insn
) = 1;
24560 /* Load the pic register before setting the frame pointer,
24561 so we can use r7 as a temporary work register. */
24562 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24563 arm_load_pic_register (live_regs_mask
);
24565 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24566 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24567 stack_pointer_rtx
);
24569 if (flag_stack_usage_info
)
24570 current_function_static_stack_size
24571 = offsets
->outgoing_args
- offsets
->saved_args
;
24573 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24574 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24579 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24580 GEN_INT (- amount
)));
24581 RTX_FRAME_RELATED_P (insn
) = 1;
24587 /* The stack decrement is too big for an immediate value in a single
24588 insn. In theory we could issue multiple subtracts, but after
24589 three of them it becomes more space efficient to place the full
24590 value in the constant pool and load into a register. (Also the
24591 ARM debugger really likes to see only one stack decrement per
24592 function). So instead we look for a scratch register into which
24593 we can load the decrement, and then we subtract this from the
24594 stack pointer. Unfortunately on the thumb the only available
24595 scratch registers are the argument registers, and we cannot use
24596 these as they may hold arguments to the function. Instead we
24597 attempt to locate a call preserved register which is used by this
24598 function. If we can find one, then we know that it will have
24599 been pushed at the start of the prologue and so we can corrupt
24601 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24602 if (live_regs_mask
& (1 << regno
))
24605 gcc_assert(regno
<= LAST_LO_REGNUM
);
24607 reg
= gen_rtx_REG (SImode
, regno
);
24609 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24611 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24612 stack_pointer_rtx
, reg
));
24614 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24615 plus_constant (Pmode
, stack_pointer_rtx
,
24617 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24618 RTX_FRAME_RELATED_P (insn
) = 1;
24622 if (frame_pointer_needed
)
24623 thumb_set_frame_pointer (offsets
);
24625 /* If we are profiling, make sure no instructions are scheduled before
24626 the call to mcount. Similarly if the user has requested no
24627 scheduling in the prolog. Similarly if we want non-call exceptions
24628 using the EABI unwinder, to prevent faulting instructions from being
24629 swapped with a stack adjustment. */
24630 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24631 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24632 && cfun
->can_throw_non_call_exceptions
))
24633 emit_insn (gen_blockage ());
24635 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24636 if (live_regs_mask
& 0xff)
24637 cfun
->machine
->lr_save_eliminated
= 0;
24640 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24641 POP instruction can be generated. LR should be replaced by PC. All
24642 the checks required are already done by USE_RETURN_INSN (). Hence,
24643 all we really need to check here is if single register is to be
24644 returned, or multiple register return. */
24646 thumb2_expand_return (bool simple_return
)
24649 unsigned long saved_regs_mask
;
24650 arm_stack_offsets
*offsets
;
24652 offsets
= arm_get_frame_offsets ();
24653 saved_regs_mask
= offsets
->saved_regs_mask
;
24655 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24656 if (saved_regs_mask
& (1 << i
))
24659 if (!simple_return
&& saved_regs_mask
)
24663 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24664 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24665 rtx addr
= gen_rtx_MEM (SImode
,
24666 gen_rtx_POST_INC (SImode
,
24667 stack_pointer_rtx
));
24668 set_mem_alias_set (addr
, get_frame_alias_set ());
24669 XVECEXP (par
, 0, 0) = ret_rtx
;
24670 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24671 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24672 emit_jump_insn (par
);
24676 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24677 saved_regs_mask
|= (1 << PC_REGNUM
);
24678 arm_emit_multi_reg_pop (saved_regs_mask
);
24683 emit_jump_insn (simple_return_rtx
);
24688 thumb1_expand_epilogue (void)
24690 HOST_WIDE_INT amount
;
24691 arm_stack_offsets
*offsets
;
24694 /* Naked functions don't have prologues. */
24695 if (IS_NAKED (arm_current_func_type ()))
24698 offsets
= arm_get_frame_offsets ();
24699 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24701 if (frame_pointer_needed
)
24703 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24704 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24706 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24708 gcc_assert (amount
>= 0);
24711 emit_insn (gen_blockage ());
24714 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24715 GEN_INT (amount
)));
24718 /* r3 is always free in the epilogue. */
24719 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24721 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24722 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24726 /* Emit a USE (stack_pointer_rtx), so that
24727 the stack adjustment will not be deleted. */
24728 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24730 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24731 emit_insn (gen_blockage ());
24733 /* Emit a clobber for each insn that will be restored in the epilogue,
24734 so that flow2 will get register lifetimes correct. */
24735 for (regno
= 0; regno
< 13; regno
++)
24736 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24737 emit_clobber (gen_rtx_REG (SImode
, regno
));
24739 if (! df_regs_ever_live_p (LR_REGNUM
))
24740 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24743 /* Epilogue code for APCS frame. */
24745 arm_expand_epilogue_apcs_frame (bool really_return
)
24747 unsigned long func_type
;
24748 unsigned long saved_regs_mask
;
24751 int floats_from_frame
= 0;
24752 arm_stack_offsets
*offsets
;
24754 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24755 func_type
= arm_current_func_type ();
24757 /* Get frame offsets for ARM. */
24758 offsets
= arm_get_frame_offsets ();
24759 saved_regs_mask
= offsets
->saved_regs_mask
;
24761 /* Find the offset of the floating-point save area in the frame. */
24763 = (offsets
->saved_args
24764 + arm_compute_static_chain_stack_bytes ()
24767 /* Compute how many core registers saved and how far away the floats are. */
24768 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24769 if (saved_regs_mask
& (1 << i
))
24772 floats_from_frame
+= 4;
24775 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24778 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24780 /* The offset is from IP_REGNUM. */
24781 int saved_size
= arm_get_vfp_saved_size ();
24782 if (saved_size
> 0)
24785 floats_from_frame
+= saved_size
;
24786 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24787 hard_frame_pointer_rtx
,
24788 GEN_INT (-floats_from_frame
)));
24789 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24790 ip_rtx
, hard_frame_pointer_rtx
);
24793 /* Generate VFP register multi-pop. */
24794 start_reg
= FIRST_VFP_REGNUM
;
24796 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24797 /* Look for a case where a reg does not need restoring. */
24798 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24799 && (!df_regs_ever_live_p (i
+ 1)
24800 || call_used_regs
[i
+ 1]))
24802 if (start_reg
!= i
)
24803 arm_emit_vfp_multi_reg_pop (start_reg
,
24804 (i
- start_reg
) / 2,
24805 gen_rtx_REG (SImode
,
24810 /* Restore the remaining regs that we have discovered (or possibly
24811 even all of them, if the conditional in the for loop never
24813 if (start_reg
!= i
)
24814 arm_emit_vfp_multi_reg_pop (start_reg
,
24815 (i
- start_reg
) / 2,
24816 gen_rtx_REG (SImode
, IP_REGNUM
));
24821 /* The frame pointer is guaranteed to be non-double-word aligned, as
24822 it is set to double-word-aligned old_stack_pointer - 4. */
24824 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24826 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24827 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24829 rtx addr
= gen_frame_mem (V2SImode
,
24830 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24832 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24833 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24834 gen_rtx_REG (V2SImode
, i
),
24840 /* saved_regs_mask should contain IP which contains old stack pointer
24841 at the time of activation creation. Since SP and IP are adjacent registers,
24842 we can restore the value directly into SP. */
24843 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24844 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24845 saved_regs_mask
|= (1 << SP_REGNUM
);
24847 /* There are two registers left in saved_regs_mask - LR and PC. We
24848 only need to restore LR (the return address), but to
24849 save time we can load it directly into PC, unless we need a
24850 special function exit sequence, or we are not really returning. */
24852 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24853 && !crtl
->calls_eh_return
)
24854 /* Delete LR from the register mask, so that LR on
24855 the stack is loaded into the PC in the register mask. */
24856 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24858 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24860 num_regs
= bit_count (saved_regs_mask
);
24861 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24864 emit_insn (gen_blockage ());
24865 /* Unwind the stack to just below the saved registers. */
24866 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24867 hard_frame_pointer_rtx
,
24868 GEN_INT (- 4 * num_regs
)));
24870 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24871 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24874 arm_emit_multi_reg_pop (saved_regs_mask
);
24876 if (IS_INTERRUPT (func_type
))
24878 /* Interrupt handlers will have pushed the
24879 IP onto the stack, so restore it now. */
24881 rtx addr
= gen_rtx_MEM (SImode
,
24882 gen_rtx_POST_INC (SImode
,
24883 stack_pointer_rtx
));
24884 set_mem_alias_set (addr
, get_frame_alias_set ());
24885 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24886 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24887 gen_rtx_REG (SImode
, IP_REGNUM
),
24891 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24894 if (crtl
->calls_eh_return
)
24895 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24897 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24899 if (IS_STACKALIGN (func_type
))
24900 /* Restore the original stack pointer. Before prologue, the stack was
24901 realigned and the original stack pointer saved in r0. For details,
24902 see comment in arm_expand_prologue. */
24903 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24905 emit_jump_insn (simple_return_rtx
);
24908 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24909 function is not a sibcall. */
24911 arm_expand_epilogue (bool really_return
)
24913 unsigned long func_type
;
24914 unsigned long saved_regs_mask
;
24918 arm_stack_offsets
*offsets
;
24920 func_type
= arm_current_func_type ();
24922 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24923 let output_return_instruction take care of instruction emission if any. */
24924 if (IS_NAKED (func_type
)
24925 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24928 emit_jump_insn (simple_return_rtx
);
24932 /* If we are throwing an exception, then we really must be doing a
24933 return, so we can't tail-call. */
24934 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24936 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24938 arm_expand_epilogue_apcs_frame (really_return
);
24942 /* Get frame offsets for ARM. */
24943 offsets
= arm_get_frame_offsets ();
24944 saved_regs_mask
= offsets
->saved_regs_mask
;
24945 num_regs
= bit_count (saved_regs_mask
);
24947 if (frame_pointer_needed
)
24950 /* Restore stack pointer if necessary. */
24953 /* In ARM mode, frame pointer points to first saved register.
24954 Restore stack pointer to last saved register. */
24955 amount
= offsets
->frame
- offsets
->saved_regs
;
24957 /* Force out any pending memory operations that reference stacked data
24958 before stack de-allocation occurs. */
24959 emit_insn (gen_blockage ());
24960 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24961 hard_frame_pointer_rtx
,
24962 GEN_INT (amount
)));
24963 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24965 hard_frame_pointer_rtx
);
24967 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24969 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24973 /* In Thumb-2 mode, the frame pointer points to the last saved
24975 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24978 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24979 hard_frame_pointer_rtx
,
24980 GEN_INT (amount
)));
24981 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24982 hard_frame_pointer_rtx
,
24983 hard_frame_pointer_rtx
);
24986 /* Force out any pending memory operations that reference stacked data
24987 before stack de-allocation occurs. */
24988 emit_insn (gen_blockage ());
24989 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24990 hard_frame_pointer_rtx
));
24991 arm_add_cfa_adjust_cfa_note (insn
, 0,
24993 hard_frame_pointer_rtx
);
24994 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24996 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25001 /* Pop off outgoing args and local frame to adjust stack pointer to
25002 last saved register. */
25003 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25007 /* Force out any pending memory operations that reference stacked data
25008 before stack de-allocation occurs. */
25009 emit_insn (gen_blockage ());
25010 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25012 GEN_INT (amount
)));
25013 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25014 stack_pointer_rtx
, stack_pointer_rtx
);
25015 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25017 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25021 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25023 /* Generate VFP register multi-pop. */
25024 int end_reg
= LAST_VFP_REGNUM
+ 1;
25026 /* Scan the registers in reverse order. We need to match
25027 any groupings made in the prologue and generate matching
25028 vldm operations. The need to match groups is because,
25029 unlike pop, vldm can only do consecutive regs. */
25030 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25031 /* Look for a case where a reg does not need restoring. */
25032 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25033 && (!df_regs_ever_live_p (i
+ 1)
25034 || call_used_regs
[i
+ 1]))
25036 /* Restore the regs discovered so far (from reg+2 to
25038 if (end_reg
> i
+ 2)
25039 arm_emit_vfp_multi_reg_pop (i
+ 2,
25040 (end_reg
- (i
+ 2)) / 2,
25041 stack_pointer_rtx
);
25045 /* Restore the remaining regs that we have discovered (or possibly
25046 even all of them, if the conditional in the for loop never
25048 if (end_reg
> i
+ 2)
25049 arm_emit_vfp_multi_reg_pop (i
+ 2,
25050 (end_reg
- (i
+ 2)) / 2,
25051 stack_pointer_rtx
);
25055 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25056 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25059 rtx addr
= gen_rtx_MEM (V2SImode
,
25060 gen_rtx_POST_INC (SImode
,
25061 stack_pointer_rtx
));
25062 set_mem_alias_set (addr
, get_frame_alias_set ());
25063 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25064 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25065 gen_rtx_REG (V2SImode
, i
),
25067 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25068 stack_pointer_rtx
, stack_pointer_rtx
);
25071 if (saved_regs_mask
)
25074 bool return_in_pc
= false;
25076 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25077 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25078 && !IS_STACKALIGN (func_type
)
25080 && crtl
->args
.pretend_args_size
== 0
25081 && saved_regs_mask
& (1 << LR_REGNUM
)
25082 && !crtl
->calls_eh_return
)
25084 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25085 saved_regs_mask
|= (1 << PC_REGNUM
);
25086 return_in_pc
= true;
25089 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25091 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25092 if (saved_regs_mask
& (1 << i
))
25094 rtx addr
= gen_rtx_MEM (SImode
,
25095 gen_rtx_POST_INC (SImode
,
25096 stack_pointer_rtx
));
25097 set_mem_alias_set (addr
, get_frame_alias_set ());
25099 if (i
== PC_REGNUM
)
25101 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25102 XVECEXP (insn
, 0, 0) = ret_rtx
;
25103 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25105 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25106 insn
= emit_jump_insn (insn
);
25110 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25112 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25113 gen_rtx_REG (SImode
, i
),
25115 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25117 stack_pointer_rtx
);
25124 && current_tune
->prefer_ldrd_strd
25125 && !optimize_function_for_size_p (cfun
))
25128 thumb2_emit_ldrd_pop (saved_regs_mask
);
25129 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25130 arm_emit_ldrd_pop (saved_regs_mask
);
25132 arm_emit_multi_reg_pop (saved_regs_mask
);
25135 arm_emit_multi_reg_pop (saved_regs_mask
);
25142 if (crtl
->args
.pretend_args_size
)
25145 rtx dwarf
= NULL_RTX
;
25147 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25149 GEN_INT (crtl
->args
.pretend_args_size
)));
25151 RTX_FRAME_RELATED_P (tmp
) = 1;
25153 if (cfun
->machine
->uses_anonymous_args
)
25155 /* Restore pretend args. Refer arm_expand_prologue on how to save
25156 pretend_args in stack. */
25157 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25158 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25159 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25160 if (saved_regs_mask
& (1 << i
))
25162 rtx reg
= gen_rtx_REG (SImode
, i
);
25163 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25166 REG_NOTES (tmp
) = dwarf
;
25168 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25169 stack_pointer_rtx
, stack_pointer_rtx
);
25172 if (!really_return
)
25175 if (crtl
->calls_eh_return
)
25176 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25178 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25180 if (IS_STACKALIGN (func_type
))
25181 /* Restore the original stack pointer. Before prologue, the stack was
25182 realigned and the original stack pointer saved in r0. For details,
25183 see comment in arm_expand_prologue. */
25184 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25186 emit_jump_insn (simple_return_rtx
);
25189 /* Implementation of insn prologue_thumb1_interwork. This is the first
25190 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25193 thumb1_output_interwork (void)
25196 FILE *f
= asm_out_file
;
25198 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25199 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25201 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25203 /* Generate code sequence to switch us into Thumb mode. */
25204 /* The .code 32 directive has already been emitted by
25205 ASM_DECLARE_FUNCTION_NAME. */
25206 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25207 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25209 /* Generate a label, so that the debugger will notice the
25210 change in instruction sets. This label is also used by
25211 the assembler to bypass the ARM code when this function
25212 is called from a Thumb encoded function elsewhere in the
25213 same file. Hence the definition of STUB_NAME here must
25214 agree with the definition in gas/config/tc-arm.c. */
25216 #define STUB_NAME ".real_start_of"
25218 fprintf (f
, "\t.code\t16\n");
25220 if (arm_dllexport_name_p (name
))
25221 name
= arm_strip_name_encoding (name
);
25223 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25224 fprintf (f
, "\t.thumb_func\n");
25225 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25230 /* Handle the case of a double word load into a low register from
25231 a computed memory address. The computed address may involve a
25232 register which is overwritten by the load. */
25234 thumb_load_double_from_address (rtx
*operands
)
25242 gcc_assert (REG_P (operands
[0]));
25243 gcc_assert (MEM_P (operands
[1]));
25245 /* Get the memory address. */
25246 addr
= XEXP (operands
[1], 0);
25248 /* Work out how the memory address is computed. */
25249 switch (GET_CODE (addr
))
25252 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25254 if (REGNO (operands
[0]) == REGNO (addr
))
25256 output_asm_insn ("ldr\t%H0, %2", operands
);
25257 output_asm_insn ("ldr\t%0, %1", operands
);
25261 output_asm_insn ("ldr\t%0, %1", operands
);
25262 output_asm_insn ("ldr\t%H0, %2", operands
);
25267 /* Compute <address> + 4 for the high order load. */
25268 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25270 output_asm_insn ("ldr\t%0, %1", operands
);
25271 output_asm_insn ("ldr\t%H0, %2", operands
);
25275 arg1
= XEXP (addr
, 0);
25276 arg2
= XEXP (addr
, 1);
25278 if (CONSTANT_P (arg1
))
25279 base
= arg2
, offset
= arg1
;
25281 base
= arg1
, offset
= arg2
;
25283 gcc_assert (REG_P (base
));
25285 /* Catch the case of <address> = <reg> + <reg> */
25286 if (REG_P (offset
))
25288 int reg_offset
= REGNO (offset
);
25289 int reg_base
= REGNO (base
);
25290 int reg_dest
= REGNO (operands
[0]);
25292 /* Add the base and offset registers together into the
25293 higher destination register. */
25294 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25295 reg_dest
+ 1, reg_base
, reg_offset
);
25297 /* Load the lower destination register from the address in
25298 the higher destination register. */
25299 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25300 reg_dest
, reg_dest
+ 1);
25302 /* Load the higher destination register from its own address
25304 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25305 reg_dest
+ 1, reg_dest
+ 1);
25309 /* Compute <address> + 4 for the high order load. */
25310 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25312 /* If the computed address is held in the low order register
25313 then load the high order register first, otherwise always
25314 load the low order register first. */
25315 if (REGNO (operands
[0]) == REGNO (base
))
25317 output_asm_insn ("ldr\t%H0, %2", operands
);
25318 output_asm_insn ("ldr\t%0, %1", operands
);
25322 output_asm_insn ("ldr\t%0, %1", operands
);
25323 output_asm_insn ("ldr\t%H0, %2", operands
);
25329 /* With no registers to worry about we can just load the value
25331 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25333 output_asm_insn ("ldr\t%H0, %2", operands
);
25334 output_asm_insn ("ldr\t%0, %1", operands
);
25338 gcc_unreachable ();
25345 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25352 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25355 operands
[4] = operands
[5];
25358 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25359 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25363 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25364 std::swap (operands
[4], operands
[5]);
25365 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25366 std::swap (operands
[5], operands
[6]);
25367 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25368 std::swap (operands
[4], operands
[5]);
25370 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25371 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25375 gcc_unreachable ();
25381 /* Output a call-via instruction for thumb state. */
25383 thumb_call_via_reg (rtx reg
)
25385 int regno
= REGNO (reg
);
25388 gcc_assert (regno
< LR_REGNUM
);
25390 /* If we are in the normal text section we can use a single instance
25391 per compilation unit. If we are doing function sections, then we need
25392 an entry per section, since we can't rely on reachability. */
25393 if (in_section
== text_section
)
25395 thumb_call_reg_needed
= 1;
25397 if (thumb_call_via_label
[regno
] == NULL
)
25398 thumb_call_via_label
[regno
] = gen_label_rtx ();
25399 labelp
= thumb_call_via_label
+ regno
;
25403 if (cfun
->machine
->call_via
[regno
] == NULL
)
25404 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25405 labelp
= cfun
->machine
->call_via
+ regno
;
25408 output_asm_insn ("bl\t%a0", labelp
);
25412 /* Routines for generating rtl. */
25414 thumb_expand_movmemqi (rtx
*operands
)
25416 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25417 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25418 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25419 HOST_WIDE_INT offset
= 0;
25423 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25429 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25435 rtx reg
= gen_reg_rtx (SImode
);
25436 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25437 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25444 rtx reg
= gen_reg_rtx (HImode
);
25445 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25446 plus_constant (Pmode
, in
,
25448 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25457 rtx reg
= gen_reg_rtx (QImode
);
25458 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25459 plus_constant (Pmode
, in
,
25461 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25468 thumb_reload_out_hi (rtx
*operands
)
25470 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25473 /* Handle reading a half-word from memory during reload. */
25475 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25477 gcc_unreachable ();
25480 /* Return the length of a function name prefix
25481 that starts with the character 'c'. */
25483 arm_get_strip_length (int c
)
25487 ARM_NAME_ENCODING_LENGTHS
25492 /* Return a pointer to a function's name with any
25493 and all prefix encodings stripped from it. */
25495 arm_strip_name_encoding (const char *name
)
25499 while ((skip
= arm_get_strip_length (* name
)))
25505 /* If there is a '*' anywhere in the name's prefix, then
25506 emit the stripped name verbatim, otherwise prepend an
25507 underscore if leading underscores are being used. */
25509 arm_asm_output_labelref (FILE *stream
, const char *name
)
25514 while ((skip
= arm_get_strip_length (* name
)))
25516 verbatim
|= (*name
== '*');
25521 fputs (name
, stream
);
25523 asm_fprintf (stream
, "%U%s", name
);
25526 /* This function is used to emit an EABI tag and its associated value.
25527 We emit the numerical value of the tag in case the assembler does not
25528 support textual tags. (Eg gas prior to 2.20). If requested we include
25529 the tag name in a comment so that anyone reading the assembler output
25530 will know which tag is being set.
25532 This function is not static because arm-c.c needs it too. */
25535 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25537 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25538 if (flag_verbose_asm
|| flag_debug_asm
)
25539 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25540 asm_fprintf (asm_out_file
, "\n");
25543 /* This function is used to print CPU tuning information as comment
25544 in assembler file. Pointers are not printed for now. */
25547 arm_print_tune_info (void)
25549 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25550 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25551 current_tune
->constant_limit
);
25552 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25553 current_tune
->max_insns_skipped
);
25554 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25555 current_tune
->prefetch
.num_slots
);
25556 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25557 current_tune
->prefetch
.l1_cache_size
);
25558 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25559 current_tune
->prefetch
.l1_cache_line_size
);
25560 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25561 (int) current_tune
->prefer_constant_pool
);
25562 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25563 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25564 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25565 current_tune
->branch_cost (false, false));
25566 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25567 current_tune
->branch_cost (false, true));
25568 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25569 current_tune
->branch_cost (true, false));
25570 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25571 current_tune
->branch_cost (true, true));
25572 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25573 (int) current_tune
->prefer_ldrd_strd
);
25574 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25575 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25576 (int) current_tune
->logical_op_non_short_circuit_arm
);
25577 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25578 (int) current_tune
->prefer_neon_for_64bits
);
25579 asm_fprintf (asm_out_file
,
25580 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25581 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25582 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25583 (int) current_tune
->string_ops_prefer_neon
);
25584 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25585 current_tune
->max_insns_inline_memset
);
25586 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25587 current_tune
->fusible_ops
);
25588 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25589 (int) current_tune
->sched_autopref
);
25593 arm_file_start (void)
25599 const char *fpu_name
;
25600 if (arm_selected_arch
)
25602 /* armv7ve doesn't support any extensions. */
25603 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25605 /* Keep backward compatability for assemblers
25606 which don't support armv7ve. */
25607 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25608 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25609 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25610 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25611 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25615 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25619 gcc_assert (strlen (arm_selected_arch
->name
)
25620 <= sizeof (buf
) / sizeof (*pos
));
25621 strncpy (buf
, arm_selected_arch
->name
,
25622 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25623 buf
[pos
- arm_selected_arch
->name
] = '\0';
25624 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25625 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25628 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25631 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25632 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25635 const char* truncated_name
25636 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25637 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25640 if (print_tune_info
)
25641 arm_print_tune_info ();
25643 if (TARGET_SOFT_FLOAT
)
25645 fpu_name
= "softvfp";
25649 fpu_name
= arm_fpu_desc
->name
;
25650 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25652 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25653 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25655 if (TARGET_HARD_FLOAT_ABI
)
25656 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25659 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25661 /* Some of these attributes only apply when the corresponding features
25662 are used. However we don't have any easy way of figuring this out.
25663 Conservatively record the setting that would have been used. */
25665 if (flag_rounding_math
)
25666 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25668 if (!flag_unsafe_math_optimizations
)
25670 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25671 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25673 if (flag_signaling_nans
)
25674 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25676 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25677 flag_finite_math_only
? 1 : 3);
25679 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25680 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25681 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25682 flag_short_enums
? 1 : 2);
25684 /* Tag_ABI_optimization_goals. */
25687 else if (optimize
>= 2)
25693 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25695 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25698 if (arm_fp16_format
)
25699 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25700 (int) arm_fp16_format
);
25702 if (arm_lang_output_object_attributes_hook
)
25703 arm_lang_output_object_attributes_hook();
25706 default_file_start ();
25710 arm_file_end (void)
25714 if (NEED_INDICATE_EXEC_STACK
)
25715 /* Add .note.GNU-stack. */
25716 file_end_indicate_exec_stack ();
25718 if (! thumb_call_reg_needed
)
25721 switch_to_section (text_section
);
25722 asm_fprintf (asm_out_file
, "\t.code 16\n");
25723 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25725 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25727 rtx label
= thumb_call_via_label
[regno
];
25731 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25732 CODE_LABEL_NUMBER (label
));
25733 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25739 /* Symbols in the text segment can be accessed without indirecting via the
25740 constant pool; it may take an extra binary operation, but this is still
25741 faster than indirecting via memory. Don't do this when not optimizing,
25742 since we won't be calculating al of the offsets necessary to do this
25746 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25748 if (optimize
> 0 && TREE_CONSTANT (decl
))
25749 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25751 default_encode_section_info (decl
, rtl
, first
);
25753 #endif /* !ARM_PE */
25756 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25758 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25759 && !strcmp (prefix
, "L"))
25761 arm_ccfsm_state
= 0;
25762 arm_target_insn
= NULL
;
25764 default_internal_label (stream
, prefix
, labelno
);
25767 /* Output code to add DELTA to the first argument, and then jump
25768 to FUNCTION. Used for C++ multiple inheritance. */
25770 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25771 HOST_WIDE_INT delta
,
25772 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25775 static int thunk_label
= 0;
25778 int mi_delta
= delta
;
25779 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25781 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25784 mi_delta
= - mi_delta
;
25786 final_start_function (emit_barrier (), file
, 1);
25790 int labelno
= thunk_label
++;
25791 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25792 /* Thunks are entered in arm mode when avaiable. */
25793 if (TARGET_THUMB1_ONLY
)
25795 /* push r3 so we can use it as a temporary. */
25796 /* TODO: Omit this save if r3 is not used. */
25797 fputs ("\tpush {r3}\n", file
);
25798 fputs ("\tldr\tr3, ", file
);
25802 fputs ("\tldr\tr12, ", file
);
25804 assemble_name (file
, label
);
25805 fputc ('\n', file
);
25808 /* If we are generating PIC, the ldr instruction below loads
25809 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25810 the address of the add + 8, so we have:
25812 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25815 Note that we have "+ 1" because some versions of GNU ld
25816 don't set the low bit of the result for R_ARM_REL32
25817 relocations against thumb function symbols.
25818 On ARMv6M this is +4, not +8. */
25819 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25820 assemble_name (file
, labelpc
);
25821 fputs (":\n", file
);
25822 if (TARGET_THUMB1_ONLY
)
25824 /* This is 2 insns after the start of the thunk, so we know it
25825 is 4-byte aligned. */
25826 fputs ("\tadd\tr3, pc, r3\n", file
);
25827 fputs ("\tmov r12, r3\n", file
);
25830 fputs ("\tadd\tr12, pc, r12\n", file
);
25832 else if (TARGET_THUMB1_ONLY
)
25833 fputs ("\tmov r12, r3\n", file
);
25835 if (TARGET_THUMB1_ONLY
)
25837 if (mi_delta
> 255)
25839 fputs ("\tldr\tr3, ", file
);
25840 assemble_name (file
, label
);
25841 fputs ("+4\n", file
);
25842 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25843 mi_op
, this_regno
, this_regno
);
25845 else if (mi_delta
!= 0)
25847 /* Thumb1 unified syntax requires s suffix in instruction name when
25848 one of the operands is immediate. */
25849 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25850 mi_op
, this_regno
, this_regno
,
25856 /* TODO: Use movw/movt for large constants when available. */
25857 while (mi_delta
!= 0)
25859 if ((mi_delta
& (3 << shift
)) == 0)
25863 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25864 mi_op
, this_regno
, this_regno
,
25865 mi_delta
& (0xff << shift
));
25866 mi_delta
&= ~(0xff << shift
);
25873 if (TARGET_THUMB1_ONLY
)
25874 fputs ("\tpop\t{r3}\n", file
);
25876 fprintf (file
, "\tbx\tr12\n");
25877 ASM_OUTPUT_ALIGN (file
, 2);
25878 assemble_name (file
, label
);
25879 fputs (":\n", file
);
25882 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25883 rtx tem
= XEXP (DECL_RTL (function
), 0);
25884 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25885 pipeline offset is four rather than eight. Adjust the offset
25887 tem
= plus_constant (GET_MODE (tem
), tem
,
25888 TARGET_THUMB1_ONLY
? -3 : -7);
25889 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25891 gen_rtx_SYMBOL_REF (Pmode
,
25892 ggc_strdup (labelpc
)));
25893 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25896 /* Output ".word .LTHUNKn". */
25897 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25899 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25900 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25904 fputs ("\tb\t", file
);
25905 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25906 if (NEED_PLT_RELOC
)
25907 fputs ("(PLT)", file
);
25908 fputc ('\n', file
);
25911 final_end_function ();
25915 arm_emit_vector_const (FILE *file
, rtx x
)
25918 const char * pattern
;
25920 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25922 switch (GET_MODE (x
))
25924 case V2SImode
: pattern
= "%08x"; break;
25925 case V4HImode
: pattern
= "%04x"; break;
25926 case V8QImode
: pattern
= "%02x"; break;
25927 default: gcc_unreachable ();
25930 fprintf (file
, "0x");
25931 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25935 element
= CONST_VECTOR_ELT (x
, i
);
25936 fprintf (file
, pattern
, INTVAL (element
));
25942 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25943 HFmode constant pool entries are actually loaded with ldr. */
25945 arm_emit_fp16_const (rtx c
)
25950 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25951 bits
= real_to_target (NULL
, &r
, HFmode
);
25952 if (WORDS_BIG_ENDIAN
)
25953 assemble_zeros (2);
25954 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25955 if (!WORDS_BIG_ENDIAN
)
25956 assemble_zeros (2);
25960 arm_output_load_gr (rtx
*operands
)
25967 if (!MEM_P (operands
[1])
25968 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25969 || !REG_P (reg
= XEXP (sum
, 0))
25970 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25971 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25972 return "wldrw%?\t%0, %1";
25974 /* Fix up an out-of-range load of a GR register. */
25975 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25976 wcgr
= operands
[0];
25978 output_asm_insn ("ldr%?\t%0, %1", operands
);
25980 operands
[0] = wcgr
;
25982 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25983 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25988 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25990 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25991 named arg and all anonymous args onto the stack.
25992 XXX I know the prologue shouldn't be pushing registers, but it is faster
25996 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26000 int second_time ATTRIBUTE_UNUSED
)
26002 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26005 cfun
->machine
->uses_anonymous_args
= 1;
26006 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26008 nregs
= pcum
->aapcs_ncrn
;
26009 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26013 nregs
= pcum
->nregs
;
26015 if (nregs
< NUM_ARG_REGS
)
26016 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26019 /* We can't rely on the caller doing the proper promotion when
26020 using APCS or ATPCS. */
26023 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26025 return !TARGET_AAPCS_BASED
;
26028 static machine_mode
26029 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26031 int *punsignedp ATTRIBUTE_UNUSED
,
26032 const_tree fntype ATTRIBUTE_UNUSED
,
26033 int for_return ATTRIBUTE_UNUSED
)
26035 if (GET_MODE_CLASS (mode
) == MODE_INT
26036 && GET_MODE_SIZE (mode
) < 4)
26042 /* AAPCS based ABIs use short enums by default. */
26045 arm_default_short_enums (void)
26047 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26051 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26054 arm_align_anon_bitfield (void)
26056 return TARGET_AAPCS_BASED
;
26060 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26063 arm_cxx_guard_type (void)
26065 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26069 /* The EABI says test the least significant bit of a guard variable. */
26072 arm_cxx_guard_mask_bit (void)
26074 return TARGET_AAPCS_BASED
;
26078 /* The EABI specifies that all array cookies are 8 bytes long. */
26081 arm_get_cookie_size (tree type
)
26085 if (!TARGET_AAPCS_BASED
)
26086 return default_cxx_get_cookie_size (type
);
26088 size
= build_int_cst (sizetype
, 8);
26093 /* The EABI says that array cookies should also contain the element size. */
26096 arm_cookie_has_size (void)
26098 return TARGET_AAPCS_BASED
;
26102 /* The EABI says constructors and destructors should return a pointer to
26103 the object constructed/destroyed. */
26106 arm_cxx_cdtor_returns_this (void)
26108 return TARGET_AAPCS_BASED
;
26111 /* The EABI says that an inline function may never be the key
26115 arm_cxx_key_method_may_be_inline (void)
26117 return !TARGET_AAPCS_BASED
;
26121 arm_cxx_determine_class_data_visibility (tree decl
)
26123 if (!TARGET_AAPCS_BASED
26124 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26127 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26128 is exported. However, on systems without dynamic vague linkage,
26129 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26130 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26131 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26133 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26134 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26138 arm_cxx_class_data_always_comdat (void)
26140 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26141 vague linkage if the class has no key function. */
26142 return !TARGET_AAPCS_BASED
;
26146 /* The EABI says __aeabi_atexit should be used to register static
26150 arm_cxx_use_aeabi_atexit (void)
26152 return TARGET_AAPCS_BASED
;
26157 arm_set_return_address (rtx source
, rtx scratch
)
26159 arm_stack_offsets
*offsets
;
26160 HOST_WIDE_INT delta
;
26162 unsigned long saved_regs
;
26164 offsets
= arm_get_frame_offsets ();
26165 saved_regs
= offsets
->saved_regs_mask
;
26167 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26168 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26171 if (frame_pointer_needed
)
26172 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26175 /* LR will be the first saved register. */
26176 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26181 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26182 GEN_INT (delta
& ~4095)));
26187 addr
= stack_pointer_rtx
;
26189 addr
= plus_constant (Pmode
, addr
, delta
);
26191 /* The store needs to be marked as frame related in order to prevent
26192 DSE from deleting it as dead if it is based on fp. */
26193 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26194 RTX_FRAME_RELATED_P (insn
) = 1;
26195 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26201 thumb_set_return_address (rtx source
, rtx scratch
)
26203 arm_stack_offsets
*offsets
;
26204 HOST_WIDE_INT delta
;
26205 HOST_WIDE_INT limit
;
26208 unsigned long mask
;
26212 offsets
= arm_get_frame_offsets ();
26213 mask
= offsets
->saved_regs_mask
;
26214 if (mask
& (1 << LR_REGNUM
))
26217 /* Find the saved regs. */
26218 if (frame_pointer_needed
)
26220 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26221 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26227 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26230 /* Allow for the stack frame. */
26231 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26233 /* The link register is always the first saved register. */
26236 /* Construct the address. */
26237 addr
= gen_rtx_REG (SImode
, reg
);
26240 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26241 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26245 addr
= plus_constant (Pmode
, addr
, delta
);
26247 /* The store needs to be marked as frame related in order to prevent
26248 DSE from deleting it as dead if it is based on fp. */
26249 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26250 RTX_FRAME_RELATED_P (insn
) = 1;
26251 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26254 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26257 /* Implements target hook vector_mode_supported_p. */
26259 arm_vector_mode_supported_p (machine_mode mode
)
26261 /* Neon also supports V2SImode, etc. listed in the clause below. */
26262 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26263 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26266 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26267 && ((mode
== V2SImode
)
26268 || (mode
== V4HImode
)
26269 || (mode
== V8QImode
)))
26272 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26273 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26274 || mode
== V2HAmode
))
26280 /* Implements target hook array_mode_supported_p. */
26283 arm_array_mode_supported_p (machine_mode mode
,
26284 unsigned HOST_WIDE_INT nelems
)
26287 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26288 && (nelems
>= 2 && nelems
<= 4))
26294 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26295 registers when autovectorizing for Neon, at least until multiple vector
26296 widths are supported properly by the middle-end. */
26298 static machine_mode
26299 arm_preferred_simd_mode (machine_mode mode
)
26305 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26307 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26309 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26311 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26313 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26320 if (TARGET_REALLY_IWMMXT
)
26336 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26338 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26339 using r0-r4 for function arguments, r7 for the stack frame and don't have
26340 enough left over to do doubleword arithmetic. For Thumb-2 all the
26341 potentially problematic instructions accept high registers so this is not
26342 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26343 that require many low registers. */
26345 arm_class_likely_spilled_p (reg_class_t rclass
)
26347 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26348 || rclass
== CC_REG
)
26354 /* Implements target hook small_register_classes_for_mode_p. */
26356 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26358 return TARGET_THUMB1
;
26361 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26362 ARM insns and therefore guarantee that the shift count is modulo 256.
26363 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26364 guarantee no particular behavior for out-of-range counts. */
26366 static unsigned HOST_WIDE_INT
26367 arm_shift_truncation_mask (machine_mode mode
)
26369 return mode
== SImode
? 255 : 0;
26373 /* Map internal gcc register numbers to DWARF2 register numbers. */
26376 arm_dbx_register_number (unsigned int regno
)
26381 if (IS_VFP_REGNUM (regno
))
26383 /* See comment in arm_dwarf_register_span. */
26384 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26385 return 64 + regno
- FIRST_VFP_REGNUM
;
26387 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26390 if (IS_IWMMXT_GR_REGNUM (regno
))
26391 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26393 if (IS_IWMMXT_REGNUM (regno
))
26394 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26396 return DWARF_FRAME_REGISTERS
;
26399 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26400 GCC models tham as 64 32-bit registers, so we need to describe this to
26401 the DWARF generation code. Other registers can use the default. */
26403 arm_dwarf_register_span (rtx rtl
)
26411 regno
= REGNO (rtl
);
26412 if (!IS_VFP_REGNUM (regno
))
26415 /* XXX FIXME: The EABI defines two VFP register ranges:
26416 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26418 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26419 corresponding D register. Until GDB supports this, we shall use the
26420 legacy encodings. We also use these encodings for D0-D15 for
26421 compatibility with older debuggers. */
26422 mode
= GET_MODE (rtl
);
26423 if (GET_MODE_SIZE (mode
) < 8)
26426 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26428 nregs
= GET_MODE_SIZE (mode
) / 4;
26429 for (i
= 0; i
< nregs
; i
+= 2)
26430 if (TARGET_BIG_END
)
26432 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26433 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26437 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26438 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26443 nregs
= GET_MODE_SIZE (mode
) / 8;
26444 for (i
= 0; i
< nregs
; i
++)
26445 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26448 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26451 #if ARM_UNWIND_INFO
26452 /* Emit unwind directives for a store-multiple instruction or stack pointer
26453 push during alignment.
26454 These should only ever be generated by the function prologue code, so
26455 expect them to have a particular form.
26456 The store-multiple instruction sometimes pushes pc as the last register,
26457 although it should not be tracked into unwind information, or for -Os
26458 sometimes pushes some dummy registers before first register that needs
26459 to be tracked in unwind information; such dummy registers are there just
26460 to avoid separate stack adjustment, and will not be restored in the
26464 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26467 HOST_WIDE_INT offset
;
26468 HOST_WIDE_INT nregs
;
26472 unsigned padfirst
= 0, padlast
= 0;
26475 e
= XVECEXP (p
, 0, 0);
26476 gcc_assert (GET_CODE (e
) == SET
);
26478 /* First insn will adjust the stack pointer. */
26479 gcc_assert (GET_CODE (e
) == SET
26480 && REG_P (SET_DEST (e
))
26481 && REGNO (SET_DEST (e
)) == SP_REGNUM
26482 && GET_CODE (SET_SRC (e
)) == PLUS
);
26484 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26485 nregs
= XVECLEN (p
, 0) - 1;
26486 gcc_assert (nregs
);
26488 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26491 /* For -Os dummy registers can be pushed at the beginning to
26492 avoid separate stack pointer adjustment. */
26493 e
= XVECEXP (p
, 0, 1);
26494 e
= XEXP (SET_DEST (e
), 0);
26495 if (GET_CODE (e
) == PLUS
)
26496 padfirst
= INTVAL (XEXP (e
, 1));
26497 gcc_assert (padfirst
== 0 || optimize_size
);
26498 /* The function prologue may also push pc, but not annotate it as it is
26499 never restored. We turn this into a stack pointer adjustment. */
26500 e
= XVECEXP (p
, 0, nregs
);
26501 e
= XEXP (SET_DEST (e
), 0);
26502 if (GET_CODE (e
) == PLUS
)
26503 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26505 padlast
= offset
- 4;
26506 gcc_assert (padlast
== 0 || padlast
== 4);
26508 fprintf (asm_out_file
, "\t.pad #4\n");
26510 fprintf (asm_out_file
, "\t.save {");
26512 else if (IS_VFP_REGNUM (reg
))
26515 fprintf (asm_out_file
, "\t.vsave {");
26518 /* Unknown register type. */
26519 gcc_unreachable ();
26521 /* If the stack increment doesn't match the size of the saved registers,
26522 something has gone horribly wrong. */
26523 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26527 /* The remaining insns will describe the stores. */
26528 for (i
= 1; i
<= nregs
; i
++)
26530 /* Expect (set (mem <addr>) (reg)).
26531 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26532 e
= XVECEXP (p
, 0, i
);
26533 gcc_assert (GET_CODE (e
) == SET
26534 && MEM_P (SET_DEST (e
))
26535 && REG_P (SET_SRC (e
)));
26537 reg
= REGNO (SET_SRC (e
));
26538 gcc_assert (reg
>= lastreg
);
26541 fprintf (asm_out_file
, ", ");
26542 /* We can't use %r for vfp because we need to use the
26543 double precision register names. */
26544 if (IS_VFP_REGNUM (reg
))
26545 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26547 asm_fprintf (asm_out_file
, "%r", reg
);
26549 #ifdef ENABLE_CHECKING
26550 /* Check that the addresses are consecutive. */
26551 e
= XEXP (SET_DEST (e
), 0);
26552 if (GET_CODE (e
) == PLUS
)
26553 gcc_assert (REG_P (XEXP (e
, 0))
26554 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26555 && CONST_INT_P (XEXP (e
, 1))
26556 && offset
== INTVAL (XEXP (e
, 1)));
26560 && REGNO (e
) == SP_REGNUM
);
26561 offset
+= reg_size
;
26564 fprintf (asm_out_file
, "}\n");
26566 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26569 /* Emit unwind directives for a SET. */
26572 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26580 switch (GET_CODE (e0
))
26583 /* Pushing a single register. */
26584 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26585 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26586 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26589 asm_fprintf (asm_out_file
, "\t.save ");
26590 if (IS_VFP_REGNUM (REGNO (e1
)))
26591 asm_fprintf(asm_out_file
, "{d%d}\n",
26592 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26594 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26598 if (REGNO (e0
) == SP_REGNUM
)
26600 /* A stack increment. */
26601 if (GET_CODE (e1
) != PLUS
26602 || !REG_P (XEXP (e1
, 0))
26603 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26604 || !CONST_INT_P (XEXP (e1
, 1)))
26607 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26608 -INTVAL (XEXP (e1
, 1)));
26610 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26612 HOST_WIDE_INT offset
;
26614 if (GET_CODE (e1
) == PLUS
)
26616 if (!REG_P (XEXP (e1
, 0))
26617 || !CONST_INT_P (XEXP (e1
, 1)))
26619 reg
= REGNO (XEXP (e1
, 0));
26620 offset
= INTVAL (XEXP (e1
, 1));
26621 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26622 HARD_FRAME_POINTER_REGNUM
, reg
,
26625 else if (REG_P (e1
))
26628 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26629 HARD_FRAME_POINTER_REGNUM
, reg
);
26634 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26636 /* Move from sp to reg. */
26637 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26639 else if (GET_CODE (e1
) == PLUS
26640 && REG_P (XEXP (e1
, 0))
26641 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26642 && CONST_INT_P (XEXP (e1
, 1)))
26644 /* Set reg to offset from sp. */
26645 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26646 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26658 /* Emit unwind directives for the given insn. */
26661 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26664 bool handled_one
= false;
26666 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26669 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26670 && (TREE_NOTHROW (current_function_decl
)
26671 || crtl
->all_throwers_are_sibcalls
))
26674 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26677 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26679 switch (REG_NOTE_KIND (note
))
26681 case REG_FRAME_RELATED_EXPR
:
26682 pat
= XEXP (note
, 0);
26685 case REG_CFA_REGISTER
:
26686 pat
= XEXP (note
, 0);
26689 pat
= PATTERN (insn
);
26690 if (GET_CODE (pat
) == PARALLEL
)
26691 pat
= XVECEXP (pat
, 0, 0);
26694 /* Only emitted for IS_STACKALIGN re-alignment. */
26699 src
= SET_SRC (pat
);
26700 dest
= SET_DEST (pat
);
26702 gcc_assert (src
== stack_pointer_rtx
);
26703 reg
= REGNO (dest
);
26704 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26707 handled_one
= true;
26710 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26711 to get correct dwarf information for shrink-wrap. We should not
26712 emit unwind information for it because these are used either for
26713 pretend arguments or notes to adjust sp and restore registers from
26715 case REG_CFA_DEF_CFA
:
26716 case REG_CFA_ADJUST_CFA
:
26717 case REG_CFA_RESTORE
:
26720 case REG_CFA_EXPRESSION
:
26721 case REG_CFA_OFFSET
:
26722 /* ??? Only handling here what we actually emit. */
26723 gcc_unreachable ();
26731 pat
= PATTERN (insn
);
26734 switch (GET_CODE (pat
))
26737 arm_unwind_emit_set (asm_out_file
, pat
);
26741 /* Store multiple. */
26742 arm_unwind_emit_sequence (asm_out_file
, pat
);
26751 /* Output a reference from a function exception table to the type_info
26752 object X. The EABI specifies that the symbol should be relocated by
26753 an R_ARM_TARGET2 relocation. */
26756 arm_output_ttype (rtx x
)
26758 fputs ("\t.word\t", asm_out_file
);
26759 output_addr_const (asm_out_file
, x
);
26760 /* Use special relocations for symbol references. */
26761 if (!CONST_INT_P (x
))
26762 fputs ("(TARGET2)", asm_out_file
);
26763 fputc ('\n', asm_out_file
);
26768 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26771 arm_asm_emit_except_personality (rtx personality
)
26773 fputs ("\t.personality\t", asm_out_file
);
26774 output_addr_const (asm_out_file
, personality
);
26775 fputc ('\n', asm_out_file
);
26778 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26781 arm_asm_init_sections (void)
26783 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26786 #endif /* ARM_UNWIND_INFO */
26788 /* Output unwind directives for the start/end of a function. */
26791 arm_output_fn_unwind (FILE * f
, bool prologue
)
26793 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26797 fputs ("\t.fnstart\n", f
);
26800 /* If this function will never be unwound, then mark it as such.
26801 The came condition is used in arm_unwind_emit to suppress
26802 the frame annotations. */
26803 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26804 && (TREE_NOTHROW (current_function_decl
)
26805 || crtl
->all_throwers_are_sibcalls
))
26806 fputs("\t.cantunwind\n", f
);
26808 fputs ("\t.fnend\n", f
);
26813 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26815 enum tls_reloc reloc
;
26818 val
= XVECEXP (x
, 0, 0);
26819 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26821 output_addr_const (fp
, val
);
26826 fputs ("(tlsgd)", fp
);
26829 fputs ("(tlsldm)", fp
);
26832 fputs ("(tlsldo)", fp
);
26835 fputs ("(gottpoff)", fp
);
26838 fputs ("(tpoff)", fp
);
26841 fputs ("(tlsdesc)", fp
);
26844 gcc_unreachable ();
26853 fputs (" + (. - ", fp
);
26854 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26855 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26856 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26857 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26867 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26870 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26872 gcc_assert (size
== 4);
26873 fputs ("\t.word\t", file
);
26874 output_addr_const (file
, x
);
26875 fputs ("(tlsldo)", file
);
26878 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26881 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26883 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26884 return arm_emit_tls_decoration (fp
, x
);
26885 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26888 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26890 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26891 assemble_name_raw (fp
, label
);
26895 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26897 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26901 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26905 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26907 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26911 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26915 else if (GET_CODE (x
) == CONST_VECTOR
)
26916 return arm_emit_vector_const (fp
, x
);
26921 /* Output assembly for a shift instruction.
26922 SET_FLAGS determines how the instruction modifies the condition codes.
26923 0 - Do not set condition codes.
26924 1 - Set condition codes.
26925 2 - Use smallest instruction. */
26927 arm_output_shift(rtx
* operands
, int set_flags
)
26930 static const char flag_chars
[3] = {'?', '.', '!'};
26935 c
= flag_chars
[set_flags
];
26936 if (TARGET_UNIFIED_ASM
)
26938 shift
= shift_op(operands
[3], &val
);
26942 operands
[2] = GEN_INT(val
);
26943 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26946 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26949 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26950 output_asm_insn (pattern
, operands
);
26954 /* Output assembly for a WMMX immediate shift instruction. */
26956 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26958 int shift
= INTVAL (operands
[2]);
26960 machine_mode opmode
= GET_MODE (operands
[0]);
26962 gcc_assert (shift
>= 0);
26964 /* If the shift value in the register versions is > 63 (for D qualifier),
26965 31 (for W qualifier) or 15 (for H qualifier). */
26966 if (((opmode
== V4HImode
) && (shift
> 15))
26967 || ((opmode
== V2SImode
) && (shift
> 31))
26968 || ((opmode
== DImode
) && (shift
> 63)))
26972 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26973 output_asm_insn (templ
, operands
);
26974 if (opmode
== DImode
)
26976 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26977 output_asm_insn (templ
, operands
);
26982 /* The destination register will contain all zeros. */
26983 sprintf (templ
, "wzero\t%%0");
26984 output_asm_insn (templ
, operands
);
26989 if ((opmode
== DImode
) && (shift
> 32))
26991 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26992 output_asm_insn (templ
, operands
);
26993 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26994 output_asm_insn (templ
, operands
);
26998 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26999 output_asm_insn (templ
, operands
);
27004 /* Output assembly for a WMMX tinsr instruction. */
27006 arm_output_iwmmxt_tinsr (rtx
*operands
)
27008 int mask
= INTVAL (operands
[3]);
27011 int units
= mode_nunits
[GET_MODE (operands
[0])];
27012 gcc_assert ((mask
& (mask
- 1)) == 0);
27013 for (i
= 0; i
< units
; ++i
)
27015 if ((mask
& 0x01) == 1)
27021 gcc_assert (i
< units
);
27023 switch (GET_MODE (operands
[0]))
27026 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27029 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27032 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27035 gcc_unreachable ();
27038 output_asm_insn (templ
, operands
);
27043 /* Output a Thumb-1 casesi dispatch sequence. */
27045 thumb1_output_casesi (rtx
*operands
)
27047 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27049 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27051 switch (GET_MODE(diff_vec
))
27054 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27055 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27057 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27058 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27060 return "bl\t%___gnu_thumb1_case_si";
27062 gcc_unreachable ();
27066 /* Output a Thumb-2 casesi instruction. */
27068 thumb2_output_casesi (rtx
*operands
)
27070 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27072 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27074 output_asm_insn ("cmp\t%0, %1", operands
);
27075 output_asm_insn ("bhi\t%l3", operands
);
27076 switch (GET_MODE(diff_vec
))
27079 return "tbb\t[%|pc, %0]";
27081 return "tbh\t[%|pc, %0, lsl #1]";
27085 output_asm_insn ("adr\t%4, %l2", operands
);
27086 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27087 output_asm_insn ("add\t%4, %4, %5", operands
);
27092 output_asm_insn ("adr\t%4, %l2", operands
);
27093 return "ldr\t%|pc, [%4, %0, lsl #2]";
27096 gcc_unreachable ();
27100 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27101 per-core tuning structs. */
27103 arm_issue_rate (void)
27105 return current_tune
->issue_rate
;
27108 /* Return how many instructions should scheduler lookahead to choose the
27111 arm_first_cycle_multipass_dfa_lookahead (void)
27113 int issue_rate
= arm_issue_rate ();
27115 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27118 /* Enable modeling of L2 auto-prefetcher. */
27120 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27122 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27126 arm_mangle_type (const_tree type
)
27128 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27129 has to be managled as if it is in the "std" namespace. */
27130 if (TARGET_AAPCS_BASED
27131 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27132 return "St9__va_list";
27134 /* Half-precision float. */
27135 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27138 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27140 if (TYPE_NAME (type
) != NULL
)
27141 return arm_mangle_builtin_type (type
);
27143 /* Use the default mangling. */
27147 /* Order of allocation of core registers for Thumb: this allocation is
27148 written over the corresponding initial entries of the array
27149 initialized with REG_ALLOC_ORDER. We allocate all low registers
27150 first. Saving and restoring a low register is usually cheaper than
27151 using a call-clobbered high register. */
27153 static const int thumb_core_reg_alloc_order
[] =
27155 3, 2, 1, 0, 4, 5, 6, 7,
27156 14, 12, 8, 9, 10, 11
27159 /* Adjust register allocation order when compiling for Thumb. */
27162 arm_order_regs_for_local_alloc (void)
27164 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27165 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27167 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27168 sizeof (thumb_core_reg_alloc_order
));
27171 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27174 arm_frame_pointer_required (void)
27176 return (cfun
->has_nonlocal_label
27177 || SUBTARGET_FRAME_POINTER_REQUIRED
27178 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27181 /* Only thumb1 can't support conditional execution, so return true if
27182 the target is not thumb1. */
27184 arm_have_conditional_execution (void)
27186 return !TARGET_THUMB1
;
27189 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27190 static HOST_WIDE_INT
27191 arm_vector_alignment (const_tree type
)
27193 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27195 if (TARGET_AAPCS_BASED
)
27196 align
= MIN (align
, 64);
27201 static unsigned int
27202 arm_autovectorize_vector_sizes (void)
27204 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27208 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27210 /* Vectors which aren't in packed structures will not be less aligned than
27211 the natural alignment of their element type, so this is safe. */
27212 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27215 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27219 arm_builtin_support_vector_misalignment (machine_mode mode
,
27220 const_tree type
, int misalignment
,
27223 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27225 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27230 /* If the misalignment is unknown, we should be able to handle the access
27231 so long as it is not to a member of a packed data structure. */
27232 if (misalignment
== -1)
27235 /* Return true if the misalignment is a multiple of the natural alignment
27236 of the vector's element type. This is probably always going to be
27237 true in practice, since we've already established that this isn't a
27239 return ((misalignment
% align
) == 0);
27242 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27247 arm_conditional_register_usage (void)
27251 if (TARGET_THUMB1
&& optimize_size
)
27253 /* When optimizing for size on Thumb-1, it's better not
27254 to use the HI regs, because of the overhead of
27256 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27257 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27260 /* The link register can be clobbered by any branch insn,
27261 but we have no way to track that at present, so mark
27262 it as unavailable. */
27264 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27266 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27268 /* VFPv3 registers are disabled when earlier VFP
27269 versions are selected due to the definition of
27270 LAST_VFP_REGNUM. */
27271 for (regno
= FIRST_VFP_REGNUM
;
27272 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27274 fixed_regs
[regno
] = 0;
27275 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27276 || regno
>= FIRST_VFP_REGNUM
+ 32;
27280 if (TARGET_REALLY_IWMMXT
)
27282 regno
= FIRST_IWMMXT_GR_REGNUM
;
27283 /* The 2002/10/09 revision of the XScale ABI has wCG0
27284 and wCG1 as call-preserved registers. The 2002/11/21
27285 revision changed this so that all wCG registers are
27286 scratch registers. */
27287 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27288 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27289 fixed_regs
[regno
] = 0;
27290 /* The XScale ABI has wR0 - wR9 as scratch registers,
27291 the rest as call-preserved registers. */
27292 for (regno
= FIRST_IWMMXT_REGNUM
;
27293 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27295 fixed_regs
[regno
] = 0;
27296 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27300 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27302 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27303 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27305 else if (TARGET_APCS_STACK
)
27307 fixed_regs
[10] = 1;
27308 call_used_regs
[10] = 1;
27310 /* -mcaller-super-interworking reserves r11 for calls to
27311 _interwork_r11_call_via_rN(). Making the register global
27312 is an easy way of ensuring that it remains valid for all
27314 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27315 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27317 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27318 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27319 if (TARGET_CALLER_INTERWORKING
)
27320 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27322 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27326 arm_preferred_rename_class (reg_class_t rclass
)
27328 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27329 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27330 and code size can be reduced. */
27331 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27337 /* Compute the atrribute "length" of insn "*push_multi".
27338 So this function MUST be kept in sync with that insn pattern. */
27340 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27342 int i
, regno
, hi_reg
;
27343 int num_saves
= XVECLEN (parallel_op
, 0);
27353 regno
= REGNO (first_op
);
27354 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27355 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27357 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27358 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27366 /* Compute the number of instructions emitted by output_move_double. */
27368 arm_count_output_move_double_insns (rtx
*operands
)
27372 /* output_move_double may modify the operands array, so call it
27373 here on a copy of the array. */
27374 ops
[0] = operands
[0];
27375 ops
[1] = operands
[1];
27376 output_move_double (ops
, false, &count
);
27381 vfp3_const_double_for_fract_bits (rtx operand
)
27383 REAL_VALUE_TYPE r0
;
27385 if (!CONST_DOUBLE_P (operand
))
27388 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27389 if (exact_real_inverse (DFmode
, &r0
)
27390 && !REAL_VALUE_NEGATIVE (r0
))
27392 if (exact_real_truncate (DFmode
, &r0
))
27394 HOST_WIDE_INT value
= real_to_integer (&r0
);
27395 value
= value
& 0xffffffff;
27396 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27397 return int_log2 (value
);
27404 vfp3_const_double_for_bits (rtx operand
)
27406 REAL_VALUE_TYPE r0
;
27408 if (!CONST_DOUBLE_P (operand
))
27411 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27412 if (exact_real_truncate (DFmode
, &r0
))
27414 HOST_WIDE_INT value
= real_to_integer (&r0
);
27415 value
= value
& 0xffffffff;
27416 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27417 return int_log2 (value
);
27423 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27426 arm_pre_atomic_barrier (enum memmodel model
)
27428 if (need_atomic_barrier_p (model
, true))
27429 emit_insn (gen_memory_barrier ());
27433 arm_post_atomic_barrier (enum memmodel model
)
27435 if (need_atomic_barrier_p (model
, false))
27436 emit_insn (gen_memory_barrier ());
27439 /* Emit the load-exclusive and store-exclusive instructions.
27440 Use acquire and release versions if necessary. */
27443 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27445 rtx (*gen
) (rtx
, rtx
);
27451 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27452 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27453 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27454 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27456 gcc_unreachable ();
27463 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27464 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27465 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27466 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27468 gcc_unreachable ();
27472 emit_insn (gen (rval
, mem
));
27476 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27479 rtx (*gen
) (rtx
, rtx
, rtx
);
27485 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27486 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27487 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27488 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27490 gcc_unreachable ();
27497 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27498 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27499 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27500 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27502 gcc_unreachable ();
27506 emit_insn (gen (bval
, rval
, mem
));
27509 /* Mark the previous jump instruction as unlikely. */
27512 emit_unlikely_jump (rtx insn
)
27514 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27516 insn
= emit_jump_insn (insn
);
27517 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27520 /* Expand a compare and swap pattern. */
27523 arm_expand_compare_and_swap (rtx operands
[])
27525 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27527 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27529 bval
= operands
[0];
27530 rval
= operands
[1];
27532 oldval
= operands
[3];
27533 newval
= operands
[4];
27534 is_weak
= operands
[5];
27535 mod_s
= operands
[6];
27536 mod_f
= operands
[7];
27537 mode
= GET_MODE (mem
);
27539 /* Normally the succ memory model must be stronger than fail, but in the
27540 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27541 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27543 if (TARGET_HAVE_LDACQ
27544 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
27545 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
27546 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27552 /* For narrow modes, we're going to perform the comparison in SImode,
27553 so do the zero-extension now. */
27554 rval
= gen_reg_rtx (SImode
);
27555 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27559 /* Force the value into a register if needed. We waited until after
27560 the zero-extension above to do this properly. */
27561 if (!arm_add_operand (oldval
, SImode
))
27562 oldval
= force_reg (SImode
, oldval
);
27566 if (!cmpdi_operand (oldval
, mode
))
27567 oldval
= force_reg (mode
, oldval
);
27571 gcc_unreachable ();
27576 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27577 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27578 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27579 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27581 gcc_unreachable ();
27584 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27586 if (mode
== QImode
|| mode
== HImode
)
27587 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27589 /* In all cases, we arrange for success to be signaled by Z set.
27590 This arrangement allows for the boolean result to be used directly
27591 in a subsequent branch, post optimization. */
27592 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27593 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27594 emit_insn (gen_rtx_SET (bval
, x
));
27597 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27598 another memory store between the load-exclusive and store-exclusive can
27599 reset the monitor from Exclusive to Open state. This means we must wait
27600 until after reload to split the pattern, lest we get a register spill in
27601 the middle of the atomic sequence. */
27604 arm_split_compare_and_swap (rtx operands
[])
27606 rtx rval
, mem
, oldval
, newval
, scratch
;
27608 enum memmodel mod_s
, mod_f
;
27610 rtx_code_label
*label1
, *label2
;
27613 rval
= operands
[0];
27615 oldval
= operands
[2];
27616 newval
= operands
[3];
27617 is_weak
= (operands
[4] != const0_rtx
);
27618 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
27619 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
27620 scratch
= operands
[7];
27621 mode
= GET_MODE (mem
);
27623 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
27625 bool use_acquire
= TARGET_HAVE_LDACQ
27626 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27627 || is_mm_release (mod_s
));
27629 bool use_release
= TARGET_HAVE_LDACQ
27630 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27631 || is_mm_acquire (mod_s
));
27633 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27634 a full barrier is emitted after the store-release. */
27636 use_acquire
= false;
27638 /* Checks whether a barrier is needed and emits one accordingly. */
27639 if (!(use_acquire
|| use_release
))
27640 arm_pre_atomic_barrier (mod_s
);
27645 label1
= gen_label_rtx ();
27646 emit_label (label1
);
27648 label2
= gen_label_rtx ();
27650 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27652 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27653 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27654 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27655 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27656 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27658 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27660 /* Weak or strong, we want EQ to be true for success, so that we
27661 match the flags that we got from the compare above. */
27662 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27663 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27664 emit_insn (gen_rtx_SET (cond
, x
));
27668 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27669 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27670 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27671 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27674 if (!is_mm_relaxed (mod_f
))
27675 emit_label (label2
);
27677 /* Checks whether a barrier is needed and emits one accordingly. */
27679 || !(use_acquire
|| use_release
))
27680 arm_post_atomic_barrier (mod_s
);
27682 if (is_mm_relaxed (mod_f
))
27683 emit_label (label2
);
27687 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27688 rtx value
, rtx model_rtx
, rtx cond
)
27690 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
27691 machine_mode mode
= GET_MODE (mem
);
27692 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27693 rtx_code_label
*label
;
27696 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
27698 bool use_acquire
= TARGET_HAVE_LDACQ
27699 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27700 || is_mm_release (model
));
27702 bool use_release
= TARGET_HAVE_LDACQ
27703 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27704 || is_mm_acquire (model
));
27706 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27707 a full barrier is emitted after the store-release. */
27709 use_acquire
= false;
27711 /* Checks whether a barrier is needed and emits one accordingly. */
27712 if (!(use_acquire
|| use_release
))
27713 arm_pre_atomic_barrier (model
);
27715 label
= gen_label_rtx ();
27716 emit_label (label
);
27719 new_out
= gen_lowpart (wmode
, new_out
);
27721 old_out
= gen_lowpart (wmode
, old_out
);
27724 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27726 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27735 x
= gen_rtx_AND (wmode
, old_out
, value
);
27736 emit_insn (gen_rtx_SET (new_out
, x
));
27737 x
= gen_rtx_NOT (wmode
, new_out
);
27738 emit_insn (gen_rtx_SET (new_out
, x
));
27742 if (CONST_INT_P (value
))
27744 value
= GEN_INT (-INTVAL (value
));
27750 if (mode
== DImode
)
27752 /* DImode plus/minus need to clobber flags. */
27753 /* The adddi3 and subdi3 patterns are incorrectly written so that
27754 they require matching operands, even when we could easily support
27755 three operands. Thankfully, this can be fixed up post-splitting,
27756 as the individual add+adc patterns do accept three operands and
27757 post-reload cprop can make these moves go away. */
27758 emit_move_insn (new_out
, old_out
);
27760 x
= gen_adddi3 (new_out
, new_out
, value
);
27762 x
= gen_subdi3 (new_out
, new_out
, value
);
27769 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27770 emit_insn (gen_rtx_SET (new_out
, x
));
27774 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27777 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27778 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27780 /* Checks whether a barrier is needed and emits one accordingly. */
27782 || !(use_acquire
|| use_release
))
27783 arm_post_atomic_barrier (model
);
27786 #define MAX_VECT_LEN 16
27788 struct expand_vec_perm_d
27790 rtx target
, op0
, op1
;
27791 unsigned char perm
[MAX_VECT_LEN
];
27792 machine_mode vmode
;
27793 unsigned char nelt
;
27798 /* Generate a variable permutation. */
27801 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27803 machine_mode vmode
= GET_MODE (target
);
27804 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27806 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27807 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27808 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27809 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27810 gcc_checking_assert (TARGET_NEON
);
27814 if (vmode
== V8QImode
)
27815 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27817 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27823 if (vmode
== V8QImode
)
27825 pair
= gen_reg_rtx (V16QImode
);
27826 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27827 pair
= gen_lowpart (TImode
, pair
);
27828 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27832 pair
= gen_reg_rtx (OImode
);
27833 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27834 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27840 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27842 machine_mode vmode
= GET_MODE (target
);
27843 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27844 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27845 rtx rmask
[MAX_VECT_LEN
], mask
;
27847 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27848 numbering of elements for big-endian, we must reverse the order. */
27849 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27851 /* The VTBL instruction does not use a modulo index, so we must take care
27852 of that ourselves. */
27853 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27854 for (i
= 0; i
< nelt
; ++i
)
27856 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27857 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27859 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27862 /* Generate or test for an insn that supports a constant permutation. */
27864 /* Recognize patterns for the VUZP insns. */
27867 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27869 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27870 rtx out0
, out1
, in0
, in1
, x
;
27871 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27873 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27876 /* Note that these are little-endian tests. Adjust for big-endian later. */
27877 if (d
->perm
[0] == 0)
27879 else if (d
->perm
[0] == 1)
27883 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27885 for (i
= 0; i
< nelt
; i
++)
27887 unsigned elt
= (i
* 2 + odd
) & mask
;
27888 if (d
->perm
[i
] != elt
)
27898 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27899 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27900 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27901 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27902 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27903 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27904 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27905 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27907 gcc_unreachable ();
27912 if (BYTES_BIG_ENDIAN
)
27914 x
= in0
, in0
= in1
, in1
= x
;
27919 out1
= gen_reg_rtx (d
->vmode
);
27921 x
= out0
, out0
= out1
, out1
= x
;
27923 emit_insn (gen (out0
, in0
, in1
, out1
));
27927 /* Recognize patterns for the VZIP insns. */
27930 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27932 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27933 rtx out0
, out1
, in0
, in1
, x
;
27934 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27936 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27939 /* Note that these are little-endian tests. Adjust for big-endian later. */
27941 if (d
->perm
[0] == high
)
27943 else if (d
->perm
[0] == 0)
27947 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27949 for (i
= 0; i
< nelt
/ 2; i
++)
27951 unsigned elt
= (i
+ high
) & mask
;
27952 if (d
->perm
[i
* 2] != elt
)
27954 elt
= (elt
+ nelt
) & mask
;
27955 if (d
->perm
[i
* 2 + 1] != elt
)
27965 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27966 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27967 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27968 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27969 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27970 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27971 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27972 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27974 gcc_unreachable ();
27979 if (BYTES_BIG_ENDIAN
)
27981 x
= in0
, in0
= in1
, in1
= x
;
27986 out1
= gen_reg_rtx (d
->vmode
);
27988 x
= out0
, out0
= out1
, out1
= x
;
27990 emit_insn (gen (out0
, in0
, in1
, out1
));
27994 /* Recognize patterns for the VREV insns. */
27997 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27999 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28000 rtx (*gen
)(rtx
, rtx
);
28002 if (!d
->one_vector_p
)
28011 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28012 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28020 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28021 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28022 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28023 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28031 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28032 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28033 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28034 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28035 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28036 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28037 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28038 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28047 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28048 for (j
= 0; j
<= diff
; j
+= 1)
28050 /* This is guaranteed to be true as the value of diff
28051 is 7, 3, 1 and we should have enough elements in the
28052 queue to generate this. Getting a vector mask with a
28053 value of diff other than these values implies that
28054 something is wrong by the time we get here. */
28055 gcc_assert (i
+ j
< nelt
);
28056 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28064 emit_insn (gen (d
->target
, d
->op0
));
28068 /* Recognize patterns for the VTRN insns. */
28071 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28073 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28074 rtx out0
, out1
, in0
, in1
, x
;
28075 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28077 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28080 /* Note that these are little-endian tests. Adjust for big-endian later. */
28081 if (d
->perm
[0] == 0)
28083 else if (d
->perm
[0] == 1)
28087 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28089 for (i
= 0; i
< nelt
; i
+= 2)
28091 if (d
->perm
[i
] != i
+ odd
)
28093 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28103 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28104 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28105 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28106 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28107 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28108 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28109 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28110 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28112 gcc_unreachable ();
28117 if (BYTES_BIG_ENDIAN
)
28119 x
= in0
, in0
= in1
, in1
= x
;
28124 out1
= gen_reg_rtx (d
->vmode
);
28126 x
= out0
, out0
= out1
, out1
= x
;
28128 emit_insn (gen (out0
, in0
, in1
, out1
));
28132 /* Recognize patterns for the VEXT insns. */
28135 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28137 unsigned int i
, nelt
= d
->nelt
;
28138 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28141 unsigned int location
;
28143 unsigned int next
= d
->perm
[0] + 1;
28145 /* TODO: Handle GCC's numbering of elements for big-endian. */
28146 if (BYTES_BIG_ENDIAN
)
28149 /* Check if the extracted indexes are increasing by one. */
28150 for (i
= 1; i
< nelt
; next
++, i
++)
28152 /* If we hit the most significant element of the 2nd vector in
28153 the previous iteration, no need to test further. */
28154 if (next
== 2 * nelt
)
28157 /* If we are operating on only one vector: it could be a
28158 rotation. If there are only two elements of size < 64, let
28159 arm_evpc_neon_vrev catch it. */
28160 if (d
->one_vector_p
&& (next
== nelt
))
28162 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28168 if (d
->perm
[i
] != next
)
28172 location
= d
->perm
[0];
28176 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28177 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28178 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28179 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28180 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28181 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28182 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28183 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28184 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28193 offset
= GEN_INT (location
);
28194 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28198 /* The NEON VTBL instruction is a fully variable permuation that's even
28199 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28200 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28201 can do slightly better by expanding this as a constant where we don't
28202 have to apply a mask. */
28205 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28207 rtx rperm
[MAX_VECT_LEN
], sel
;
28208 machine_mode vmode
= d
->vmode
;
28209 unsigned int i
, nelt
= d
->nelt
;
28211 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28212 numbering of elements for big-endian, we must reverse the order. */
28213 if (BYTES_BIG_ENDIAN
)
28219 /* Generic code will try constant permutation twice. Once with the
28220 original mode and again with the elements lowered to QImode.
28221 So wait and don't do the selector expansion ourselves. */
28222 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28225 for (i
= 0; i
< nelt
; ++i
)
28226 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28227 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28228 sel
= force_reg (vmode
, sel
);
28230 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28235 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28237 /* Check if the input mask matches vext before reordering the
28240 if (arm_evpc_neon_vext (d
))
28243 /* The pattern matching functions above are written to look for a small
28244 number to begin the sequence (0, 1, N/2). If we begin with an index
28245 from the second operand, we can swap the operands. */
28246 if (d
->perm
[0] >= d
->nelt
)
28248 unsigned i
, nelt
= d
->nelt
;
28251 for (i
= 0; i
< nelt
; ++i
)
28252 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28261 if (arm_evpc_neon_vuzp (d
))
28263 if (arm_evpc_neon_vzip (d
))
28265 if (arm_evpc_neon_vrev (d
))
28267 if (arm_evpc_neon_vtrn (d
))
28269 return arm_evpc_neon_vtbl (d
);
28274 /* Expand a vec_perm_const pattern. */
28277 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28279 struct expand_vec_perm_d d
;
28280 int i
, nelt
, which
;
28286 d
.vmode
= GET_MODE (target
);
28287 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28288 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28289 d
.testing_p
= false;
28291 for (i
= which
= 0; i
< nelt
; ++i
)
28293 rtx e
= XVECEXP (sel
, 0, i
);
28294 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28295 which
|= (ei
< nelt
? 1 : 2);
28305 d
.one_vector_p
= false;
28306 if (!rtx_equal_p (op0
, op1
))
28309 /* The elements of PERM do not suggest that only the first operand
28310 is used, but both operands are identical. Allow easier matching
28311 of the permutation by folding the permutation into the single
28315 for (i
= 0; i
< nelt
; ++i
)
28316 d
.perm
[i
] &= nelt
- 1;
28318 d
.one_vector_p
= true;
28323 d
.one_vector_p
= true;
28327 return arm_expand_vec_perm_const_1 (&d
);
28330 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28333 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28334 const unsigned char *sel
)
28336 struct expand_vec_perm_d d
;
28337 unsigned int i
, nelt
, which
;
28341 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28342 d
.testing_p
= true;
28343 memcpy (d
.perm
, sel
, nelt
);
28345 /* Categorize the set of elements in the selector. */
28346 for (i
= which
= 0; i
< nelt
; ++i
)
28348 unsigned char e
= d
.perm
[i
];
28349 gcc_assert (e
< 2 * nelt
);
28350 which
|= (e
< nelt
? 1 : 2);
28353 /* For all elements from second vector, fold the elements to first. */
28355 for (i
= 0; i
< nelt
; ++i
)
28358 /* Check whether the mask can be applied to the vector type. */
28359 d
.one_vector_p
= (which
!= 3);
28361 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28362 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28363 if (!d
.one_vector_p
)
28364 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28367 ret
= arm_expand_vec_perm_const_1 (&d
);
28374 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28376 /* If we are soft float and we do not have ldrd
28377 then all auto increment forms are ok. */
28378 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28383 /* Post increment and Pre Decrement are supported for all
28384 instruction forms except for vector forms. */
28387 if (VECTOR_MODE_P (mode
))
28389 if (code
!= ARM_PRE_DEC
)
28399 /* Without LDRD and mode size greater than
28400 word size, there is no point in auto-incrementing
28401 because ldm and stm will not have these forms. */
28402 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28405 /* Vector and floating point modes do not support
28406 these auto increment forms. */
28407 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28420 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28421 on ARM, since we know that shifts by negative amounts are no-ops.
28422 Additionally, the default expansion code is not available or suitable
28423 for post-reload insn splits (this can occur when the register allocator
28424 chooses not to do a shift in NEON).
28426 This function is used in both initial expand and post-reload splits, and
28427 handles all kinds of 64-bit shifts.
28429 Input requirements:
28430 - It is safe for the input and output to be the same register, but
28431 early-clobber rules apply for the shift amount and scratch registers.
28432 - Shift by register requires both scratch registers. In all other cases
28433 the scratch registers may be NULL.
28434 - Ashiftrt by a register also clobbers the CC register. */
28436 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28437 rtx amount
, rtx scratch1
, rtx scratch2
)
28439 rtx out_high
= gen_highpart (SImode
, out
);
28440 rtx out_low
= gen_lowpart (SImode
, out
);
28441 rtx in_high
= gen_highpart (SImode
, in
);
28442 rtx in_low
= gen_lowpart (SImode
, in
);
28445 in = the register pair containing the input value.
28446 out = the destination register pair.
28447 up = the high- or low-part of each pair.
28448 down = the opposite part to "up".
28449 In a shift, we can consider bits to shift from "up"-stream to
28450 "down"-stream, so in a left-shift "up" is the low-part and "down"
28451 is the high-part of each register pair. */
28453 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28454 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28455 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28456 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28458 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28460 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28461 && GET_MODE (out
) == DImode
);
28463 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28464 && GET_MODE (in
) == DImode
);
28466 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28467 && GET_MODE (amount
) == SImode
)
28468 || CONST_INT_P (amount
)));
28469 gcc_assert (scratch1
== NULL
28470 || (GET_CODE (scratch1
) == SCRATCH
)
28471 || (GET_MODE (scratch1
) == SImode
28472 && REG_P (scratch1
)));
28473 gcc_assert (scratch2
== NULL
28474 || (GET_CODE (scratch2
) == SCRATCH
)
28475 || (GET_MODE (scratch2
) == SImode
28476 && REG_P (scratch2
)));
28477 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28478 || !HARD_REGISTER_P (out
)
28479 || (REGNO (out
) != REGNO (amount
)
28480 && REGNO (out
) + 1 != REGNO (amount
)));
28482 /* Macros to make following code more readable. */
28483 #define SUB_32(DEST,SRC) \
28484 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28485 #define RSB_32(DEST,SRC) \
28486 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28487 #define SUB_S_32(DEST,SRC) \
28488 gen_addsi3_compare0 ((DEST), (SRC), \
28490 #define SET(DEST,SRC) \
28491 gen_rtx_SET ((DEST), (SRC))
28492 #define SHIFT(CODE,SRC,AMOUNT) \
28493 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28494 #define LSHIFT(CODE,SRC,AMOUNT) \
28495 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28496 SImode, (SRC), (AMOUNT))
28497 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28498 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28499 SImode, (SRC), (AMOUNT))
28501 gen_rtx_IOR (SImode, (A), (B))
28502 #define BRANCH(COND,LABEL) \
28503 gen_arm_cond_branch ((LABEL), \
28504 gen_rtx_ ## COND (CCmode, cc_reg, \
28508 /* Shifts by register and shifts by constant are handled separately. */
28509 if (CONST_INT_P (amount
))
28511 /* We have a shift-by-constant. */
28513 /* First, handle out-of-range shift amounts.
28514 In both cases we try to match the result an ARM instruction in a
28515 shift-by-register would give. This helps reduce execution
28516 differences between optimization levels, but it won't stop other
28517 parts of the compiler doing different things. This is "undefined
28518 behaviour, in any case. */
28519 if (INTVAL (amount
) <= 0)
28520 emit_insn (gen_movdi (out
, in
));
28521 else if (INTVAL (amount
) >= 64)
28523 if (code
== ASHIFTRT
)
28525 rtx const31_rtx
= GEN_INT (31);
28526 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28527 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28530 emit_insn (gen_movdi (out
, const0_rtx
));
28533 /* Now handle valid shifts. */
28534 else if (INTVAL (amount
) < 32)
28536 /* Shifts by a constant less than 32. */
28537 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28539 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28540 emit_insn (SET (out_down
,
28541 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28543 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28547 /* Shifts by a constant greater than 31. */
28548 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28550 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28551 if (code
== ASHIFTRT
)
28552 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28555 emit_insn (SET (out_up
, const0_rtx
));
28560 /* We have a shift-by-register. */
28561 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28563 /* This alternative requires the scratch registers. */
28564 gcc_assert (scratch1
&& REG_P (scratch1
));
28565 gcc_assert (scratch2
&& REG_P (scratch2
));
28567 /* We will need the values "amount-32" and "32-amount" later.
28568 Swapping them around now allows the later code to be more general. */
28572 emit_insn (SUB_32 (scratch1
, amount
));
28573 emit_insn (RSB_32 (scratch2
, amount
));
28576 emit_insn (RSB_32 (scratch1
, amount
));
28577 /* Also set CC = amount > 32. */
28578 emit_insn (SUB_S_32 (scratch2
, amount
));
28581 emit_insn (RSB_32 (scratch1
, amount
));
28582 emit_insn (SUB_32 (scratch2
, amount
));
28585 gcc_unreachable ();
28588 /* Emit code like this:
28591 out_down = in_down << amount;
28592 out_down = (in_up << (amount - 32)) | out_down;
28593 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28594 out_up = in_up << amount;
28597 out_down = in_down >> amount;
28598 out_down = (in_up << (32 - amount)) | out_down;
28600 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28601 out_up = in_up << amount;
28604 out_down = in_down >> amount;
28605 out_down = (in_up << (32 - amount)) | out_down;
28607 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28608 out_up = in_up << amount;
28610 The ARM and Thumb2 variants are the same but implemented slightly
28611 differently. If this were only called during expand we could just
28612 use the Thumb2 case and let combine do the right thing, but this
28613 can also be called from post-reload splitters. */
28615 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28617 if (!TARGET_THUMB2
)
28619 /* Emit code for ARM mode. */
28620 emit_insn (SET (out_down
,
28621 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28622 if (code
== ASHIFTRT
)
28624 rtx_code_label
*done_label
= gen_label_rtx ();
28625 emit_jump_insn (BRANCH (LT
, done_label
));
28626 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28628 emit_label (done_label
);
28631 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28636 /* Emit code for Thumb2 mode.
28637 Thumb2 can't do shift and or in one insn. */
28638 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28639 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28641 if (code
== ASHIFTRT
)
28643 rtx_code_label
*done_label
= gen_label_rtx ();
28644 emit_jump_insn (BRANCH (LT
, done_label
));
28645 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28646 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28647 emit_label (done_label
);
28651 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28652 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28656 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28671 /* Returns true if a valid comparison operation and makes
28672 the operands in a form that is valid. */
28674 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28676 enum rtx_code code
= GET_CODE (*comparison
);
28678 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28679 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28681 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28683 if (code
== UNEQ
|| code
== LTGT
)
28686 code_int
= (int)code
;
28687 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28688 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28693 if (!arm_add_operand (*op1
, mode
))
28694 *op1
= force_reg (mode
, *op1
);
28695 if (!arm_add_operand (*op2
, mode
))
28696 *op2
= force_reg (mode
, *op2
);
28700 if (!cmpdi_operand (*op1
, mode
))
28701 *op1
= force_reg (mode
, *op1
);
28702 if (!cmpdi_operand (*op2
, mode
))
28703 *op2
= force_reg (mode
, *op2
);
28708 if (!arm_float_compare_operand (*op1
, mode
))
28709 *op1
= force_reg (mode
, *op1
);
28710 if (!arm_float_compare_operand (*op2
, mode
))
28711 *op2
= force_reg (mode
, *op2
);
28721 /* Maximum number of instructions to set block of memory. */
28723 arm_block_set_max_insns (void)
28725 if (optimize_function_for_size_p (cfun
))
28728 return current_tune
->max_insns_inline_memset
;
28731 /* Return TRUE if it's profitable to set block of memory for
28732 non-vectorized case. VAL is the value to set the memory
28733 with. LENGTH is the number of bytes to set. ALIGN is the
28734 alignment of the destination memory in bytes. UNALIGNED_P
28735 is TRUE if we can only set the memory with instructions
28736 meeting alignment requirements. USE_STRD_P is TRUE if we
28737 can use strd to set the memory. */
28739 arm_block_set_non_vect_profit_p (rtx val
,
28740 unsigned HOST_WIDE_INT length
,
28741 unsigned HOST_WIDE_INT align
,
28742 bool unaligned_p
, bool use_strd_p
)
28745 /* For leftovers in bytes of 0-7, we can set the memory block using
28746 strb/strh/str with minimum instruction number. */
28747 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28751 num
= arm_const_inline_cost (SET
, val
);
28752 num
+= length
/ align
+ length
% align
;
28754 else if (use_strd_p
)
28756 num
= arm_const_double_inline_cost (val
);
28757 num
+= (length
>> 3) + leftover
[length
& 7];
28761 num
= arm_const_inline_cost (SET
, val
);
28762 num
+= (length
>> 2) + leftover
[length
& 3];
28765 /* We may be able to combine last pair STRH/STRB into a single STR
28766 by shifting one byte back. */
28767 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28770 return (num
<= arm_block_set_max_insns ());
28773 /* Return TRUE if it's profitable to set block of memory for
28774 vectorized case. LENGTH is the number of bytes to set.
28775 ALIGN is the alignment of destination memory in bytes.
28776 MODE is the vector mode used to set the memory. */
28778 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28779 unsigned HOST_WIDE_INT align
,
28783 bool unaligned_p
= ((align
& 3) != 0);
28784 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28786 /* Instruction loading constant value. */
28788 /* Instructions storing the memory. */
28789 num
+= (length
+ nelt
- 1) / nelt
;
28790 /* Instructions adjusting the address expression. Only need to
28791 adjust address expression if it's 4 bytes aligned and bytes
28792 leftover can only be stored by mis-aligned store instruction. */
28793 if (!unaligned_p
&& (length
& 3) != 0)
28796 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28797 if (!unaligned_p
&& mode
== V16QImode
)
28800 return (num
<= arm_block_set_max_insns ());
28803 /* Set a block of memory using vectorization instructions for the
28804 unaligned case. We fill the first LENGTH bytes of the memory
28805 area starting from DSTBASE with byte constant VALUE. ALIGN is
28806 the alignment requirement of memory. Return TRUE if succeeded. */
28808 arm_block_set_unaligned_vect (rtx dstbase
,
28809 unsigned HOST_WIDE_INT length
,
28810 unsigned HOST_WIDE_INT value
,
28811 unsigned HOST_WIDE_INT align
)
28813 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28815 rtx val_elt
, val_vec
, reg
;
28816 rtx rval
[MAX_VECT_LEN
];
28817 rtx (*gen_func
) (rtx
, rtx
);
28819 unsigned HOST_WIDE_INT v
= value
;
28821 gcc_assert ((align
& 0x3) != 0);
28822 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28823 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28824 if (length
>= nelt_v16
)
28827 gen_func
= gen_movmisalignv16qi
;
28832 gen_func
= gen_movmisalignv8qi
;
28834 nelt_mode
= GET_MODE_NUNITS (mode
);
28835 gcc_assert (length
>= nelt_mode
);
28836 /* Skip if it isn't profitable. */
28837 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28840 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28841 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28843 v
= sext_hwi (v
, BITS_PER_WORD
);
28844 val_elt
= GEN_INT (v
);
28845 for (j
= 0; j
< nelt_mode
; j
++)
28848 reg
= gen_reg_rtx (mode
);
28849 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28850 /* Emit instruction loading the constant value. */
28851 emit_move_insn (reg
, val_vec
);
28853 /* Handle nelt_mode bytes in a vector. */
28854 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28856 emit_insn ((*gen_func
) (mem
, reg
));
28857 if (i
+ 2 * nelt_mode
<= length
)
28858 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28861 /* If there are not less than nelt_v8 bytes leftover, we must be in
28863 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28865 /* Handle (8, 16) bytes leftover. */
28866 if (i
+ nelt_v8
< length
)
28868 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28869 /* We are shifting bytes back, set the alignment accordingly. */
28870 if ((length
& 1) != 0 && align
>= 2)
28871 set_mem_align (mem
, BITS_PER_UNIT
);
28873 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28875 /* Handle (0, 8] bytes leftover. */
28876 else if (i
< length
&& i
+ nelt_v8
>= length
)
28878 if (mode
== V16QImode
)
28880 reg
= gen_lowpart (V8QImode
, reg
);
28881 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28883 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28884 + (nelt_mode
- nelt_v8
))));
28885 /* We are shifting bytes back, set the alignment accordingly. */
28886 if ((length
& 1) != 0 && align
>= 2)
28887 set_mem_align (mem
, BITS_PER_UNIT
);
28889 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28895 /* Set a block of memory using vectorization instructions for the
28896 aligned case. We fill the first LENGTH bytes of the memory area
28897 starting from DSTBASE with byte constant VALUE. ALIGN is the
28898 alignment requirement of memory. Return TRUE if succeeded. */
28900 arm_block_set_aligned_vect (rtx dstbase
,
28901 unsigned HOST_WIDE_INT length
,
28902 unsigned HOST_WIDE_INT value
,
28903 unsigned HOST_WIDE_INT align
)
28905 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28906 rtx dst
, addr
, mem
;
28907 rtx val_elt
, val_vec
, reg
;
28908 rtx rval
[MAX_VECT_LEN
];
28910 unsigned HOST_WIDE_INT v
= value
;
28912 gcc_assert ((align
& 0x3) == 0);
28913 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28914 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28915 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28920 nelt_mode
= GET_MODE_NUNITS (mode
);
28921 gcc_assert (length
>= nelt_mode
);
28922 /* Skip if it isn't profitable. */
28923 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28926 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28928 v
= sext_hwi (v
, BITS_PER_WORD
);
28929 val_elt
= GEN_INT (v
);
28930 for (j
= 0; j
< nelt_mode
; j
++)
28933 reg
= gen_reg_rtx (mode
);
28934 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28935 /* Emit instruction loading the constant value. */
28936 emit_move_insn (reg
, val_vec
);
28939 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28940 if (mode
== V16QImode
)
28942 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28943 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28945 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28946 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28948 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28949 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28950 /* We are shifting bytes back, set the alignment accordingly. */
28951 if ((length
& 0x3) == 0)
28952 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28953 else if ((length
& 0x1) == 0)
28954 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28956 set_mem_align (mem
, BITS_PER_UNIT
);
28958 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28961 /* Fall through for bytes leftover. */
28963 nelt_mode
= GET_MODE_NUNITS (mode
);
28964 reg
= gen_lowpart (V8QImode
, reg
);
28967 /* Handle 8 bytes in a vector. */
28968 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28970 addr
= plus_constant (Pmode
, dst
, i
);
28971 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28972 emit_move_insn (mem
, reg
);
28975 /* Handle single word leftover by shifting 4 bytes back. We can
28976 use aligned access for this case. */
28977 if (i
+ UNITS_PER_WORD
== length
)
28979 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28980 mem
= adjust_automodify_address (dstbase
, mode
,
28981 addr
, i
- UNITS_PER_WORD
);
28982 /* We are shifting 4 bytes back, set the alignment accordingly. */
28983 if (align
> UNITS_PER_WORD
)
28984 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28986 emit_move_insn (mem
, reg
);
28988 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28989 We have to use unaligned access for this case. */
28990 else if (i
< length
)
28992 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28993 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28994 /* We are shifting bytes back, set the alignment accordingly. */
28995 if ((length
& 1) == 0)
28996 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28998 set_mem_align (mem
, BITS_PER_UNIT
);
29000 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29006 /* Set a block of memory using plain strh/strb instructions, only
29007 using instructions allowed by ALIGN on processor. We fill the
29008 first LENGTH bytes of the memory area starting from DSTBASE
29009 with byte constant VALUE. ALIGN is the alignment requirement
29012 arm_block_set_unaligned_non_vect (rtx dstbase
,
29013 unsigned HOST_WIDE_INT length
,
29014 unsigned HOST_WIDE_INT value
,
29015 unsigned HOST_WIDE_INT align
)
29018 rtx dst
, addr
, mem
;
29019 rtx val_exp
, val_reg
, reg
;
29021 HOST_WIDE_INT v
= value
;
29023 gcc_assert (align
== 1 || align
== 2);
29026 v
|= (value
<< BITS_PER_UNIT
);
29028 v
= sext_hwi (v
, BITS_PER_WORD
);
29029 val_exp
= GEN_INT (v
);
29030 /* Skip if it isn't profitable. */
29031 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29032 align
, true, false))
29035 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29036 mode
= (align
== 2 ? HImode
: QImode
);
29037 val_reg
= force_reg (SImode
, val_exp
);
29038 reg
= gen_lowpart (mode
, val_reg
);
29040 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29042 addr
= plus_constant (Pmode
, dst
, i
);
29043 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29044 emit_move_insn (mem
, reg
);
29047 /* Handle single byte leftover. */
29048 if (i
+ 1 == length
)
29050 reg
= gen_lowpart (QImode
, val_reg
);
29051 addr
= plus_constant (Pmode
, dst
, i
);
29052 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29053 emit_move_insn (mem
, reg
);
29057 gcc_assert (i
== length
);
29061 /* Set a block of memory using plain strd/str/strh/strb instructions,
29062 to permit unaligned copies on processors which support unaligned
29063 semantics for those instructions. We fill the first LENGTH bytes
29064 of the memory area starting from DSTBASE with byte constant VALUE.
29065 ALIGN is the alignment requirement of memory. */
29067 arm_block_set_aligned_non_vect (rtx dstbase
,
29068 unsigned HOST_WIDE_INT length
,
29069 unsigned HOST_WIDE_INT value
,
29070 unsigned HOST_WIDE_INT align
)
29073 rtx dst
, addr
, mem
;
29074 rtx val_exp
, val_reg
, reg
;
29075 unsigned HOST_WIDE_INT v
;
29078 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29079 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29081 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29082 if (length
< UNITS_PER_WORD
)
29083 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29086 v
|= (v
<< BITS_PER_WORD
);
29088 v
= sext_hwi (v
, BITS_PER_WORD
);
29090 val_exp
= GEN_INT (v
);
29091 /* Skip if it isn't profitable. */
29092 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29093 align
, false, use_strd_p
))
29098 /* Try without strd. */
29099 v
= (v
>> BITS_PER_WORD
);
29100 v
= sext_hwi (v
, BITS_PER_WORD
);
29101 val_exp
= GEN_INT (v
);
29102 use_strd_p
= false;
29103 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29104 align
, false, use_strd_p
))
29109 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29110 /* Handle double words using strd if possible. */
29113 val_reg
= force_reg (DImode
, val_exp
);
29115 for (; (i
+ 8 <= length
); i
+= 8)
29117 addr
= plus_constant (Pmode
, dst
, i
);
29118 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29119 emit_move_insn (mem
, reg
);
29123 val_reg
= force_reg (SImode
, val_exp
);
29125 /* Handle words. */
29126 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29127 for (; (i
+ 4 <= length
); i
+= 4)
29129 addr
= plus_constant (Pmode
, dst
, i
);
29130 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29131 if ((align
& 3) == 0)
29132 emit_move_insn (mem
, reg
);
29134 emit_insn (gen_unaligned_storesi (mem
, reg
));
29137 /* Merge last pair of STRH and STRB into a STR if possible. */
29138 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29140 addr
= plus_constant (Pmode
, dst
, i
- 1);
29141 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29142 /* We are shifting one byte back, set the alignment accordingly. */
29143 if ((align
& 1) == 0)
29144 set_mem_align (mem
, BITS_PER_UNIT
);
29146 /* Most likely this is an unaligned access, and we can't tell at
29147 compilation time. */
29148 emit_insn (gen_unaligned_storesi (mem
, reg
));
29152 /* Handle half word leftover. */
29153 if (i
+ 2 <= length
)
29155 reg
= gen_lowpart (HImode
, val_reg
);
29156 addr
= plus_constant (Pmode
, dst
, i
);
29157 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29158 if ((align
& 1) == 0)
29159 emit_move_insn (mem
, reg
);
29161 emit_insn (gen_unaligned_storehi (mem
, reg
));
29166 /* Handle single byte leftover. */
29167 if (i
+ 1 == length
)
29169 reg
= gen_lowpart (QImode
, val_reg
);
29170 addr
= plus_constant (Pmode
, dst
, i
);
29171 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29172 emit_move_insn (mem
, reg
);
29178 /* Set a block of memory using vectorization instructions for both
29179 aligned and unaligned cases. We fill the first LENGTH bytes of
29180 the memory area starting from DSTBASE with byte constant VALUE.
29181 ALIGN is the alignment requirement of memory. */
29183 arm_block_set_vect (rtx dstbase
,
29184 unsigned HOST_WIDE_INT length
,
29185 unsigned HOST_WIDE_INT value
,
29186 unsigned HOST_WIDE_INT align
)
29188 /* Check whether we need to use unaligned store instruction. */
29189 if (((align
& 3) != 0 || (length
& 3) != 0)
29190 /* Check whether unaligned store instruction is available. */
29191 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29194 if ((align
& 3) == 0)
29195 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29197 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29200 /* Expand string store operation. Firstly we try to do that by using
29201 vectorization instructions, then try with ARM unaligned access and
29202 double-word store if profitable. OPERANDS[0] is the destination,
29203 OPERANDS[1] is the number of bytes, operands[2] is the value to
29204 initialize the memory, OPERANDS[3] is the known alignment of the
29207 arm_gen_setmem (rtx
*operands
)
29209 rtx dstbase
= operands
[0];
29210 unsigned HOST_WIDE_INT length
;
29211 unsigned HOST_WIDE_INT value
;
29212 unsigned HOST_WIDE_INT align
;
29214 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29217 length
= UINTVAL (operands
[1]);
29221 value
= (UINTVAL (operands
[2]) & 0xFF);
29222 align
= UINTVAL (operands
[3]);
29223 if (TARGET_NEON
&& length
>= 8
29224 && current_tune
->string_ops_prefer_neon
29225 && arm_block_set_vect (dstbase
, length
, value
, align
))
29228 if (!unaligned_access
&& (align
& 3) != 0)
29229 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29231 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29236 arm_macro_fusion_p (void)
29238 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29243 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29246 rtx prev_set
= single_set (prev
);
29247 rtx curr_set
= single_set (curr
);
29253 if (any_condjump_p (curr
))
29256 if (!arm_macro_fusion_p ())
29259 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29261 /* We are trying to fuse
29262 movw imm / movt imm
29263 instructions as a group that gets scheduled together. */
29265 set_dest
= SET_DEST (curr_set
);
29267 if (GET_MODE (set_dest
) != SImode
)
29270 /* We are trying to match:
29271 prev (movw) == (set (reg r0) (const_int imm16))
29272 curr (movt) == (set (zero_extract (reg r0)
29275 (const_int imm16_1))
29277 prev (movw) == (set (reg r1)
29278 (high (symbol_ref ("SYM"))))
29279 curr (movt) == (set (reg r0)
29281 (symbol_ref ("SYM")))) */
29282 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29284 if (CONST_INT_P (SET_SRC (curr_set
))
29285 && CONST_INT_P (SET_SRC (prev_set
))
29286 && REG_P (XEXP (set_dest
, 0))
29287 && REG_P (SET_DEST (prev_set
))
29288 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29291 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29292 && REG_P (SET_DEST (curr_set
))
29293 && REG_P (SET_DEST (prev_set
))
29294 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29295 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29301 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29303 static unsigned HOST_WIDE_INT
29304 arm_asan_shadow_offset (void)
29306 return (unsigned HOST_WIDE_INT
) 1 << 29;
29310 /* This is a temporary fix for PR60655. Ideally we need
29311 to handle most of these cases in the generic part but
29312 currently we reject minus (..) (sym_ref). We try to
29313 ameliorate the case with minus (sym_ref1) (sym_ref2)
29314 where they are in the same section. */
29317 arm_const_not_ok_for_debug_p (rtx p
)
29319 tree decl_op0
= NULL
;
29320 tree decl_op1
= NULL
;
29322 if (GET_CODE (p
) == MINUS
)
29324 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29326 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29328 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29329 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29331 if ((TREE_CODE (decl_op1
) == VAR_DECL
29332 || TREE_CODE (decl_op1
) == CONST_DECL
)
29333 && (TREE_CODE (decl_op0
) == VAR_DECL
29334 || TREE_CODE (decl_op0
) == CONST_DECL
))
29335 return (get_variable_section (decl_op1
, false)
29336 != get_variable_section (decl_op0
, false));
29338 if (TREE_CODE (decl_op1
) == LABEL_DECL
29339 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29340 return (DECL_CONTEXT (decl_op1
)
29341 != DECL_CONTEXT (decl_op0
));
29351 /* return TRUE if x is a reference to a value in a constant pool */
29353 arm_is_constant_pool_ref (rtx x
)
29356 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29357 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29360 /* Remember the last target of arm_set_current_function. */
29361 static GTY(()) tree arm_previous_fndecl
;
29363 /* Invalidate arm_previous_fndecl. */
29365 arm_reset_previous_fndecl (void)
29367 arm_previous_fndecl
= NULL_TREE
;
29370 /* Establish appropriate back-end context for processing the function
29371 FNDECL. The argument might be NULL to indicate processing at top
29372 level, outside of any function scope. */
29374 arm_set_current_function (tree fndecl
)
29376 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29379 tree old_tree
= (arm_previous_fndecl
29380 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29383 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29385 arm_previous_fndecl
= fndecl
;
29386 if (old_tree
== new_tree
)
29389 if (new_tree
&& new_tree
!= target_option_default_node
)
29391 cl_target_option_restore (&global_options
,
29392 TREE_TARGET_OPTION (new_tree
));
29394 if (TREE_TARGET_GLOBALS (new_tree
))
29395 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29397 TREE_TARGET_GLOBALS (new_tree
)
29398 = save_target_globals_default_opts ();
29401 else if (old_tree
&& old_tree
!= target_option_default_node
)
29403 new_tree
= target_option_current_node
;
29405 cl_target_option_restore (&global_options
,
29406 TREE_TARGET_OPTION (new_tree
));
29407 if (TREE_TARGET_GLOBALS (new_tree
))
29408 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29409 else if (new_tree
== target_option_default_node
)
29410 restore_target_globals (&default_target_globals
);
29412 TREE_TARGET_GLOBALS (new_tree
)
29413 = save_target_globals_default_opts ();
29416 arm_option_params_internal (&global_options
);
29419 /* Hook to determine if one function can safely inline another. */
29422 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED
, tree callee ATTRIBUTE_UNUSED
)
29424 /* Overidde default hook: Always OK to inline between different modes.
29425 Function with mode specific instructions, e.g using asm, must be explicitely
29426 protected with noinline. */
29430 /* Inner function to process the attribute((target(...))), take an argument and
29431 set the current options from the argument. If we have a list, recursively
29432 go over the list. */
29435 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29437 if (TREE_CODE (args
) == TREE_LIST
)
29440 for (; args
; args
= TREE_CHAIN (args
))
29441 if (TREE_VALUE (args
)
29442 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29447 else if (TREE_CODE (args
) != STRING_CST
)
29449 error ("attribute %<target%> argument not a string");
29453 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29454 while (argstr
&& *argstr
!= '\0')
29456 while (ISSPACE (*argstr
))
29459 if (!strcmp (argstr
, "thumb"))
29461 opts
->x_target_flags
|= MASK_THUMB
;
29462 arm_option_check_internal (opts
);
29466 if (!strcmp (argstr
, "arm"))
29468 opts
->x_target_flags
&= ~MASK_THUMB
;
29469 arm_option_check_internal (opts
);
29473 warning (0, "attribute(target(\"%s\")) is unknown", argstr
);
29480 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29483 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29484 struct gcc_options
*opts_set
)
29486 if (!arm_valid_target_attribute_rec (args
, opts
))
29489 /* Do any overrides, such as global options arch=xxx. */
29490 arm_option_override_internal (opts
, opts_set
);
29492 return build_target_option_node (opts
);
29496 add_attribute (const char * mode
, tree
*attributes
)
29498 size_t len
= strlen (mode
);
29499 tree value
= build_string (len
, mode
);
29501 TREE_TYPE (value
) = build_array_type (char_type_node
,
29502 build_index_type (size_int (len
)));
29504 *attributes
= tree_cons (get_identifier ("target"),
29505 build_tree_list (NULL_TREE
, value
),
29509 /* For testing. Insert thumb or arm modes alternatively on functions. */
29512 arm_insert_attributes (tree fndecl
, tree
* attributes
)
29516 if (! TARGET_FLIP_THUMB
)
29519 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
29520 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
29523 /* Nested definitions must inherit mode. */
29524 if (current_function_decl
)
29526 mode
= TARGET_THUMB
? "thumb" : "arm";
29527 add_attribute (mode
, attributes
);
29531 /* If there is already a setting don't change it. */
29532 if (lookup_attribute ("target", *attributes
) != NULL
)
29535 mode
= thumb_flipper
? "thumb" : "arm";
29536 add_attribute (mode
, attributes
);
29538 thumb_flipper
= !thumb_flipper
;
29541 /* Hook to validate attribute((target("string"))). */
29544 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29545 tree args
, int ARG_UNUSED (flags
))
29548 struct gcc_options func_options
;
29549 tree cur_tree
, new_optimize
;
29550 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29552 /* Get the optimization options of the current function. */
29553 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29555 /* If the function changed the optimization levels as well as setting target
29556 options, start with the optimizations specified. */
29557 if (!func_optimize
)
29558 func_optimize
= optimization_default_node
;
29560 /* Init func_options. */
29561 memset (&func_options
, 0, sizeof (func_options
));
29562 init_options_struct (&func_options
, NULL
);
29563 lang_hooks
.init_options_struct (&func_options
);
29565 /* Initialize func_options to the defaults. */
29566 cl_optimization_restore (&func_options
,
29567 TREE_OPTIMIZATION (func_optimize
));
29569 cl_target_option_restore (&func_options
,
29570 TREE_TARGET_OPTION (target_option_default_node
));
29572 /* Set func_options flags with new target mode. */
29573 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29574 &global_options_set
);
29576 if (cur_tree
== NULL_TREE
)
29579 new_optimize
= build_optimization_node (&func_options
);
29581 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
29583 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
29589 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29591 if (TARGET_UNIFIED_ASM
)
29592 fprintf (stream
, "\t.syntax unified\n");
29594 fprintf (stream
, "\t.syntax divided\n");
29598 if (is_called_in_ARM_mode (decl
)
29599 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29600 && cfun
->is_thunk
))
29601 fprintf (stream
, "\t.code 32\n");
29602 else if (TARGET_THUMB1
)
29603 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29605 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29608 fprintf (stream
, "\t.arm\n");
29610 if (TARGET_POKE_FUNCTION_NAME
)
29611 arm_poke_function_name (stream
, (const char *) name
);
29614 /* If MEM is in the form of [base+offset], extract the two parts
29615 of address and set to BASE and OFFSET, otherwise return false
29616 after clearing BASE and OFFSET. */
29619 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29623 gcc_assert (MEM_P (mem
));
29625 addr
= XEXP (mem
, 0);
29627 /* Strip off const from addresses like (const (addr)). */
29628 if (GET_CODE (addr
) == CONST
)
29629 addr
= XEXP (addr
, 0);
29631 if (GET_CODE (addr
) == REG
)
29634 *offset
= const0_rtx
;
29638 if (GET_CODE (addr
) == PLUS
29639 && GET_CODE (XEXP (addr
, 0)) == REG
29640 && CONST_INT_P (XEXP (addr
, 1)))
29642 *base
= XEXP (addr
, 0);
29643 *offset
= XEXP (addr
, 1);
29648 *offset
= NULL_RTX
;
29653 /* If INSN is a load or store of address in the form of [base+offset],
29654 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29655 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29656 otherwise return FALSE. */
29659 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29663 gcc_assert (INSN_P (insn
));
29664 x
= PATTERN (insn
);
29665 if (GET_CODE (x
) != SET
)
29669 dest
= SET_DEST (x
);
29670 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29673 extract_base_offset_in_addr (dest
, base
, offset
);
29675 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29678 extract_base_offset_in_addr (src
, base
, offset
);
29683 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29686 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29688 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29689 and PRI are only calculated for these instructions. For other instruction,
29690 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29691 instruction fusion can be supported by returning different priorities.
29693 It's important that irrelevant instructions get the largest FUSION_PRI. */
29696 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29697 int *fusion_pri
, int *pri
)
29703 gcc_assert (INSN_P (insn
));
29706 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29713 /* Load goes first. */
29715 *fusion_pri
= tmp
- 1;
29717 *fusion_pri
= tmp
- 2;
29721 /* INSN with smaller base register goes first. */
29722 tmp
-= ((REGNO (base
) & 0xff) << 20);
29724 /* INSN with smaller offset goes first. */
29725 off_val
= (int)(INTVAL (offset
));
29727 tmp
-= (off_val
& 0xfffff);
29729 tmp
+= ((- off_val
) & 0xfffff);
29734 #include "gt-arm.h"