1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
32 #include "fold-const.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
38 #include "insn-config.h"
39 #include "conditions.h"
41 #include "insn-attr.h"
50 #include "insn-codes.h"
52 #include "diagnostic-core.h"
58 #include "cfgcleanup.h"
63 #include "sched-int.h"
64 #include "common/common-target.h"
66 #include "langhooks.h"
72 #include "gimple-expr.h"
73 #include "target-globals.h"
75 #include "tm-constrs.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 /* Forward definitions of types. */
82 typedef struct minipool_node Mnode
;
83 typedef struct minipool_fixup Mfix
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
99 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned feature_count (const arm_feature_set
*);
102 static int arm_address_register_rtx_p (rtx
, int);
103 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
104 static bool is_called_in_ARM_mode (tree
);
105 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
106 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
107 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
108 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
109 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
110 inline static int thumb1_index_register_rtx_p (rtx
, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx
, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx
, int);
116 static void arm_print_operand_address (FILE *, rtx
);
117 static bool arm_print_operand_punct_valid_p (unsigned char code
);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
119 static arm_cc
get_arm_condition_code (rtx
);
120 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
121 static const char *output_multi_immediate (rtx
*, const char *, const char *,
123 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
124 static struct machine_function
*arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
127 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
128 static Mnode
*add_minipool_forward_ref (Mfix
*);
129 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
130 static Mnode
*add_minipool_backward_ref (Mfix
*);
131 static void assign_minipool_offsets (Mfix
*);
132 static void arm_print_value (FILE *, rtx
);
133 static void dump_minipool (rtx_insn
*);
134 static int arm_barrier_cost (rtx_insn
*);
135 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
136 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
137 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_reg_mask (void);
143 static unsigned long arm_isr_value (tree
);
144 static unsigned long arm_compute_func_type (void);
145 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
151 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
152 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
153 static int arm_comp_type_attributes (const_tree
, const_tree
);
154 static void arm_set_default_type_attributes (tree
);
155 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
156 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
157 static int optimal_immediate_sequence (enum rtx_code code
,
158 unsigned HOST_WIDE_INT val
,
159 struct four_ints
*return_sequence
);
160 static int optimal_immediate_sequence_1 (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
,
164 static int arm_get_strip_length (int);
165 static bool arm_function_ok_for_sibcall (tree
, tree
);
166 static machine_mode
arm_promote_function_mode (const_tree
,
169 static bool arm_return_in_memory (const_tree
, const_tree
);
170 static rtx
arm_function_value (const_tree
, const_tree
, bool);
171 static rtx
arm_libcall_value_1 (machine_mode
);
172 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
173 static bool arm_function_value_regno_p (const unsigned int);
174 static void arm_internal_label (FILE *, const char *, unsigned long);
175 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
177 static bool arm_have_conditional_execution (void);
178 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
179 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
180 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
181 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
182 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
183 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
184 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
185 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
186 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
187 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
188 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
189 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
190 static void emit_constant_insn (rtx cond
, rtx pattern
);
191 static rtx_insn
*emit_set_insn (rtx
, rtx
);
192 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
193 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
195 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
197 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
199 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
200 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
202 static rtx
aapcs_libcall_value (machine_mode
);
203 static int aapcs_select_return_coproc (const_tree
, const_tree
);
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
207 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_encode_section_info (tree
, rtx
, int);
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree
, tree
*);
217 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
219 static bool arm_pass_by_reference (cumulative_args_t
,
220 machine_mode
, const_tree
, bool);
221 static bool arm_promote_prototypes (const_tree
);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree
);
225 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
226 static bool arm_return_in_memory (const_tree
, const_tree
);
228 static void arm_unwind_emit (FILE *, rtx_insn
*);
229 static bool arm_output_ttype (rtx
);
230 static void arm_asm_emit_except_personality (rtx
);
231 static void arm_asm_init_sections (void);
233 static rtx
arm_dwarf_register_span (rtx
);
235 static tree
arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree
arm_get_cookie_size (tree
);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree
);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree
arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree
, rtx
);
247 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
248 static void arm_option_override (void);
249 static void arm_option_print (FILE *, int, struct cl_target_option
*);
250 static void arm_set_current_function (tree
);
251 static bool arm_can_inline_p (tree
, tree
);
252 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
253 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
254 static bool arm_macro_fusion_p (void);
255 static bool arm_cannot_copy_insn_p (rtx_insn
*);
256 static int arm_issue_rate (void);
257 static int arm_first_cycle_multipass_dfa_lookahead (void);
258 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
259 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
260 static bool arm_output_addr_const_extra (FILE *, rtx
);
261 static bool arm_allocate_stack_slots_for_args (void);
262 static bool arm_warn_func_return (tree
);
263 static const char *arm_invalid_parameter_type (const_tree t
);
264 static const char *arm_invalid_return_type (const_tree t
);
265 static tree
arm_promoted_type (const_tree t
);
266 static tree
arm_convert_to_type (tree type
, tree expr
);
267 static bool arm_scalar_mode_supported_p (machine_mode
);
268 static bool arm_frame_pointer_required (void);
269 static bool arm_can_eliminate (const int, const int);
270 static void arm_asm_trampoline_template (FILE *);
271 static void arm_trampoline_init (rtx
, tree
, rtx
);
272 static rtx
arm_trampoline_adjust_address (rtx
);
273 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
274 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
275 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
276 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
277 static bool arm_array_mode_supported_p (machine_mode
,
278 unsigned HOST_WIDE_INT
);
279 static machine_mode
arm_preferred_simd_mode (machine_mode
);
280 static bool arm_class_likely_spilled_p (reg_class_t
);
281 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
282 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
283 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
287 static void arm_conditional_register_usage (void);
288 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
289 static unsigned int arm_autovectorize_vector_sizes (void);
290 static int arm_default_branch_cost (bool, bool);
291 static int arm_cortex_a5_branch_cost (bool, bool);
292 static int arm_cortex_m_branch_cost (bool, bool);
293 static int arm_cortex_m7_branch_cost (bool, bool);
295 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
296 const unsigned char *sel
);
298 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
302 int misalign ATTRIBUTE_UNUSED
);
303 static unsigned arm_add_stmt_cost (void *data
, int count
,
304 enum vect_cost_for_stmt kind
,
305 struct _stmt_vec_info
*stmt_info
,
307 enum vect_cost_model_location where
);
309 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
310 bool op0_preserve_value
);
311 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
313 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table
[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
323 { "long_call", 0, 0, false, true, true, NULL
, false },
324 /* Whereas these functions are always known to reside within the 26 bit
326 { "short_call", 0, 0, false, true, true, NULL
, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
338 /* ARM/PE has three new attributes:
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
347 { "dllimport", 0, 0, true, false, false, NULL
, false },
348 { "dllexport", 0, 0, true, false, false, NULL
, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
357 { NULL
, 0, 0, false, false, false, NULL
, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #define TARGET_LRA_P hook_bool_void_true
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_INSERT_ATTRIBUTES
376 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START arm_file_start
380 #undef TARGET_ASM_FILE_END
381 #define TARGET_ASM_FILE_END arm_file_end
383 #undef TARGET_ASM_ALIGNED_SI_OP
384 #define TARGET_ASM_ALIGNED_SI_OP NULL
385 #undef TARGET_ASM_INTEGER
386 #define TARGET_ASM_INTEGER arm_assemble_integer
388 #undef TARGET_PRINT_OPERAND
389 #define TARGET_PRINT_OPERAND arm_print_operand
390 #undef TARGET_PRINT_OPERAND_ADDRESS
391 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
395 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
396 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
398 #undef TARGET_ASM_FUNCTION_PROLOGUE
399 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
404 #undef TARGET_CAN_INLINE_P
405 #define TARGET_CAN_INLINE_P arm_can_inline_p
407 #undef TARGET_OPTION_OVERRIDE
408 #define TARGET_OPTION_OVERRIDE arm_option_override
410 #undef TARGET_OPTION_PRINT
411 #define TARGET_OPTION_PRINT arm_option_print
413 #undef TARGET_COMP_TYPE_ATTRIBUTES
414 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
416 #undef TARGET_SCHED_MACRO_FUSION_P
417 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
419 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
420 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
422 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
423 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
425 #undef TARGET_SCHED_ADJUST_COST
426 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
428 #undef TARGET_SET_CURRENT_FUNCTION
429 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
431 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
432 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
434 #undef TARGET_SCHED_REORDER
435 #define TARGET_SCHED_REORDER arm_sched_reorder
437 #undef TARGET_REGISTER_MOVE_COST
438 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
440 #undef TARGET_MEMORY_MOVE_COST
441 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
443 #undef TARGET_ENCODE_SECTION_INFO
445 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
447 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
450 #undef TARGET_STRIP_NAME_ENCODING
451 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
453 #undef TARGET_ASM_INTERNAL_LABEL
454 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
456 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
457 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
459 #undef TARGET_FUNCTION_VALUE
460 #define TARGET_FUNCTION_VALUE arm_function_value
462 #undef TARGET_LIBCALL_VALUE
463 #define TARGET_LIBCALL_VALUE arm_libcall_value
465 #undef TARGET_FUNCTION_VALUE_REGNO_P
466 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
468 #undef TARGET_ASM_OUTPUT_MI_THUNK
469 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
473 #undef TARGET_RTX_COSTS
474 #define TARGET_RTX_COSTS arm_rtx_costs
475 #undef TARGET_ADDRESS_COST
476 #define TARGET_ADDRESS_COST arm_address_cost
478 #undef TARGET_SHIFT_TRUNCATION_MASK
479 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
480 #undef TARGET_VECTOR_MODE_SUPPORTED_P
481 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
482 #undef TARGET_ARRAY_MODE_SUPPORTED_P
483 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
484 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
485 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
486 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
487 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
488 arm_autovectorize_vector_sizes
490 #undef TARGET_MACHINE_DEPENDENT_REORG
491 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
493 #undef TARGET_INIT_BUILTINS
494 #define TARGET_INIT_BUILTINS arm_init_builtins
495 #undef TARGET_EXPAND_BUILTIN
496 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
497 #undef TARGET_BUILTIN_DECL
498 #define TARGET_BUILTIN_DECL arm_builtin_decl
500 #undef TARGET_INIT_LIBFUNCS
501 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
503 #undef TARGET_PROMOTE_FUNCTION_MODE
504 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
505 #undef TARGET_PROMOTE_PROTOTYPES
506 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
507 #undef TARGET_PASS_BY_REFERENCE
508 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
509 #undef TARGET_ARG_PARTIAL_BYTES
510 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
511 #undef TARGET_FUNCTION_ARG
512 #define TARGET_FUNCTION_ARG arm_function_arg
513 #undef TARGET_FUNCTION_ARG_ADVANCE
514 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
515 #undef TARGET_FUNCTION_ARG_BOUNDARY
516 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
521 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
522 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
524 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
525 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
526 #undef TARGET_TRAMPOLINE_INIT
527 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
528 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
529 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
531 #undef TARGET_WARN_FUNC_RETURN
532 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
534 #undef TARGET_DEFAULT_SHORT_ENUMS
535 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
537 #undef TARGET_ALIGN_ANON_BITFIELD
538 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
540 #undef TARGET_NARROW_VOLATILE_BITFIELD
541 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
543 #undef TARGET_CXX_GUARD_TYPE
544 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
546 #undef TARGET_CXX_GUARD_MASK_BIT
547 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
549 #undef TARGET_CXX_GET_COOKIE_SIZE
550 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
552 #undef TARGET_CXX_COOKIE_HAS_SIZE
553 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
555 #undef TARGET_CXX_CDTOR_RETURNS_THIS
556 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
558 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
559 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
561 #undef TARGET_CXX_USE_AEABI_ATEXIT
562 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
564 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
565 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
566 arm_cxx_determine_class_data_visibility
568 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
569 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
571 #undef TARGET_RETURN_IN_MSB
572 #define TARGET_RETURN_IN_MSB arm_return_in_msb
574 #undef TARGET_RETURN_IN_MEMORY
575 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
581 #undef TARGET_ASM_UNWIND_EMIT
582 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
584 /* EABI unwinding tables use a different format for the typeinfo tables. */
585 #undef TARGET_ASM_TTYPE
586 #define TARGET_ASM_TTYPE arm_output_ttype
588 #undef TARGET_ARM_EABI_UNWINDER
589 #define TARGET_ARM_EABI_UNWINDER true
591 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
592 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
596 #endif /* ARM_UNWIND_INFO */
598 #undef TARGET_DWARF_REGISTER_SPAN
599 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
601 #undef TARGET_CANNOT_COPY_INSN_P
602 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
605 #undef TARGET_HAVE_TLS
606 #define TARGET_HAVE_TLS true
609 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
610 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
612 #undef TARGET_LEGITIMATE_CONSTANT_P
613 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
618 #undef TARGET_MAX_ANCHOR_OFFSET
619 #define TARGET_MAX_ANCHOR_OFFSET 4095
621 /* The minimum is set such that the total size of the block
622 for a particular anchor is -4088 + 1 + 4095 bytes, which is
623 divisible by eight, ensuring natural spacing of anchors. */
624 #undef TARGET_MIN_ANCHOR_OFFSET
625 #define TARGET_MIN_ANCHOR_OFFSET -4088
627 #undef TARGET_SCHED_ISSUE_RATE
628 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
632 arm_first_cycle_multipass_dfa_lookahead
634 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
635 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
636 arm_first_cycle_multipass_dfa_lookahead_guard
638 #undef TARGET_MANGLE_TYPE
639 #define TARGET_MANGLE_TYPE arm_mangle_type
641 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
642 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
644 #undef TARGET_BUILD_BUILTIN_VA_LIST
645 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
646 #undef TARGET_EXPAND_BUILTIN_VA_START
647 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
648 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
649 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
656 #undef TARGET_LEGITIMATE_ADDRESS_P
657 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
659 #undef TARGET_PREFERRED_RELOAD_CLASS
660 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
662 #undef TARGET_INVALID_PARAMETER_TYPE
663 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
665 #undef TARGET_INVALID_RETURN_TYPE
666 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
668 #undef TARGET_PROMOTED_TYPE
669 #define TARGET_PROMOTED_TYPE arm_promoted_type
671 #undef TARGET_CONVERT_TO_TYPE
672 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
675 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
677 #undef TARGET_FRAME_POINTER_REQUIRED
678 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
680 #undef TARGET_CAN_ELIMINATE
681 #define TARGET_CAN_ELIMINATE arm_can_eliminate
683 #undef TARGET_CONDITIONAL_REGISTER_USAGE
684 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
686 #undef TARGET_CLASS_LIKELY_SPILLED_P
687 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
689 #undef TARGET_VECTORIZE_BUILTINS
690 #define TARGET_VECTORIZE_BUILTINS
692 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
693 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
694 arm_builtin_vectorized_function
696 #undef TARGET_VECTOR_ALIGNMENT
697 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
699 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
700 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
701 arm_vector_alignment_reachable
703 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
704 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
705 arm_builtin_support_vector_misalignment
707 #undef TARGET_PREFERRED_RENAME_CLASS
708 #define TARGET_PREFERRED_RENAME_CLASS \
709 arm_preferred_rename_class
711 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
712 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
713 arm_vectorize_vec_perm_const_ok
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
717 arm_builtin_vectorization_cost
718 #undef TARGET_VECTORIZE_ADD_STMT_COST
719 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
721 #undef TARGET_CANONICALIZE_COMPARISON
722 #define TARGET_CANONICALIZE_COMPARISON \
723 arm_canonicalize_comparison
725 #undef TARGET_ASAN_SHADOW_OFFSET
726 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
728 #undef MAX_INSN_PER_IT_BLOCK
729 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
731 #undef TARGET_CAN_USE_DOLOOP_P
732 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
734 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
735 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
737 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
738 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
740 #undef TARGET_SCHED_FUSION_PRIORITY
741 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
743 struct gcc_target targetm
= TARGET_INITIALIZER
;
745 /* Obstack for minipool constant handling. */
746 static struct obstack minipool_obstack
;
747 static char * minipool_startobj
;
749 /* The maximum number of insns skipped which
750 will be conditionalised if possible. */
751 static int max_insns_skipped
= 5;
753 extern FILE * asm_out_file
;
755 /* True if we are currently building a constant table. */
756 int making_const_table
;
758 /* The processor for which instructions should be scheduled. */
759 enum processor_type arm_tune
= arm_none
;
761 /* The current tuning set. */
762 const struct tune_params
*current_tune
;
764 /* Which floating point hardware to schedule for. */
767 /* Which floating popint hardware to use. */
768 const struct arm_fpu_desc
*arm_fpu_desc
;
770 /* Used for Thumb call_via trampolines. */
771 rtx thumb_call_via_label
[14];
772 static int thumb_call_reg_needed
;
774 /* The bits in this mask specify which
775 instructions we are allowed to generate. */
776 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
778 /* The bits in this mask specify which instruction scheduling options should
780 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
782 /* The highest ARM architecture version supported by the
784 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
786 /* The following are used in the arm.md file as equivalents to bits
787 in the above two flag variables. */
789 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
792 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
795 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
798 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
801 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
804 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
807 /* Nonzero if this chip supports the ARM 6K extensions. */
810 /* Nonzero if this chip supports the ARM 6KZ extensions. */
813 /* Nonzero if instructions present in ARMv6-M can be used. */
816 /* Nonzero if this chip supports the ARM 7 extensions. */
819 /* Nonzero if instructions not present in the 'M' profile can be used. */
820 int arm_arch_notm
= 0;
822 /* Nonzero if instructions present in ARMv7E-M can be used. */
825 /* Nonzero if instructions present in ARMv8 can be used. */
828 /* Nonzero if this chip can benefit from load scheduling. */
829 int arm_ld_sched
= 0;
831 /* Nonzero if this chip is a StrongARM. */
832 int arm_tune_strongarm
= 0;
834 /* Nonzero if this chip supports Intel Wireless MMX technology. */
835 int arm_arch_iwmmxt
= 0;
837 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
838 int arm_arch_iwmmxt2
= 0;
840 /* Nonzero if this chip is an XScale. */
841 int arm_arch_xscale
= 0;
843 /* Nonzero if tuning for XScale */
844 int arm_tune_xscale
= 0;
846 /* Nonzero if we want to tune for stores that access the write-buffer.
847 This typically means an ARM6 or ARM7 with MMU or MPU. */
848 int arm_tune_wbuf
= 0;
850 /* Nonzero if tuning for Cortex-A9. */
851 int arm_tune_cortex_a9
= 0;
853 /* Nonzero if we should define __THUMB_INTERWORK__ in the
855 XXX This is a bit of a hack, it's intended to help work around
856 problems in GLD which doesn't understand that armv5t code is
857 interworking clean. */
858 int arm_cpp_interwork
= 0;
860 /* Nonzero if chip supports Thumb 2. */
863 /* Nonzero if chip supports integer division instruction. */
864 int arm_arch_arm_hwdiv
;
865 int arm_arch_thumb_hwdiv
;
867 /* Nonzero if chip disallows volatile memory access in IT block. */
868 int arm_arch_no_volatile_ce
;
870 /* Nonzero if we should use Neon to handle 64-bits operations rather
871 than core registers. */
872 int prefer_neon_for_64bits
= 0;
874 /* Nonzero if we shouldn't use literal pools. */
875 bool arm_disable_literal_pool
= false;
877 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
878 we must report the mode of the memory reference from
879 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
880 machine_mode output_memory_reference_mode
;
882 /* The register number to be used for the PIC offset register. */
883 unsigned arm_pic_register
= INVALID_REGNUM
;
885 enum arm_pcs arm_pcs_default
;
887 /* For an explanation of these variables, see final_prescan_insn below. */
889 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
890 enum arm_cond_code arm_current_cc
;
893 int arm_target_label
;
894 /* The number of conditionally executed insns, including the current insn. */
895 int arm_condexec_count
= 0;
896 /* A bitmask specifying the patterns for the IT block.
897 Zero means do not output an IT block before this insn. */
898 int arm_condexec_mask
= 0;
899 /* The number of bits used in arm_condexec_mask. */
900 int arm_condexec_masklen
= 0;
902 /* Nonzero if chip supports the ARMv8 CRC instructions. */
903 int arm_arch_crc
= 0;
905 /* Nonzero if the core has a very small, high-latency, multiply unit. */
906 int arm_m_profile_small_mul
= 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes
[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence
[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
932 const char *const name
;
933 enum processor_type core
;
935 enum base_architecture base_arch
;
936 const arm_feature_set flags
;
937 const struct tune_params
*const tune
;
941 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
942 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
949 /* arm generic vectorizer costs. */
951 struct cpu_vec_costs arm_default_vec_cost
= {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs
=
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
990 true /* non_exec_costs_exec. */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1008 COSTS_N_INSNS (4), /* extend_add. */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1), /* store_unaligned. */
1031 COSTS_N_INSNS (1), /* loadv. */
1032 COSTS_N_INSNS (1) /* storev. */
1037 COSTS_N_INSNS (14), /* div. */
1038 COSTS_N_INSNS (4), /* mult. */
1039 COSTS_N_INSNS (7), /* mult_addsub. */
1040 COSTS_N_INSNS (30), /* fma. */
1041 COSTS_N_INSNS (3), /* addsub. */
1042 COSTS_N_INSNS (1), /* fpconst. */
1043 COSTS_N_INSNS (1), /* neg. */
1044 COSTS_N_INSNS (3), /* compare. */
1045 COSTS_N_INSNS (3), /* widen. */
1046 COSTS_N_INSNS (3), /* narrow. */
1047 COSTS_N_INSNS (3), /* toint. */
1048 COSTS_N_INSNS (3), /* fromint. */
1049 COSTS_N_INSNS (3) /* roundint. */
1053 COSTS_N_INSNS (24), /* div. */
1054 COSTS_N_INSNS (5), /* mult. */
1055 COSTS_N_INSNS (8), /* mult_addsub. */
1056 COSTS_N_INSNS (30), /* fma. */
1057 COSTS_N_INSNS (3), /* addsub. */
1058 COSTS_N_INSNS (1), /* fpconst. */
1059 COSTS_N_INSNS (1), /* neg. */
1060 COSTS_N_INSNS (3), /* compare. */
1061 COSTS_N_INSNS (3), /* widen. */
1062 COSTS_N_INSNS (3), /* narrow. */
1063 COSTS_N_INSNS (3), /* toint. */
1064 COSTS_N_INSNS (3), /* fromint. */
1065 COSTS_N_INSNS (3) /* roundint. */
1070 COSTS_N_INSNS (1) /* alu. */
1074 const struct cpu_cost_table cortexa8_extra_costs
=
1080 COSTS_N_INSNS (1), /* shift. */
1082 COSTS_N_INSNS (1), /* arith_shift. */
1083 0, /* arith_shift_reg. */
1084 COSTS_N_INSNS (1), /* log_shift. */
1085 0, /* log_shift_reg. */
1087 0, /* extend_arith. */
1093 true /* non_exec_costs_exec. */
1098 COSTS_N_INSNS (1), /* simple. */
1099 COSTS_N_INSNS (1), /* flag_setting. */
1100 COSTS_N_INSNS (1), /* extend. */
1101 COSTS_N_INSNS (1), /* add. */
1102 COSTS_N_INSNS (1), /* extend_add. */
1103 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1107 0, /* simple (N/A). */
1108 0, /* flag_setting (N/A). */
1109 COSTS_N_INSNS (2), /* extend. */
1111 COSTS_N_INSNS (2), /* extend_add. */
1117 COSTS_N_INSNS (1), /* load. */
1118 COSTS_N_INSNS (1), /* load_sign_extend. */
1119 COSTS_N_INSNS (1), /* ldrd. */
1120 COSTS_N_INSNS (1), /* ldm_1st. */
1121 1, /* ldm_regs_per_insn_1st. */
1122 2, /* ldm_regs_per_insn_subsequent. */
1123 COSTS_N_INSNS (1), /* loadf. */
1124 COSTS_N_INSNS (1), /* loadd. */
1125 COSTS_N_INSNS (1), /* load_unaligned. */
1126 COSTS_N_INSNS (1), /* store. */
1127 COSTS_N_INSNS (1), /* strd. */
1128 COSTS_N_INSNS (1), /* stm_1st. */
1129 1, /* stm_regs_per_insn_1st. */
1130 2, /* stm_regs_per_insn_subsequent. */
1131 COSTS_N_INSNS (1), /* storef. */
1132 COSTS_N_INSNS (1), /* stored. */
1133 COSTS_N_INSNS (1), /* store_unaligned. */
1134 COSTS_N_INSNS (1), /* loadv. */
1135 COSTS_N_INSNS (1) /* storev. */
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1173 COSTS_N_INSNS (1) /* alu. */
1177 const struct cpu_cost_table cortexa5_extra_costs
=
1183 COSTS_N_INSNS (1), /* shift. */
1184 COSTS_N_INSNS (1), /* shift_reg. */
1185 COSTS_N_INSNS (1), /* arith_shift. */
1186 COSTS_N_INSNS (1), /* arith_shift_reg. */
1187 COSTS_N_INSNS (1), /* log_shift. */
1188 COSTS_N_INSNS (1), /* log_shift_reg. */
1189 COSTS_N_INSNS (1), /* extend. */
1190 COSTS_N_INSNS (1), /* extend_arith. */
1191 COSTS_N_INSNS (1), /* bfi. */
1192 COSTS_N_INSNS (1), /* bfx. */
1193 COSTS_N_INSNS (1), /* clz. */
1194 COSTS_N_INSNS (1), /* rev. */
1196 true /* non_exec_costs_exec. */
1203 COSTS_N_INSNS (1), /* flag_setting. */
1204 COSTS_N_INSNS (1), /* extend. */
1205 COSTS_N_INSNS (1), /* add. */
1206 COSTS_N_INSNS (1), /* extend_add. */
1207 COSTS_N_INSNS (7) /* idiv. */
1211 0, /* simple (N/A). */
1212 0, /* flag_setting (N/A). */
1213 COSTS_N_INSNS (1), /* extend. */
1215 COSTS_N_INSNS (2), /* extend_add. */
1221 COSTS_N_INSNS (1), /* load. */
1222 COSTS_N_INSNS (1), /* load_sign_extend. */
1223 COSTS_N_INSNS (6), /* ldrd. */
1224 COSTS_N_INSNS (1), /* ldm_1st. */
1225 1, /* ldm_regs_per_insn_1st. */
1226 2, /* ldm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* loadf. */
1228 COSTS_N_INSNS (4), /* loadd. */
1229 COSTS_N_INSNS (1), /* load_unaligned. */
1230 COSTS_N_INSNS (1), /* store. */
1231 COSTS_N_INSNS (3), /* strd. */
1232 COSTS_N_INSNS (1), /* stm_1st. */
1233 1, /* stm_regs_per_insn_1st. */
1234 2, /* stm_regs_per_insn_subsequent. */
1235 COSTS_N_INSNS (2), /* storef. */
1236 COSTS_N_INSNS (2), /* stored. */
1237 COSTS_N_INSNS (1), /* store_unaligned. */
1238 COSTS_N_INSNS (1), /* loadv. */
1239 COSTS_N_INSNS (1) /* storev. */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1277 COSTS_N_INSNS (1) /* alu. */
1282 const struct cpu_cost_table cortexa7_extra_costs
=
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1301 true /* non_exec_costs_exec. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1), /* store_unaligned. */
1343 COSTS_N_INSNS (1), /* loadv. */
1344 COSTS_N_INSNS (1) /* storev. */
1349 COSTS_N_INSNS (15), /* div. */
1350 COSTS_N_INSNS (3), /* mult. */
1351 COSTS_N_INSNS (7), /* mult_addsub. */
1352 COSTS_N_INSNS (7), /* fma. */
1353 COSTS_N_INSNS (3), /* addsub. */
1354 COSTS_N_INSNS (3), /* fpconst. */
1355 COSTS_N_INSNS (3), /* neg. */
1356 COSTS_N_INSNS (3), /* compare. */
1357 COSTS_N_INSNS (3), /* widen. */
1358 COSTS_N_INSNS (3), /* narrow. */
1359 COSTS_N_INSNS (3), /* toint. */
1360 COSTS_N_INSNS (3), /* fromint. */
1361 COSTS_N_INSNS (3) /* roundint. */
1365 COSTS_N_INSNS (30), /* div. */
1366 COSTS_N_INSNS (6), /* mult. */
1367 COSTS_N_INSNS (10), /* mult_addsub. */
1368 COSTS_N_INSNS (7), /* fma. */
1369 COSTS_N_INSNS (3), /* addsub. */
1370 COSTS_N_INSNS (3), /* fpconst. */
1371 COSTS_N_INSNS (3), /* neg. */
1372 COSTS_N_INSNS (3), /* compare. */
1373 COSTS_N_INSNS (3), /* widen. */
1374 COSTS_N_INSNS (3), /* narrow. */
1375 COSTS_N_INSNS (3), /* toint. */
1376 COSTS_N_INSNS (3), /* fromint. */
1377 COSTS_N_INSNS (3) /* roundint. */
1382 COSTS_N_INSNS (1) /* alu. */
1386 const struct cpu_cost_table cortexa12_extra_costs
=
1393 COSTS_N_INSNS (1), /* shift_reg. */
1394 COSTS_N_INSNS (1), /* arith_shift. */
1395 COSTS_N_INSNS (1), /* arith_shift_reg. */
1396 COSTS_N_INSNS (1), /* log_shift. */
1397 COSTS_N_INSNS (1), /* log_shift_reg. */
1399 COSTS_N_INSNS (1), /* extend_arith. */
1401 COSTS_N_INSNS (1), /* bfx. */
1402 COSTS_N_INSNS (1), /* clz. */
1403 COSTS_N_INSNS (1), /* rev. */
1405 true /* non_exec_costs_exec. */
1410 COSTS_N_INSNS (2), /* simple. */
1411 COSTS_N_INSNS (3), /* flag_setting. */
1412 COSTS_N_INSNS (2), /* extend. */
1413 COSTS_N_INSNS (3), /* add. */
1414 COSTS_N_INSNS (2), /* extend_add. */
1415 COSTS_N_INSNS (18) /* idiv. */
1419 0, /* simple (N/A). */
1420 0, /* flag_setting (N/A). */
1421 COSTS_N_INSNS (3), /* extend. */
1423 COSTS_N_INSNS (3), /* extend_add. */
1429 COSTS_N_INSNS (3), /* load. */
1430 COSTS_N_INSNS (3), /* load_sign_extend. */
1431 COSTS_N_INSNS (3), /* ldrd. */
1432 COSTS_N_INSNS (3), /* ldm_1st. */
1433 1, /* ldm_regs_per_insn_1st. */
1434 2, /* ldm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (3), /* loadf. */
1436 COSTS_N_INSNS (3), /* loadd. */
1437 0, /* load_unaligned. */
1441 1, /* stm_regs_per_insn_1st. */
1442 2, /* stm_regs_per_insn_subsequent. */
1443 COSTS_N_INSNS (2), /* storef. */
1444 COSTS_N_INSNS (2), /* stored. */
1445 0, /* store_unaligned. */
1446 COSTS_N_INSNS (1), /* loadv. */
1447 COSTS_N_INSNS (1) /* storev. */
1452 COSTS_N_INSNS (17), /* div. */
1453 COSTS_N_INSNS (4), /* mult. */
1454 COSTS_N_INSNS (8), /* mult_addsub. */
1455 COSTS_N_INSNS (8), /* fma. */
1456 COSTS_N_INSNS (4), /* addsub. */
1457 COSTS_N_INSNS (2), /* fpconst. */
1458 COSTS_N_INSNS (2), /* neg. */
1459 COSTS_N_INSNS (2), /* compare. */
1460 COSTS_N_INSNS (4), /* widen. */
1461 COSTS_N_INSNS (4), /* narrow. */
1462 COSTS_N_INSNS (4), /* toint. */
1463 COSTS_N_INSNS (4), /* fromint. */
1464 COSTS_N_INSNS (4) /* roundint. */
1468 COSTS_N_INSNS (31), /* div. */
1469 COSTS_N_INSNS (4), /* mult. */
1470 COSTS_N_INSNS (8), /* mult_addsub. */
1471 COSTS_N_INSNS (8), /* fma. */
1472 COSTS_N_INSNS (4), /* addsub. */
1473 COSTS_N_INSNS (2), /* fpconst. */
1474 COSTS_N_INSNS (2), /* neg. */
1475 COSTS_N_INSNS (2), /* compare. */
1476 COSTS_N_INSNS (4), /* widen. */
1477 COSTS_N_INSNS (4), /* narrow. */
1478 COSTS_N_INSNS (4), /* toint. */
1479 COSTS_N_INSNS (4), /* fromint. */
1480 COSTS_N_INSNS (4) /* roundint. */
1485 COSTS_N_INSNS (1) /* alu. */
1489 const struct cpu_cost_table cortexa15_extra_costs
=
1497 COSTS_N_INSNS (1), /* arith_shift. */
1498 COSTS_N_INSNS (1), /* arith_shift_reg. */
1499 COSTS_N_INSNS (1), /* log_shift. */
1500 COSTS_N_INSNS (1), /* log_shift_reg. */
1502 COSTS_N_INSNS (1), /* extend_arith. */
1503 COSTS_N_INSNS (1), /* bfi. */
1508 true /* non_exec_costs_exec. */
1513 COSTS_N_INSNS (2), /* simple. */
1514 COSTS_N_INSNS (3), /* flag_setting. */
1515 COSTS_N_INSNS (2), /* extend. */
1516 COSTS_N_INSNS (2), /* add. */
1517 COSTS_N_INSNS (2), /* extend_add. */
1518 COSTS_N_INSNS (18) /* idiv. */
1522 0, /* simple (N/A). */
1523 0, /* flag_setting (N/A). */
1524 COSTS_N_INSNS (3), /* extend. */
1526 COSTS_N_INSNS (3), /* extend_add. */
1532 COSTS_N_INSNS (3), /* load. */
1533 COSTS_N_INSNS (3), /* load_sign_extend. */
1534 COSTS_N_INSNS (3), /* ldrd. */
1535 COSTS_N_INSNS (4), /* ldm_1st. */
1536 1, /* ldm_regs_per_insn_1st. */
1537 2, /* ldm_regs_per_insn_subsequent. */
1538 COSTS_N_INSNS (4), /* loadf. */
1539 COSTS_N_INSNS (4), /* loadd. */
1540 0, /* load_unaligned. */
1543 COSTS_N_INSNS (1), /* stm_1st. */
1544 1, /* stm_regs_per_insn_1st. */
1545 2, /* stm_regs_per_insn_subsequent. */
1548 0, /* store_unaligned. */
1549 COSTS_N_INSNS (1), /* loadv. */
1550 COSTS_N_INSNS (1) /* storev. */
1555 COSTS_N_INSNS (17), /* div. */
1556 COSTS_N_INSNS (4), /* mult. */
1557 COSTS_N_INSNS (8), /* mult_addsub. */
1558 COSTS_N_INSNS (8), /* fma. */
1559 COSTS_N_INSNS (4), /* addsub. */
1560 COSTS_N_INSNS (2), /* fpconst. */
1561 COSTS_N_INSNS (2), /* neg. */
1562 COSTS_N_INSNS (5), /* compare. */
1563 COSTS_N_INSNS (4), /* widen. */
1564 COSTS_N_INSNS (4), /* narrow. */
1565 COSTS_N_INSNS (4), /* toint. */
1566 COSTS_N_INSNS (4), /* fromint. */
1567 COSTS_N_INSNS (4) /* roundint. */
1571 COSTS_N_INSNS (31), /* div. */
1572 COSTS_N_INSNS (4), /* mult. */
1573 COSTS_N_INSNS (8), /* mult_addsub. */
1574 COSTS_N_INSNS (8), /* fma. */
1575 COSTS_N_INSNS (4), /* addsub. */
1576 COSTS_N_INSNS (2), /* fpconst. */
1577 COSTS_N_INSNS (2), /* neg. */
1578 COSTS_N_INSNS (2), /* compare. */
1579 COSTS_N_INSNS (4), /* widen. */
1580 COSTS_N_INSNS (4), /* narrow. */
1581 COSTS_N_INSNS (4), /* toint. */
1582 COSTS_N_INSNS (4), /* fromint. */
1583 COSTS_N_INSNS (4) /* roundint. */
1588 COSTS_N_INSNS (1) /* alu. */
1592 const struct cpu_cost_table v7m_extra_costs
=
1600 0, /* arith_shift. */
1601 COSTS_N_INSNS (1), /* arith_shift_reg. */
1603 COSTS_N_INSNS (1), /* log_shift_reg. */
1605 COSTS_N_INSNS (1), /* extend_arith. */
1610 COSTS_N_INSNS (1), /* non_exec. */
1611 false /* non_exec_costs_exec. */
1616 COSTS_N_INSNS (1), /* simple. */
1617 COSTS_N_INSNS (1), /* flag_setting. */
1618 COSTS_N_INSNS (2), /* extend. */
1619 COSTS_N_INSNS (1), /* add. */
1620 COSTS_N_INSNS (3), /* extend_add. */
1621 COSTS_N_INSNS (8) /* idiv. */
1625 0, /* simple (N/A). */
1626 0, /* flag_setting (N/A). */
1627 COSTS_N_INSNS (2), /* extend. */
1629 COSTS_N_INSNS (3), /* extend_add. */
1635 COSTS_N_INSNS (2), /* load. */
1636 0, /* load_sign_extend. */
1637 COSTS_N_INSNS (3), /* ldrd. */
1638 COSTS_N_INSNS (2), /* ldm_1st. */
1639 1, /* ldm_regs_per_insn_1st. */
1640 1, /* ldm_regs_per_insn_subsequent. */
1641 COSTS_N_INSNS (2), /* loadf. */
1642 COSTS_N_INSNS (3), /* loadd. */
1643 COSTS_N_INSNS (1), /* load_unaligned. */
1644 COSTS_N_INSNS (2), /* store. */
1645 COSTS_N_INSNS (3), /* strd. */
1646 COSTS_N_INSNS (2), /* stm_1st. */
1647 1, /* stm_regs_per_insn_1st. */
1648 1, /* stm_regs_per_insn_subsequent. */
1649 COSTS_N_INSNS (2), /* storef. */
1650 COSTS_N_INSNS (3), /* stored. */
1651 COSTS_N_INSNS (1), /* store_unaligned. */
1652 COSTS_N_INSNS (1), /* loadv. */
1653 COSTS_N_INSNS (1) /* storev. */
1658 COSTS_N_INSNS (7), /* div. */
1659 COSTS_N_INSNS (2), /* mult. */
1660 COSTS_N_INSNS (5), /* mult_addsub. */
1661 COSTS_N_INSNS (3), /* fma. */
1662 COSTS_N_INSNS (1), /* addsub. */
1674 COSTS_N_INSNS (15), /* div. */
1675 COSTS_N_INSNS (5), /* mult. */
1676 COSTS_N_INSNS (7), /* mult_addsub. */
1677 COSTS_N_INSNS (7), /* fma. */
1678 COSTS_N_INSNS (3), /* addsub. */
1691 COSTS_N_INSNS (1) /* alu. */
1695 const struct tune_params arm_slowmul_tune
=
1697 arm_slowmul_rtx_costs
,
1698 NULL
, /* Insn extra costs. */
1699 NULL
, /* Sched adj cost. */
1700 arm_default_branch_cost
,
1701 &arm_default_vec_cost
,
1702 3, /* Constant limit. */
1703 5, /* Max cond insns. */
1704 8, /* Memset max inline. */
1705 1, /* Issue rate. */
1706 ARM_PREFETCH_NOT_BENEFICIAL
,
1707 tune_params::PREF_CONST_POOL_TRUE
,
1708 tune_params::PREF_LDRD_FALSE
,
1709 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1710 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1711 tune_params::DISPARAGE_FLAGS_NEITHER
,
1712 tune_params::PREF_NEON_64_FALSE
,
1713 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1714 tune_params::FUSE_NOTHING
,
1715 tune_params::SCHED_AUTOPREF_OFF
1718 const struct tune_params arm_fastmul_tune
=
1720 arm_fastmul_rtx_costs
,
1721 NULL
, /* Insn extra costs. */
1722 NULL
, /* Sched adj cost. */
1723 arm_default_branch_cost
,
1724 &arm_default_vec_cost
,
1725 1, /* Constant limit. */
1726 5, /* Max cond insns. */
1727 8, /* Memset max inline. */
1728 1, /* Issue rate. */
1729 ARM_PREFETCH_NOT_BENEFICIAL
,
1730 tune_params::PREF_CONST_POOL_TRUE
,
1731 tune_params::PREF_LDRD_FALSE
,
1732 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1734 tune_params::DISPARAGE_FLAGS_NEITHER
,
1735 tune_params::PREF_NEON_64_FALSE
,
1736 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1737 tune_params::FUSE_NOTHING
,
1738 tune_params::SCHED_AUTOPREF_OFF
1741 /* StrongARM has early execution of branches, so a sequence that is worth
1742 skipping is shorter. Set max_insns_skipped to a lower value. */
1744 const struct tune_params arm_strongarm_tune
=
1746 arm_fastmul_rtx_costs
,
1747 NULL
, /* Insn extra costs. */
1748 NULL
, /* Sched adj cost. */
1749 arm_default_branch_cost
,
1750 &arm_default_vec_cost
,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL
,
1756 tune_params::PREF_CONST_POOL_TRUE
,
1757 tune_params::PREF_LDRD_FALSE
,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER
,
1761 tune_params::PREF_NEON_64_FALSE
,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1763 tune_params::FUSE_NOTHING
,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune
=
1769 arm_xscale_rtx_costs
,
1770 NULL
, /* Insn extra costs. */
1771 xscale_sched_adjust_cost
,
1772 arm_default_branch_cost
,
1773 &arm_default_vec_cost
,
1774 2, /* Constant limit. */
1775 3, /* Max cond insns. */
1776 8, /* Memset max inline. */
1777 1, /* Issue rate. */
1778 ARM_PREFETCH_NOT_BENEFICIAL
,
1779 tune_params::PREF_CONST_POOL_TRUE
,
1780 tune_params::PREF_LDRD_FALSE
,
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1782 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1783 tune_params::DISPARAGE_FLAGS_NEITHER
,
1784 tune_params::PREF_NEON_64_FALSE
,
1785 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1786 tune_params::FUSE_NOTHING
,
1787 tune_params::SCHED_AUTOPREF_OFF
1790 const struct tune_params arm_9e_tune
=
1793 NULL
, /* Insn extra costs. */
1794 NULL
, /* Sched adj cost. */
1795 arm_default_branch_cost
,
1796 &arm_default_vec_cost
,
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 8, /* Memset max inline. */
1800 1, /* Issue rate. */
1801 ARM_PREFETCH_NOT_BENEFICIAL
,
1802 tune_params::PREF_CONST_POOL_TRUE
,
1803 tune_params::PREF_LDRD_FALSE
,
1804 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1806 tune_params::DISPARAGE_FLAGS_NEITHER
,
1807 tune_params::PREF_NEON_64_FALSE
,
1808 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1809 tune_params::FUSE_NOTHING
,
1810 tune_params::SCHED_AUTOPREF_OFF
1813 const struct tune_params arm_marvell_pj4_tune
=
1816 NULL
, /* Insn extra costs. */
1817 NULL
, /* Sched adj cost. */
1818 arm_default_branch_cost
,
1819 &arm_default_vec_cost
,
1820 1, /* Constant limit. */
1821 5, /* Max cond insns. */
1822 8, /* Memset max inline. */
1823 2, /* Issue rate. */
1824 ARM_PREFETCH_NOT_BENEFICIAL
,
1825 tune_params::PREF_CONST_POOL_TRUE
,
1826 tune_params::PREF_LDRD_FALSE
,
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1829 tune_params::DISPARAGE_FLAGS_NEITHER
,
1830 tune_params::PREF_NEON_64_FALSE
,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1832 tune_params::FUSE_NOTHING
,
1833 tune_params::SCHED_AUTOPREF_OFF
1836 const struct tune_params arm_v6t2_tune
=
1839 NULL
, /* Insn extra costs. */
1840 NULL
, /* Sched adj cost. */
1841 arm_default_branch_cost
,
1842 &arm_default_vec_cost
,
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 8, /* Memset max inline. */
1846 1, /* Issue rate. */
1847 ARM_PREFETCH_NOT_BENEFICIAL
,
1848 tune_params::PREF_CONST_POOL_FALSE
,
1849 tune_params::PREF_LDRD_FALSE
,
1850 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1851 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1852 tune_params::DISPARAGE_FLAGS_NEITHER
,
1853 tune_params::PREF_NEON_64_FALSE
,
1854 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1855 tune_params::FUSE_NOTHING
,
1856 tune_params::SCHED_AUTOPREF_OFF
1860 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1861 const struct tune_params arm_cortex_tune
=
1864 &generic_extra_costs
,
1865 NULL
, /* Sched adj cost. */
1866 arm_default_branch_cost
,
1867 &arm_default_vec_cost
,
1868 1, /* Constant limit. */
1869 5, /* Max cond insns. */
1870 8, /* Memset max inline. */
1871 2, /* Issue rate. */
1872 ARM_PREFETCH_NOT_BENEFICIAL
,
1873 tune_params::PREF_CONST_POOL_FALSE
,
1874 tune_params::PREF_LDRD_FALSE
,
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1876 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1877 tune_params::DISPARAGE_FLAGS_NEITHER
,
1878 tune_params::PREF_NEON_64_FALSE
,
1879 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1880 tune_params::FUSE_NOTHING
,
1881 tune_params::SCHED_AUTOPREF_OFF
1884 const struct tune_params arm_cortex_a8_tune
=
1887 &cortexa8_extra_costs
,
1888 NULL
, /* Sched adj cost. */
1889 arm_default_branch_cost
,
1890 &arm_default_vec_cost
,
1891 1, /* Constant limit. */
1892 5, /* Max cond insns. */
1893 8, /* Memset max inline. */
1894 2, /* Issue rate. */
1895 ARM_PREFETCH_NOT_BENEFICIAL
,
1896 tune_params::PREF_CONST_POOL_FALSE
,
1897 tune_params::PREF_LDRD_FALSE
,
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1899 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1900 tune_params::DISPARAGE_FLAGS_NEITHER
,
1901 tune_params::PREF_NEON_64_FALSE
,
1902 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1903 tune_params::FUSE_NOTHING
,
1904 tune_params::SCHED_AUTOPREF_OFF
1907 const struct tune_params arm_cortex_a7_tune
=
1910 &cortexa7_extra_costs
,
1911 NULL
, /* Sched adj cost. */
1912 arm_default_branch_cost
,
1913 &arm_default_vec_cost
,
1914 1, /* Constant limit. */
1915 5, /* Max cond insns. */
1916 8, /* Memset max inline. */
1917 2, /* Issue rate. */
1918 ARM_PREFETCH_NOT_BENEFICIAL
,
1919 tune_params::PREF_CONST_POOL_FALSE
,
1920 tune_params::PREF_LDRD_FALSE
,
1921 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1922 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1923 tune_params::DISPARAGE_FLAGS_NEITHER
,
1924 tune_params::PREF_NEON_64_FALSE
,
1925 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1926 tune_params::FUSE_NOTHING
,
1927 tune_params::SCHED_AUTOPREF_OFF
1930 const struct tune_params arm_cortex_a15_tune
=
1933 &cortexa15_extra_costs
,
1934 NULL
, /* Sched adj cost. */
1935 arm_default_branch_cost
,
1936 &arm_default_vec_cost
,
1937 1, /* Constant limit. */
1938 2, /* Max cond insns. */
1939 8, /* Memset max inline. */
1940 3, /* Issue rate. */
1941 ARM_PREFETCH_NOT_BENEFICIAL
,
1942 tune_params::PREF_CONST_POOL_FALSE
,
1943 tune_params::PREF_LDRD_TRUE
,
1944 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1945 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1946 tune_params::DISPARAGE_FLAGS_ALL
,
1947 tune_params::PREF_NEON_64_FALSE
,
1948 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1949 tune_params::FUSE_NOTHING
,
1950 tune_params::SCHED_AUTOPREF_FULL
1953 const struct tune_params arm_cortex_a53_tune
=
1956 &cortexa53_extra_costs
,
1957 NULL
, /* Sched adj cost. */
1958 arm_default_branch_cost
,
1959 &arm_default_vec_cost
,
1960 1, /* Constant limit. */
1961 5, /* Max cond insns. */
1962 8, /* Memset max inline. */
1963 2, /* Issue rate. */
1964 ARM_PREFETCH_NOT_BENEFICIAL
,
1965 tune_params::PREF_CONST_POOL_FALSE
,
1966 tune_params::PREF_LDRD_FALSE
,
1967 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1968 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1969 tune_params::DISPARAGE_FLAGS_NEITHER
,
1970 tune_params::PREF_NEON_64_FALSE
,
1971 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1972 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1973 tune_params::SCHED_AUTOPREF_OFF
1976 const struct tune_params arm_cortex_a57_tune
=
1979 &cortexa57_extra_costs
,
1980 NULL
, /* Sched adj cost. */
1981 arm_default_branch_cost
,
1982 &arm_default_vec_cost
,
1983 1, /* Constant limit. */
1984 2, /* Max cond insns. */
1985 8, /* Memset max inline. */
1986 3, /* Issue rate. */
1987 ARM_PREFETCH_NOT_BENEFICIAL
,
1988 tune_params::PREF_CONST_POOL_FALSE
,
1989 tune_params::PREF_LDRD_TRUE
,
1990 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1992 tune_params::DISPARAGE_FLAGS_ALL
,
1993 tune_params::PREF_NEON_64_FALSE
,
1994 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1995 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1996 tune_params::SCHED_AUTOPREF_FULL
1999 const struct tune_params arm_xgene1_tune
=
2002 &xgene1_extra_costs
,
2003 NULL
, /* Sched adj cost. */
2004 arm_default_branch_cost
,
2005 &arm_default_vec_cost
,
2006 1, /* Constant limit. */
2007 2, /* Max cond insns. */
2008 32, /* Memset max inline. */
2009 4, /* Issue rate. */
2010 ARM_PREFETCH_NOT_BENEFICIAL
,
2011 tune_params::PREF_CONST_POOL_FALSE
,
2012 tune_params::PREF_LDRD_TRUE
,
2013 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2015 tune_params::DISPARAGE_FLAGS_ALL
,
2016 tune_params::PREF_NEON_64_FALSE
,
2017 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2018 tune_params::FUSE_NOTHING
,
2019 tune_params::SCHED_AUTOPREF_OFF
2022 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2023 less appealing. Set max_insns_skipped to a low value. */
2025 const struct tune_params arm_cortex_a5_tune
=
2028 &cortexa5_extra_costs
,
2029 NULL
, /* Sched adj cost. */
2030 arm_cortex_a5_branch_cost
,
2031 &arm_default_vec_cost
,
2032 1, /* Constant limit. */
2033 1, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 2, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL
,
2037 tune_params::PREF_CONST_POOL_FALSE
,
2038 tune_params::PREF_LDRD_FALSE
,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_NEITHER
,
2042 tune_params::PREF_NEON_64_FALSE
,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2044 tune_params::FUSE_NOTHING
,
2045 tune_params::SCHED_AUTOPREF_OFF
2048 const struct tune_params arm_cortex_a9_tune
=
2051 &cortexa9_extra_costs
,
2052 cortex_a9_sched_adjust_cost
,
2053 arm_default_branch_cost
,
2054 &arm_default_vec_cost
,
2055 1, /* Constant limit. */
2056 5, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 2, /* Issue rate. */
2059 ARM_PREFETCH_BENEFICIAL(4,32,32),
2060 tune_params::PREF_CONST_POOL_FALSE
,
2061 tune_params::PREF_LDRD_FALSE
,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_NEITHER
,
2065 tune_params::PREF_NEON_64_FALSE
,
2066 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2067 tune_params::FUSE_NOTHING
,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_cortex_a12_tune
=
2074 &cortexa12_extra_costs
,
2075 NULL
, /* Sched adj cost. */
2076 arm_default_branch_cost
,
2077 &arm_default_vec_cost
, /* Vectorizer costs. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL
,
2083 tune_params::PREF_CONST_POOL_FALSE
,
2084 tune_params::PREF_LDRD_TRUE
,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_ALL
,
2088 tune_params::PREF_NEON_64_FALSE
,
2089 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2090 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2091 tune_params::SCHED_AUTOPREF_OFF
2094 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2095 cycle to execute each. An LDR from the constant pool also takes two cycles
2096 to execute, but mildly increases pipelining opportunity (consecutive
2097 loads/stores can be pipelined together, saving one cycle), and may also
2098 improve icache utilisation. Hence we prefer the constant pool for such
2101 const struct tune_params arm_v7m_tune
=
2105 NULL
, /* Sched adj cost. */
2106 arm_cortex_m_branch_cost
,
2107 &arm_default_vec_cost
,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 1, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL
,
2113 tune_params::PREF_CONST_POOL_TRUE
,
2114 tune_params::PREF_LDRD_FALSE
,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER
,
2118 tune_params::PREF_NEON_64_FALSE
,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2120 tune_params::FUSE_NOTHING
,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 /* Cortex-M7 tuning. */
2126 const struct tune_params arm_cortex_m7_tune
=
2130 NULL
, /* Sched adj cost. */
2131 arm_cortex_m7_branch_cost
,
2132 &arm_default_vec_cost
,
2133 0, /* Constant limit. */
2134 1, /* Max cond insns. */
2135 8, /* Memset max inline. */
2136 2, /* Issue rate. */
2137 ARM_PREFETCH_NOT_BENEFICIAL
,
2138 tune_params::PREF_CONST_POOL_TRUE
,
2139 tune_params::PREF_LDRD_FALSE
,
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2142 tune_params::DISPARAGE_FLAGS_NEITHER
,
2143 tune_params::PREF_NEON_64_FALSE
,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2145 tune_params::FUSE_NOTHING
,
2146 tune_params::SCHED_AUTOPREF_OFF
2149 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2150 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2151 const struct tune_params arm_v6m_tune
=
2154 NULL
, /* Insn extra costs. */
2155 NULL
, /* Sched adj cost. */
2156 arm_default_branch_cost
,
2157 &arm_default_vec_cost
, /* Vectorizer costs. */
2158 1, /* Constant limit. */
2159 5, /* Max cond insns. */
2160 8, /* Memset max inline. */
2161 1, /* Issue rate. */
2162 ARM_PREFETCH_NOT_BENEFICIAL
,
2163 tune_params::PREF_CONST_POOL_FALSE
,
2164 tune_params::PREF_LDRD_FALSE
,
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2167 tune_params::DISPARAGE_FLAGS_NEITHER
,
2168 tune_params::PREF_NEON_64_FALSE
,
2169 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2170 tune_params::FUSE_NOTHING
,
2171 tune_params::SCHED_AUTOPREF_OFF
2174 const struct tune_params arm_fa726te_tune
=
2177 NULL
, /* Insn extra costs. */
2178 fa726te_sched_adjust_cost
,
2179 arm_default_branch_cost
,
2180 &arm_default_vec_cost
,
2181 1, /* Constant limit. */
2182 5, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 2, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL
,
2186 tune_params::PREF_CONST_POOL_TRUE
,
2187 tune_params::PREF_LDRD_FALSE
,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER
,
2191 tune_params::PREF_NEON_64_FALSE
,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2193 tune_params::FUSE_NOTHING
,
2194 tune_params::SCHED_AUTOPREF_OFF
2198 /* Not all of these give usefully different compilation alternatives,
2199 but there is no simple way of generalizing them. */
2200 static const struct processors all_cores
[] =
2203 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2204 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2205 FLAGS, &arm_##COSTS##_tune},
2206 #include "arm-cores.def"
2208 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2211 static const struct processors all_architectures
[] =
2213 /* ARM Architectures */
2214 /* We don't specify tuning costs here as it will be figured out
2217 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2218 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2219 #include "arm-arches.def"
2221 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2225 /* These are populated as commandline arguments are processed, or NULL
2226 if not specified. */
2227 static const struct processors
*arm_selected_arch
;
2228 static const struct processors
*arm_selected_cpu
;
2229 static const struct processors
*arm_selected_tune
;
2231 /* The name of the preprocessor macro to define for this architecture. */
2233 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2235 /* Available values for -mfpu=. */
2237 static const struct arm_fpu_desc all_fpus
[] =
2239 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2240 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2241 #include "arm-fpus.def"
2246 /* Supported TLS relocations. */
2254 TLS_DESCSEQ
/* GNU scheme */
2257 /* The maximum number of insns to be used when loading a constant. */
2259 arm_constant_limit (bool size_p
)
2261 return size_p
? 1 : current_tune
->constant_limit
;
2264 /* Emit an insn that's a simple single-set. Both the operands must be known
2266 inline static rtx_insn
*
2267 emit_set_insn (rtx x
, rtx y
)
2269 return emit_insn (gen_rtx_SET (x
, y
));
2272 /* Return the number of bits set in VALUE. */
2274 bit_count (unsigned long value
)
2276 unsigned long count
= 0;
2281 value
&= value
- 1; /* Clear the least-significant set bit. */
2287 /* Return the number of features in feature-set SET. */
2289 feature_count (const arm_feature_set
* set
)
2291 return (bit_count (ARM_FSET_CPU1 (*set
))
2292 + bit_count (ARM_FSET_CPU2 (*set
)));
2299 } arm_fixed_mode_set
;
2301 /* A small helper for setting fixed-point library libfuncs. */
2304 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2305 const char *funcname
, const char *modename
,
2310 if (num_suffix
== 0)
2311 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2313 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2315 set_optab_libfunc (optable
, mode
, buffer
);
2319 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2320 machine_mode from
, const char *funcname
,
2321 const char *toname
, const char *fromname
)
2324 const char *maybe_suffix_2
= "";
2326 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2327 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2328 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2329 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2330 maybe_suffix_2
= "2";
2332 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2335 set_conv_libfunc (optable
, to
, from
, buffer
);
2338 /* Set up library functions unique to ARM. */
2341 arm_init_libfuncs (void)
2343 /* For Linux, we have access to kernel support for atomic operations. */
2344 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2345 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2347 /* There are no special library functions unless we are using the
2352 /* The functions below are described in Section 4 of the "Run-Time
2353 ABI for the ARM architecture", Version 1.0. */
2355 /* Double-precision floating-point arithmetic. Table 2. */
2356 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2357 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2358 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2359 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2360 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2362 /* Double-precision comparisons. Table 3. */
2363 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2364 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2365 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2366 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2367 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2368 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2369 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2371 /* Single-precision floating-point arithmetic. Table 4. */
2372 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2373 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2374 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2375 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2376 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2378 /* Single-precision comparisons. Table 5. */
2379 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2380 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2381 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2382 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2383 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2384 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2385 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2387 /* Floating-point to integer conversions. Table 6. */
2388 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2389 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2390 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2391 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2392 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2393 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2394 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2395 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2397 /* Conversions between floating types. Table 7. */
2398 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2399 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2401 /* Integer to floating-point conversions. Table 8. */
2402 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2403 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2404 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2405 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2406 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2407 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2408 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2409 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2411 /* Long long. Table 9. */
2412 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2413 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2414 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2415 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2416 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2417 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2418 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2419 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2421 /* Integer (32/32->32) division. \S 4.3.1. */
2422 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2423 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2425 /* The divmod functions are designed so that they can be used for
2426 plain division, even though they return both the quotient and the
2427 remainder. The quotient is returned in the usual location (i.e.,
2428 r0 for SImode, {r0, r1} for DImode), just as would be expected
2429 for an ordinary division routine. Because the AAPCS calling
2430 conventions specify that all of { r0, r1, r2, r3 } are
2431 callee-saved registers, there is no need to tell the compiler
2432 explicitly that those registers are clobbered by these
2434 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2435 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2437 /* For SImode division the ABI provides div-without-mod routines,
2438 which are faster. */
2439 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2440 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2442 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2443 divmod libcalls instead. */
2444 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2445 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2446 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2447 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2449 /* Half-precision float operations. The compiler handles all operations
2450 with NULL libfuncs by converting the SFmode. */
2451 switch (arm_fp16_format
)
2453 case ARM_FP16_FORMAT_IEEE
:
2454 case ARM_FP16_FORMAT_ALTERNATIVE
:
2457 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2458 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2460 : "__gnu_f2h_alternative"));
2461 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2462 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2464 : "__gnu_h2f_alternative"));
2467 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2468 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2469 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2470 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2471 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2474 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2475 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2476 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2477 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2478 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2479 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2480 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2487 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2489 const arm_fixed_mode_set fixed_arith_modes
[] =
2510 const arm_fixed_mode_set fixed_conv_modes
[] =
2540 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2542 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2543 "add", fixed_arith_modes
[i
].name
, 3);
2544 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2545 "ssadd", fixed_arith_modes
[i
].name
, 3);
2546 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2547 "usadd", fixed_arith_modes
[i
].name
, 3);
2548 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2549 "sub", fixed_arith_modes
[i
].name
, 3);
2550 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2551 "sssub", fixed_arith_modes
[i
].name
, 3);
2552 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2553 "ussub", fixed_arith_modes
[i
].name
, 3);
2554 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2555 "mul", fixed_arith_modes
[i
].name
, 3);
2556 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2557 "ssmul", fixed_arith_modes
[i
].name
, 3);
2558 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2559 "usmul", fixed_arith_modes
[i
].name
, 3);
2560 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2561 "div", fixed_arith_modes
[i
].name
, 3);
2562 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2563 "udiv", fixed_arith_modes
[i
].name
, 3);
2564 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2565 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2566 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2567 "usdiv", fixed_arith_modes
[i
].name
, 3);
2568 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2569 "neg", fixed_arith_modes
[i
].name
, 2);
2570 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2571 "ssneg", fixed_arith_modes
[i
].name
, 2);
2572 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2573 "usneg", fixed_arith_modes
[i
].name
, 2);
2574 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2575 "ashl", fixed_arith_modes
[i
].name
, 3);
2576 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2577 "ashr", fixed_arith_modes
[i
].name
, 3);
2578 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2579 "lshr", fixed_arith_modes
[i
].name
, 3);
2580 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2581 "ssashl", fixed_arith_modes
[i
].name
, 3);
2582 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2583 "usashl", fixed_arith_modes
[i
].name
, 3);
2584 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2585 "cmp", fixed_arith_modes
[i
].name
, 2);
2588 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2589 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2592 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2593 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2596 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2597 fixed_conv_modes
[j
].mode
, "fract",
2598 fixed_conv_modes
[i
].name
,
2599 fixed_conv_modes
[j
].name
);
2600 arm_set_fixed_conv_libfunc (satfract_optab
,
2601 fixed_conv_modes
[i
].mode
,
2602 fixed_conv_modes
[j
].mode
, "satfract",
2603 fixed_conv_modes
[i
].name
,
2604 fixed_conv_modes
[j
].name
);
2605 arm_set_fixed_conv_libfunc (fractuns_optab
,
2606 fixed_conv_modes
[i
].mode
,
2607 fixed_conv_modes
[j
].mode
, "fractuns",
2608 fixed_conv_modes
[i
].name
,
2609 fixed_conv_modes
[j
].name
);
2610 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2611 fixed_conv_modes
[i
].mode
,
2612 fixed_conv_modes
[j
].mode
, "satfractuns",
2613 fixed_conv_modes
[i
].name
,
2614 fixed_conv_modes
[j
].name
);
2618 if (TARGET_AAPCS_BASED
)
2619 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2622 /* On AAPCS systems, this is the "struct __va_list". */
2623 static GTY(()) tree va_list_type
;
2625 /* Return the type to use as __builtin_va_list. */
2627 arm_build_builtin_va_list (void)
2632 if (!TARGET_AAPCS_BASED
)
2633 return std_build_builtin_va_list ();
2635 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2643 The C Library ABI further reinforces this definition in \S
2646 We must follow this definition exactly. The structure tag
2647 name is visible in C++ mangled names, and thus forms a part
2648 of the ABI. The field name may be used by people who
2649 #include <stdarg.h>. */
2650 /* Create the type. */
2651 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2652 /* Give it the required name. */
2653 va_list_name
= build_decl (BUILTINS_LOCATION
,
2655 get_identifier ("__va_list"),
2657 DECL_ARTIFICIAL (va_list_name
) = 1;
2658 TYPE_NAME (va_list_type
) = va_list_name
;
2659 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2660 /* Create the __ap field. */
2661 ap_field
= build_decl (BUILTINS_LOCATION
,
2663 get_identifier ("__ap"),
2665 DECL_ARTIFICIAL (ap_field
) = 1;
2666 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2667 TYPE_FIELDS (va_list_type
) = ap_field
;
2668 /* Compute its layout. */
2669 layout_type (va_list_type
);
2671 return va_list_type
;
2674 /* Return an expression of type "void *" pointing to the next
2675 available argument in a variable-argument list. VALIST is the
2676 user-level va_list object, of type __builtin_va_list. */
2678 arm_extract_valist_ptr (tree valist
)
2680 if (TREE_TYPE (valist
) == error_mark_node
)
2681 return error_mark_node
;
2683 /* On an AAPCS target, the pointer is stored within "struct
2685 if (TARGET_AAPCS_BASED
)
2687 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2688 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2689 valist
, ap_field
, NULL_TREE
);
2695 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2697 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2699 valist
= arm_extract_valist_ptr (valist
);
2700 std_expand_builtin_va_start (valist
, nextarg
);
2703 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2705 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2708 valist
= arm_extract_valist_ptr (valist
);
2709 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2712 /* Check any incompatible options that the user has specified. */
2714 arm_option_check_internal (struct gcc_options
*opts
)
2716 int flags
= opts
->x_target_flags
;
2718 /* Make sure that the processor choice does not conflict with any of the
2719 other command line choices. */
2720 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2721 error ("target CPU does not support ARM mode");
2723 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2724 from here where no function is being compiled currently. */
2725 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2726 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2728 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2729 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2731 /* If this target is normally configured to use APCS frames, warn if they
2732 are turned off and debugging is turned on. */
2733 if (TARGET_ARM_P (flags
)
2734 && write_symbols
!= NO_DEBUG
2735 && !TARGET_APCS_FRAME
2736 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2737 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2739 /* iWMMXt unsupported under Thumb mode. */
2740 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2741 error ("iWMMXt unsupported under Thumb mode");
2743 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2744 error ("can not use -mtp=cp15 with 16-bit Thumb");
2746 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2748 error ("RTP PIC is incompatible with Thumb");
2752 /* We only support -mslow-flash-data on armv7-m targets. */
2753 if (target_slow_flash_data
2754 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2755 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2756 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2759 /* Recompute the global settings depending on target attribute options. */
2762 arm_option_params_internal (void)
2764 /* If we are not using the default (ARM mode) section anchor offset
2765 ranges, then set the correct ranges now. */
2768 /* Thumb-1 LDR instructions cannot have negative offsets.
2769 Permissible positive offset ranges are 5-bit (for byte loads),
2770 6-bit (for halfword loads), or 7-bit (for word loads).
2771 Empirical results suggest a 7-bit anchor range gives the best
2772 overall code size. */
2773 targetm
.min_anchor_offset
= 0;
2774 targetm
.max_anchor_offset
= 127;
2776 else if (TARGET_THUMB2
)
2778 /* The minimum is set such that the total size of the block
2779 for a particular anchor is 248 + 1 + 4095 bytes, which is
2780 divisible by eight, ensuring natural spacing of anchors. */
2781 targetm
.min_anchor_offset
= -248;
2782 targetm
.max_anchor_offset
= 4095;
2786 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2787 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2792 /* If optimizing for size, bump the number of instructions that we
2793 are prepared to conditionally execute (even on a StrongARM). */
2794 max_insns_skipped
= 6;
2796 /* For THUMB2, we limit the conditional sequence to one IT block. */
2798 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2801 /* When -mrestrict-it is in use tone down the if-conversion. */
2802 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2803 ? 1 : current_tune
->max_insns_skipped
;
2806 /* True if -mflip-thumb should next add an attribute for the default
2807 mode, false if it should next add an attribute for the opposite mode. */
2808 static GTY(()) bool thumb_flipper
;
2810 /* Options after initial target override. */
2811 static GTY(()) tree init_optimize
;
2813 /* Reset options between modes that the user has specified. */
2815 arm_option_override_internal (struct gcc_options
*opts
,
2816 struct gcc_options
*opts_set
)
2818 if (TARGET_THUMB_P (opts
->x_target_flags
)
2819 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2821 warning (0, "target CPU does not support THUMB instructions");
2822 opts
->x_target_flags
&= ~MASK_THUMB
;
2825 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2827 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2828 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2831 /* Callee super interworking implies thumb interworking. Adding
2832 this to the flags here simplifies the logic elsewhere. */
2833 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2834 opts
->x_target_flags
|= MASK_INTERWORK
;
2836 /* need to remember initial values so combinaisons of options like
2837 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2838 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2840 if (! opts_set
->x_arm_restrict_it
)
2841 opts
->x_arm_restrict_it
= arm_arch8
;
2843 if (!TARGET_THUMB2_P (opts
->x_target_flags
))
2844 opts
->x_arm_restrict_it
= 0;
2846 /* Don't warn since it's on by default in -O2. */
2847 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2848 opts
->x_flag_schedule_insns
= 0;
2850 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2852 /* Disable shrink-wrap when optimizing function for size, since it tends to
2853 generate additional returns. */
2854 if (optimize_function_for_size_p (cfun
)
2855 && TARGET_THUMB2_P (opts
->x_target_flags
))
2856 opts
->x_flag_shrink_wrap
= false;
2858 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2860 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2861 - epilogue_insns - does not accurately model the corresponding insns
2862 emitted in the asm file. In particular, see the comment in thumb_exit
2863 'Find out how many of the (return) argument registers we can corrupt'.
2864 As a consequence, the epilogue may clobber registers without fipa-ra
2865 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2866 TODO: Accurately model clobbers for epilogue_insns and reenable
2868 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2869 opts
->x_flag_ipa_ra
= 0;
2871 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
2873 /* Thumb2 inline assembly code should always use unified syntax.
2874 This will apply to ARM and Thumb1 eventually. */
2875 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
2878 /* Fix up any incompatible options that the user has specified. */
2880 arm_option_override (void)
2882 arm_selected_arch
= NULL
;
2883 arm_selected_cpu
= NULL
;
2884 arm_selected_tune
= NULL
;
2886 if (global_options_set
.x_arm_arch_option
)
2887 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2889 if (global_options_set
.x_arm_cpu_option
)
2891 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2892 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2895 if (global_options_set
.x_arm_tune_option
)
2896 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2898 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2899 SUBTARGET_OVERRIDE_OPTIONS
;
2902 if (arm_selected_arch
)
2904 if (arm_selected_cpu
)
2906 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
2907 arm_feature_set selected_flags
;
2908 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
2909 arm_selected_arch
->flags
);
2910 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
2911 /* Check for conflict between mcpu and march. */
2912 if (!ARM_FSET_IS_EMPTY (selected_flags
))
2914 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2915 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2916 /* -march wins for code generation.
2917 -mcpu wins for default tuning. */
2918 if (!arm_selected_tune
)
2919 arm_selected_tune
= arm_selected_cpu
;
2921 arm_selected_cpu
= arm_selected_arch
;
2925 arm_selected_arch
= NULL
;
2928 /* Pick a CPU based on the architecture. */
2929 arm_selected_cpu
= arm_selected_arch
;
2932 /* If the user did not specify a processor, choose one for them. */
2933 if (!arm_selected_cpu
)
2935 const struct processors
* sel
;
2936 arm_feature_set sought
= ARM_FSET_EMPTY
;;
2938 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2939 if (!arm_selected_cpu
->name
)
2941 #ifdef SUBTARGET_CPU_DEFAULT
2942 /* Use the subtarget default CPU if none was specified by
2944 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2946 /* Default to ARM6. */
2947 if (!arm_selected_cpu
->name
)
2948 arm_selected_cpu
= &all_cores
[arm6
];
2951 sel
= arm_selected_cpu
;
2952 insn_flags
= sel
->flags
;
2954 /* Now check to see if the user has specified some command line
2955 switch that require certain abilities from the cpu. */
2957 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2959 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
2960 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
2962 /* There are no ARM processors that support both APCS-26 and
2963 interworking. Therefore we force FL_MODE26 to be removed
2964 from insn_flags here (if it was set), so that the search
2965 below will always be able to find a compatible processor. */
2966 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
2969 if (!ARM_FSET_IS_EMPTY (sought
)
2970 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
2972 /* Try to locate a CPU type that supports all of the abilities
2973 of the default CPU, plus the extra abilities requested by
2975 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2976 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
2979 if (sel
->name
== NULL
)
2981 unsigned current_bit_count
= 0;
2982 const struct processors
* best_fit
= NULL
;
2984 /* Ideally we would like to issue an error message here
2985 saying that it was not possible to find a CPU compatible
2986 with the default CPU, but which also supports the command
2987 line options specified by the programmer, and so they
2988 ought to use the -mcpu=<name> command line option to
2989 override the default CPU type.
2991 If we cannot find a cpu that has both the
2992 characteristics of the default cpu and the given
2993 command line options we scan the array again looking
2994 for a best match. */
2995 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2997 arm_feature_set required
= ARM_FSET_EMPTY
;
2998 ARM_FSET_UNION (required
, sought
, insn_flags
);
2999 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3002 arm_feature_set flags
;
3003 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3004 count
= feature_count (&flags
);
3006 if (count
>= current_bit_count
)
3009 current_bit_count
= count
;
3013 gcc_assert (best_fit
);
3017 arm_selected_cpu
= sel
;
3021 gcc_assert (arm_selected_cpu
);
3022 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3023 if (!arm_selected_tune
)
3024 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3026 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3027 insn_flags
= arm_selected_cpu
->flags
;
3028 arm_base_arch
= arm_selected_cpu
->base_arch
;
3030 arm_tune
= arm_selected_tune
->core
;
3031 tune_flags
= arm_selected_tune
->flags
;
3032 current_tune
= arm_selected_tune
->tune
;
3034 /* TBD: Dwarf info for apcs frame is not handled yet. */
3035 if (TARGET_APCS_FRAME
)
3036 flag_shrink_wrap
= false;
3038 /* BPABI targets use linker tricks to allow interworking on cores
3039 without thumb support. */
3040 if (TARGET_INTERWORK
3041 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3043 warning (0, "target CPU does not support interworking" );
3044 target_flags
&= ~MASK_INTERWORK
;
3047 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3049 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3050 target_flags
|= MASK_APCS_FRAME
;
3053 if (TARGET_POKE_FUNCTION_NAME
)
3054 target_flags
|= MASK_APCS_FRAME
;
3056 if (TARGET_APCS_REENT
&& flag_pic
)
3057 error ("-fpic and -mapcs-reent are incompatible");
3059 if (TARGET_APCS_REENT
)
3060 warning (0, "APCS reentrant code not supported. Ignored");
3062 if (TARGET_APCS_FLOAT
)
3063 warning (0, "passing floating point arguments in fp regs not yet supported");
3065 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3066 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3067 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3068 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3069 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3070 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3071 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3072 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3073 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3074 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3075 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3076 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3077 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3078 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3079 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3080 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3082 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3083 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3084 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3085 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3086 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3087 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3088 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3089 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3090 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3091 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3092 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3093 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3095 /* V5 code we generate is completely interworking capable, so we turn off
3096 TARGET_INTERWORK here to avoid many tests later on. */
3098 /* XXX However, we must pass the right pre-processor defines to CPP
3099 or GLD can get confused. This is a hack. */
3100 if (TARGET_INTERWORK
)
3101 arm_cpp_interwork
= 1;
3104 target_flags
&= ~MASK_INTERWORK
;
3106 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3107 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3109 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3110 error ("iwmmxt abi requires an iwmmxt capable cpu");
3112 if (!global_options_set
.x_arm_fpu_index
)
3114 const char *target_fpu_name
;
3117 #ifdef FPUTYPE_DEFAULT
3118 target_fpu_name
= FPUTYPE_DEFAULT
;
3120 target_fpu_name
= "vfp";
3123 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3128 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
3130 switch (arm_fpu_desc
->model
)
3132 case ARM_FP_MODEL_VFP
:
3133 arm_fpu_attr
= FPU_VFP
;
3140 if (TARGET_AAPCS_BASED
)
3142 if (TARGET_CALLER_INTERWORKING
)
3143 error ("AAPCS does not support -mcaller-super-interworking");
3145 if (TARGET_CALLEE_INTERWORKING
)
3146 error ("AAPCS does not support -mcallee-super-interworking");
3149 /* iWMMXt and NEON are incompatible. */
3150 if (TARGET_IWMMXT
&& TARGET_NEON
)
3151 error ("iWMMXt and NEON are incompatible");
3153 /* __fp16 support currently assumes the core has ldrh. */
3154 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3155 sorry ("__fp16 and no ldrh");
3157 /* If soft-float is specified then don't use FPU. */
3158 if (TARGET_SOFT_FLOAT
)
3159 arm_fpu_attr
= FPU_NONE
;
3161 if (TARGET_AAPCS_BASED
)
3163 if (arm_abi
== ARM_ABI_IWMMXT
)
3164 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3165 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3166 && TARGET_HARD_FLOAT
3168 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3170 arm_pcs_default
= ARM_PCS_AAPCS
;
3174 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3175 sorry ("-mfloat-abi=hard and VFP");
3177 if (arm_abi
== ARM_ABI_APCS
)
3178 arm_pcs_default
= ARM_PCS_APCS
;
3180 arm_pcs_default
= ARM_PCS_ATPCS
;
3183 /* For arm2/3 there is no need to do any scheduling if we are doing
3184 software floating-point. */
3185 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3186 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3188 /* Use the cp15 method if it is available. */
3189 if (target_thread_pointer
== TP_AUTO
)
3191 if (arm_arch6k
&& !TARGET_THUMB1
)
3192 target_thread_pointer
= TP_CP15
;
3194 target_thread_pointer
= TP_SOFT
;
3197 /* Override the default structure alignment for AAPCS ABI. */
3198 if (!global_options_set
.x_arm_structure_size_boundary
)
3200 if (TARGET_AAPCS_BASED
)
3201 arm_structure_size_boundary
= 8;
3205 if (arm_structure_size_boundary
!= 8
3206 && arm_structure_size_boundary
!= 32
3207 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3209 if (ARM_DOUBLEWORD_ALIGN
)
3211 "structure size boundary can only be set to 8, 32 or 64");
3213 warning (0, "structure size boundary can only be set to 8 or 32");
3214 arm_structure_size_boundary
3215 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3219 /* If stack checking is disabled, we can use r10 as the PIC register,
3220 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3221 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3223 if (TARGET_VXWORKS_RTP
)
3224 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3225 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3228 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3229 arm_pic_register
= 9;
3231 if (arm_pic_register_string
!= NULL
)
3233 int pic_register
= decode_reg_name (arm_pic_register_string
);
3236 warning (0, "-mpic-register= is useless without -fpic");
3238 /* Prevent the user from choosing an obviously stupid PIC register. */
3239 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3240 || pic_register
== HARD_FRAME_POINTER_REGNUM
3241 || pic_register
== STACK_POINTER_REGNUM
3242 || pic_register
>= PC_REGNUM
3243 || (TARGET_VXWORKS_RTP
3244 && (unsigned int) pic_register
!= arm_pic_register
))
3245 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3247 arm_pic_register
= pic_register
;
3250 if (TARGET_VXWORKS_RTP
3251 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3252 arm_pic_data_is_text_relative
= 0;
3254 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3255 if (fix_cm3_ldrd
== 2)
3257 if (arm_selected_cpu
->core
== cortexm3
)
3263 /* Enable -munaligned-access by default for
3264 - all ARMv6 architecture-based processors
3265 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3266 - ARMv8 architecture-base processors.
3268 Disable -munaligned-access by default for
3269 - all pre-ARMv6 architecture-based processors
3270 - ARMv6-M architecture-based processors. */
3272 if (unaligned_access
== 2)
3274 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3275 unaligned_access
= 1;
3277 unaligned_access
= 0;
3279 else if (unaligned_access
== 1
3280 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3282 warning (0, "target CPU does not support unaligned accesses");
3283 unaligned_access
= 0;
3286 /* Hot/Cold partitioning is not currently supported, since we can't
3287 handle literal pool placement in that case. */
3288 if (flag_reorder_blocks_and_partition
)
3290 inform (input_location
,
3291 "-freorder-blocks-and-partition not supported on this architecture");
3292 flag_reorder_blocks_and_partition
= 0;
3293 flag_reorder_blocks
= 1;
3297 /* Hoisting PIC address calculations more aggressively provides a small,
3298 but measurable, size reduction for PIC code. Therefore, we decrease
3299 the bar for unrestricted expression hoisting to the cost of PIC address
3300 calculation, which is 2 instructions. */
3301 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3302 global_options
.x_param_values
,
3303 global_options_set
.x_param_values
);
3305 /* ARM EABI defaults to strict volatile bitfields. */
3306 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3307 && abi_version_at_least(2))
3308 flag_strict_volatile_bitfields
= 1;
3310 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3311 have deemed it beneficial (signified by setting
3312 prefetch.num_slots to 1 or more). */
3313 if (flag_prefetch_loop_arrays
< 0
3316 && current_tune
->prefetch
.num_slots
> 0)
3317 flag_prefetch_loop_arrays
= 1;
3319 /* Set up parameters to be used in prefetching algorithm. Do not
3320 override the defaults unless we are tuning for a core we have
3321 researched values for. */
3322 if (current_tune
->prefetch
.num_slots
> 0)
3323 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3324 current_tune
->prefetch
.num_slots
,
3325 global_options
.x_param_values
,
3326 global_options_set
.x_param_values
);
3327 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3328 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3329 current_tune
->prefetch
.l1_cache_line_size
,
3330 global_options
.x_param_values
,
3331 global_options_set
.x_param_values
);
3332 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3333 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3334 current_tune
->prefetch
.l1_cache_size
,
3335 global_options
.x_param_values
,
3336 global_options_set
.x_param_values
);
3338 /* Use Neon to perform 64-bits operations rather than core
3340 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3341 if (use_neon_for_64bits
== 1)
3342 prefer_neon_for_64bits
= true;
3344 /* Use the alternative scheduling-pressure algorithm by default. */
3345 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3346 global_options
.x_param_values
,
3347 global_options_set
.x_param_values
);
3349 /* Look through ready list and all of queue for instructions
3350 relevant for L2 auto-prefetcher. */
3351 int param_sched_autopref_queue_depth
;
3353 switch (current_tune
->sched_autopref
)
3355 case tune_params::SCHED_AUTOPREF_OFF
:
3356 param_sched_autopref_queue_depth
= -1;
3359 case tune_params::SCHED_AUTOPREF_RANK
:
3360 param_sched_autopref_queue_depth
= 0;
3363 case tune_params::SCHED_AUTOPREF_FULL
:
3364 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3371 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3372 param_sched_autopref_queue_depth
,
3373 global_options
.x_param_values
,
3374 global_options_set
.x_param_values
);
3376 /* Currently, for slow flash data, we just disable literal pools. */
3377 if (target_slow_flash_data
)
3378 arm_disable_literal_pool
= true;
3380 /* Disable scheduling fusion by default if it's not armv7 processor
3381 or doesn't prefer ldrd/strd. */
3382 if (flag_schedule_fusion
== 2
3383 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3384 flag_schedule_fusion
= 0;
3386 /* Need to remember initial options before they are overriden. */
3387 init_optimize
= build_optimization_node (&global_options
);
3389 arm_option_override_internal (&global_options
, &global_options_set
);
3390 arm_option_check_internal (&global_options
);
3391 arm_option_params_internal ();
3393 /* Register global variables with the garbage collector. */
3394 arm_add_gc_roots ();
3396 /* Save the initial options in case the user does function specific
3398 target_option_default_node
= target_option_current_node
3399 = build_target_option_node (&global_options
);
3401 /* Init initial mode for testing. */
3402 thumb_flipper
= TARGET_THUMB
;
3406 arm_add_gc_roots (void)
3408 gcc_obstack_init(&minipool_obstack
);
3409 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3412 /* A table of known ARM exception types.
3413 For use with the interrupt function attribute. */
3417 const char *const arg
;
3418 const unsigned long return_value
;
3422 static const isr_attribute_arg isr_attribute_args
[] =
3424 { "IRQ", ARM_FT_ISR
},
3425 { "irq", ARM_FT_ISR
},
3426 { "FIQ", ARM_FT_FIQ
},
3427 { "fiq", ARM_FT_FIQ
},
3428 { "ABORT", ARM_FT_ISR
},
3429 { "abort", ARM_FT_ISR
},
3430 { "ABORT", ARM_FT_ISR
},
3431 { "abort", ARM_FT_ISR
},
3432 { "UNDEF", ARM_FT_EXCEPTION
},
3433 { "undef", ARM_FT_EXCEPTION
},
3434 { "SWI", ARM_FT_EXCEPTION
},
3435 { "swi", ARM_FT_EXCEPTION
},
3436 { NULL
, ARM_FT_NORMAL
}
3439 /* Returns the (interrupt) function type of the current
3440 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3442 static unsigned long
3443 arm_isr_value (tree argument
)
3445 const isr_attribute_arg
* ptr
;
3449 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3451 /* No argument - default to IRQ. */
3452 if (argument
== NULL_TREE
)
3455 /* Get the value of the argument. */
3456 if (TREE_VALUE (argument
) == NULL_TREE
3457 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3458 return ARM_FT_UNKNOWN
;
3460 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3462 /* Check it against the list of known arguments. */
3463 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3464 if (streq (arg
, ptr
->arg
))
3465 return ptr
->return_value
;
3467 /* An unrecognized interrupt type. */
3468 return ARM_FT_UNKNOWN
;
3471 /* Computes the type of the current function. */
3473 static unsigned long
3474 arm_compute_func_type (void)
3476 unsigned long type
= ARM_FT_UNKNOWN
;
3480 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3482 /* Decide if the current function is volatile. Such functions
3483 never return, and many memory cycles can be saved by not storing
3484 register values that will never be needed again. This optimization
3485 was added to speed up context switching in a kernel application. */
3487 && (TREE_NOTHROW (current_function_decl
)
3488 || !(flag_unwind_tables
3490 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3491 && TREE_THIS_VOLATILE (current_function_decl
))
3492 type
|= ARM_FT_VOLATILE
;
3494 if (cfun
->static_chain_decl
!= NULL
)
3495 type
|= ARM_FT_NESTED
;
3497 attr
= DECL_ATTRIBUTES (current_function_decl
);
3499 a
= lookup_attribute ("naked", attr
);
3501 type
|= ARM_FT_NAKED
;
3503 a
= lookup_attribute ("isr", attr
);
3505 a
= lookup_attribute ("interrupt", attr
);
3508 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3510 type
|= arm_isr_value (TREE_VALUE (a
));
3515 /* Returns the type of the current function. */
3518 arm_current_func_type (void)
3520 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3521 cfun
->machine
->func_type
= arm_compute_func_type ();
3523 return cfun
->machine
->func_type
;
3527 arm_allocate_stack_slots_for_args (void)
3529 /* Naked functions should not allocate stack slots for arguments. */
3530 return !IS_NAKED (arm_current_func_type ());
3534 arm_warn_func_return (tree decl
)
3536 /* Naked functions are implemented entirely in assembly, including the
3537 return sequence, so suppress warnings about this. */
3538 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3542 /* Output assembler code for a block containing the constant parts
3543 of a trampoline, leaving space for the variable parts.
3545 On the ARM, (if r8 is the static chain regnum, and remembering that
3546 referencing pc adds an offset of 8) the trampoline looks like:
3549 .word static chain value
3550 .word function's address
3551 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3554 arm_asm_trampoline_template (FILE *f
)
3556 if (TARGET_UNIFIED_ASM
)
3557 fprintf (f
, "\t.syntax unified\n");
3559 fprintf (f
, "\t.syntax divided\n");
3563 fprintf (f
, "\t.arm\n");
3564 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3565 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3567 else if (TARGET_THUMB2
)
3569 fprintf (f
, "\t.thumb\n");
3570 /* The Thumb-2 trampoline is similar to the arm implementation.
3571 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3572 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3573 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3574 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3578 ASM_OUTPUT_ALIGN (f
, 2);
3579 fprintf (f
, "\t.code\t16\n");
3580 fprintf (f
, ".Ltrampoline_start:\n");
3581 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3582 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3583 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3584 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3585 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3586 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3588 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3589 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3592 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3595 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3597 rtx fnaddr
, mem
, a_tramp
;
3599 emit_block_move (m_tramp
, assemble_trampoline_template (),
3600 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3602 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3603 emit_move_insn (mem
, chain_value
);
3605 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3606 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3607 emit_move_insn (mem
, fnaddr
);
3609 a_tramp
= XEXP (m_tramp
, 0);
3610 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3611 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3612 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3615 /* Thumb trampolines should be entered in thumb mode, so set
3616 the bottom bit of the address. */
3619 arm_trampoline_adjust_address (rtx addr
)
3622 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3623 NULL
, 0, OPTAB_LIB_WIDEN
);
3627 /* Return 1 if it is possible to return using a single instruction.
3628 If SIBLING is non-null, this is a test for a return before a sibling
3629 call. SIBLING is the call insn, so we can examine its register usage. */
3632 use_return_insn (int iscond
, rtx sibling
)
3635 unsigned int func_type
;
3636 unsigned long saved_int_regs
;
3637 unsigned HOST_WIDE_INT stack_adjust
;
3638 arm_stack_offsets
*offsets
;
3640 /* Never use a return instruction before reload has run. */
3641 if (!reload_completed
)
3644 func_type
= arm_current_func_type ();
3646 /* Naked, volatile and stack alignment functions need special
3648 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3651 /* So do interrupt functions that use the frame pointer and Thumb
3652 interrupt functions. */
3653 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3656 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3657 && !optimize_function_for_size_p (cfun
))
3660 offsets
= arm_get_frame_offsets ();
3661 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3663 /* As do variadic functions. */
3664 if (crtl
->args
.pretend_args_size
3665 || cfun
->machine
->uses_anonymous_args
3666 /* Or if the function calls __builtin_eh_return () */
3667 || crtl
->calls_eh_return
3668 /* Or if the function calls alloca */
3669 || cfun
->calls_alloca
3670 /* Or if there is a stack adjustment. However, if the stack pointer
3671 is saved on the stack, we can use a pre-incrementing stack load. */
3672 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3673 && stack_adjust
== 4))
3674 /* Or if the static chain register was saved above the frame, under the
3675 assumption that the stack pointer isn't saved on the stack. */
3676 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3677 && arm_compute_static_chain_stack_bytes() != 0))
3680 saved_int_regs
= offsets
->saved_regs_mask
;
3682 /* Unfortunately, the insn
3684 ldmib sp, {..., sp, ...}
3686 triggers a bug on most SA-110 based devices, such that the stack
3687 pointer won't be correctly restored if the instruction takes a
3688 page fault. We work around this problem by popping r3 along with
3689 the other registers, since that is never slower than executing
3690 another instruction.
3692 We test for !arm_arch5 here, because code for any architecture
3693 less than this could potentially be run on one of the buggy
3695 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3697 /* Validate that r3 is a call-clobbered register (always true in
3698 the default abi) ... */
3699 if (!call_used_regs
[3])
3702 /* ... that it isn't being used for a return value ... */
3703 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3706 /* ... or for a tail-call argument ... */
3709 gcc_assert (CALL_P (sibling
));
3711 if (find_regno_fusage (sibling
, USE
, 3))
3715 /* ... and that there are no call-saved registers in r0-r2
3716 (always true in the default ABI). */
3717 if (saved_int_regs
& 0x7)
3721 /* Can't be done if interworking with Thumb, and any registers have been
3723 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3726 /* On StrongARM, conditional returns are expensive if they aren't
3727 taken and multiple registers have been stacked. */
3728 if (iscond
&& arm_tune_strongarm
)
3730 /* Conditional return when just the LR is stored is a simple
3731 conditional-load instruction, that's not expensive. */
3732 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3736 && arm_pic_register
!= INVALID_REGNUM
3737 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3741 /* If there are saved registers but the LR isn't saved, then we need
3742 two instructions for the return. */
3743 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3746 /* Can't be done if any of the VFP regs are pushed,
3747 since this also requires an insn. */
3748 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3749 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3750 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3753 if (TARGET_REALLY_IWMMXT
)
3754 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3755 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3761 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3762 shrink-wrapping if possible. This is the case if we need to emit a
3763 prologue, which we can test by looking at the offsets. */
3765 use_simple_return_p (void)
3767 arm_stack_offsets
*offsets
;
3769 offsets
= arm_get_frame_offsets ();
3770 return offsets
->outgoing_args
!= 0;
3773 /* Return TRUE if int I is a valid immediate ARM constant. */
3776 const_ok_for_arm (HOST_WIDE_INT i
)
3780 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3781 be all zero, or all one. */
3782 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3783 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3784 != ((~(unsigned HOST_WIDE_INT
) 0)
3785 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3788 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3790 /* Fast return for 0 and small values. We must do this for zero, since
3791 the code below can't handle that one case. */
3792 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3795 /* Get the number of trailing zeros. */
3796 lowbit
= ffs((int) i
) - 1;
3798 /* Only even shifts are allowed in ARM mode so round down to the
3799 nearest even number. */
3803 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3808 /* Allow rotated constants in ARM mode. */
3810 && ((i
& ~0xc000003f) == 0
3811 || (i
& ~0xf000000f) == 0
3812 || (i
& ~0xfc000003) == 0))
3819 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3822 if (i
== v
|| i
== (v
| (v
<< 8)))
3825 /* Allow repeated pattern 0xXY00XY00. */
3835 /* Return true if I is a valid constant for the operation CODE. */
3837 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3839 if (const_ok_for_arm (i
))
3845 /* See if we can use movw. */
3846 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3849 /* Otherwise, try mvn. */
3850 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3853 /* See if we can use addw or subw. */
3855 && ((i
& 0xfffff000) == 0
3856 || ((-i
) & 0xfffff000) == 0))
3858 /* else fall through. */
3878 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3880 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3886 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3890 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3897 /* Return true if I is a valid di mode constant for the operation CODE. */
3899 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3901 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3902 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3903 rtx hi
= GEN_INT (hi_val
);
3904 rtx lo
= GEN_INT (lo_val
);
3914 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3915 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3917 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3924 /* Emit a sequence of insns to handle a large constant.
3925 CODE is the code of the operation required, it can be any of SET, PLUS,
3926 IOR, AND, XOR, MINUS;
3927 MODE is the mode in which the operation is being performed;
3928 VAL is the integer to operate on;
3929 SOURCE is the other operand (a register, or a null-pointer for SET);
3930 SUBTARGETS means it is safe to create scratch registers if that will
3931 either produce a simpler sequence, or we will want to cse the values.
3932 Return value is the number of insns emitted. */
3934 /* ??? Tweak this for thumb2. */
3936 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3937 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3941 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3942 cond
= COND_EXEC_TEST (PATTERN (insn
));
3946 if (subtargets
|| code
== SET
3947 || (REG_P (target
) && REG_P (source
)
3948 && REGNO (target
) != REGNO (source
)))
3950 /* After arm_reorg has been called, we can't fix up expensive
3951 constants by pushing them into memory so we must synthesize
3952 them in-line, regardless of the cost. This is only likely to
3953 be more costly on chips that have load delay slots and we are
3954 compiling without running the scheduler (so no splitting
3955 occurred before the final instruction emission).
3957 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3959 if (!cfun
->machine
->after_arm_reorg
3961 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3963 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3968 /* Currently SET is the only monadic value for CODE, all
3969 the rest are diadic. */
3970 if (TARGET_USE_MOVT
)
3971 arm_emit_movpair (target
, GEN_INT (val
));
3973 emit_set_insn (target
, GEN_INT (val
));
3979 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3981 if (TARGET_USE_MOVT
)
3982 arm_emit_movpair (temp
, GEN_INT (val
));
3984 emit_set_insn (temp
, GEN_INT (val
));
3986 /* For MINUS, the value is subtracted from, since we never
3987 have subtraction of a constant. */
3989 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3991 emit_set_insn (target
,
3992 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3998 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4002 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4003 ARM/THUMB2 immediates, and add up to VAL.
4004 Thr function return value gives the number of insns required. */
4006 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4007 struct four_ints
*return_sequence
)
4009 int best_consecutive_zeros
= 0;
4013 struct four_ints tmp_sequence
;
4015 /* If we aren't targeting ARM, the best place to start is always at
4016 the bottom, otherwise look more closely. */
4019 for (i
= 0; i
< 32; i
+= 2)
4021 int consecutive_zeros
= 0;
4023 if (!(val
& (3 << i
)))
4025 while ((i
< 32) && !(val
& (3 << i
)))
4027 consecutive_zeros
+= 2;
4030 if (consecutive_zeros
> best_consecutive_zeros
)
4032 best_consecutive_zeros
= consecutive_zeros
;
4033 best_start
= i
- consecutive_zeros
;
4040 /* So long as it won't require any more insns to do so, it's
4041 desirable to emit a small constant (in bits 0...9) in the last
4042 insn. This way there is more chance that it can be combined with
4043 a later addressing insn to form a pre-indexed load or store
4044 operation. Consider:
4046 *((volatile int *)0xe0000100) = 1;
4047 *((volatile int *)0xe0000110) = 2;
4049 We want this to wind up as:
4053 str rB, [rA, #0x100]
4055 str rB, [rA, #0x110]
4057 rather than having to synthesize both large constants from scratch.
4059 Therefore, we calculate how many insns would be required to emit
4060 the constant starting from `best_start', and also starting from
4061 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4062 yield a shorter sequence, we may as well use zero. */
4063 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4065 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
4067 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4068 if (insns2
<= insns1
)
4070 *return_sequence
= tmp_sequence
;
4078 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4080 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4081 struct four_ints
*return_sequence
, int i
)
4083 int remainder
= val
& 0xffffffff;
4086 /* Try and find a way of doing the job in either two or three
4089 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4090 location. We start at position I. This may be the MSB, or
4091 optimial_immediate_sequence may have positioned it at the largest block
4092 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4093 wrapping around to the top of the word when we drop off the bottom.
4094 In the worst case this code should produce no more than four insns.
4096 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4097 constants, shifted to any arbitrary location. We should always start
4102 unsigned int b1
, b2
, b3
, b4
;
4103 unsigned HOST_WIDE_INT result
;
4106 gcc_assert (insns
< 4);
4111 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4112 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4115 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4116 /* We can use addw/subw for the last 12 bits. */
4120 /* Use an 8-bit shifted/rotated immediate. */
4124 result
= remainder
& ((0x0ff << end
)
4125 | ((i
< end
) ? (0xff >> (32 - end
))
4132 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4133 arbitrary shifts. */
4134 i
-= TARGET_ARM
? 2 : 1;
4138 /* Next, see if we can do a better job with a thumb2 replicated
4141 We do it this way around to catch the cases like 0x01F001E0 where
4142 two 8-bit immediates would work, but a replicated constant would
4145 TODO: 16-bit constants that don't clear all the bits, but still win.
4146 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4149 b1
= (remainder
& 0xff000000) >> 24;
4150 b2
= (remainder
& 0x00ff0000) >> 16;
4151 b3
= (remainder
& 0x0000ff00) >> 8;
4152 b4
= remainder
& 0xff;
4156 /* The 8-bit immediate already found clears b1 (and maybe b2),
4157 but must leave b3 and b4 alone. */
4159 /* First try to find a 32-bit replicated constant that clears
4160 almost everything. We can assume that we can't do it in one,
4161 or else we wouldn't be here. */
4162 unsigned int tmp
= b1
& b2
& b3
& b4
;
4163 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4165 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4166 + (tmp
== b3
) + (tmp
== b4
);
4168 && (matching_bytes
>= 3
4169 || (matching_bytes
== 2
4170 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4172 /* At least 3 of the bytes match, and the fourth has at
4173 least as many bits set, or two of the bytes match
4174 and it will only require one more insn to finish. */
4182 /* Second, try to find a 16-bit replicated constant that can
4183 leave three of the bytes clear. If b2 or b4 is already
4184 zero, then we can. If the 8-bit from above would not
4185 clear b2 anyway, then we still win. */
4186 else if (b1
== b3
&& (!b2
|| !b4
4187 || (remainder
& 0x00ff0000 & ~result
)))
4189 result
= remainder
& 0xff00ff00;
4195 /* The 8-bit immediate already found clears b2 (and maybe b3)
4196 and we don't get here unless b1 is alredy clear, but it will
4197 leave b4 unchanged. */
4199 /* If we can clear b2 and b4 at once, then we win, since the
4200 8-bits couldn't possibly reach that far. */
4203 result
= remainder
& 0x00ff00ff;
4209 return_sequence
->i
[insns
++] = result
;
4210 remainder
&= ~result
;
4212 if (code
== SET
|| code
== MINUS
)
4220 /* Emit an instruction with the indicated PATTERN. If COND is
4221 non-NULL, conditionalize the execution of the instruction on COND
4225 emit_constant_insn (rtx cond
, rtx pattern
)
4228 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4229 emit_insn (pattern
);
4232 /* As above, but extra parameter GENERATE which, if clear, suppresses
4236 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4237 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4238 int subtargets
, int generate
)
4242 int final_invert
= 0;
4244 int set_sign_bit_copies
= 0;
4245 int clear_sign_bit_copies
= 0;
4246 int clear_zero_bit_copies
= 0;
4247 int set_zero_bit_copies
= 0;
4248 int insns
= 0, neg_insns
, inv_insns
;
4249 unsigned HOST_WIDE_INT temp1
, temp2
;
4250 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4251 struct four_ints
*immediates
;
4252 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4254 /* Find out which operations are safe for a given CODE. Also do a quick
4255 check for degenerate cases; these can occur when DImode operations
4268 if (remainder
== 0xffffffff)
4271 emit_constant_insn (cond
,
4272 gen_rtx_SET (target
,
4273 GEN_INT (ARM_SIGN_EXTEND (val
))));
4279 if (reload_completed
&& rtx_equal_p (target
, source
))
4283 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4292 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4295 if (remainder
== 0xffffffff)
4297 if (reload_completed
&& rtx_equal_p (target
, source
))
4300 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4309 if (reload_completed
&& rtx_equal_p (target
, source
))
4312 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4316 if (remainder
== 0xffffffff)
4319 emit_constant_insn (cond
,
4320 gen_rtx_SET (target
,
4321 gen_rtx_NOT (mode
, source
)));
4328 /* We treat MINUS as (val - source), since (source - val) is always
4329 passed as (source + (-val)). */
4333 emit_constant_insn (cond
,
4334 gen_rtx_SET (target
,
4335 gen_rtx_NEG (mode
, source
)));
4338 if (const_ok_for_arm (val
))
4341 emit_constant_insn (cond
,
4342 gen_rtx_SET (target
,
4343 gen_rtx_MINUS (mode
, GEN_INT (val
),
4354 /* If we can do it in one insn get out quickly. */
4355 if (const_ok_for_op (val
, code
))
4358 emit_constant_insn (cond
,
4359 gen_rtx_SET (target
,
4361 ? gen_rtx_fmt_ee (code
, mode
, source
,
4367 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4369 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4370 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4374 if (mode
== SImode
&& i
== 16)
4375 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4377 emit_constant_insn (cond
,
4378 gen_zero_extendhisi2
4379 (target
, gen_lowpart (HImode
, source
)));
4381 /* Extz only supports SImode, but we can coerce the operands
4383 emit_constant_insn (cond
,
4384 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4385 gen_lowpart (SImode
, source
),
4386 GEN_INT (i
), const0_rtx
));
4392 /* Calculate a few attributes that may be useful for specific
4394 /* Count number of leading zeros. */
4395 for (i
= 31; i
>= 0; i
--)
4397 if ((remainder
& (1 << i
)) == 0)
4398 clear_sign_bit_copies
++;
4403 /* Count number of leading 1's. */
4404 for (i
= 31; i
>= 0; i
--)
4406 if ((remainder
& (1 << i
)) != 0)
4407 set_sign_bit_copies
++;
4412 /* Count number of trailing zero's. */
4413 for (i
= 0; i
<= 31; i
++)
4415 if ((remainder
& (1 << i
)) == 0)
4416 clear_zero_bit_copies
++;
4421 /* Count number of trailing 1's. */
4422 for (i
= 0; i
<= 31; i
++)
4424 if ((remainder
& (1 << i
)) != 0)
4425 set_zero_bit_copies
++;
4433 /* See if we can do this by sign_extending a constant that is known
4434 to be negative. This is a good, way of doing it, since the shift
4435 may well merge into a subsequent insn. */
4436 if (set_sign_bit_copies
> 1)
4438 if (const_ok_for_arm
4439 (temp1
= ARM_SIGN_EXTEND (remainder
4440 << (set_sign_bit_copies
- 1))))
4444 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4445 emit_constant_insn (cond
,
4446 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4447 emit_constant_insn (cond
,
4448 gen_ashrsi3 (target
, new_src
,
4449 GEN_INT (set_sign_bit_copies
- 1)));
4453 /* For an inverted constant, we will need to set the low bits,
4454 these will be shifted out of harm's way. */
4455 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4456 if (const_ok_for_arm (~temp1
))
4460 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4461 emit_constant_insn (cond
,
4462 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4463 emit_constant_insn (cond
,
4464 gen_ashrsi3 (target
, new_src
,
4465 GEN_INT (set_sign_bit_copies
- 1)));
4471 /* See if we can calculate the value as the difference between two
4472 valid immediates. */
4473 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4475 int topshift
= clear_sign_bit_copies
& ~1;
4477 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4478 & (0xff000000 >> topshift
));
4480 /* If temp1 is zero, then that means the 9 most significant
4481 bits of remainder were 1 and we've caused it to overflow.
4482 When topshift is 0 we don't need to do anything since we
4483 can borrow from 'bit 32'. */
4484 if (temp1
== 0 && topshift
!= 0)
4485 temp1
= 0x80000000 >> (topshift
- 1);
4487 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4489 if (const_ok_for_arm (temp2
))
4493 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4494 emit_constant_insn (cond
,
4495 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4496 emit_constant_insn (cond
,
4497 gen_addsi3 (target
, new_src
,
4505 /* See if we can generate this by setting the bottom (or the top)
4506 16 bits, and then shifting these into the other half of the
4507 word. We only look for the simplest cases, to do more would cost
4508 too much. Be careful, however, not to generate this when the
4509 alternative would take fewer insns. */
4510 if (val
& 0xffff0000)
4512 temp1
= remainder
& 0xffff0000;
4513 temp2
= remainder
& 0x0000ffff;
4515 /* Overlaps outside this range are best done using other methods. */
4516 for (i
= 9; i
< 24; i
++)
4518 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4519 && !const_ok_for_arm (temp2
))
4521 rtx new_src
= (subtargets
4522 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4524 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4525 source
, subtargets
, generate
);
4533 gen_rtx_ASHIFT (mode
, source
,
4540 /* Don't duplicate cases already considered. */
4541 for (i
= 17; i
< 24; i
++)
4543 if (((temp1
| (temp1
>> i
)) == remainder
)
4544 && !const_ok_for_arm (temp1
))
4546 rtx new_src
= (subtargets
4547 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4549 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4550 source
, subtargets
, generate
);
4555 gen_rtx_SET (target
,
4558 gen_rtx_LSHIFTRT (mode
, source
,
4569 /* If we have IOR or XOR, and the constant can be loaded in a
4570 single instruction, and we can find a temporary to put it in,
4571 then this can be done in two instructions instead of 3-4. */
4573 /* TARGET can't be NULL if SUBTARGETS is 0 */
4574 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4576 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4580 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4582 emit_constant_insn (cond
,
4583 gen_rtx_SET (sub
, GEN_INT (val
)));
4584 emit_constant_insn (cond
,
4585 gen_rtx_SET (target
,
4586 gen_rtx_fmt_ee (code
, mode
,
4597 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4598 and the remainder 0s for e.g. 0xfff00000)
4599 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4601 This can be done in 2 instructions by using shifts with mov or mvn.
4606 mvn r0, r0, lsr #12 */
4607 if (set_sign_bit_copies
> 8
4608 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4612 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4613 rtx shift
= GEN_INT (set_sign_bit_copies
);
4619 gen_rtx_ASHIFT (mode
,
4624 gen_rtx_SET (target
,
4626 gen_rtx_LSHIFTRT (mode
, sub
,
4633 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4635 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4637 For eg. r0 = r0 | 0xfff
4642 if (set_zero_bit_copies
> 8
4643 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4647 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4648 rtx shift
= GEN_INT (set_zero_bit_copies
);
4654 gen_rtx_LSHIFTRT (mode
,
4659 gen_rtx_SET (target
,
4661 gen_rtx_ASHIFT (mode
, sub
,
4667 /* This will never be reached for Thumb2 because orn is a valid
4668 instruction. This is for Thumb1 and the ARM 32 bit cases.
4670 x = y | constant (such that ~constant is a valid constant)
4672 x = ~(~y & ~constant).
4674 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4678 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4679 emit_constant_insn (cond
,
4681 gen_rtx_NOT (mode
, source
)));
4684 sub
= gen_reg_rtx (mode
);
4685 emit_constant_insn (cond
,
4687 gen_rtx_AND (mode
, source
,
4689 emit_constant_insn (cond
,
4690 gen_rtx_SET (target
,
4691 gen_rtx_NOT (mode
, sub
)));
4698 /* See if two shifts will do 2 or more insn's worth of work. */
4699 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4701 HOST_WIDE_INT shift_mask
= ((0xffffffff
4702 << (32 - clear_sign_bit_copies
))
4705 if ((remainder
| shift_mask
) != 0xffffffff)
4707 HOST_WIDE_INT new_val
4708 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4712 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4713 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4714 new_src
, source
, subtargets
, 1);
4719 rtx targ
= subtargets
? NULL_RTX
: target
;
4720 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4721 targ
, source
, subtargets
, 0);
4727 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4728 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4730 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4731 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4737 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4739 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4741 if ((remainder
| shift_mask
) != 0xffffffff)
4743 HOST_WIDE_INT new_val
4744 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4747 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4749 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4750 new_src
, source
, subtargets
, 1);
4755 rtx targ
= subtargets
? NULL_RTX
: target
;
4757 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4758 targ
, source
, subtargets
, 0);
4764 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4765 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4767 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4768 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4780 /* Calculate what the instruction sequences would be if we generated it
4781 normally, negated, or inverted. */
4783 /* AND cannot be split into multiple insns, so invert and use BIC. */
4786 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4789 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4794 if (can_invert
|| final_invert
)
4795 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4800 immediates
= &pos_immediates
;
4802 /* Is the negated immediate sequence more efficient? */
4803 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4806 immediates
= &neg_immediates
;
4811 /* Is the inverted immediate sequence more efficient?
4812 We must allow for an extra NOT instruction for XOR operations, although
4813 there is some chance that the final 'mvn' will get optimized later. */
4814 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4817 immediates
= &inv_immediates
;
4825 /* Now output the chosen sequence as instructions. */
4828 for (i
= 0; i
< insns
; i
++)
4830 rtx new_src
, temp1_rtx
;
4832 temp1
= immediates
->i
[i
];
4834 if (code
== SET
|| code
== MINUS
)
4835 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4836 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4837 new_src
= gen_reg_rtx (mode
);
4843 else if (can_negate
)
4846 temp1
= trunc_int_for_mode (temp1
, mode
);
4847 temp1_rtx
= GEN_INT (temp1
);
4851 else if (code
== MINUS
)
4852 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4854 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4856 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4861 can_negate
= can_invert
;
4865 else if (code
== MINUS
)
4873 emit_constant_insn (cond
, gen_rtx_SET (target
,
4874 gen_rtx_NOT (mode
, source
)));
4881 /* Canonicalize a comparison so that we are more likely to recognize it.
4882 This can be done for a few constant compares, where we can make the
4883 immediate value easier to load. */
4886 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4887 bool op0_preserve_value
)
4890 unsigned HOST_WIDE_INT i
, maxval
;
4892 mode
= GET_MODE (*op0
);
4893 if (mode
== VOIDmode
)
4894 mode
= GET_MODE (*op1
);
4896 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4898 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4899 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4900 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4901 for GTU/LEU in Thumb mode. */
4905 if (*code
== GT
|| *code
== LE
4906 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4908 /* Missing comparison. First try to use an available
4910 if (CONST_INT_P (*op1
))
4918 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4920 *op1
= GEN_INT (i
+ 1);
4921 *code
= *code
== GT
? GE
: LT
;
4927 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4928 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4930 *op1
= GEN_INT (i
+ 1);
4931 *code
= *code
== GTU
? GEU
: LTU
;
4940 /* If that did not work, reverse the condition. */
4941 if (!op0_preserve_value
)
4943 std::swap (*op0
, *op1
);
4944 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4950 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4951 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4952 to facilitate possible combining with a cmp into 'ands'. */
4954 && GET_CODE (*op0
) == ZERO_EXTEND
4955 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4956 && GET_MODE (XEXP (*op0
, 0)) == QImode
4957 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4958 && subreg_lowpart_p (XEXP (*op0
, 0))
4959 && *op1
== const0_rtx
)
4960 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4963 /* Comparisons smaller than DImode. Only adjust comparisons against
4964 an out-of-range constant. */
4965 if (!CONST_INT_P (*op1
)
4966 || const_ok_for_arm (INTVAL (*op1
))
4967 || const_ok_for_arm (- INTVAL (*op1
)))
4981 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4983 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4984 *code
= *code
== GT
? GE
: LT
;
4992 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4994 *op1
= GEN_INT (i
- 1);
4995 *code
= *code
== GE
? GT
: LE
;
5002 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5003 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5005 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5006 *code
= *code
== GTU
? GEU
: LTU
;
5014 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5016 *op1
= GEN_INT (i
- 1);
5017 *code
= *code
== GEU
? GTU
: LEU
;
5028 /* Define how to find the value returned by a function. */
5031 arm_function_value(const_tree type
, const_tree func
,
5032 bool outgoing ATTRIBUTE_UNUSED
)
5035 int unsignedp ATTRIBUTE_UNUSED
;
5036 rtx r ATTRIBUTE_UNUSED
;
5038 mode
= TYPE_MODE (type
);
5040 if (TARGET_AAPCS_BASED
)
5041 return aapcs_allocate_return_reg (mode
, type
, func
);
5043 /* Promote integer types. */
5044 if (INTEGRAL_TYPE_P (type
))
5045 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5047 /* Promotes small structs returned in a register to full-word size
5048 for big-endian AAPCS. */
5049 if (arm_return_in_msb (type
))
5051 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5052 if (size
% UNITS_PER_WORD
!= 0)
5054 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5055 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5059 return arm_libcall_value_1 (mode
);
5062 /* libcall hashtable helpers. */
5064 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5066 static inline hashval_t
hash (const rtx_def
*);
5067 static inline bool equal (const rtx_def
*, const rtx_def
*);
5068 static inline void remove (rtx_def
*);
5072 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5074 return rtx_equal_p (p1
, p2
);
5078 libcall_hasher::hash (const rtx_def
*p1
)
5080 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5083 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5086 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5088 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5092 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5094 static bool init_done
= false;
5095 static libcall_table_type
*libcall_htab
= NULL
;
5101 libcall_htab
= new libcall_table_type (31);
5102 add_libcall (libcall_htab
,
5103 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5104 add_libcall (libcall_htab
,
5105 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5106 add_libcall (libcall_htab
,
5107 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5108 add_libcall (libcall_htab
,
5109 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5111 add_libcall (libcall_htab
,
5112 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5113 add_libcall (libcall_htab
,
5114 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5115 add_libcall (libcall_htab
,
5116 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5117 add_libcall (libcall_htab
,
5118 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5120 add_libcall (libcall_htab
,
5121 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5122 add_libcall (libcall_htab
,
5123 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5124 add_libcall (libcall_htab
,
5125 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5126 add_libcall (libcall_htab
,
5127 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5128 add_libcall (libcall_htab
,
5129 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5130 add_libcall (libcall_htab
,
5131 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5132 add_libcall (libcall_htab
,
5133 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5134 add_libcall (libcall_htab
,
5135 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5137 /* Values from double-precision helper functions are returned in core
5138 registers if the selected core only supports single-precision
5139 arithmetic, even if we are using the hard-float ABI. The same is
5140 true for single-precision helpers, but we will never be using the
5141 hard-float ABI on a CPU which doesn't support single-precision
5142 operations in hardware. */
5143 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5144 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5145 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5146 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5147 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5148 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5149 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5150 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5151 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5152 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5153 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5154 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5156 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5160 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5164 arm_libcall_value_1 (machine_mode mode
)
5166 if (TARGET_AAPCS_BASED
)
5167 return aapcs_libcall_value (mode
);
5168 else if (TARGET_IWMMXT_ABI
5169 && arm_vector_mode_supported_p (mode
))
5170 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5172 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5175 /* Define how to find the value returned by a library function
5176 assuming the value has mode MODE. */
5179 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5181 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5182 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5184 /* The following libcalls return their result in integer registers,
5185 even though they return a floating point value. */
5186 if (arm_libcall_uses_aapcs_base (libcall
))
5187 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5191 return arm_libcall_value_1 (mode
);
5194 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5197 arm_function_value_regno_p (const unsigned int regno
)
5199 if (regno
== ARG_REGISTER (1)
5201 && TARGET_AAPCS_BASED
5203 && TARGET_HARD_FLOAT
5204 && regno
== FIRST_VFP_REGNUM
)
5205 || (TARGET_IWMMXT_ABI
5206 && regno
== FIRST_IWMMXT_REGNUM
))
5212 /* Determine the amount of memory needed to store the possible return
5213 registers of an untyped call. */
5215 arm_apply_result_size (void)
5221 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5223 if (TARGET_IWMMXT_ABI
)
5230 /* Decide whether TYPE should be returned in memory (true)
5231 or in a register (false). FNTYPE is the type of the function making
5234 arm_return_in_memory (const_tree type
, const_tree fntype
)
5238 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5240 if (TARGET_AAPCS_BASED
)
5242 /* Simple, non-aggregate types (ie not including vectors and
5243 complex) are always returned in a register (or registers).
5244 We don't care about which register here, so we can short-cut
5245 some of the detail. */
5246 if (!AGGREGATE_TYPE_P (type
)
5247 && TREE_CODE (type
) != VECTOR_TYPE
5248 && TREE_CODE (type
) != COMPLEX_TYPE
)
5251 /* Any return value that is no larger than one word can be
5253 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5256 /* Check any available co-processors to see if they accept the
5257 type as a register candidate (VFP, for example, can return
5258 some aggregates in consecutive registers). These aren't
5259 available if the call is variadic. */
5260 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5263 /* Vector values should be returned using ARM registers, not
5264 memory (unless they're over 16 bytes, which will break since
5265 we only have four call-clobbered registers to play with). */
5266 if (TREE_CODE (type
) == VECTOR_TYPE
)
5267 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5269 /* The rest go in memory. */
5273 if (TREE_CODE (type
) == VECTOR_TYPE
)
5274 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5276 if (!AGGREGATE_TYPE_P (type
) &&
5277 (TREE_CODE (type
) != VECTOR_TYPE
))
5278 /* All simple types are returned in registers. */
5281 if (arm_abi
!= ARM_ABI_APCS
)
5283 /* ATPCS and later return aggregate types in memory only if they are
5284 larger than a word (or are variable size). */
5285 return (size
< 0 || size
> UNITS_PER_WORD
);
5288 /* For the arm-wince targets we choose to be compatible with Microsoft's
5289 ARM and Thumb compilers, which always return aggregates in memory. */
5291 /* All structures/unions bigger than one word are returned in memory.
5292 Also catch the case where int_size_in_bytes returns -1. In this case
5293 the aggregate is either huge or of variable size, and in either case
5294 we will want to return it via memory and not in a register. */
5295 if (size
< 0 || size
> UNITS_PER_WORD
)
5298 if (TREE_CODE (type
) == RECORD_TYPE
)
5302 /* For a struct the APCS says that we only return in a register
5303 if the type is 'integer like' and every addressable element
5304 has an offset of zero. For practical purposes this means
5305 that the structure can have at most one non bit-field element
5306 and that this element must be the first one in the structure. */
5308 /* Find the first field, ignoring non FIELD_DECL things which will
5309 have been created by C++. */
5310 for (field
= TYPE_FIELDS (type
);
5311 field
&& TREE_CODE (field
) != FIELD_DECL
;
5312 field
= DECL_CHAIN (field
))
5316 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5318 /* Check that the first field is valid for returning in a register. */
5320 /* ... Floats are not allowed */
5321 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5324 /* ... Aggregates that are not themselves valid for returning in
5325 a register are not allowed. */
5326 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5329 /* Now check the remaining fields, if any. Only bitfields are allowed,
5330 since they are not addressable. */
5331 for (field
= DECL_CHAIN (field
);
5333 field
= DECL_CHAIN (field
))
5335 if (TREE_CODE (field
) != FIELD_DECL
)
5338 if (!DECL_BIT_FIELD_TYPE (field
))
5345 if (TREE_CODE (type
) == UNION_TYPE
)
5349 /* Unions can be returned in registers if every element is
5350 integral, or can be returned in an integer register. */
5351 for (field
= TYPE_FIELDS (type
);
5353 field
= DECL_CHAIN (field
))
5355 if (TREE_CODE (field
) != FIELD_DECL
)
5358 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5361 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5367 #endif /* not ARM_WINCE */
5369 /* Return all other types in memory. */
5373 const struct pcs_attribute_arg
5377 } pcs_attribute_args
[] =
5379 {"aapcs", ARM_PCS_AAPCS
},
5380 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5382 /* We could recognize these, but changes would be needed elsewhere
5383 * to implement them. */
5384 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5385 {"atpcs", ARM_PCS_ATPCS
},
5386 {"apcs", ARM_PCS_APCS
},
5388 {NULL
, ARM_PCS_UNKNOWN
}
5392 arm_pcs_from_attribute (tree attr
)
5394 const struct pcs_attribute_arg
*ptr
;
5397 /* Get the value of the argument. */
5398 if (TREE_VALUE (attr
) == NULL_TREE
5399 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5400 return ARM_PCS_UNKNOWN
;
5402 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5404 /* Check it against the list of known arguments. */
5405 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5406 if (streq (arg
, ptr
->arg
))
5409 /* An unrecognized interrupt type. */
5410 return ARM_PCS_UNKNOWN
;
5413 /* Get the PCS variant to use for this call. TYPE is the function's type
5414 specification, DECL is the specific declartion. DECL may be null if
5415 the call could be indirect or if this is a library call. */
5417 arm_get_pcs_model (const_tree type
, const_tree decl
)
5419 bool user_convention
= false;
5420 enum arm_pcs user_pcs
= arm_pcs_default
;
5425 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5428 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5429 user_convention
= true;
5432 if (TARGET_AAPCS_BASED
)
5434 /* Detect varargs functions. These always use the base rules
5435 (no argument is ever a candidate for a co-processor
5437 bool base_rules
= stdarg_p (type
);
5439 if (user_convention
)
5441 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5442 sorry ("non-AAPCS derived PCS variant");
5443 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5444 error ("variadic functions must use the base AAPCS variant");
5448 return ARM_PCS_AAPCS
;
5449 else if (user_convention
)
5451 else if (decl
&& flag_unit_at_a_time
)
5453 /* Local functions never leak outside this compilation unit,
5454 so we are free to use whatever conventions are
5456 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5457 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5459 return ARM_PCS_AAPCS_LOCAL
;
5462 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5463 sorry ("PCS variant");
5465 /* For everything else we use the target's default. */
5466 return arm_pcs_default
;
5471 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5472 const_tree fntype ATTRIBUTE_UNUSED
,
5473 rtx libcall ATTRIBUTE_UNUSED
,
5474 const_tree fndecl ATTRIBUTE_UNUSED
)
5476 /* Record the unallocated VFP registers. */
5477 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5478 pcum
->aapcs_vfp_reg_alloc
= 0;
5481 /* Walk down the type tree of TYPE counting consecutive base elements.
5482 If *MODEP is VOIDmode, then set it to the first valid floating point
5483 type. If a non-floating point type is found, or if a floating point
5484 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5485 otherwise return the count in the sub-tree. */
5487 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5492 switch (TREE_CODE (type
))
5495 mode
= TYPE_MODE (type
);
5496 if (mode
!= DFmode
&& mode
!= SFmode
)
5499 if (*modep
== VOIDmode
)
5508 mode
= TYPE_MODE (TREE_TYPE (type
));
5509 if (mode
!= DFmode
&& mode
!= SFmode
)
5512 if (*modep
== VOIDmode
)
5521 /* Use V2SImode and V4SImode as representatives of all 64-bit
5522 and 128-bit vector types, whether or not those modes are
5523 supported with the present options. */
5524 size
= int_size_in_bytes (type
);
5537 if (*modep
== VOIDmode
)
5540 /* Vector modes are considered to be opaque: two vectors are
5541 equivalent for the purposes of being homogeneous aggregates
5542 if they are the same size. */
5551 tree index
= TYPE_DOMAIN (type
);
5553 /* Can't handle incomplete types nor sizes that are not
5555 if (!COMPLETE_TYPE_P (type
)
5556 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5559 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5562 || !TYPE_MAX_VALUE (index
)
5563 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5564 || !TYPE_MIN_VALUE (index
)
5565 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5569 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5570 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5572 /* There must be no padding. */
5573 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5585 /* Can't handle incomplete types nor sizes that are not
5587 if (!COMPLETE_TYPE_P (type
)
5588 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5591 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5593 if (TREE_CODE (field
) != FIELD_DECL
)
5596 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5602 /* There must be no padding. */
5603 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5610 case QUAL_UNION_TYPE
:
5612 /* These aren't very interesting except in a degenerate case. */
5617 /* Can't handle incomplete types nor sizes that are not
5619 if (!COMPLETE_TYPE_P (type
)
5620 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5623 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5625 if (TREE_CODE (field
) != FIELD_DECL
)
5628 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5631 count
= count
> sub_count
? count
: sub_count
;
5634 /* There must be no padding. */
5635 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5648 /* Return true if PCS_VARIANT should use VFP registers. */
5650 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5652 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5654 static bool seen_thumb1_vfp
= false;
5656 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5658 sorry ("Thumb-1 hard-float VFP ABI");
5659 /* sorry() is not immediately fatal, so only display this once. */
5660 seen_thumb1_vfp
= true;
5666 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5669 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5670 (TARGET_VFP_DOUBLE
|| !is_double
));
5673 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5674 suitable for passing or returning in VFP registers for the PCS
5675 variant selected. If it is, then *BASE_MODE is updated to contain
5676 a machine mode describing each element of the argument's type and
5677 *COUNT to hold the number of such elements. */
5679 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5680 machine_mode mode
, const_tree type
,
5681 machine_mode
*base_mode
, int *count
)
5683 machine_mode new_mode
= VOIDmode
;
5685 /* If we have the type information, prefer that to working things
5686 out from the mode. */
5689 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5691 if (ag_count
> 0 && ag_count
<= 4)
5696 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5697 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5698 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5703 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5706 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5712 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5715 *base_mode
= new_mode
;
5720 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5721 machine_mode mode
, const_tree type
)
5723 int count ATTRIBUTE_UNUSED
;
5724 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5726 if (!use_vfp_abi (pcs_variant
, false))
5728 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5733 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5736 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5739 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5740 &pcum
->aapcs_vfp_rmode
,
5741 &pcum
->aapcs_vfp_rcount
);
5745 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5746 const_tree type ATTRIBUTE_UNUSED
)
5748 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5749 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5752 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5753 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5755 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5757 || (mode
== TImode
&& ! TARGET_NEON
)
5758 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5761 int rcount
= pcum
->aapcs_vfp_rcount
;
5763 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5767 /* Avoid using unsupported vector modes. */
5768 if (rmode
== V2SImode
)
5770 else if (rmode
== V4SImode
)
5777 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5778 for (i
= 0; i
< rcount
; i
++)
5780 rtx tmp
= gen_rtx_REG (rmode
,
5781 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5782 tmp
= gen_rtx_EXPR_LIST
5784 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5785 XVECEXP (par
, 0, i
) = tmp
;
5788 pcum
->aapcs_reg
= par
;
5791 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5798 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5800 const_tree type ATTRIBUTE_UNUSED
)
5802 if (!use_vfp_abi (pcs_variant
, false))
5805 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5808 machine_mode ag_mode
;
5813 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5818 if (ag_mode
== V2SImode
)
5820 else if (ag_mode
== V4SImode
)
5826 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5827 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5828 for (i
= 0; i
< count
; i
++)
5830 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5831 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5832 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5833 XVECEXP (par
, 0, i
) = tmp
;
5839 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5843 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5844 machine_mode mode ATTRIBUTE_UNUSED
,
5845 const_tree type ATTRIBUTE_UNUSED
)
5847 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5848 pcum
->aapcs_vfp_reg_alloc
= 0;
5852 #define AAPCS_CP(X) \
5854 aapcs_ ## X ## _cum_init, \
5855 aapcs_ ## X ## _is_call_candidate, \
5856 aapcs_ ## X ## _allocate, \
5857 aapcs_ ## X ## _is_return_candidate, \
5858 aapcs_ ## X ## _allocate_return_reg, \
5859 aapcs_ ## X ## _advance \
5862 /* Table of co-processors that can be used to pass arguments in
5863 registers. Idealy no arugment should be a candidate for more than
5864 one co-processor table entry, but the table is processed in order
5865 and stops after the first match. If that entry then fails to put
5866 the argument into a co-processor register, the argument will go on
5870 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5871 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5873 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5874 BLKmode) is a candidate for this co-processor's registers; this
5875 function should ignore any position-dependent state in
5876 CUMULATIVE_ARGS and only use call-type dependent information. */
5877 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5879 /* Return true if the argument does get a co-processor register; it
5880 should set aapcs_reg to an RTX of the register allocated as is
5881 required for a return from FUNCTION_ARG. */
5882 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5884 /* Return true if a result of mode MODE (or type TYPE if MODE is
5885 BLKmode) is can be returned in this co-processor's registers. */
5886 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5888 /* Allocate and return an RTX element to hold the return type of a
5889 call, this routine must not fail and will only be called if
5890 is_return_candidate returned true with the same parameters. */
5891 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5893 /* Finish processing this argument and prepare to start processing
5895 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5896 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5904 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5909 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5910 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5917 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5919 /* We aren't passed a decl, so we can't check that a call is local.
5920 However, it isn't clear that that would be a win anyway, since it
5921 might limit some tail-calling opportunities. */
5922 enum arm_pcs pcs_variant
;
5926 const_tree fndecl
= NULL_TREE
;
5928 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5931 fntype
= TREE_TYPE (fntype
);
5934 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5937 pcs_variant
= arm_pcs_default
;
5939 if (pcs_variant
!= ARM_PCS_AAPCS
)
5943 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5944 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5953 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5956 /* We aren't passed a decl, so we can't check that a call is local.
5957 However, it isn't clear that that would be a win anyway, since it
5958 might limit some tail-calling opportunities. */
5959 enum arm_pcs pcs_variant
;
5960 int unsignedp ATTRIBUTE_UNUSED
;
5964 const_tree fndecl
= NULL_TREE
;
5966 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5969 fntype
= TREE_TYPE (fntype
);
5972 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5975 pcs_variant
= arm_pcs_default
;
5977 /* Promote integer types. */
5978 if (type
&& INTEGRAL_TYPE_P (type
))
5979 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5981 if (pcs_variant
!= ARM_PCS_AAPCS
)
5985 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5986 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5988 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5992 /* Promotes small structs returned in a register to full-word size
5993 for big-endian AAPCS. */
5994 if (type
&& arm_return_in_msb (type
))
5996 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5997 if (size
% UNITS_PER_WORD
!= 0)
5999 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6000 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6004 return gen_rtx_REG (mode
, R0_REGNUM
);
6008 aapcs_libcall_value (machine_mode mode
)
6010 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6011 && GET_MODE_SIZE (mode
) <= 4)
6014 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6017 /* Lay out a function argument using the AAPCS rules. The rule
6018 numbers referred to here are those in the AAPCS. */
6020 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6021 const_tree type
, bool named
)
6026 /* We only need to do this once per argument. */
6027 if (pcum
->aapcs_arg_processed
)
6030 pcum
->aapcs_arg_processed
= true;
6032 /* Special case: if named is false then we are handling an incoming
6033 anonymous argument which is on the stack. */
6037 /* Is this a potential co-processor register candidate? */
6038 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6040 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6041 pcum
->aapcs_cprc_slot
= slot
;
6043 /* We don't have to apply any of the rules from part B of the
6044 preparation phase, these are handled elsewhere in the
6049 /* A Co-processor register candidate goes either in its own
6050 class of registers or on the stack. */
6051 if (!pcum
->aapcs_cprc_failed
[slot
])
6053 /* C1.cp - Try to allocate the argument to co-processor
6055 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6058 /* C2.cp - Put the argument on the stack and note that we
6059 can't assign any more candidates in this slot. We also
6060 need to note that we have allocated stack space, so that
6061 we won't later try to split a non-cprc candidate between
6062 core registers and the stack. */
6063 pcum
->aapcs_cprc_failed
[slot
] = true;
6064 pcum
->can_split
= false;
6067 /* We didn't get a register, so this argument goes on the
6069 gcc_assert (pcum
->can_split
== false);
6074 /* C3 - For double-word aligned arguments, round the NCRN up to the
6075 next even number. */
6076 ncrn
= pcum
->aapcs_ncrn
;
6077 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6080 nregs
= ARM_NUM_REGS2(mode
, type
);
6082 /* Sigh, this test should really assert that nregs > 0, but a GCC
6083 extension allows empty structs and then gives them empty size; it
6084 then allows such a structure to be passed by value. For some of
6085 the code below we have to pretend that such an argument has
6086 non-zero size so that we 'locate' it correctly either in
6087 registers or on the stack. */
6088 gcc_assert (nregs
>= 0);
6090 nregs2
= nregs
? nregs
: 1;
6092 /* C4 - Argument fits entirely in core registers. */
6093 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6095 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6096 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6100 /* C5 - Some core registers left and there are no arguments already
6101 on the stack: split this argument between the remaining core
6102 registers and the stack. */
6103 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6105 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6106 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6107 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6111 /* C6 - NCRN is set to 4. */
6112 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6114 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6119 for a call to a function whose data type is FNTYPE.
6120 For a library call, FNTYPE is NULL. */
6122 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6124 tree fndecl ATTRIBUTE_UNUSED
)
6126 /* Long call handling. */
6128 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6130 pcum
->pcs_variant
= arm_pcs_default
;
6132 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6134 if (arm_libcall_uses_aapcs_base (libname
))
6135 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6137 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6138 pcum
->aapcs_reg
= NULL_RTX
;
6139 pcum
->aapcs_partial
= 0;
6140 pcum
->aapcs_arg_processed
= false;
6141 pcum
->aapcs_cprc_slot
= -1;
6142 pcum
->can_split
= true;
6144 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6148 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6150 pcum
->aapcs_cprc_failed
[i
] = false;
6151 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6159 /* On the ARM, the offset starts at 0. */
6161 pcum
->iwmmxt_nregs
= 0;
6162 pcum
->can_split
= true;
6164 /* Varargs vectors are treated the same as long long.
6165 named_count avoids having to change the way arm handles 'named' */
6166 pcum
->named_count
= 0;
6169 if (TARGET_REALLY_IWMMXT
&& fntype
)
6173 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6175 fn_arg
= TREE_CHAIN (fn_arg
))
6176 pcum
->named_count
+= 1;
6178 if (! pcum
->named_count
)
6179 pcum
->named_count
= INT_MAX
;
6183 /* Return true if mode/type need doubleword alignment. */
6185 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6188 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6190 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6191 if (!AGGREGATE_TYPE_P (type
))
6192 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6194 /* Array types: Use member alignment of element type. */
6195 if (TREE_CODE (type
) == ARRAY_TYPE
)
6196 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6198 /* Record/aggregate types: Use greatest member alignment of any member. */
6199 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6200 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6207 /* Determine where to put an argument to a function.
6208 Value is zero to push the argument on the stack,
6209 or a hard register in which to store the argument.
6211 MODE is the argument's machine mode.
6212 TYPE is the data type of the argument (as a tree).
6213 This is null for libcalls where that information may
6215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6216 the preceding args and about the function being called.
6217 NAMED is nonzero if this argument is a named parameter
6218 (otherwise it is an extra parameter matching an ellipsis).
6220 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6221 other arguments are passed on the stack. If (NAMED == 0) (which happens
6222 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6223 defined), say it is passed in the stack (function_prologue will
6224 indeed make it pass in the stack if necessary). */
6227 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6228 const_tree type
, bool named
)
6230 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6233 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6234 a call insn (op3 of a call_value insn). */
6235 if (mode
== VOIDmode
)
6238 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6240 aapcs_layout_arg (pcum
, mode
, type
, named
);
6241 return pcum
->aapcs_reg
;
6244 /* Varargs vectors are treated the same as long long.
6245 named_count avoids having to change the way arm handles 'named' */
6246 if (TARGET_IWMMXT_ABI
6247 && arm_vector_mode_supported_p (mode
)
6248 && pcum
->named_count
> pcum
->nargs
+ 1)
6250 if (pcum
->iwmmxt_nregs
<= 9)
6251 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6254 pcum
->can_split
= false;
6259 /* Put doubleword aligned quantities in even register pairs. */
6261 && ARM_DOUBLEWORD_ALIGN
6262 && arm_needs_doubleword_align (mode
, type
))
6265 /* Only allow splitting an arg between regs and memory if all preceding
6266 args were allocated to regs. For args passed by reference we only count
6267 the reference pointer. */
6268 if (pcum
->can_split
)
6271 nregs
= ARM_NUM_REGS2 (mode
, type
);
6273 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6276 return gen_rtx_REG (mode
, pcum
->nregs
);
6280 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6282 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6283 ? DOUBLEWORD_ALIGNMENT
6288 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6289 tree type
, bool named
)
6291 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6292 int nregs
= pcum
->nregs
;
6294 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6296 aapcs_layout_arg (pcum
, mode
, type
, named
);
6297 return pcum
->aapcs_partial
;
6300 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6303 if (NUM_ARG_REGS
> nregs
6304 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6306 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6311 /* Update the data in PCUM to advance over an argument
6312 of mode MODE and data type TYPE.
6313 (TYPE is null for libcalls where that information may not be available.) */
6316 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6317 const_tree type
, bool named
)
6319 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6321 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6323 aapcs_layout_arg (pcum
, mode
, type
, named
);
6325 if (pcum
->aapcs_cprc_slot
>= 0)
6327 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6329 pcum
->aapcs_cprc_slot
= -1;
6332 /* Generic stuff. */
6333 pcum
->aapcs_arg_processed
= false;
6334 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6335 pcum
->aapcs_reg
= NULL_RTX
;
6336 pcum
->aapcs_partial
= 0;
6341 if (arm_vector_mode_supported_p (mode
)
6342 && pcum
->named_count
> pcum
->nargs
6343 && TARGET_IWMMXT_ABI
)
6344 pcum
->iwmmxt_nregs
+= 1;
6346 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6350 /* Variable sized types are passed by reference. This is a GCC
6351 extension to the ARM ABI. */
6354 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6355 machine_mode mode ATTRIBUTE_UNUSED
,
6356 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6358 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6361 /* Encode the current state of the #pragma [no_]long_calls. */
6364 OFF
, /* No #pragma [no_]long_calls is in effect. */
6365 LONG
, /* #pragma long_calls is in effect. */
6366 SHORT
/* #pragma no_long_calls is in effect. */
6369 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6372 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6374 arm_pragma_long_calls
= LONG
;
6378 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6380 arm_pragma_long_calls
= SHORT
;
6384 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6386 arm_pragma_long_calls
= OFF
;
6389 /* Handle an attribute requiring a FUNCTION_DECL;
6390 arguments as in struct attribute_spec.handler. */
6392 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6393 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6395 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6397 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6399 *no_add_attrs
= true;
6405 /* Handle an "interrupt" or "isr" attribute;
6406 arguments as in struct attribute_spec.handler. */
6408 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6413 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6415 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6417 *no_add_attrs
= true;
6419 /* FIXME: the argument if any is checked for type attributes;
6420 should it be checked for decl ones? */
6424 if (TREE_CODE (*node
) == FUNCTION_TYPE
6425 || TREE_CODE (*node
) == METHOD_TYPE
)
6427 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6429 warning (OPT_Wattributes
, "%qE attribute ignored",
6431 *no_add_attrs
= true;
6434 else if (TREE_CODE (*node
) == POINTER_TYPE
6435 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6436 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6437 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6439 *node
= build_variant_type_copy (*node
);
6440 TREE_TYPE (*node
) = build_type_attribute_variant
6442 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6443 *no_add_attrs
= true;
6447 /* Possibly pass this attribute on from the type to a decl. */
6448 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6449 | (int) ATTR_FLAG_FUNCTION_NEXT
6450 | (int) ATTR_FLAG_ARRAY_NEXT
))
6452 *no_add_attrs
= true;
6453 return tree_cons (name
, args
, NULL_TREE
);
6457 warning (OPT_Wattributes
, "%qE attribute ignored",
6466 /* Handle a "pcs" attribute; arguments as in struct
6467 attribute_spec.handler. */
6469 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6470 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6472 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6474 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6475 *no_add_attrs
= true;
6480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6481 /* Handle the "notshared" attribute. This attribute is another way of
6482 requesting hidden visibility. ARM's compiler supports
6483 "__declspec(notshared)"; we support the same thing via an
6487 arm_handle_notshared_attribute (tree
*node
,
6488 tree name ATTRIBUTE_UNUSED
,
6489 tree args ATTRIBUTE_UNUSED
,
6490 int flags ATTRIBUTE_UNUSED
,
6493 tree decl
= TYPE_NAME (*node
);
6497 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6498 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6499 *no_add_attrs
= false;
6505 /* Return 0 if the attributes for two types are incompatible, 1 if they
6506 are compatible, and 2 if they are nearly compatible (which causes a
6507 warning to be generated). */
6509 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6513 /* Check for mismatch of non-default calling convention. */
6514 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6517 /* Check for mismatched call attributes. */
6518 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6519 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6520 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6521 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6523 /* Only bother to check if an attribute is defined. */
6524 if (l1
| l2
| s1
| s2
)
6526 /* If one type has an attribute, the other must have the same attribute. */
6527 if ((l1
!= l2
) || (s1
!= s2
))
6530 /* Disallow mixed attributes. */
6531 if ((l1
& s2
) || (l2
& s1
))
6535 /* Check for mismatched ISR attribute. */
6536 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6538 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6539 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6541 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6548 /* Assigns default attributes to newly defined type. This is used to
6549 set short_call/long_call attributes for function types of
6550 functions defined inside corresponding #pragma scopes. */
6552 arm_set_default_type_attributes (tree type
)
6554 /* Add __attribute__ ((long_call)) to all functions, when
6555 inside #pragma long_calls or __attribute__ ((short_call)),
6556 when inside #pragma no_long_calls. */
6557 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6559 tree type_attr_list
, attr_name
;
6560 type_attr_list
= TYPE_ATTRIBUTES (type
);
6562 if (arm_pragma_long_calls
== LONG
)
6563 attr_name
= get_identifier ("long_call");
6564 else if (arm_pragma_long_calls
== SHORT
)
6565 attr_name
= get_identifier ("short_call");
6569 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6570 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6574 /* Return true if DECL is known to be linked into section SECTION. */
6577 arm_function_in_section_p (tree decl
, section
*section
)
6579 /* We can only be certain about the prevailing symbol definition. */
6580 if (!decl_binds_to_current_def_p (decl
))
6583 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6584 if (!DECL_SECTION_NAME (decl
))
6586 /* Make sure that we will not create a unique section for DECL. */
6587 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6591 return function_section (decl
) == section
;
6594 /* Return nonzero if a 32-bit "long_call" should be generated for
6595 a call from the current function to DECL. We generate a long_call
6598 a. has an __attribute__((long call))
6599 or b. is within the scope of a #pragma long_calls
6600 or c. the -mlong-calls command line switch has been specified
6602 However we do not generate a long call if the function:
6604 d. has an __attribute__ ((short_call))
6605 or e. is inside the scope of a #pragma no_long_calls
6606 or f. is defined in the same section as the current function. */
6609 arm_is_long_call_p (tree decl
)
6614 return TARGET_LONG_CALLS
;
6616 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6617 if (lookup_attribute ("short_call", attrs
))
6620 /* For "f", be conservative, and only cater for cases in which the
6621 whole of the current function is placed in the same section. */
6622 if (!flag_reorder_blocks_and_partition
6623 && TREE_CODE (decl
) == FUNCTION_DECL
6624 && arm_function_in_section_p (decl
, current_function_section ()))
6627 if (lookup_attribute ("long_call", attrs
))
6630 return TARGET_LONG_CALLS
;
6633 /* Return nonzero if it is ok to make a tail-call to DECL. */
6635 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6637 unsigned long func_type
;
6639 if (cfun
->machine
->sibcall_blocked
)
6642 /* Never tailcall something if we are generating code for Thumb-1. */
6646 /* The PIC register is live on entry to VxWorks PLT entries, so we
6647 must make the call before restoring the PIC register. */
6648 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6651 /* If we are interworking and the function is not declared static
6652 then we can't tail-call it unless we know that it exists in this
6653 compilation unit (since it might be a Thumb routine). */
6654 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6655 && !TREE_ASM_WRITTEN (decl
))
6658 func_type
= arm_current_func_type ();
6659 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6660 if (IS_INTERRUPT (func_type
))
6663 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6665 /* Check that the return value locations are the same. For
6666 example that we aren't returning a value from the sibling in
6667 a VFP register but then need to transfer it to a core
6671 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6672 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6674 if (!rtx_equal_p (a
, b
))
6678 /* Never tailcall if function may be called with a misaligned SP. */
6679 if (IS_STACKALIGN (func_type
))
6682 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6683 references should become a NOP. Don't convert such calls into
6685 if (TARGET_AAPCS_BASED
6686 && arm_abi
== ARM_ABI_AAPCS
6688 && DECL_WEAK (decl
))
6691 /* Everything else is ok. */
6696 /* Addressing mode support functions. */
6698 /* Return nonzero if X is a legitimate immediate operand when compiling
6699 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6701 legitimate_pic_operand_p (rtx x
)
6703 if (GET_CODE (x
) == SYMBOL_REF
6704 || (GET_CODE (x
) == CONST
6705 && GET_CODE (XEXP (x
, 0)) == PLUS
6706 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6712 /* Record that the current function needs a PIC register. Initialize
6713 cfun->machine->pic_reg if we have not already done so. */
6716 require_pic_register (void)
6718 /* A lot of the logic here is made obscure by the fact that this
6719 routine gets called as part of the rtx cost estimation process.
6720 We don't want those calls to affect any assumptions about the real
6721 function; and further, we can't call entry_of_function() until we
6722 start the real expansion process. */
6723 if (!crtl
->uses_pic_offset_table
)
6725 gcc_assert (can_create_pseudo_p ());
6726 if (arm_pic_register
!= INVALID_REGNUM
6727 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6729 if (!cfun
->machine
->pic_reg
)
6730 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6732 /* Play games to avoid marking the function as needing pic
6733 if we are being called as part of the cost-estimation
6735 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6736 crtl
->uses_pic_offset_table
= 1;
6740 rtx_insn
*seq
, *insn
;
6742 if (!cfun
->machine
->pic_reg
)
6743 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6745 /* Play games to avoid marking the function as needing pic
6746 if we are being called as part of the cost-estimation
6748 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6750 crtl
->uses_pic_offset_table
= 1;
6753 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6754 && arm_pic_register
> LAST_LO_REGNUM
)
6755 emit_move_insn (cfun
->machine
->pic_reg
,
6756 gen_rtx_REG (Pmode
, arm_pic_register
));
6758 arm_load_pic_register (0UL);
6763 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6765 INSN_LOCATION (insn
) = prologue_location
;
6767 /* We can be called during expansion of PHI nodes, where
6768 we can't yet emit instructions directly in the final
6769 insn stream. Queue the insns on the entry edge, they will
6770 be committed after everything else is expanded. */
6771 insert_insn_on_edge (seq
,
6772 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6779 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6781 if (GET_CODE (orig
) == SYMBOL_REF
6782 || GET_CODE (orig
) == LABEL_REF
)
6788 gcc_assert (can_create_pseudo_p ());
6789 reg
= gen_reg_rtx (Pmode
);
6792 /* VxWorks does not impose a fixed gap between segments; the run-time
6793 gap can be different from the object-file gap. We therefore can't
6794 use GOTOFF unless we are absolutely sure that the symbol is in the
6795 same segment as the GOT. Unfortunately, the flexibility of linker
6796 scripts means that we can't be sure of that in general, so assume
6797 that GOTOFF is never valid on VxWorks. */
6798 if ((GET_CODE (orig
) == LABEL_REF
6799 || (GET_CODE (orig
) == SYMBOL_REF
&&
6800 SYMBOL_REF_LOCAL_P (orig
)))
6802 && arm_pic_data_is_text_relative
)
6803 insn
= arm_pic_static_addr (orig
, reg
);
6809 /* If this function doesn't have a pic register, create one now. */
6810 require_pic_register ();
6812 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6814 /* Make the MEM as close to a constant as possible. */
6815 mem
= SET_SRC (pat
);
6816 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6817 MEM_READONLY_P (mem
) = 1;
6818 MEM_NOTRAP_P (mem
) = 1;
6820 insn
= emit_insn (pat
);
6823 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6825 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6829 else if (GET_CODE (orig
) == CONST
)
6833 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6834 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6837 /* Handle the case where we have: const (UNSPEC_TLS). */
6838 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6839 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6842 /* Handle the case where we have:
6843 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6845 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6846 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6847 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6849 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6855 gcc_assert (can_create_pseudo_p ());
6856 reg
= gen_reg_rtx (Pmode
);
6859 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6861 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6862 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6863 base
== reg
? 0 : reg
);
6865 if (CONST_INT_P (offset
))
6867 /* The base register doesn't really matter, we only want to
6868 test the index for the appropriate mode. */
6869 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6871 gcc_assert (can_create_pseudo_p ());
6872 offset
= force_reg (Pmode
, offset
);
6875 if (CONST_INT_P (offset
))
6876 return plus_constant (Pmode
, base
, INTVAL (offset
));
6879 if (GET_MODE_SIZE (mode
) > 4
6880 && (GET_MODE_CLASS (mode
) == MODE_INT
6881 || TARGET_SOFT_FLOAT
))
6883 emit_insn (gen_addsi3 (reg
, base
, offset
));
6887 return gen_rtx_PLUS (Pmode
, base
, offset
);
6894 /* Find a spare register to use during the prolog of a function. */
6897 thumb_find_work_register (unsigned long pushed_regs_mask
)
6901 /* Check the argument registers first as these are call-used. The
6902 register allocation order means that sometimes r3 might be used
6903 but earlier argument registers might not, so check them all. */
6904 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6905 if (!df_regs_ever_live_p (reg
))
6908 /* Before going on to check the call-saved registers we can try a couple
6909 more ways of deducing that r3 is available. The first is when we are
6910 pushing anonymous arguments onto the stack and we have less than 4
6911 registers worth of fixed arguments(*). In this case r3 will be part of
6912 the variable argument list and so we can be sure that it will be
6913 pushed right at the start of the function. Hence it will be available
6914 for the rest of the prologue.
6915 (*): ie crtl->args.pretend_args_size is greater than 0. */
6916 if (cfun
->machine
->uses_anonymous_args
6917 && crtl
->args
.pretend_args_size
> 0)
6918 return LAST_ARG_REGNUM
;
6920 /* The other case is when we have fixed arguments but less than 4 registers
6921 worth. In this case r3 might be used in the body of the function, but
6922 it is not being used to convey an argument into the function. In theory
6923 we could just check crtl->args.size to see how many bytes are
6924 being passed in argument registers, but it seems that it is unreliable.
6925 Sometimes it will have the value 0 when in fact arguments are being
6926 passed. (See testcase execute/20021111-1.c for an example). So we also
6927 check the args_info.nregs field as well. The problem with this field is
6928 that it makes no allowances for arguments that are passed to the
6929 function but which are not used. Hence we could miss an opportunity
6930 when a function has an unused argument in r3. But it is better to be
6931 safe than to be sorry. */
6932 if (! cfun
->machine
->uses_anonymous_args
6933 && crtl
->args
.size
>= 0
6934 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6935 && (TARGET_AAPCS_BASED
6936 ? crtl
->args
.info
.aapcs_ncrn
< 4
6937 : crtl
->args
.info
.nregs
< 4))
6938 return LAST_ARG_REGNUM
;
6940 /* Otherwise look for a call-saved register that is going to be pushed. */
6941 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6942 if (pushed_regs_mask
& (1 << reg
))
6947 /* Thumb-2 can use high regs. */
6948 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6949 if (pushed_regs_mask
& (1 << reg
))
6952 /* Something went wrong - thumb_compute_save_reg_mask()
6953 should have arranged for a suitable register to be pushed. */
6957 static GTY(()) int pic_labelno
;
6959 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6963 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6965 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6967 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6970 gcc_assert (flag_pic
);
6972 pic_reg
= cfun
->machine
->pic_reg
;
6973 if (TARGET_VXWORKS_RTP
)
6975 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6976 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6977 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6979 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6981 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6982 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6986 /* We use an UNSPEC rather than a LABEL_REF because this label
6987 never appears in the code stream. */
6989 labelno
= GEN_INT (pic_labelno
++);
6990 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6991 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6993 /* On the ARM the PC register contains 'dot + 8' at the time of the
6994 addition, on the Thumb it is 'dot + 4'. */
6995 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6996 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6998 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7002 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7004 else /* TARGET_THUMB1 */
7006 if (arm_pic_register
!= INVALID_REGNUM
7007 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7009 /* We will have pushed the pic register, so we should always be
7010 able to find a work register. */
7011 pic_tmp
= gen_rtx_REG (SImode
,
7012 thumb_find_work_register (saved_regs
));
7013 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7014 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7015 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7017 else if (arm_pic_register
!= INVALID_REGNUM
7018 && arm_pic_register
> LAST_LO_REGNUM
7019 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7021 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7022 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7023 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7026 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7030 /* Need to emit this whether or not we obey regdecls,
7031 since setjmp/longjmp can cause life info to screw up. */
7035 /* Generate code to load the address of a static var when flag_pic is set. */
7037 arm_pic_static_addr (rtx orig
, rtx reg
)
7039 rtx l1
, labelno
, offset_rtx
, insn
;
7041 gcc_assert (flag_pic
);
7043 /* We use an UNSPEC rather than a LABEL_REF because this label
7044 never appears in the code stream. */
7045 labelno
= GEN_INT (pic_labelno
++);
7046 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7047 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7049 /* On the ARM the PC register contains 'dot + 8' at the time of the
7050 addition, on the Thumb it is 'dot + 4'. */
7051 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7052 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7053 UNSPEC_SYMBOL_OFFSET
);
7054 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7056 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7060 /* Return nonzero if X is valid as an ARM state addressing register. */
7062 arm_address_register_rtx_p (rtx x
, int strict_p
)
7072 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7074 return (regno
<= LAST_ARM_REGNUM
7075 || regno
>= FIRST_PSEUDO_REGISTER
7076 || regno
== FRAME_POINTER_REGNUM
7077 || regno
== ARG_POINTER_REGNUM
);
7080 /* Return TRUE if this rtx is the difference of a symbol and a label,
7081 and will reduce to a PC-relative relocation in the object file.
7082 Expressions like this can be left alone when generating PIC, rather
7083 than forced through the GOT. */
7085 pcrel_constant_p (rtx x
)
7087 if (GET_CODE (x
) == MINUS
)
7088 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7093 /* Return true if X will surely end up in an index register after next
7096 will_be_in_index_register (const_rtx x
)
7098 /* arm.md: calculate_pic_address will split this into a register. */
7099 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7102 /* Return nonzero if X is a valid ARM state address operand. */
7104 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7108 enum rtx_code code
= GET_CODE (x
);
7110 if (arm_address_register_rtx_p (x
, strict_p
))
7113 use_ldrd
= (TARGET_LDRD
7115 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7117 if (code
== POST_INC
|| code
== PRE_DEC
7118 || ((code
== PRE_INC
|| code
== POST_DEC
)
7119 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7120 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7122 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7123 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7124 && GET_CODE (XEXP (x
, 1)) == PLUS
7125 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7127 rtx addend
= XEXP (XEXP (x
, 1), 1);
7129 /* Don't allow ldrd post increment by register because it's hard
7130 to fixup invalid register choices. */
7132 && GET_CODE (x
) == POST_MODIFY
7136 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7137 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7140 /* After reload constants split into minipools will have addresses
7141 from a LABEL_REF. */
7142 else if (reload_completed
7143 && (code
== LABEL_REF
7145 && GET_CODE (XEXP (x
, 0)) == PLUS
7146 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7147 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7150 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7153 else if (code
== PLUS
)
7155 rtx xop0
= XEXP (x
, 0);
7156 rtx xop1
= XEXP (x
, 1);
7158 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7159 && ((CONST_INT_P (xop1
)
7160 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7161 || (!strict_p
&& will_be_in_index_register (xop1
))))
7162 || (arm_address_register_rtx_p (xop1
, strict_p
)
7163 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7167 /* Reload currently can't handle MINUS, so disable this for now */
7168 else if (GET_CODE (x
) == MINUS
)
7170 rtx xop0
= XEXP (x
, 0);
7171 rtx xop1
= XEXP (x
, 1);
7173 return (arm_address_register_rtx_p (xop0
, strict_p
)
7174 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7178 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7179 && code
== SYMBOL_REF
7180 && CONSTANT_POOL_ADDRESS_P (x
)
7182 && symbol_mentioned_p (get_pool_constant (x
))
7183 && ! pcrel_constant_p (get_pool_constant (x
))))
7189 /* Return nonzero if X is a valid Thumb-2 address operand. */
7191 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7194 enum rtx_code code
= GET_CODE (x
);
7196 if (arm_address_register_rtx_p (x
, strict_p
))
7199 use_ldrd
= (TARGET_LDRD
7201 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7203 if (code
== POST_INC
|| code
== PRE_DEC
7204 || ((code
== PRE_INC
|| code
== POST_DEC
)
7205 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7206 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7208 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7209 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7210 && GET_CODE (XEXP (x
, 1)) == PLUS
7211 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7213 /* Thumb-2 only has autoincrement by constant. */
7214 rtx addend
= XEXP (XEXP (x
, 1), 1);
7215 HOST_WIDE_INT offset
;
7217 if (!CONST_INT_P (addend
))
7220 offset
= INTVAL(addend
);
7221 if (GET_MODE_SIZE (mode
) <= 4)
7222 return (offset
> -256 && offset
< 256);
7224 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7225 && (offset
& 3) == 0);
7228 /* After reload constants split into minipools will have addresses
7229 from a LABEL_REF. */
7230 else if (reload_completed
7231 && (code
== LABEL_REF
7233 && GET_CODE (XEXP (x
, 0)) == PLUS
7234 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7235 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7238 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7241 else if (code
== PLUS
)
7243 rtx xop0
= XEXP (x
, 0);
7244 rtx xop1
= XEXP (x
, 1);
7246 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7247 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7248 || (!strict_p
&& will_be_in_index_register (xop1
))))
7249 || (arm_address_register_rtx_p (xop1
, strict_p
)
7250 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7253 /* Normally we can assign constant values to target registers without
7254 the help of constant pool. But there are cases we have to use constant
7256 1) assign a label to register.
7257 2) sign-extend a 8bit value to 32bit and then assign to register.
7259 Constant pool access in format:
7260 (set (reg r0) (mem (symbol_ref (".LC0"))))
7261 will cause the use of literal pool (later in function arm_reorg).
7262 So here we mark such format as an invalid format, then the compiler
7263 will adjust it into:
7264 (set (reg r0) (symbol_ref (".LC0")))
7265 (set (reg r0) (mem (reg r0))).
7266 No extra register is required, and (mem (reg r0)) won't cause the use
7267 of literal pools. */
7268 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7269 && CONSTANT_POOL_ADDRESS_P (x
))
7272 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7273 && code
== SYMBOL_REF
7274 && CONSTANT_POOL_ADDRESS_P (x
)
7276 && symbol_mentioned_p (get_pool_constant (x
))
7277 && ! pcrel_constant_p (get_pool_constant (x
))))
7283 /* Return nonzero if INDEX is valid for an address index operand in
7286 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7289 HOST_WIDE_INT range
;
7290 enum rtx_code code
= GET_CODE (index
);
7292 /* Standard coprocessor addressing modes. */
7293 if (TARGET_HARD_FLOAT
7295 && (mode
== SFmode
|| mode
== DFmode
))
7296 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7297 && INTVAL (index
) > -1024
7298 && (INTVAL (index
) & 3) == 0);
7300 /* For quad modes, we restrict the constant offset to be slightly less
7301 than what the instruction format permits. We do this because for
7302 quad mode moves, we will actually decompose them into two separate
7303 double-mode reads or writes. INDEX must therefore be a valid
7304 (double-mode) offset and so should INDEX+8. */
7305 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7306 return (code
== CONST_INT
7307 && INTVAL (index
) < 1016
7308 && INTVAL (index
) > -1024
7309 && (INTVAL (index
) & 3) == 0);
7311 /* We have no such constraint on double mode offsets, so we permit the
7312 full range of the instruction format. */
7313 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7314 return (code
== CONST_INT
7315 && INTVAL (index
) < 1024
7316 && INTVAL (index
) > -1024
7317 && (INTVAL (index
) & 3) == 0);
7319 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7320 return (code
== CONST_INT
7321 && INTVAL (index
) < 1024
7322 && INTVAL (index
) > -1024
7323 && (INTVAL (index
) & 3) == 0);
7325 if (arm_address_register_rtx_p (index
, strict_p
)
7326 && (GET_MODE_SIZE (mode
) <= 4))
7329 if (mode
== DImode
|| mode
== DFmode
)
7331 if (code
== CONST_INT
)
7333 HOST_WIDE_INT val
= INTVAL (index
);
7336 return val
> -256 && val
< 256;
7338 return val
> -4096 && val
< 4092;
7341 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7344 if (GET_MODE_SIZE (mode
) <= 4
7348 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7352 rtx xiop0
= XEXP (index
, 0);
7353 rtx xiop1
= XEXP (index
, 1);
7355 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7356 && power_of_two_operand (xiop1
, SImode
))
7357 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7358 && power_of_two_operand (xiop0
, SImode
)));
7360 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7361 || code
== ASHIFT
|| code
== ROTATERT
)
7363 rtx op
= XEXP (index
, 1);
7365 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7368 && INTVAL (op
) <= 31);
7372 /* For ARM v4 we may be doing a sign-extend operation during the
7378 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7384 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7386 return (code
== CONST_INT
7387 && INTVAL (index
) < range
7388 && INTVAL (index
) > -range
);
7391 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7392 index operand. i.e. 1, 2, 4 or 8. */
7394 thumb2_index_mul_operand (rtx op
)
7398 if (!CONST_INT_P (op
))
7402 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7405 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7407 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7409 enum rtx_code code
= GET_CODE (index
);
7411 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7415 && (mode
== SFmode
|| mode
== DFmode
))
7416 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7417 /* Thumb-2 allows only > -256 index range for it's core register
7418 load/stores. Since we allow SF/DF in core registers, we have
7419 to use the intersection between -256~4096 (core) and -1024~1024
7421 && INTVAL (index
) > -256
7422 && (INTVAL (index
) & 3) == 0);
7424 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7426 /* For DImode assume values will usually live in core regs
7427 and only allow LDRD addressing modes. */
7428 if (!TARGET_LDRD
|| mode
!= DImode
)
7429 return (code
== CONST_INT
7430 && INTVAL (index
) < 1024
7431 && INTVAL (index
) > -1024
7432 && (INTVAL (index
) & 3) == 0);
7435 /* For quad modes, we restrict the constant offset to be slightly less
7436 than what the instruction format permits. We do this because for
7437 quad mode moves, we will actually decompose them into two separate
7438 double-mode reads or writes. INDEX must therefore be a valid
7439 (double-mode) offset and so should INDEX+8. */
7440 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7441 return (code
== CONST_INT
7442 && INTVAL (index
) < 1016
7443 && INTVAL (index
) > -1024
7444 && (INTVAL (index
) & 3) == 0);
7446 /* We have no such constraint on double mode offsets, so we permit the
7447 full range of the instruction format. */
7448 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7449 return (code
== CONST_INT
7450 && INTVAL (index
) < 1024
7451 && INTVAL (index
) > -1024
7452 && (INTVAL (index
) & 3) == 0);
7454 if (arm_address_register_rtx_p (index
, strict_p
)
7455 && (GET_MODE_SIZE (mode
) <= 4))
7458 if (mode
== DImode
|| mode
== DFmode
)
7460 if (code
== CONST_INT
)
7462 HOST_WIDE_INT val
= INTVAL (index
);
7463 /* ??? Can we assume ldrd for thumb2? */
7464 /* Thumb-2 ldrd only has reg+const addressing modes. */
7465 /* ldrd supports offsets of +-1020.
7466 However the ldr fallback does not. */
7467 return val
> -256 && val
< 256 && (val
& 3) == 0;
7475 rtx xiop0
= XEXP (index
, 0);
7476 rtx xiop1
= XEXP (index
, 1);
7478 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7479 && thumb2_index_mul_operand (xiop1
))
7480 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7481 && thumb2_index_mul_operand (xiop0
)));
7483 else if (code
== ASHIFT
)
7485 rtx op
= XEXP (index
, 1);
7487 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7490 && INTVAL (op
) <= 3);
7493 return (code
== CONST_INT
7494 && INTVAL (index
) < 4096
7495 && INTVAL (index
) > -256);
7498 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7500 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7510 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7512 return (regno
<= LAST_LO_REGNUM
7513 || regno
> LAST_VIRTUAL_REGISTER
7514 || regno
== FRAME_POINTER_REGNUM
7515 || (GET_MODE_SIZE (mode
) >= 4
7516 && (regno
== STACK_POINTER_REGNUM
7517 || regno
>= FIRST_PSEUDO_REGISTER
7518 || x
== hard_frame_pointer_rtx
7519 || x
== arg_pointer_rtx
)));
7522 /* Return nonzero if x is a legitimate index register. This is the case
7523 for any base register that can access a QImode object. */
7525 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7527 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7530 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7532 The AP may be eliminated to either the SP or the FP, so we use the
7533 least common denominator, e.g. SImode, and offsets from 0 to 64.
7535 ??? Verify whether the above is the right approach.
7537 ??? Also, the FP may be eliminated to the SP, so perhaps that
7538 needs special handling also.
7540 ??? Look at how the mips16 port solves this problem. It probably uses
7541 better ways to solve some of these problems.
7543 Although it is not incorrect, we don't accept QImode and HImode
7544 addresses based on the frame pointer or arg pointer until the
7545 reload pass starts. This is so that eliminating such addresses
7546 into stack based ones won't produce impossible code. */
7548 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7550 /* ??? Not clear if this is right. Experiment. */
7551 if (GET_MODE_SIZE (mode
) < 4
7552 && !(reload_in_progress
|| reload_completed
)
7553 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7554 || reg_mentioned_p (arg_pointer_rtx
, x
)
7555 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7556 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7557 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7558 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7561 /* Accept any base register. SP only in SImode or larger. */
7562 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7565 /* This is PC relative data before arm_reorg runs. */
7566 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7567 && GET_CODE (x
) == SYMBOL_REF
7568 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7571 /* This is PC relative data after arm_reorg runs. */
7572 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7574 && (GET_CODE (x
) == LABEL_REF
7575 || (GET_CODE (x
) == CONST
7576 && GET_CODE (XEXP (x
, 0)) == PLUS
7577 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7578 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7581 /* Post-inc indexing only supported for SImode and larger. */
7582 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7583 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7586 else if (GET_CODE (x
) == PLUS
)
7588 /* REG+REG address can be any two index registers. */
7589 /* We disallow FRAME+REG addressing since we know that FRAME
7590 will be replaced with STACK, and SP relative addressing only
7591 permits SP+OFFSET. */
7592 if (GET_MODE_SIZE (mode
) <= 4
7593 && XEXP (x
, 0) != frame_pointer_rtx
7594 && XEXP (x
, 1) != frame_pointer_rtx
7595 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7596 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7597 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7600 /* REG+const has 5-7 bit offset for non-SP registers. */
7601 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7602 || XEXP (x
, 0) == arg_pointer_rtx
)
7603 && CONST_INT_P (XEXP (x
, 1))
7604 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7607 /* REG+const has 10-bit offset for SP, but only SImode and
7608 larger is supported. */
7609 /* ??? Should probably check for DI/DFmode overflow here
7610 just like GO_IF_LEGITIMATE_OFFSET does. */
7611 else if (REG_P (XEXP (x
, 0))
7612 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7613 && GET_MODE_SIZE (mode
) >= 4
7614 && CONST_INT_P (XEXP (x
, 1))
7615 && INTVAL (XEXP (x
, 1)) >= 0
7616 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7617 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7620 else if (REG_P (XEXP (x
, 0))
7621 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7622 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7623 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7624 && REGNO (XEXP (x
, 0))
7625 <= LAST_VIRTUAL_POINTER_REGISTER
))
7626 && GET_MODE_SIZE (mode
) >= 4
7627 && CONST_INT_P (XEXP (x
, 1))
7628 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7632 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7633 && GET_MODE_SIZE (mode
) == 4
7634 && GET_CODE (x
) == SYMBOL_REF
7635 && CONSTANT_POOL_ADDRESS_P (x
)
7637 && symbol_mentioned_p (get_pool_constant (x
))
7638 && ! pcrel_constant_p (get_pool_constant (x
))))
7644 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7645 instruction of mode MODE. */
7647 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7649 switch (GET_MODE_SIZE (mode
))
7652 return val
>= 0 && val
< 32;
7655 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7659 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7665 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7668 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7669 else if (TARGET_THUMB2
)
7670 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7671 else /* if (TARGET_THUMB1) */
7672 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7675 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7677 Given an rtx X being reloaded into a reg required to be
7678 in class CLASS, return the class of reg to actually use.
7679 In general this is just CLASS, but for the Thumb core registers and
7680 immediate constants we prefer a LO_REGS class or a subset. */
7683 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7689 if (rclass
== GENERAL_REGS
)
7696 /* Build the SYMBOL_REF for __tls_get_addr. */
7698 static GTY(()) rtx tls_get_addr_libfunc
;
7701 get_tls_get_addr (void)
7703 if (!tls_get_addr_libfunc
)
7704 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7705 return tls_get_addr_libfunc
;
7709 arm_load_tp (rtx target
)
7712 target
= gen_reg_rtx (SImode
);
7716 /* Can return in any reg. */
7717 emit_insn (gen_load_tp_hard (target
));
7721 /* Always returned in r0. Immediately copy the result into a pseudo,
7722 otherwise other uses of r0 (e.g. setting up function arguments) may
7723 clobber the value. */
7727 emit_insn (gen_load_tp_soft ());
7729 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7730 emit_move_insn (target
, tmp
);
7736 load_tls_operand (rtx x
, rtx reg
)
7740 if (reg
== NULL_RTX
)
7741 reg
= gen_reg_rtx (SImode
);
7743 tmp
= gen_rtx_CONST (SImode
, x
);
7745 emit_move_insn (reg
, tmp
);
7751 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7753 rtx insns
, label
, labelno
, sum
;
7755 gcc_assert (reloc
!= TLS_DESCSEQ
);
7758 labelno
= GEN_INT (pic_labelno
++);
7759 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7760 label
= gen_rtx_CONST (VOIDmode
, label
);
7762 sum
= gen_rtx_UNSPEC (Pmode
,
7763 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7764 GEN_INT (TARGET_ARM
? 8 : 4)),
7766 reg
= load_tls_operand (sum
, reg
);
7769 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7771 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7773 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7774 LCT_PURE
, /* LCT_CONST? */
7775 Pmode
, 1, reg
, Pmode
);
7777 insns
= get_insns ();
7784 arm_tls_descseq_addr (rtx x
, rtx reg
)
7786 rtx labelno
= GEN_INT (pic_labelno
++);
7787 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7788 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7789 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7790 gen_rtx_CONST (VOIDmode
, label
),
7791 GEN_INT (!TARGET_ARM
)),
7793 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7795 emit_insn (gen_tlscall (x
, labelno
));
7797 reg
= gen_reg_rtx (SImode
);
7799 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7801 emit_move_insn (reg
, reg0
);
7807 legitimize_tls_address (rtx x
, rtx reg
)
7809 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7810 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7814 case TLS_MODEL_GLOBAL_DYNAMIC
:
7815 if (TARGET_GNU2_TLS
)
7817 reg
= arm_tls_descseq_addr (x
, reg
);
7819 tp
= arm_load_tp (NULL_RTX
);
7821 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7825 /* Original scheme */
7826 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7827 dest
= gen_reg_rtx (Pmode
);
7828 emit_libcall_block (insns
, dest
, ret
, x
);
7832 case TLS_MODEL_LOCAL_DYNAMIC
:
7833 if (TARGET_GNU2_TLS
)
7835 reg
= arm_tls_descseq_addr (x
, reg
);
7837 tp
= arm_load_tp (NULL_RTX
);
7839 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7843 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7845 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7846 share the LDM result with other LD model accesses. */
7847 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7849 dest
= gen_reg_rtx (Pmode
);
7850 emit_libcall_block (insns
, dest
, ret
, eqv
);
7852 /* Load the addend. */
7853 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7854 GEN_INT (TLS_LDO32
)),
7856 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7857 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7861 case TLS_MODEL_INITIAL_EXEC
:
7862 labelno
= GEN_INT (pic_labelno
++);
7863 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7864 label
= gen_rtx_CONST (VOIDmode
, label
);
7865 sum
= gen_rtx_UNSPEC (Pmode
,
7866 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7867 GEN_INT (TARGET_ARM
? 8 : 4)),
7869 reg
= load_tls_operand (sum
, reg
);
7872 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7873 else if (TARGET_THUMB2
)
7874 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7877 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7878 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7881 tp
= arm_load_tp (NULL_RTX
);
7883 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7885 case TLS_MODEL_LOCAL_EXEC
:
7886 tp
= arm_load_tp (NULL_RTX
);
7888 reg
= gen_rtx_UNSPEC (Pmode
,
7889 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7891 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7893 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7900 /* Try machine-dependent ways of modifying an illegitimate address
7901 to be legitimate. If we find one, return the new, valid address. */
7903 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7905 if (arm_tls_referenced_p (x
))
7909 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7911 addend
= XEXP (XEXP (x
, 0), 1);
7912 x
= XEXP (XEXP (x
, 0), 0);
7915 if (GET_CODE (x
) != SYMBOL_REF
)
7918 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7920 x
= legitimize_tls_address (x
, NULL_RTX
);
7924 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7933 /* TODO: legitimize_address for Thumb2. */
7936 return thumb_legitimize_address (x
, orig_x
, mode
);
7939 if (GET_CODE (x
) == PLUS
)
7941 rtx xop0
= XEXP (x
, 0);
7942 rtx xop1
= XEXP (x
, 1);
7944 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7945 xop0
= force_reg (SImode
, xop0
);
7947 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7948 && !symbol_mentioned_p (xop1
))
7949 xop1
= force_reg (SImode
, xop1
);
7951 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7952 && CONST_INT_P (xop1
))
7954 HOST_WIDE_INT n
, low_n
;
7958 /* VFP addressing modes actually allow greater offsets, but for
7959 now we just stick with the lowest common denominator. */
7961 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7973 low_n
= ((mode
) == TImode
? 0
7974 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7978 base_reg
= gen_reg_rtx (SImode
);
7979 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7980 emit_move_insn (base_reg
, val
);
7981 x
= plus_constant (Pmode
, base_reg
, low_n
);
7983 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7984 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7987 /* XXX We don't allow MINUS any more -- see comment in
7988 arm_legitimate_address_outer_p (). */
7989 else if (GET_CODE (x
) == MINUS
)
7991 rtx xop0
= XEXP (x
, 0);
7992 rtx xop1
= XEXP (x
, 1);
7994 if (CONSTANT_P (xop0
))
7995 xop0
= force_reg (SImode
, xop0
);
7997 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7998 xop1
= force_reg (SImode
, xop1
);
8000 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8001 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8004 /* Make sure to take full advantage of the pre-indexed addressing mode
8005 with absolute addresses which often allows for the base register to
8006 be factorized for multiple adjacent memory references, and it might
8007 even allows for the mini pool to be avoided entirely. */
8008 else if (CONST_INT_P (x
) && optimize
> 0)
8011 HOST_WIDE_INT mask
, base
, index
;
8014 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8015 use a 8-bit index. So let's use a 12-bit index for SImode only and
8016 hope that arm_gen_constant will enable ldrb to use more bits. */
8017 bits
= (mode
== SImode
) ? 12 : 8;
8018 mask
= (1 << bits
) - 1;
8019 base
= INTVAL (x
) & ~mask
;
8020 index
= INTVAL (x
) & mask
;
8021 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8023 /* It'll most probably be more efficient to generate the base
8024 with more bits set and use a negative index instead. */
8028 base_reg
= force_reg (SImode
, GEN_INT (base
));
8029 x
= plus_constant (Pmode
, base_reg
, index
);
8034 /* We need to find and carefully transform any SYMBOL and LABEL
8035 references; so go back to the original address expression. */
8036 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8038 if (new_x
!= orig_x
)
8046 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8047 to be legitimate. If we find one, return the new, valid address. */
8049 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8051 if (GET_CODE (x
) == PLUS
8052 && CONST_INT_P (XEXP (x
, 1))
8053 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8054 || INTVAL (XEXP (x
, 1)) < 0))
8056 rtx xop0
= XEXP (x
, 0);
8057 rtx xop1
= XEXP (x
, 1);
8058 HOST_WIDE_INT offset
= INTVAL (xop1
);
8060 /* Try and fold the offset into a biasing of the base register and
8061 then offsetting that. Don't do this when optimizing for space
8062 since it can cause too many CSEs. */
8063 if (optimize_size
&& offset
>= 0
8064 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8066 HOST_WIDE_INT delta
;
8069 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8070 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8071 delta
= 31 * GET_MODE_SIZE (mode
);
8073 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8075 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8077 x
= plus_constant (Pmode
, xop0
, delta
);
8079 else if (offset
< 0 && offset
> -256)
8080 /* Small negative offsets are best done with a subtract before the
8081 dereference, forcing these into a register normally takes two
8083 x
= force_operand (x
, NULL_RTX
);
8086 /* For the remaining cases, force the constant into a register. */
8087 xop1
= force_reg (SImode
, xop1
);
8088 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8091 else if (GET_CODE (x
) == PLUS
8092 && s_register_operand (XEXP (x
, 1), SImode
)
8093 && !s_register_operand (XEXP (x
, 0), SImode
))
8095 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8097 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8102 /* We need to find and carefully transform any SYMBOL and LABEL
8103 references; so go back to the original address expression. */
8104 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8106 if (new_x
!= orig_x
)
8113 /* Return TRUE if X contains any TLS symbol references. */
8116 arm_tls_referenced_p (rtx x
)
8118 if (! TARGET_HAVE_TLS
)
8121 subrtx_iterator::array_type array
;
8122 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8124 const_rtx x
= *iter
;
8125 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8128 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8129 TLS offsets, not real symbol references. */
8130 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8131 iter
.skip_subrtxes ();
8136 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8138 On the ARM, allow any integer (invalid ones are removed later by insn
8139 patterns), nice doubles and symbol_refs which refer to the function's
8142 When generating pic allow anything. */
8145 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8147 return flag_pic
|| !label_mentioned_p (x
);
8151 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8153 return (CONST_INT_P (x
)
8154 || CONST_DOUBLE_P (x
)
8155 || CONSTANT_ADDRESS_P (x
)
8160 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8162 return (!arm_cannot_force_const_mem (mode
, x
)
8164 ? arm_legitimate_constant_p_1 (mode
, x
)
8165 : thumb_legitimate_constant_p (mode
, x
)));
8168 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8171 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8175 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8177 split_const (x
, &base
, &offset
);
8178 if (GET_CODE (base
) == SYMBOL_REF
8179 && !offset_within_block_p (base
, INTVAL (offset
)))
8182 return arm_tls_referenced_p (x
);
8185 #define REG_OR_SUBREG_REG(X) \
8187 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8189 #define REG_OR_SUBREG_RTX(X) \
8190 (REG_P (X) ? (X) : SUBREG_REG (X))
8193 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8195 machine_mode mode
= GET_MODE (x
);
8204 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8211 return COSTS_N_INSNS (1);
8214 if (CONST_INT_P (XEXP (x
, 1)))
8217 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8224 return COSTS_N_INSNS (2) + cycles
;
8226 return COSTS_N_INSNS (1) + 16;
8229 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8231 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8232 return (COSTS_N_INSNS (words
)
8233 + 4 * ((MEM_P (SET_SRC (x
)))
8234 + MEM_P (SET_DEST (x
))));
8239 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8241 if (thumb_shiftable_const (INTVAL (x
)))
8242 return COSTS_N_INSNS (2);
8243 return COSTS_N_INSNS (3);
8245 else if ((outer
== PLUS
|| outer
== COMPARE
)
8246 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8248 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8249 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8250 return COSTS_N_INSNS (1);
8251 else if (outer
== AND
)
8254 /* This duplicates the tests in the andsi3 expander. */
8255 for (i
= 9; i
<= 31; i
++)
8256 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8257 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8258 return COSTS_N_INSNS (2);
8260 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8261 || outer
== LSHIFTRT
)
8263 return COSTS_N_INSNS (2);
8269 return COSTS_N_INSNS (3);
8287 /* XXX another guess. */
8288 /* Memory costs quite a lot for the first word, but subsequent words
8289 load at the equivalent of a single insn each. */
8290 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8291 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8296 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8302 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8303 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8309 return total
+ COSTS_N_INSNS (1);
8311 /* Assume a two-shift sequence. Increase the cost slightly so
8312 we prefer actual shifts over an extend operation. */
8313 return total
+ 1 + COSTS_N_INSNS (2);
8321 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8323 machine_mode mode
= GET_MODE (x
);
8324 enum rtx_code subcode
;
8326 enum rtx_code code
= GET_CODE (x
);
8332 /* Memory costs quite a lot for the first word, but subsequent words
8333 load at the equivalent of a single insn each. */
8334 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8341 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8342 *total
= COSTS_N_INSNS (2);
8343 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8344 *total
= COSTS_N_INSNS (4);
8346 *total
= COSTS_N_INSNS (20);
8350 if (REG_P (XEXP (x
, 1)))
8351 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8352 else if (!CONST_INT_P (XEXP (x
, 1)))
8353 *total
= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8359 *total
+= COSTS_N_INSNS (4);
8364 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8365 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8368 *total
+= COSTS_N_INSNS (3);
8372 *total
+= COSTS_N_INSNS (1);
8373 /* Increase the cost of complex shifts because they aren't any faster,
8374 and reduce dual issue opportunities. */
8375 if (arm_tune_cortex_a9
8376 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8384 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8385 if (CONST_INT_P (XEXP (x
, 0))
8386 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8388 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8392 if (CONST_INT_P (XEXP (x
, 1))
8393 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8395 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8402 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8404 if (TARGET_HARD_FLOAT
8406 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8408 *total
= COSTS_N_INSNS (1);
8409 if (CONST_DOUBLE_P (XEXP (x
, 0))
8410 && arm_const_double_rtx (XEXP (x
, 0)))
8412 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8416 if (CONST_DOUBLE_P (XEXP (x
, 1))
8417 && arm_const_double_rtx (XEXP (x
, 1)))
8419 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8425 *total
= COSTS_N_INSNS (20);
8429 *total
= COSTS_N_INSNS (1);
8430 if (CONST_INT_P (XEXP (x
, 0))
8431 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8433 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8437 subcode
= GET_CODE (XEXP (x
, 1));
8438 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8439 || subcode
== LSHIFTRT
8440 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8442 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8443 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8447 /* A shift as a part of RSB costs no more than RSB itself. */
8448 if (GET_CODE (XEXP (x
, 0)) == MULT
8449 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8451 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, speed
);
8452 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8457 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8459 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8460 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8464 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8465 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8467 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8469 if (REG_P (XEXP (XEXP (x
, 1), 0))
8470 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8471 *total
+= COSTS_N_INSNS (1);
8479 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8480 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8481 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8483 *total
= COSTS_N_INSNS (1);
8484 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
8485 GET_CODE (XEXP (x
, 0)), 0, speed
);
8486 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8490 /* MLA: All arguments must be registers. We filter out
8491 multiplication by a power of two, so that we fall down into
8493 if (GET_CODE (XEXP (x
, 0)) == MULT
8494 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8496 /* The cost comes from the cost of the multiply. */
8500 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8502 if (TARGET_HARD_FLOAT
8504 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8506 *total
= COSTS_N_INSNS (1);
8507 if (CONST_DOUBLE_P (XEXP (x
, 1))
8508 && arm_const_double_rtx (XEXP (x
, 1)))
8510 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8517 *total
= COSTS_N_INSNS (20);
8521 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8522 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8524 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), mode
, code
,
8526 if (REG_P (XEXP (XEXP (x
, 0), 0))
8527 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8528 *total
+= COSTS_N_INSNS (1);
8534 case AND
: case XOR
: case IOR
:
8536 /* Normally the frame registers will be spilt into reg+const during
8537 reload, so it is a bad idea to combine them with other instructions,
8538 since then they might not be moved outside of loops. As a compromise
8539 we allow integration with ops that have a constant as their second
8541 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8542 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8543 && !CONST_INT_P (XEXP (x
, 1)))
8544 *total
= COSTS_N_INSNS (1);
8548 *total
+= COSTS_N_INSNS (2);
8549 if (CONST_INT_P (XEXP (x
, 1))
8550 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8552 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8559 *total
+= COSTS_N_INSNS (1);
8560 if (CONST_INT_P (XEXP (x
, 1))
8561 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8563 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8566 subcode
= GET_CODE (XEXP (x
, 0));
8567 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8568 || subcode
== LSHIFTRT
8569 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8571 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8572 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8577 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8579 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8580 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8584 if (subcode
== UMIN
|| subcode
== UMAX
8585 || subcode
== SMIN
|| subcode
== SMAX
)
8587 *total
= COSTS_N_INSNS (3);
8594 /* This should have been handled by the CPU specific routines. */
8598 if (arm_arch3m
&& mode
== SImode
8599 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8600 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8601 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8602 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8603 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8604 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8606 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, LSHIFTRT
,
8610 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8614 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8616 if (TARGET_HARD_FLOAT
8618 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8620 *total
= COSTS_N_INSNS (1);
8623 *total
= COSTS_N_INSNS (2);
8629 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8630 if (mode
== SImode
&& code
== NOT
)
8632 subcode
= GET_CODE (XEXP (x
, 0));
8633 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8634 || subcode
== LSHIFTRT
8635 || subcode
== ROTATE
|| subcode
== ROTATERT
8637 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8639 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
,
8641 /* Register shifts cost an extra cycle. */
8642 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8643 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8653 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8655 *total
= COSTS_N_INSNS (4);
8659 operand
= XEXP (x
, 0);
8661 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8662 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8663 && REG_P (XEXP (operand
, 0))
8664 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8665 *total
+= COSTS_N_INSNS (1);
8666 *total
+= rtx_cost (XEXP (x
, 1), VOIDmode
, code
, 1, speed
);
8667 *total
+= rtx_cost (XEXP (x
, 2), VOIDmode
, code
, 2, speed
);
8671 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8673 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8680 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8681 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8683 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8690 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8691 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8693 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8714 /* SCC insns. In the case where the comparison has already been
8715 performed, then they cost 2 instructions. Otherwise they need
8716 an additional comparison before them. */
8717 *total
= COSTS_N_INSNS (2);
8718 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8725 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8731 *total
+= COSTS_N_INSNS (1);
8732 if (CONST_INT_P (XEXP (x
, 1))
8733 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8735 *total
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed
);
8739 subcode
= GET_CODE (XEXP (x
, 0));
8740 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8741 || subcode
== LSHIFTRT
8742 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8744 mode
= GET_MODE (XEXP (x
, 0));
8745 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8746 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8751 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8753 mode
= GET_MODE (XEXP (x
, 0));
8754 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8755 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8765 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8766 if (!CONST_INT_P (XEXP (x
, 1))
8767 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8768 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8772 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8774 if (TARGET_HARD_FLOAT
8776 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8778 *total
= COSTS_N_INSNS (1);
8781 *total
= COSTS_N_INSNS (20);
8784 *total
= COSTS_N_INSNS (1);
8786 *total
+= COSTS_N_INSNS (3);
8792 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8794 rtx op
= XEXP (x
, 0);
8795 machine_mode opmode
= GET_MODE (op
);
8798 *total
+= COSTS_N_INSNS (1);
8800 if (opmode
!= SImode
)
8804 /* If !arm_arch4, we use one of the extendhisi2_mem
8805 or movhi_bytes patterns for HImode. For a QImode
8806 sign extension, we first zero-extend from memory
8807 and then perform a shift sequence. */
8808 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8809 *total
+= COSTS_N_INSNS (2);
8812 *total
+= COSTS_N_INSNS (1);
8814 /* We don't have the necessary insn, so we need to perform some
8816 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8817 /* An and with constant 255. */
8818 *total
+= COSTS_N_INSNS (1);
8820 /* A shift sequence. Increase costs slightly to avoid
8821 combining two shifts into an extend operation. */
8822 *total
+= COSTS_N_INSNS (2) + 1;
8828 switch (GET_MODE (XEXP (x
, 0)))
8835 *total
= COSTS_N_INSNS (1);
8845 mode
= GET_MODE (XEXP (x
, 0));
8846 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8850 if (const_ok_for_arm (INTVAL (x
))
8851 || const_ok_for_arm (~INTVAL (x
)))
8852 *total
= COSTS_N_INSNS (1);
8854 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8855 INTVAL (x
), NULL_RTX
,
8862 *total
= COSTS_N_INSNS (3);
8866 *total
= COSTS_N_INSNS (1);
8870 *total
= COSTS_N_INSNS (1);
8871 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8875 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8876 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8877 *total
= COSTS_N_INSNS (1);
8879 *total
= COSTS_N_INSNS (4);
8883 /* The vec_extract patterns accept memory operands that require an
8884 address reload. Account for the cost of that reload to give the
8885 auto-inc-dec pass an incentive to try to replace them. */
8886 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8887 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8889 mode
= GET_MODE (SET_DEST (x
));
8890 *total
= rtx_cost (SET_DEST (x
), mode
, code
, 0, speed
);
8891 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8892 *total
+= COSTS_N_INSNS (1);
8895 /* Likewise for the vec_set patterns. */
8896 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8897 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8898 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8900 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8901 mode
= GET_MODE (SET_DEST (x
));
8902 *total
= rtx_cost (mem
, mode
, code
, 0, speed
);
8903 if (!neon_vector_mem_operand (mem
, 2, true))
8904 *total
+= COSTS_N_INSNS (1);
8910 /* We cost this as high as our memory costs to allow this to
8911 be hoisted from loops. */
8912 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8914 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8920 && TARGET_HARD_FLOAT
8922 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8923 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8924 *total
= COSTS_N_INSNS (1);
8926 *total
= COSTS_N_INSNS (4);
8930 *total
= COSTS_N_INSNS (4);
8935 /* Estimates the size cost of thumb1 instructions.
8936 For now most of the code is copied from thumb1_rtx_costs. We need more
8937 fine grain tuning when we have more related test cases. */
8939 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8941 machine_mode mode
= GET_MODE (x
);
8950 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8954 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8955 defined by RTL expansion, especially for the expansion of
8957 if ((GET_CODE (XEXP (x
, 0)) == MULT
8958 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8959 || (GET_CODE (XEXP (x
, 1)) == MULT
8960 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8961 return COSTS_N_INSNS (2);
8962 /* On purpose fall through for normal RTX. */
8966 return COSTS_N_INSNS (1);
8969 if (CONST_INT_P (XEXP (x
, 1)))
8971 /* Thumb1 mul instruction can't operate on const. We must Load it
8972 into a register first. */
8973 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8974 /* For the targets which have a very small and high-latency multiply
8975 unit, we prefer to synthesize the mult with up to 5 instructions,
8976 giving a good balance between size and performance. */
8977 if (arm_arch6m
&& arm_m_profile_small_mul
)
8978 return COSTS_N_INSNS (5);
8980 return COSTS_N_INSNS (1) + const_size
;
8982 return COSTS_N_INSNS (1);
8985 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8987 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8988 return COSTS_N_INSNS (words
)
8989 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8990 || satisfies_constraint_K (SET_SRC (x
))
8991 /* thumb1_movdi_insn. */
8992 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8997 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8998 return COSTS_N_INSNS (1);
8999 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9000 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9001 return COSTS_N_INSNS (2);
9002 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9003 if (thumb_shiftable_const (INTVAL (x
)))
9004 return COSTS_N_INSNS (2);
9005 return COSTS_N_INSNS (3);
9007 else if ((outer
== PLUS
|| outer
== COMPARE
)
9008 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9010 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9011 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9012 return COSTS_N_INSNS (1);
9013 else if (outer
== AND
)
9016 /* This duplicates the tests in the andsi3 expander. */
9017 for (i
= 9; i
<= 31; i
++)
9018 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
9019 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
9020 return COSTS_N_INSNS (2);
9022 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9023 || outer
== LSHIFTRT
)
9025 return COSTS_N_INSNS (2);
9031 return COSTS_N_INSNS (3);
9045 return COSTS_N_INSNS (1);
9048 return (COSTS_N_INSNS (1)
9050 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9051 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9052 ? COSTS_N_INSNS (1) : 0));
9056 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9061 /* XXX still guessing. */
9062 switch (GET_MODE (XEXP (x
, 0)))
9065 return (1 + (mode
== DImode
? 4 : 0)
9066 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9069 return (4 + (mode
== DImode
? 4 : 0)
9070 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9073 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9084 /* RTX costs when optimizing for size. */
9086 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9089 machine_mode mode
= GET_MODE (x
);
9092 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9096 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9100 /* A memory access costs 1 insn if the mode is small, or the address is
9101 a single register, otherwise it costs one insn per word. */
9102 if (REG_P (XEXP (x
, 0)))
9103 *total
= COSTS_N_INSNS (1);
9105 && GET_CODE (XEXP (x
, 0)) == PLUS
9106 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9107 /* This will be split into two instructions.
9108 See arm.md:calculate_pic_address. */
9109 *total
= COSTS_N_INSNS (2);
9111 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9118 /* Needs a libcall, so it costs about this. */
9119 *total
= COSTS_N_INSNS (2);
9123 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9125 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
9134 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9136 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), mode
, code
,
9140 else if (mode
== SImode
)
9142 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
9144 /* Slightly disparage register shifts, but not by much. */
9145 if (!CONST_INT_P (XEXP (x
, 1)))
9146 *total
+= 1 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9150 /* Needs a libcall. */
9151 *total
= COSTS_N_INSNS (2);
9155 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9156 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9158 *total
= COSTS_N_INSNS (1);
9164 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9165 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9167 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9168 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9169 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9170 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9171 || subcode1
== ASHIFTRT
)
9173 /* It's just the cost of the two operands. */
9178 *total
= COSTS_N_INSNS (1);
9182 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9186 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9187 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9189 *total
= COSTS_N_INSNS (1);
9193 /* A shift as a part of ADD costs nothing. */
9194 if (GET_CODE (XEXP (x
, 0)) == MULT
9195 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9197 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9198 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, false);
9199 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9204 case AND
: case XOR
: case IOR
:
9207 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9209 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9210 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9211 || (code
== AND
&& subcode
== NOT
))
9213 /* It's just the cost of the two operands. */
9219 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9223 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9227 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9228 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9230 *total
= COSTS_N_INSNS (1);
9236 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9245 if (cc_register (XEXP (x
, 0), VOIDmode
))
9248 *total
= COSTS_N_INSNS (1);
9252 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9253 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9254 *total
= COSTS_N_INSNS (1);
9256 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9261 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9264 if (const_ok_for_arm (INTVAL (x
)))
9265 /* A multiplication by a constant requires another instruction
9266 to load the constant to a register. */
9267 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9269 else if (const_ok_for_arm (~INTVAL (x
)))
9270 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9271 else if (const_ok_for_arm (-INTVAL (x
)))
9273 if (outer_code
== COMPARE
|| outer_code
== PLUS
9274 || outer_code
== MINUS
)
9277 *total
= COSTS_N_INSNS (1);
9280 *total
= COSTS_N_INSNS (2);
9286 *total
= COSTS_N_INSNS (2);
9290 *total
= COSTS_N_INSNS (4);
9295 && TARGET_HARD_FLOAT
9296 && outer_code
== SET
9297 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9298 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9299 *total
= COSTS_N_INSNS (1);
9301 *total
= COSTS_N_INSNS (4);
9306 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9307 cost of these slightly. */
9308 *total
= COSTS_N_INSNS (1) + 1;
9315 if (mode
!= VOIDmode
)
9316 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9318 *total
= COSTS_N_INSNS (4); /* How knows? */
9323 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9324 operand, then return the operand that is being shifted. If the shift
9325 is not by a constant, then set SHIFT_REG to point to the operand.
9326 Return NULL if OP is not a shifter operand. */
9328 shifter_op_p (rtx op
, rtx
*shift_reg
)
9330 enum rtx_code code
= GET_CODE (op
);
9332 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9333 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9334 return XEXP (op
, 0);
9335 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9336 return XEXP (op
, 0);
9337 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9338 || code
== ASHIFTRT
)
9340 if (!CONST_INT_P (XEXP (op
, 1)))
9341 *shift_reg
= XEXP (op
, 1);
9342 return XEXP (op
, 0);
9349 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9351 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9352 rtx_code code
= GET_CODE (x
);
9353 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9355 switch (XINT (x
, 1))
9357 case UNSPEC_UNALIGNED_LOAD
:
9358 /* We can only do unaligned loads into the integer unit, and we can't
9360 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9362 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9363 + extra_cost
->ldst
.load_unaligned
);
9366 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9367 ADDR_SPACE_GENERIC
, speed_p
);
9371 case UNSPEC_UNALIGNED_STORE
:
9372 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9374 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9375 + extra_cost
->ldst
.store_unaligned
);
9377 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9379 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9380 ADDR_SPACE_GENERIC
, speed_p
);
9391 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9395 *cost
= COSTS_N_INSNS (2);
9401 /* Cost of a libcall. We assume one insn per argument, an amount for the
9402 call (one insn for -Os) and then one for processing the result. */
9403 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9405 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9408 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9409 if (shift_op != NULL \
9410 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9415 *cost += extra_cost->alu.arith_shift_reg; \
9416 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9417 ASHIFT, 1, speed_p); \
9420 *cost += extra_cost->alu.arith_shift; \
9422 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9423 ASHIFT, 0, speed_p) \
9424 + rtx_cost (XEXP (x, 1 - IDX), \
9425 GET_MODE (shift_op), \
9432 /* RTX costs. Make an estimate of the cost of executing the operation
9433 X, which is contained with an operation with code OUTER_CODE.
9434 SPEED_P indicates whether the cost desired is the performance cost,
9435 or the size cost. The estimate is stored in COST and the return
9436 value is TRUE if the cost calculation is final, or FALSE if the
9437 caller should recurse through the operands of X to add additional
9440 We currently make no attempt to model the size savings of Thumb-2
9441 16-bit instructions. At the normal points in compilation where
9442 this code is called we have no measure of whether the condition
9443 flags are live or not, and thus no realistic way to determine what
9444 the size will eventually be. */
9446 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9447 const struct cpu_cost_table
*extra_cost
,
9448 int *cost
, bool speed_p
)
9450 machine_mode mode
= GET_MODE (x
);
9452 *cost
= COSTS_N_INSNS (1);
9457 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9459 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9467 /* SET RTXs don't have a mode so we get it from the destination. */
9468 mode
= GET_MODE (SET_DEST (x
));
9470 if (REG_P (SET_SRC (x
))
9471 && REG_P (SET_DEST (x
)))
9473 /* Assume that most copies can be done with a single insn,
9474 unless we don't have HW FP, in which case everything
9475 larger than word mode will require two insns. */
9476 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9477 && GET_MODE_SIZE (mode
) > 4)
9480 /* Conditional register moves can be encoded
9481 in 16 bits in Thumb mode. */
9482 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9488 if (CONST_INT_P (SET_SRC (x
)))
9490 /* Handle CONST_INT here, since the value doesn't have a mode
9491 and we would otherwise be unable to work out the true cost. */
9492 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9495 /* Slightly lower the cost of setting a core reg to a constant.
9496 This helps break up chains and allows for better scheduling. */
9497 if (REG_P (SET_DEST (x
))
9498 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9501 /* Immediate moves with an immediate in the range [0, 255] can be
9502 encoded in 16 bits in Thumb mode. */
9503 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9504 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9506 goto const_int_cost
;
9512 /* A memory access costs 1 insn if the mode is small, or the address is
9513 a single register, otherwise it costs one insn per word. */
9514 if (REG_P (XEXP (x
, 0)))
9515 *cost
= COSTS_N_INSNS (1);
9517 && GET_CODE (XEXP (x
, 0)) == PLUS
9518 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9519 /* This will be split into two instructions.
9520 See arm.md:calculate_pic_address. */
9521 *cost
= COSTS_N_INSNS (2);
9523 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9525 /* For speed optimizations, add the costs of the address and
9526 accessing memory. */
9529 *cost
+= (extra_cost
->ldst
.load
9530 + arm_address_cost (XEXP (x
, 0), mode
,
9531 ADDR_SPACE_GENERIC
, speed_p
));
9533 *cost
+= extra_cost
->ldst
.load
;
9539 /* Calculations of LDM costs are complex. We assume an initial cost
9540 (ldm_1st) which will load the number of registers mentioned in
9541 ldm_regs_per_insn_1st registers; then each additional
9542 ldm_regs_per_insn_subsequent registers cost one more insn. The
9543 formula for N regs is thus:
9545 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9546 + ldm_regs_per_insn_subsequent - 1)
9547 / ldm_regs_per_insn_subsequent).
9549 Additional costs may also be added for addressing. A similar
9550 formula is used for STM. */
9552 bool is_ldm
= load_multiple_operation (x
, SImode
);
9553 bool is_stm
= store_multiple_operation (x
, SImode
);
9555 if (is_ldm
|| is_stm
)
9559 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9560 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9561 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9562 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9563 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9564 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9565 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9567 *cost
+= regs_per_insn_1st
9568 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9569 + regs_per_insn_sub
- 1)
9570 / regs_per_insn_sub
);
9579 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9580 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9581 *cost
+= COSTS_N_INSNS (speed_p
9582 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9583 else if (mode
== SImode
&& TARGET_IDIV
)
9584 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9586 *cost
= LIBCALL_COST (2);
9587 return false; /* All arguments must be in registers. */
9590 /* MOD by a power of 2 can be expanded as:
9592 and r0, r0, #(n - 1)
9593 and r1, r1, #(n - 1)
9594 rsbpl r0, r1, #0. */
9595 if (CONST_INT_P (XEXP (x
, 1))
9596 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9599 *cost
+= COSTS_N_INSNS (3);
9602 *cost
+= 2 * extra_cost
->alu
.logical
9603 + extra_cost
->alu
.arith
;
9609 *cost
= LIBCALL_COST (2);
9610 return false; /* All arguments must be in registers. */
9613 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9615 *cost
+= (COSTS_N_INSNS (1)
9616 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9618 *cost
+= extra_cost
->alu
.shift_reg
;
9626 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9628 *cost
+= (COSTS_N_INSNS (2)
9629 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9631 *cost
+= 2 * extra_cost
->alu
.shift
;
9634 else if (mode
== SImode
)
9636 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9637 /* Slightly disparage register shifts at -Os, but not by much. */
9638 if (!CONST_INT_P (XEXP (x
, 1)))
9639 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9640 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9643 else if (GET_MODE_CLASS (mode
) == MODE_INT
9644 && GET_MODE_SIZE (mode
) < 4)
9648 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9649 /* Slightly disparage register shifts at -Os, but not by
9651 if (!CONST_INT_P (XEXP (x
, 1)))
9652 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9653 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9655 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9657 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9659 /* Can use SBFX/UBFX. */
9661 *cost
+= extra_cost
->alu
.bfx
;
9662 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9666 *cost
+= COSTS_N_INSNS (1);
9667 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9670 if (CONST_INT_P (XEXP (x
, 1)))
9671 *cost
+= 2 * extra_cost
->alu
.shift
;
9673 *cost
+= (extra_cost
->alu
.shift
9674 + extra_cost
->alu
.shift_reg
);
9677 /* Slightly disparage register shifts. */
9678 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9683 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9684 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9687 if (CONST_INT_P (XEXP (x
, 1)))
9688 *cost
+= (2 * extra_cost
->alu
.shift
9689 + extra_cost
->alu
.log_shift
);
9691 *cost
+= (extra_cost
->alu
.shift
9692 + extra_cost
->alu
.shift_reg
9693 + extra_cost
->alu
.log_shift_reg
);
9699 *cost
= LIBCALL_COST (2);
9708 *cost
+= extra_cost
->alu
.rev
;
9715 /* No rev instruction available. Look at arm_legacy_rev
9716 and thumb_legacy_rev for the form of RTL used then. */
9719 *cost
+= COSTS_N_INSNS (9);
9723 *cost
+= 6 * extra_cost
->alu
.shift
;
9724 *cost
+= 3 * extra_cost
->alu
.logical
;
9729 *cost
+= COSTS_N_INSNS (4);
9733 *cost
+= 2 * extra_cost
->alu
.shift
;
9734 *cost
+= extra_cost
->alu
.arith_shift
;
9735 *cost
+= 2 * extra_cost
->alu
.logical
;
9743 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9744 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9746 if (GET_CODE (XEXP (x
, 0)) == MULT
9747 || GET_CODE (XEXP (x
, 1)) == MULT
)
9749 rtx mul_op0
, mul_op1
, sub_op
;
9752 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9754 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9756 mul_op0
= XEXP (XEXP (x
, 0), 0);
9757 mul_op1
= XEXP (XEXP (x
, 0), 1);
9758 sub_op
= XEXP (x
, 1);
9762 mul_op0
= XEXP (XEXP (x
, 1), 0);
9763 mul_op1
= XEXP (XEXP (x
, 1), 1);
9764 sub_op
= XEXP (x
, 0);
9767 /* The first operand of the multiply may be optionally
9769 if (GET_CODE (mul_op0
) == NEG
)
9770 mul_op0
= XEXP (mul_op0
, 0);
9772 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9773 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9774 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9780 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9786 rtx shift_by_reg
= NULL
;
9790 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9791 if (shift_op
== NULL
)
9793 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9794 non_shift_op
= XEXP (x
, 0);
9797 non_shift_op
= XEXP (x
, 1);
9799 if (shift_op
!= NULL
)
9801 if (shift_by_reg
!= NULL
)
9804 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9805 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9808 *cost
+= extra_cost
->alu
.arith_shift
;
9810 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9811 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9816 && GET_CODE (XEXP (x
, 1)) == MULT
)
9820 *cost
+= extra_cost
->mult
[0].add
;
9821 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9822 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9823 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9827 if (CONST_INT_P (XEXP (x
, 0)))
9829 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9830 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9832 *cost
= COSTS_N_INSNS (insns
);
9834 *cost
+= insns
* extra_cost
->alu
.arith
;
9835 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9839 *cost
+= extra_cost
->alu
.arith
;
9844 if (GET_MODE_CLASS (mode
) == MODE_INT
9845 && GET_MODE_SIZE (mode
) < 4)
9847 rtx shift_op
, shift_reg
;
9850 /* We check both sides of the MINUS for shifter operands since,
9851 unlike PLUS, it's not commutative. */
9853 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9854 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9856 /* Slightly disparage, as we might need to widen the result. */
9859 *cost
+= extra_cost
->alu
.arith
;
9861 if (CONST_INT_P (XEXP (x
, 0)))
9863 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9872 *cost
+= COSTS_N_INSNS (1);
9874 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9876 rtx op1
= XEXP (x
, 1);
9879 *cost
+= 2 * extra_cost
->alu
.arith
;
9881 if (GET_CODE (op1
) == ZERO_EXTEND
)
9882 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9885 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9886 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9890 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9893 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9894 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9896 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9899 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9900 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9903 *cost
+= (extra_cost
->alu
.arith
9904 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9905 ? extra_cost
->alu
.arith
9906 : extra_cost
->alu
.arith_shift
));
9907 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9908 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9909 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9914 *cost
+= 2 * extra_cost
->alu
.arith
;
9920 *cost
= LIBCALL_COST (2);
9924 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9925 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9927 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9929 rtx mul_op0
, mul_op1
, add_op
;
9932 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9934 mul_op0
= XEXP (XEXP (x
, 0), 0);
9935 mul_op1
= XEXP (XEXP (x
, 0), 1);
9936 add_op
= XEXP (x
, 1);
9938 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9939 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9940 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9946 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9949 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9951 *cost
= LIBCALL_COST (2);
9955 /* Narrow modes can be synthesized in SImode, but the range
9956 of useful sub-operations is limited. Check for shift operations
9957 on one of the operands. Only left shifts can be used in the
9959 if (GET_MODE_CLASS (mode
) == MODE_INT
9960 && GET_MODE_SIZE (mode
) < 4)
9962 rtx shift_op
, shift_reg
;
9965 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9967 if (CONST_INT_P (XEXP (x
, 1)))
9969 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9970 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9972 *cost
= COSTS_N_INSNS (insns
);
9974 *cost
+= insns
* extra_cost
->alu
.arith
;
9975 /* Slightly penalize a narrow operation as the result may
9977 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9981 /* Slightly penalize a narrow operation as the result may
9985 *cost
+= extra_cost
->alu
.arith
;
9992 rtx shift_op
, shift_reg
;
9995 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9996 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9998 /* UXTA[BH] or SXTA[BH]. */
10000 *cost
+= extra_cost
->alu
.extend_arith
;
10001 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10003 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10008 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10009 if (shift_op
!= NULL
)
10014 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10015 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10018 *cost
+= extra_cost
->alu
.arith_shift
;
10020 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10021 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10024 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10026 rtx mul_op
= XEXP (x
, 0);
10028 if (TARGET_DSP_MULTIPLY
10029 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10030 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10031 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10032 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10033 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10034 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10035 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10036 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10037 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10038 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10039 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10040 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10043 /* SMLA[BT][BT]. */
10045 *cost
+= extra_cost
->mult
[0].extend_add
;
10046 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10047 SIGN_EXTEND
, 0, speed_p
)
10048 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10049 SIGN_EXTEND
, 0, speed_p
)
10050 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10055 *cost
+= extra_cost
->mult
[0].add
;
10056 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10057 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10058 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10061 if (CONST_INT_P (XEXP (x
, 1)))
10063 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10064 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10066 *cost
= COSTS_N_INSNS (insns
);
10068 *cost
+= insns
* extra_cost
->alu
.arith
;
10069 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10073 *cost
+= extra_cost
->alu
.arith
;
10078 if (mode
== DImode
)
10081 && GET_CODE (XEXP (x
, 0)) == MULT
10082 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10083 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10084 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10085 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10088 *cost
+= extra_cost
->mult
[1].extend_add
;
10089 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10090 ZERO_EXTEND
, 0, speed_p
)
10091 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10092 ZERO_EXTEND
, 0, speed_p
)
10093 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10097 *cost
+= COSTS_N_INSNS (1);
10099 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10100 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10103 *cost
+= (extra_cost
->alu
.arith
10104 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10105 ? extra_cost
->alu
.arith
10106 : extra_cost
->alu
.arith_shift
));
10108 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10110 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10115 *cost
+= 2 * extra_cost
->alu
.arith
;
10120 *cost
= LIBCALL_COST (2);
10123 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10126 *cost
+= extra_cost
->alu
.rev
;
10130 /* Fall through. */
10131 case AND
: case XOR
:
10132 if (mode
== SImode
)
10134 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10135 rtx op0
= XEXP (x
, 0);
10136 rtx shift_op
, shift_reg
;
10140 || (code
== IOR
&& TARGET_THUMB2
)))
10141 op0
= XEXP (op0
, 0);
10144 shift_op
= shifter_op_p (op0
, &shift_reg
);
10145 if (shift_op
!= NULL
)
10150 *cost
+= extra_cost
->alu
.log_shift_reg
;
10151 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10154 *cost
+= extra_cost
->alu
.log_shift
;
10156 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10157 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10161 if (CONST_INT_P (XEXP (x
, 1)))
10163 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10164 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10167 *cost
= COSTS_N_INSNS (insns
);
10169 *cost
+= insns
* extra_cost
->alu
.logical
;
10170 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10175 *cost
+= extra_cost
->alu
.logical
;
10176 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10177 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10181 if (mode
== DImode
)
10183 rtx op0
= XEXP (x
, 0);
10184 enum rtx_code subcode
= GET_CODE (op0
);
10186 *cost
+= COSTS_N_INSNS (1);
10190 || (code
== IOR
&& TARGET_THUMB2
)))
10191 op0
= XEXP (op0
, 0);
10193 if (GET_CODE (op0
) == ZERO_EXTEND
)
10196 *cost
+= 2 * extra_cost
->alu
.logical
;
10198 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10200 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10203 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10206 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10208 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10210 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10215 *cost
+= 2 * extra_cost
->alu
.logical
;
10221 *cost
= LIBCALL_COST (2);
10225 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10226 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10228 rtx op0
= XEXP (x
, 0);
10230 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10231 op0
= XEXP (op0
, 0);
10234 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10236 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10237 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10240 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10242 *cost
= LIBCALL_COST (2);
10246 if (mode
== SImode
)
10248 if (TARGET_DSP_MULTIPLY
10249 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10250 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10251 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10252 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10253 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10254 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10255 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10256 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10257 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10258 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10259 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10260 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10263 /* SMUL[TB][TB]. */
10265 *cost
+= extra_cost
->mult
[0].extend
;
10266 *cost
+= rtx_cost (XEXP (x
, 0), mode
, SIGN_EXTEND
, 0, speed_p
);
10267 *cost
+= rtx_cost (XEXP (x
, 1), mode
, SIGN_EXTEND
, 1, speed_p
);
10271 *cost
+= extra_cost
->mult
[0].simple
;
10275 if (mode
== DImode
)
10278 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10279 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10280 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10281 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10284 *cost
+= extra_cost
->mult
[1].extend
;
10285 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10286 ZERO_EXTEND
, 0, speed_p
)
10287 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10288 ZERO_EXTEND
, 0, speed_p
));
10292 *cost
= LIBCALL_COST (2);
10297 *cost
= LIBCALL_COST (2);
10301 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10302 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10304 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10307 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10312 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10316 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10318 *cost
= LIBCALL_COST (1);
10322 if (mode
== SImode
)
10324 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10326 *cost
+= COSTS_N_INSNS (1);
10327 /* Assume the non-flag-changing variant. */
10329 *cost
+= (extra_cost
->alu
.log_shift
10330 + extra_cost
->alu
.arith_shift
);
10331 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10335 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10336 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10338 *cost
+= COSTS_N_INSNS (1);
10339 /* No extra cost for MOV imm and MVN imm. */
10340 /* If the comparison op is using the flags, there's no further
10341 cost, otherwise we need to add the cost of the comparison. */
10342 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10343 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10344 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10346 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10347 *cost
+= (COSTS_N_INSNS (1)
10348 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10350 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10353 *cost
+= extra_cost
->alu
.arith
;
10359 *cost
+= extra_cost
->alu
.arith
;
10363 if (GET_MODE_CLASS (mode
) == MODE_INT
10364 && GET_MODE_SIZE (mode
) < 4)
10366 /* Slightly disparage, as we might need an extend operation. */
10369 *cost
+= extra_cost
->alu
.arith
;
10373 if (mode
== DImode
)
10375 *cost
+= COSTS_N_INSNS (1);
10377 *cost
+= 2 * extra_cost
->alu
.arith
;
10382 *cost
= LIBCALL_COST (1);
10386 if (mode
== SImode
)
10389 rtx shift_reg
= NULL
;
10391 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10395 if (shift_reg
!= NULL
)
10398 *cost
+= extra_cost
->alu
.log_shift_reg
;
10399 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10402 *cost
+= extra_cost
->alu
.log_shift
;
10403 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10408 *cost
+= extra_cost
->alu
.logical
;
10411 if (mode
== DImode
)
10413 *cost
+= COSTS_N_INSNS (1);
10419 *cost
+= LIBCALL_COST (1);
10424 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10426 *cost
+= COSTS_N_INSNS (3);
10429 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10430 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10432 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10433 /* Assume that if one arm of the if_then_else is a register,
10434 that it will be tied with the result and eliminate the
10435 conditional insn. */
10436 if (REG_P (XEXP (x
, 1)))
10438 else if (REG_P (XEXP (x
, 2)))
10444 if (extra_cost
->alu
.non_exec_costs_exec
)
10445 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10447 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10450 *cost
+= op1cost
+ op2cost
;
10456 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10460 machine_mode op0mode
;
10461 /* We'll mostly assume that the cost of a compare is the cost of the
10462 LHS. However, there are some notable exceptions. */
10464 /* Floating point compares are never done as side-effects. */
10465 op0mode
= GET_MODE (XEXP (x
, 0));
10466 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10467 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10470 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10472 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10474 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10480 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10482 *cost
= LIBCALL_COST (2);
10486 /* DImode compares normally take two insns. */
10487 if (op0mode
== DImode
)
10489 *cost
+= COSTS_N_INSNS (1);
10491 *cost
+= 2 * extra_cost
->alu
.arith
;
10495 if (op0mode
== SImode
)
10500 if (XEXP (x
, 1) == const0_rtx
10501 && !(REG_P (XEXP (x
, 0))
10502 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10503 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10505 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10507 /* Multiply operations that set the flags are often
10508 significantly more expensive. */
10510 && GET_CODE (XEXP (x
, 0)) == MULT
10511 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10512 *cost
+= extra_cost
->mult
[0].flag_setting
;
10515 && GET_CODE (XEXP (x
, 0)) == PLUS
10516 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10517 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10519 *cost
+= extra_cost
->mult
[0].flag_setting
;
10524 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10525 if (shift_op
!= NULL
)
10527 if (shift_reg
!= NULL
)
10529 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10532 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10535 *cost
+= extra_cost
->alu
.arith_shift
;
10536 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10537 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10542 *cost
+= extra_cost
->alu
.arith
;
10543 if (CONST_INT_P (XEXP (x
, 1))
10544 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10546 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10554 *cost
= LIBCALL_COST (2);
10577 if (outer_code
== SET
)
10579 /* Is it a store-flag operation? */
10580 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10581 && XEXP (x
, 1) == const0_rtx
)
10583 /* Thumb also needs an IT insn. */
10584 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10587 if (XEXP (x
, 1) == const0_rtx
)
10592 /* LSR Rd, Rn, #31. */
10594 *cost
+= extra_cost
->alu
.shift
;
10604 *cost
+= COSTS_N_INSNS (1);
10608 /* RSBS T1, Rn, Rn, LSR #31
10610 *cost
+= COSTS_N_INSNS (1);
10612 *cost
+= extra_cost
->alu
.arith_shift
;
10616 /* RSB Rd, Rn, Rn, ASR #1
10617 LSR Rd, Rd, #31. */
10618 *cost
+= COSTS_N_INSNS (1);
10620 *cost
+= (extra_cost
->alu
.arith_shift
10621 + extra_cost
->alu
.shift
);
10627 *cost
+= COSTS_N_INSNS (1);
10629 *cost
+= extra_cost
->alu
.shift
;
10633 /* Remaining cases are either meaningless or would take
10634 three insns anyway. */
10635 *cost
= COSTS_N_INSNS (3);
10638 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10643 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10644 if (CONST_INT_P (XEXP (x
, 1))
10645 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10647 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10654 /* Not directly inside a set. If it involves the condition code
10655 register it must be the condition for a branch, cond_exec or
10656 I_T_E operation. Since the comparison is performed elsewhere
10657 this is just the control part which has no additional
10659 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10660 && XEXP (x
, 1) == const0_rtx
)
10668 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10669 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10672 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10676 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10678 *cost
= LIBCALL_COST (1);
10682 if (mode
== SImode
)
10685 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10689 *cost
= LIBCALL_COST (1);
10693 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10694 && MEM_P (XEXP (x
, 0)))
10696 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10698 if (mode
== DImode
)
10699 *cost
+= COSTS_N_INSNS (1);
10704 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10705 *cost
+= extra_cost
->ldst
.load
;
10707 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10709 if (mode
== DImode
)
10710 *cost
+= extra_cost
->alu
.shift
;
10715 /* Widening from less than 32-bits requires an extend operation. */
10716 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10718 /* We have SXTB/SXTH. */
10719 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10721 *cost
+= extra_cost
->alu
.extend
;
10723 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10725 /* Needs two shifts. */
10726 *cost
+= COSTS_N_INSNS (1);
10727 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10729 *cost
+= 2 * extra_cost
->alu
.shift
;
10732 /* Widening beyond 32-bits requires one more insn. */
10733 if (mode
== DImode
)
10735 *cost
+= COSTS_N_INSNS (1);
10737 *cost
+= extra_cost
->alu
.shift
;
10744 || GET_MODE (XEXP (x
, 0)) == SImode
10745 || GET_MODE (XEXP (x
, 0)) == QImode
)
10746 && MEM_P (XEXP (x
, 0)))
10748 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10750 if (mode
== DImode
)
10751 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10756 /* Widening from less than 32-bits requires an extend operation. */
10757 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10759 /* UXTB can be a shorter instruction in Thumb2, but it might
10760 be slower than the AND Rd, Rn, #255 alternative. When
10761 optimizing for speed it should never be slower to use
10762 AND, and we don't really model 16-bit vs 32-bit insns
10765 *cost
+= extra_cost
->alu
.logical
;
10767 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10769 /* We have UXTB/UXTH. */
10770 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10772 *cost
+= extra_cost
->alu
.extend
;
10774 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10776 /* Needs two shifts. It's marginally preferable to use
10777 shifts rather than two BIC instructions as the second
10778 shift may merge with a subsequent insn as a shifter
10780 *cost
= COSTS_N_INSNS (2);
10781 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10783 *cost
+= 2 * extra_cost
->alu
.shift
;
10786 /* Widening beyond 32-bits requires one more insn. */
10787 if (mode
== DImode
)
10789 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10796 /* CONST_INT has no mode, so we cannot tell for sure how many
10797 insns are really going to be needed. The best we can do is
10798 look at the value passed. If it fits in SImode, then assume
10799 that's the mode it will be used for. Otherwise assume it
10800 will be used in DImode. */
10801 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10806 /* Avoid blowing up in arm_gen_constant (). */
10807 if (!(outer_code
== PLUS
10808 || outer_code
== AND
10809 || outer_code
== IOR
10810 || outer_code
== XOR
10811 || outer_code
== MINUS
))
10815 if (mode
== SImode
)
10817 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10818 INTVAL (x
), NULL
, NULL
,
10824 *cost
+= COSTS_N_INSNS (arm_gen_constant
10825 (outer_code
, SImode
, NULL
,
10826 trunc_int_for_mode (INTVAL (x
), SImode
),
10828 + arm_gen_constant (outer_code
, SImode
, NULL
,
10829 INTVAL (x
) >> 32, NULL
,
10841 if (arm_arch_thumb2
&& !flag_pic
)
10842 *cost
+= COSTS_N_INSNS (1);
10844 *cost
+= extra_cost
->ldst
.load
;
10847 *cost
+= COSTS_N_INSNS (1);
10851 *cost
+= COSTS_N_INSNS (1);
10853 *cost
+= extra_cost
->alu
.arith
;
10859 *cost
= COSTS_N_INSNS (4);
10864 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10865 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10867 if (vfp3_const_double_rtx (x
))
10870 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10876 if (mode
== DFmode
)
10877 *cost
+= extra_cost
->ldst
.loadd
;
10879 *cost
+= extra_cost
->ldst
.loadf
;
10882 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10886 *cost
= COSTS_N_INSNS (4);
10892 && TARGET_HARD_FLOAT
10893 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10894 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10895 *cost
= COSTS_N_INSNS (1);
10897 *cost
= COSTS_N_INSNS (4);
10902 /* When optimizing for size, we prefer constant pool entries to
10903 MOVW/MOVT pairs, so bump the cost of these slightly. */
10910 *cost
+= extra_cost
->alu
.clz
;
10914 if (XEXP (x
, 1) == const0_rtx
)
10917 *cost
+= extra_cost
->alu
.log_shift
;
10918 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10921 /* Fall through. */
10925 *cost
+= COSTS_N_INSNS (1);
10929 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10930 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10931 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10932 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10933 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10934 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10935 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10936 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10940 *cost
+= extra_cost
->mult
[1].extend
;
10941 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10942 ZERO_EXTEND
, 0, speed_p
)
10943 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10944 ZERO_EXTEND
, 0, speed_p
));
10947 *cost
= LIBCALL_COST (1);
10950 case UNSPEC_VOLATILE
:
10952 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10955 /* Reading the PC is like reading any other register. Writing it
10956 is more expensive, but we take that into account elsewhere. */
10961 /* TODO: Simple zero_extract of bottom bits using AND. */
10962 /* Fall through. */
10966 && CONST_INT_P (XEXP (x
, 1))
10967 && CONST_INT_P (XEXP (x
, 2)))
10970 *cost
+= extra_cost
->alu
.bfx
;
10971 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10974 /* Without UBFX/SBFX, need to resort to shift operations. */
10975 *cost
+= COSTS_N_INSNS (1);
10977 *cost
+= 2 * extra_cost
->alu
.shift
;
10978 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10982 if (TARGET_HARD_FLOAT
)
10985 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10986 if (!TARGET_FPU_ARMV8
10987 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10989 /* Pre v8, widening HF->DF is a two-step process, first
10990 widening to SFmode. */
10991 *cost
+= COSTS_N_INSNS (1);
10993 *cost
+= extra_cost
->fp
[0].widen
;
10995 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10999 *cost
= LIBCALL_COST (1);
11002 case FLOAT_TRUNCATE
:
11003 if (TARGET_HARD_FLOAT
)
11006 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11007 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11009 /* Vector modes? */
11011 *cost
= LIBCALL_COST (1);
11015 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11017 rtx op0
= XEXP (x
, 0);
11018 rtx op1
= XEXP (x
, 1);
11019 rtx op2
= XEXP (x
, 2);
11022 /* vfms or vfnma. */
11023 if (GET_CODE (op0
) == NEG
)
11024 op0
= XEXP (op0
, 0);
11026 /* vfnms or vfnma. */
11027 if (GET_CODE (op2
) == NEG
)
11028 op2
= XEXP (op2
, 0);
11030 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11031 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11032 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11035 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11040 *cost
= LIBCALL_COST (3);
11045 if (TARGET_HARD_FLOAT
)
11047 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11049 mode
= GET_MODE (XEXP (x
, 0));
11051 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11052 /* Strip of the 'cost' of rounding towards zero. */
11053 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11054 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11057 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11058 /* ??? Increase the cost to deal with transferring from
11059 FP -> CORE registers? */
11062 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11063 && TARGET_FPU_ARMV8
)
11066 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11069 /* Vector costs? */
11071 *cost
= LIBCALL_COST (1);
11075 case UNSIGNED_FLOAT
:
11076 if (TARGET_HARD_FLOAT
)
11078 /* ??? Increase the cost to deal with transferring from CORE
11079 -> FP registers? */
11081 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11084 *cost
= LIBCALL_COST (1);
11092 /* Just a guess. Guess number of instructions in the asm
11093 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11094 though (see PR60663). */
11095 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11096 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11098 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11102 if (mode
!= VOIDmode
)
11103 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11105 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11110 #undef HANDLE_NARROW_SHIFT_ARITH
11112 /* RTX costs when optimizing for size. */
11114 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11115 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11118 int code
= GET_CODE (x
);
11120 if (TARGET_OLD_RTX_COSTS
11121 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11123 /* Old way. (Deprecated.) */
11125 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11126 (enum rtx_code
) outer_code
, total
);
11128 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11129 (enum rtx_code
) outer_code
, total
,
11135 if (current_tune
->insn_extra_cost
)
11136 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11137 (enum rtx_code
) outer_code
,
11138 current_tune
->insn_extra_cost
,
11140 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11141 && current_tune->insn_extra_cost != NULL */
11143 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11144 (enum rtx_code
) outer_code
,
11145 &generic_extra_costs
, total
, speed
);
11148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11150 print_rtl_single (dump_file
, x
);
11151 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11152 *total
, result
? "final" : "partial");
11157 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11158 supported on any "slowmul" cores, so it can be ignored. */
11161 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11162 int *total
, bool speed
)
11164 machine_mode mode
= GET_MODE (x
);
11168 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11175 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11178 *total
= COSTS_N_INSNS (20);
11182 if (CONST_INT_P (XEXP (x
, 1)))
11184 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11185 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11186 int cost
, const_ok
= const_ok_for_arm (i
);
11187 int j
, booth_unit_size
;
11189 /* Tune as appropriate. */
11190 cost
= const_ok
? 4 : 8;
11191 booth_unit_size
= 2;
11192 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11194 i
>>= booth_unit_size
;
11198 *total
= COSTS_N_INSNS (cost
);
11199 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
11203 *total
= COSTS_N_INSNS (20);
11207 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11212 /* RTX cost for cores with a fast multiply unit (M variants). */
11215 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11216 int *total
, bool speed
)
11218 machine_mode mode
= GET_MODE (x
);
11222 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11226 /* ??? should thumb2 use different costs? */
11230 /* There is no point basing this on the tuning, since it is always the
11231 fast variant if it exists at all. */
11233 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11234 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11235 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11237 *total
= COSTS_N_INSNS(2);
11242 if (mode
== DImode
)
11244 *total
= COSTS_N_INSNS (5);
11248 if (CONST_INT_P (XEXP (x
, 1)))
11250 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11251 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11252 int cost
, const_ok
= const_ok_for_arm (i
);
11253 int j
, booth_unit_size
;
11255 /* Tune as appropriate. */
11256 cost
= const_ok
? 4 : 8;
11257 booth_unit_size
= 8;
11258 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11260 i
>>= booth_unit_size
;
11264 *total
= COSTS_N_INSNS(cost
);
11268 if (mode
== SImode
)
11270 *total
= COSTS_N_INSNS (4);
11274 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11276 if (TARGET_HARD_FLOAT
11278 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11280 *total
= COSTS_N_INSNS (1);
11285 /* Requires a lib call */
11286 *total
= COSTS_N_INSNS (20);
11290 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11295 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11296 so it can be ignored. */
11299 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11300 int *total
, bool speed
)
11302 machine_mode mode
= GET_MODE (x
);
11306 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11313 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11314 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11316 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11317 will stall until the multiplication is complete. */
11318 *total
= COSTS_N_INSNS (3);
11322 /* There is no point basing this on the tuning, since it is always the
11323 fast variant if it exists at all. */
11325 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11326 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11327 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11329 *total
= COSTS_N_INSNS (2);
11334 if (mode
== DImode
)
11336 *total
= COSTS_N_INSNS (5);
11340 if (CONST_INT_P (XEXP (x
, 1)))
11342 /* If operand 1 is a constant we can more accurately
11343 calculate the cost of the multiply. The multiplier can
11344 retire 15 bits on the first cycle and a further 12 on the
11345 second. We do, of course, have to load the constant into
11346 a register first. */
11347 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11348 /* There's a general overhead of one cycle. */
11350 unsigned HOST_WIDE_INT masked_const
;
11352 if (i
& 0x80000000)
11355 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11357 masked_const
= i
& 0xffff8000;
11358 if (masked_const
!= 0)
11361 masked_const
= i
& 0xf8000000;
11362 if (masked_const
!= 0)
11365 *total
= COSTS_N_INSNS (cost
);
11369 if (mode
== SImode
)
11371 *total
= COSTS_N_INSNS (3);
11375 /* Requires a lib call */
11376 *total
= COSTS_N_INSNS (20);
11380 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11385 /* RTX costs for 9e (and later) cores. */
11388 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11389 int *total
, bool speed
)
11391 machine_mode mode
= GET_MODE (x
);
11398 /* Small multiply: 32 cycles for an integer multiply inst. */
11399 if (arm_arch6m
&& arm_m_profile_small_mul
)
11400 *total
= COSTS_N_INSNS (32);
11402 *total
= COSTS_N_INSNS (3);
11406 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11414 /* There is no point basing this on the tuning, since it is always the
11415 fast variant if it exists at all. */
11417 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11418 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11419 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11421 *total
= COSTS_N_INSNS (2);
11426 if (mode
== DImode
)
11428 *total
= COSTS_N_INSNS (5);
11432 if (mode
== SImode
)
11434 *total
= COSTS_N_INSNS (2);
11438 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11440 if (TARGET_HARD_FLOAT
11442 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11444 *total
= COSTS_N_INSNS (1);
11449 *total
= COSTS_N_INSNS (20);
11453 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11456 /* All address computations that can be done are free, but rtx cost returns
11457 the same for practically all of them. So we weight the different types
11458 of address here in the order (most pref first):
11459 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11461 arm_arm_address_cost (rtx x
)
11463 enum rtx_code c
= GET_CODE (x
);
11465 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11467 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11472 if (CONST_INT_P (XEXP (x
, 1)))
11475 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11485 arm_thumb_address_cost (rtx x
)
11487 enum rtx_code c
= GET_CODE (x
);
11492 && REG_P (XEXP (x
, 0))
11493 && CONST_INT_P (XEXP (x
, 1)))
11500 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11501 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11503 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11506 /* Adjust cost hook for XScale. */
11508 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11510 /* Some true dependencies can have a higher cost depending
11511 on precisely how certain input operands are used. */
11512 if (REG_NOTE_KIND(link
) == 0
11513 && recog_memoized (insn
) >= 0
11514 && recog_memoized (dep
) >= 0)
11516 int shift_opnum
= get_attr_shift (insn
);
11517 enum attr_type attr_type
= get_attr_type (dep
);
11519 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11520 operand for INSN. If we have a shifted input operand and the
11521 instruction we depend on is another ALU instruction, then we may
11522 have to account for an additional stall. */
11523 if (shift_opnum
!= 0
11524 && (attr_type
== TYPE_ALU_SHIFT_IMM
11525 || attr_type
== TYPE_ALUS_SHIFT_IMM
11526 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11527 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11528 || attr_type
== TYPE_ALU_SHIFT_REG
11529 || attr_type
== TYPE_ALUS_SHIFT_REG
11530 || attr_type
== TYPE_LOGIC_SHIFT_REG
11531 || attr_type
== TYPE_LOGICS_SHIFT_REG
11532 || attr_type
== TYPE_MOV_SHIFT
11533 || attr_type
== TYPE_MVN_SHIFT
11534 || attr_type
== TYPE_MOV_SHIFT_REG
11535 || attr_type
== TYPE_MVN_SHIFT_REG
))
11537 rtx shifted_operand
;
11540 /* Get the shifted operand. */
11541 extract_insn (insn
);
11542 shifted_operand
= recog_data
.operand
[shift_opnum
];
11544 /* Iterate over all the operands in DEP. If we write an operand
11545 that overlaps with SHIFTED_OPERAND, then we have increase the
11546 cost of this dependency. */
11547 extract_insn (dep
);
11548 preprocess_constraints (dep
);
11549 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11551 /* We can ignore strict inputs. */
11552 if (recog_data
.operand_type
[opno
] == OP_IN
)
11555 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11567 /* Adjust cost hook for Cortex A9. */
11569 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11571 switch (REG_NOTE_KIND (link
))
11578 case REG_DEP_OUTPUT
:
11579 if (recog_memoized (insn
) >= 0
11580 && recog_memoized (dep
) >= 0)
11582 if (GET_CODE (PATTERN (insn
)) == SET
)
11585 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11587 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11589 enum attr_type attr_type_insn
= get_attr_type (insn
);
11590 enum attr_type attr_type_dep
= get_attr_type (dep
);
11592 /* By default all dependencies of the form
11595 have an extra latency of 1 cycle because
11596 of the input and output dependency in this
11597 case. However this gets modeled as an true
11598 dependency and hence all these checks. */
11599 if (REG_P (SET_DEST (PATTERN (insn
)))
11600 && REG_P (SET_DEST (PATTERN (dep
)))
11601 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11602 SET_DEST (PATTERN (dep
))))
11604 /* FMACS is a special case where the dependent
11605 instruction can be issued 3 cycles before
11606 the normal latency in case of an output
11608 if ((attr_type_insn
== TYPE_FMACS
11609 || attr_type_insn
== TYPE_FMACD
)
11610 && (attr_type_dep
== TYPE_FMACS
11611 || attr_type_dep
== TYPE_FMACD
))
11613 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11614 *cost
= insn_default_latency (dep
) - 3;
11616 *cost
= insn_default_latency (dep
);
11621 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11622 *cost
= insn_default_latency (dep
) + 1;
11624 *cost
= insn_default_latency (dep
);
11634 gcc_unreachable ();
11640 /* Adjust cost hook for FA726TE. */
11642 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11644 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11645 have penalty of 3. */
11646 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11647 && recog_memoized (insn
) >= 0
11648 && recog_memoized (dep
) >= 0
11649 && get_attr_conds (dep
) == CONDS_SET
)
11651 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11652 if (get_attr_conds (insn
) == CONDS_USE
11653 && get_attr_type (insn
) != TYPE_BRANCH
)
11659 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11660 || get_attr_conds (insn
) == CONDS_USE
)
11670 /* Implement TARGET_REGISTER_MOVE_COST.
11672 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11673 it is typically more expensive than a single memory access. We set
11674 the cost to less than two memory accesses so that floating
11675 point to integer conversion does not go through memory. */
11678 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11679 reg_class_t from
, reg_class_t to
)
11683 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11684 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11686 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11687 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11689 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11696 if (from
== HI_REGS
|| to
== HI_REGS
)
11703 /* Implement TARGET_MEMORY_MOVE_COST. */
11706 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11707 bool in ATTRIBUTE_UNUSED
)
11713 if (GET_MODE_SIZE (mode
) < 4)
11716 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11720 /* Vectorizer cost model implementation. */
11722 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11724 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11726 int misalign ATTRIBUTE_UNUSED
)
11730 switch (type_of_cost
)
11733 return current_tune
->vec_costs
->scalar_stmt_cost
;
11736 return current_tune
->vec_costs
->scalar_load_cost
;
11739 return current_tune
->vec_costs
->scalar_store_cost
;
11742 return current_tune
->vec_costs
->vec_stmt_cost
;
11745 return current_tune
->vec_costs
->vec_align_load_cost
;
11748 return current_tune
->vec_costs
->vec_store_cost
;
11750 case vec_to_scalar
:
11751 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11753 case scalar_to_vec
:
11754 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11756 case unaligned_load
:
11757 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11759 case unaligned_store
:
11760 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11762 case cond_branch_taken
:
11763 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11765 case cond_branch_not_taken
:
11766 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11769 case vec_promote_demote
:
11770 return current_tune
->vec_costs
->vec_stmt_cost
;
11772 case vec_construct
:
11773 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11774 return elements
/ 2 + 1;
11777 gcc_unreachable ();
11781 /* Implement targetm.vectorize.add_stmt_cost. */
11784 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11785 struct _stmt_vec_info
*stmt_info
, int misalign
,
11786 enum vect_cost_model_location where
)
11788 unsigned *cost
= (unsigned *) data
;
11789 unsigned retval
= 0;
11791 if (flag_vect_cost_model
)
11793 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11794 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11796 /* Statements in an inner loop relative to the loop being
11797 vectorized are weighted more heavily. The value here is
11798 arbitrary and could potentially be improved with analysis. */
11799 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11800 count
*= 50; /* FIXME. */
11802 retval
= (unsigned) (count
* stmt_cost
);
11803 cost
[where
] += retval
;
11809 /* Return true if and only if this insn can dual-issue only as older. */
11811 cortexa7_older_only (rtx_insn
*insn
)
11813 if (recog_memoized (insn
) < 0)
11816 switch (get_attr_type (insn
))
11818 case TYPE_ALU_DSP_REG
:
11819 case TYPE_ALU_SREG
:
11820 case TYPE_ALUS_SREG
:
11821 case TYPE_LOGIC_REG
:
11822 case TYPE_LOGICS_REG
:
11824 case TYPE_ADCS_REG
:
11829 case TYPE_SHIFT_IMM
:
11830 case TYPE_SHIFT_REG
:
11831 case TYPE_LOAD_BYTE
:
11834 case TYPE_FFARITHS
:
11836 case TYPE_FFARITHD
:
11854 case TYPE_F_STORES
:
11861 /* Return true if and only if this insn can dual-issue as younger. */
11863 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11865 if (recog_memoized (insn
) < 0)
11868 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11872 switch (get_attr_type (insn
))
11875 case TYPE_ALUS_IMM
:
11876 case TYPE_LOGIC_IMM
:
11877 case TYPE_LOGICS_IMM
:
11882 case TYPE_MOV_SHIFT
:
11883 case TYPE_MOV_SHIFT_REG
:
11893 /* Look for an instruction that can dual issue only as an older
11894 instruction, and move it in front of any instructions that can
11895 dual-issue as younger, while preserving the relative order of all
11896 other instructions in the ready list. This is a hueuristic to help
11897 dual-issue in later cycles, by postponing issue of more flexible
11898 instructions. This heuristic may affect dual issue opportunities
11899 in the current cycle. */
11901 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11902 int *n_readyp
, int clock
)
11905 int first_older_only
= -1, first_younger
= -1;
11909 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11913 /* Traverse the ready list from the head (the instruction to issue
11914 first), and looking for the first instruction that can issue as
11915 younger and the first instruction that can dual-issue only as
11917 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11919 rtx_insn
*insn
= ready
[i
];
11920 if (cortexa7_older_only (insn
))
11922 first_older_only
= i
;
11924 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11927 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11931 /* Nothing to reorder because either no younger insn found or insn
11932 that can dual-issue only as older appears before any insn that
11933 can dual-issue as younger. */
11934 if (first_younger
== -1)
11937 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11941 /* Nothing to reorder because no older-only insn in the ready list. */
11942 if (first_older_only
== -1)
11945 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11949 /* Move first_older_only insn before first_younger. */
11951 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11952 INSN_UID(ready
[first_older_only
]),
11953 INSN_UID(ready
[first_younger
]));
11954 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11955 for (i
= first_older_only
; i
< first_younger
; i
++)
11957 ready
[i
] = ready
[i
+1];
11960 ready
[i
] = first_older_only_insn
;
11964 /* Implement TARGET_SCHED_REORDER. */
11966 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11972 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11975 /* Do nothing for other cores. */
11979 return arm_issue_rate ();
11982 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11983 It corrects the value of COST based on the relationship between
11984 INSN and DEP through the dependence LINK. It returns the new
11985 value. There is a per-core adjust_cost hook to adjust scheduler costs
11986 and the per-core hook can choose to completely override the generic
11987 adjust_cost function. Only put bits of code into arm_adjust_cost that
11988 are common across all cores. */
11990 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11994 /* When generating Thumb-1 code, we want to place flag-setting operations
11995 close to a conditional branch which depends on them, so that we can
11996 omit the comparison. */
11998 && REG_NOTE_KIND (link
) == 0
11999 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12000 && recog_memoized (dep
) >= 0
12001 && get_attr_conds (dep
) == CONDS_SET
)
12004 if (current_tune
->sched_adjust_cost
!= NULL
)
12006 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12010 /* XXX Is this strictly true? */
12011 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12012 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12015 /* Call insns don't incur a stall, even if they follow a load. */
12016 if (REG_NOTE_KIND (link
) == 0
12020 if ((i_pat
= single_set (insn
)) != NULL
12021 && MEM_P (SET_SRC (i_pat
))
12022 && (d_pat
= single_set (dep
)) != NULL
12023 && MEM_P (SET_DEST (d_pat
)))
12025 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12026 /* This is a load after a store, there is no conflict if the load reads
12027 from a cached area. Assume that loads from the stack, and from the
12028 constant pool are cached, and that others will miss. This is a
12031 if ((GET_CODE (src_mem
) == SYMBOL_REF
12032 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12033 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12034 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12035 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12043 arm_max_conditional_execute (void)
12045 return max_insns_skipped
;
12049 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12052 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12054 return (optimize
> 0) ? 2 : 0;
12058 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12060 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12063 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12064 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12065 sequences of non-executed instructions in IT blocks probably take the same
12066 amount of time as executed instructions (and the IT instruction itself takes
12067 space in icache). This function was experimentally determined to give good
12068 results on a popular embedded benchmark. */
12071 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12073 return (TARGET_32BIT
&& speed_p
) ? 1
12074 : arm_default_branch_cost (speed_p
, predictable_p
);
12078 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12080 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12083 static bool fp_consts_inited
= false;
12085 static REAL_VALUE_TYPE value_fp0
;
12088 init_fp_table (void)
12092 r
= REAL_VALUE_ATOF ("0", DFmode
);
12094 fp_consts_inited
= true;
12097 /* Return TRUE if rtx X is a valid immediate FP constant. */
12099 arm_const_double_rtx (rtx x
)
12103 if (!fp_consts_inited
)
12106 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12107 if (REAL_VALUE_MINUS_ZERO (r
))
12110 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12116 /* VFPv3 has a fairly wide range of representable immediates, formed from
12117 "quarter-precision" floating-point values. These can be evaluated using this
12118 formula (with ^ for exponentiation):
12122 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12123 16 <= n <= 31 and 0 <= r <= 7.
12125 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12127 - A (most-significant) is the sign bit.
12128 - BCD are the exponent (encoded as r XOR 3).
12129 - EFGH are the mantissa (encoded as n - 16).
12132 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12133 fconst[sd] instruction, or -1 if X isn't suitable. */
12135 vfp3_const_double_index (rtx x
)
12137 REAL_VALUE_TYPE r
, m
;
12138 int sign
, exponent
;
12139 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12140 unsigned HOST_WIDE_INT mask
;
12141 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12144 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12147 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12149 /* We can't represent these things, so detect them first. */
12150 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12153 /* Extract sign, exponent and mantissa. */
12154 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12155 r
= real_value_abs (&r
);
12156 exponent
= REAL_EXP (&r
);
12157 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12158 highest (sign) bit, with a fixed binary point at bit point_pos.
12159 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12160 bits for the mantissa, this may fail (low bits would be lost). */
12161 real_ldexp (&m
, &r
, point_pos
- exponent
);
12162 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12163 mantissa
= w
.elt (0);
12164 mant_hi
= w
.elt (1);
12166 /* If there are bits set in the low part of the mantissa, we can't
12167 represent this value. */
12171 /* Now make it so that mantissa contains the most-significant bits, and move
12172 the point_pos to indicate that the least-significant bits have been
12174 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12175 mantissa
= mant_hi
;
12177 /* We can permit four significant bits of mantissa only, plus a high bit
12178 which is always 1. */
12179 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12180 if ((mantissa
& mask
) != 0)
12183 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12184 mantissa
>>= point_pos
- 5;
12186 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12187 floating-point immediate zero with Neon using an integer-zero load, but
12188 that case is handled elsewhere.) */
12192 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12194 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12195 normalized significands are in the range [1, 2). (Our mantissa is shifted
12196 left 4 places at this point relative to normalized IEEE754 values). GCC
12197 internally uses [0.5, 1) (see real.c), so the exponent returned from
12198 REAL_EXP must be altered. */
12199 exponent
= 5 - exponent
;
12201 if (exponent
< 0 || exponent
> 7)
12204 /* Sign, mantissa and exponent are now in the correct form to plug into the
12205 formula described in the comment above. */
12206 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12209 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12211 vfp3_const_double_rtx (rtx x
)
12216 return vfp3_const_double_index (x
) != -1;
12219 /* Recognize immediates which can be used in various Neon instructions. Legal
12220 immediates are described by the following table (for VMVN variants, the
12221 bitwise inverse of the constant shown is recognized. In either case, VMOV
12222 is output and the correct instruction to use for a given constant is chosen
12223 by the assembler). The constant shown is replicated across all elements of
12224 the destination vector.
12226 insn elems variant constant (binary)
12227 ---- ----- ------- -----------------
12228 vmov i32 0 00000000 00000000 00000000 abcdefgh
12229 vmov i32 1 00000000 00000000 abcdefgh 00000000
12230 vmov i32 2 00000000 abcdefgh 00000000 00000000
12231 vmov i32 3 abcdefgh 00000000 00000000 00000000
12232 vmov i16 4 00000000 abcdefgh
12233 vmov i16 5 abcdefgh 00000000
12234 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12235 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12236 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12237 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12238 vmvn i16 10 00000000 abcdefgh
12239 vmvn i16 11 abcdefgh 00000000
12240 vmov i32 12 00000000 00000000 abcdefgh 11111111
12241 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12242 vmov i32 14 00000000 abcdefgh 11111111 11111111
12243 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12244 vmov i8 16 abcdefgh
12245 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12246 eeeeeeee ffffffff gggggggg hhhhhhhh
12247 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12248 vmov f32 19 00000000 00000000 00000000 00000000
12250 For case 18, B = !b. Representable values are exactly those accepted by
12251 vfp3_const_double_index, but are output as floating-point numbers rather
12254 For case 19, we will change it to vmov.i32 when assembling.
12256 Variants 0-5 (inclusive) may also be used as immediates for the second
12257 operand of VORR/VBIC instructions.
12259 The INVERSE argument causes the bitwise inverse of the given operand to be
12260 recognized instead (used for recognizing legal immediates for the VAND/VORN
12261 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12262 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12263 output, rather than the real insns vbic/vorr).
12265 INVERSE makes no difference to the recognition of float vectors.
12267 The return value is the variant of immediate as shown in the above table, or
12268 -1 if the given value doesn't match any of the listed patterns.
12271 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12272 rtx
*modconst
, int *elementwidth
)
12274 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12276 for (i = 0; i < idx; i += (STRIDE)) \
12281 immtype = (CLASS); \
12282 elsize = (ELSIZE); \
12286 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12287 unsigned int innersize
;
12288 unsigned char bytes
[16];
12289 int immtype
= -1, matches
;
12290 unsigned int invmask
= inverse
? 0xff : 0;
12291 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12294 n_elts
= CONST_VECTOR_NUNITS (op
);
12298 if (mode
== VOIDmode
)
12302 innersize
= GET_MODE_UNIT_SIZE (mode
);
12304 /* Vectors of float constants. */
12305 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12307 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12308 REAL_VALUE_TYPE r0
;
12310 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12313 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12315 for (i
= 1; i
< n_elts
; i
++)
12317 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12318 REAL_VALUE_TYPE re
;
12320 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12322 if (!REAL_VALUES_EQUAL (r0
, re
))
12327 *modconst
= CONST_VECTOR_ELT (op
, 0);
12332 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12338 /* Splat vector constant out into a byte vector. */
12339 for (i
= 0; i
< n_elts
; i
++)
12341 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12342 unsigned HOST_WIDE_INT elpart
;
12343 unsigned int part
, parts
;
12345 if (CONST_INT_P (el
))
12347 elpart
= INTVAL (el
);
12350 else if (CONST_DOUBLE_P (el
))
12352 elpart
= CONST_DOUBLE_LOW (el
);
12356 gcc_unreachable ();
12358 for (part
= 0; part
< parts
; part
++)
12361 for (byte
= 0; byte
< innersize
; byte
++)
12363 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12364 elpart
>>= BITS_PER_UNIT
;
12366 if (CONST_DOUBLE_P (el
))
12367 elpart
= CONST_DOUBLE_HIGH (el
);
12371 /* Sanity check. */
12372 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12376 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12377 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12379 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12380 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12382 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12383 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12385 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12386 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12388 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12390 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12392 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12393 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12395 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12396 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12398 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12399 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12401 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12402 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12404 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12406 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12408 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12409 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12411 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12412 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12414 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12415 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12417 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12418 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12420 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12422 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12423 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12431 *elementwidth
= elsize
;
12435 unsigned HOST_WIDE_INT imm
= 0;
12437 /* Un-invert bytes of recognized vector, if necessary. */
12439 for (i
= 0; i
< idx
; i
++)
12440 bytes
[i
] ^= invmask
;
12444 /* FIXME: Broken on 32-bit H_W_I hosts. */
12445 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12447 for (i
= 0; i
< 8; i
++)
12448 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12449 << (i
* BITS_PER_UNIT
);
12451 *modconst
= GEN_INT (imm
);
12455 unsigned HOST_WIDE_INT imm
= 0;
12457 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12458 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12460 *modconst
= GEN_INT (imm
);
12468 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12469 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12470 float elements), and a modified constant (whatever should be output for a
12471 VMOV) in *MODCONST. */
12474 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12475 rtx
*modconst
, int *elementwidth
)
12479 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12485 *modconst
= tmpconst
;
12488 *elementwidth
= tmpwidth
;
12493 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12494 the immediate is valid, write a constant suitable for using as an operand
12495 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12496 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12499 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12500 rtx
*modconst
, int *elementwidth
)
12504 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12506 if (retval
< 0 || retval
> 5)
12510 *modconst
= tmpconst
;
12513 *elementwidth
= tmpwidth
;
12518 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12519 the immediate is valid, write a constant suitable for using as an operand
12520 to VSHR/VSHL to *MODCONST and the corresponding element width to
12521 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12522 because they have different limitations. */
12525 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12526 rtx
*modconst
, int *elementwidth
,
12529 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12530 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12531 unsigned HOST_WIDE_INT last_elt
= 0;
12532 unsigned HOST_WIDE_INT maxshift
;
12534 /* Split vector constant out into a byte vector. */
12535 for (i
= 0; i
< n_elts
; i
++)
12537 rtx el
= CONST_VECTOR_ELT (op
, i
);
12538 unsigned HOST_WIDE_INT elpart
;
12540 if (CONST_INT_P (el
))
12541 elpart
= INTVAL (el
);
12542 else if (CONST_DOUBLE_P (el
))
12545 gcc_unreachable ();
12547 if (i
!= 0 && elpart
!= last_elt
)
12553 /* Shift less than element size. */
12554 maxshift
= innersize
* 8;
12558 /* Left shift immediate value can be from 0 to <size>-1. */
12559 if (last_elt
>= maxshift
)
12564 /* Right shift immediate value can be from 1 to <size>. */
12565 if (last_elt
== 0 || last_elt
> maxshift
)
12570 *elementwidth
= innersize
* 8;
12573 *modconst
= CONST_VECTOR_ELT (op
, 0);
12578 /* Return a string suitable for output of Neon immediate logic operation
12582 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12583 int inverse
, int quad
)
12585 int width
, is_valid
;
12586 static char templ
[40];
12588 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12590 gcc_assert (is_valid
!= 0);
12593 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12595 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12600 /* Return a string suitable for output of Neon immediate shift operation
12601 (VSHR or VSHL) MNEM. */
12604 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12605 machine_mode mode
, int quad
,
12608 int width
, is_valid
;
12609 static char templ
[40];
12611 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12612 gcc_assert (is_valid
!= 0);
12615 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12617 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12622 /* Output a sequence of pairwise operations to implement a reduction.
12623 NOTE: We do "too much work" here, because pairwise operations work on two
12624 registers-worth of operands in one go. Unfortunately we can't exploit those
12625 extra calculations to do the full operation in fewer steps, I don't think.
12626 Although all vector elements of the result but the first are ignored, we
12627 actually calculate the same result in each of the elements. An alternative
12628 such as initially loading a vector with zero to use as each of the second
12629 operands would use up an additional register and take an extra instruction,
12630 for no particular gain. */
12633 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12634 rtx (*reduc
) (rtx
, rtx
, rtx
))
12636 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12639 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12641 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12642 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12647 /* If VALS is a vector constant that can be loaded into a register
12648 using VDUP, generate instructions to do so and return an RTX to
12649 assign to the register. Otherwise return NULL_RTX. */
12652 neon_vdup_constant (rtx vals
)
12654 machine_mode mode
= GET_MODE (vals
);
12655 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12658 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12661 if (!const_vec_duplicate_p (vals
, &x
))
12662 /* The elements are not all the same. We could handle repeating
12663 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12664 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12668 /* We can load this constant by using VDUP and a constant in a
12669 single ARM register. This will be cheaper than a vector
12672 x
= copy_to_mode_reg (inner_mode
, x
);
12673 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12676 /* Generate code to load VALS, which is a PARALLEL containing only
12677 constants (for vec_init) or CONST_VECTOR, efficiently into a
12678 register. Returns an RTX to copy into the register, or NULL_RTX
12679 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12682 neon_make_constant (rtx vals
)
12684 machine_mode mode
= GET_MODE (vals
);
12686 rtx const_vec
= NULL_RTX
;
12687 int n_elts
= GET_MODE_NUNITS (mode
);
12691 if (GET_CODE (vals
) == CONST_VECTOR
)
12693 else if (GET_CODE (vals
) == PARALLEL
)
12695 /* A CONST_VECTOR must contain only CONST_INTs and
12696 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12697 Only store valid constants in a CONST_VECTOR. */
12698 for (i
= 0; i
< n_elts
; ++i
)
12700 rtx x
= XVECEXP (vals
, 0, i
);
12701 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12704 if (n_const
== n_elts
)
12705 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12708 gcc_unreachable ();
12710 if (const_vec
!= NULL
12711 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12712 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12714 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12715 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12716 pipeline cycle; creating the constant takes one or two ARM
12717 pipeline cycles. */
12719 else if (const_vec
!= NULL_RTX
)
12720 /* Load from constant pool. On Cortex-A8 this takes two cycles
12721 (for either double or quad vectors). We can not take advantage
12722 of single-cycle VLD1 because we need a PC-relative addressing
12726 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12727 We can not construct an initializer. */
12731 /* Initialize vector TARGET to VALS. */
12734 neon_expand_vector_init (rtx target
, rtx vals
)
12736 machine_mode mode
= GET_MODE (target
);
12737 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12738 int n_elts
= GET_MODE_NUNITS (mode
);
12739 int n_var
= 0, one_var
= -1;
12740 bool all_same
= true;
12744 for (i
= 0; i
< n_elts
; ++i
)
12746 x
= XVECEXP (vals
, 0, i
);
12747 if (!CONSTANT_P (x
))
12748 ++n_var
, one_var
= i
;
12750 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12756 rtx constant
= neon_make_constant (vals
);
12757 if (constant
!= NULL_RTX
)
12759 emit_move_insn (target
, constant
);
12764 /* Splat a single non-constant element if we can. */
12765 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12767 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12768 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12772 /* One field is non-constant. Load constant then overwrite varying
12773 field. This is more efficient than using the stack. */
12776 rtx copy
= copy_rtx (vals
);
12777 rtx index
= GEN_INT (one_var
);
12779 /* Load constant part of vector, substitute neighboring value for
12780 varying element. */
12781 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12782 neon_expand_vector_init (target
, copy
);
12784 /* Insert variable. */
12785 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12789 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12792 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12795 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12798 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12801 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12804 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12807 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12810 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12813 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12816 gcc_unreachable ();
12821 /* Construct the vector in memory one field at a time
12822 and load the whole vector. */
12823 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12824 for (i
= 0; i
< n_elts
; i
++)
12825 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12826 i
* GET_MODE_SIZE (inner_mode
)),
12827 XVECEXP (vals
, 0, i
));
12828 emit_move_insn (target
, mem
);
12831 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12832 ERR if it doesn't. EXP indicates the source location, which includes the
12833 inlining history for intrinsics. */
12836 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12837 const_tree exp
, const char *desc
)
12839 HOST_WIDE_INT lane
;
12841 gcc_assert (CONST_INT_P (operand
));
12843 lane
= INTVAL (operand
);
12845 if (lane
< low
|| lane
>= high
)
12848 error ("%K%s %wd out of range %wd - %wd",
12849 exp
, desc
, lane
, low
, high
- 1);
12851 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12855 /* Bounds-check lanes. */
12858 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12861 bounds_check (operand
, low
, high
, exp
, "lane");
12864 /* Bounds-check constants. */
12867 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12869 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12873 neon_element_bits (machine_mode mode
)
12875 return GET_MODE_UNIT_BITSIZE (mode
);
12879 /* Predicates for `match_operand' and `match_operator'. */
12881 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12882 WB is true if full writeback address modes are allowed and is false
12883 if limited writeback address modes (POST_INC and PRE_DEC) are
12887 arm_coproc_mem_operand (rtx op
, bool wb
)
12891 /* Reject eliminable registers. */
12892 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12893 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12894 || reg_mentioned_p (arg_pointer_rtx
, op
)
12895 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12896 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12897 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12898 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12901 /* Constants are converted into offsets from labels. */
12905 ind
= XEXP (op
, 0);
12907 if (reload_completed
12908 && (GET_CODE (ind
) == LABEL_REF
12909 || (GET_CODE (ind
) == CONST
12910 && GET_CODE (XEXP (ind
, 0)) == PLUS
12911 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12912 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12915 /* Match: (mem (reg)). */
12917 return arm_address_register_rtx_p (ind
, 0);
12919 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12920 acceptable in any case (subject to verification by
12921 arm_address_register_rtx_p). We need WB to be true to accept
12922 PRE_INC and POST_DEC. */
12923 if (GET_CODE (ind
) == POST_INC
12924 || GET_CODE (ind
) == PRE_DEC
12926 && (GET_CODE (ind
) == PRE_INC
12927 || GET_CODE (ind
) == POST_DEC
)))
12928 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12931 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12932 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12933 && GET_CODE (XEXP (ind
, 1)) == PLUS
12934 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12935 ind
= XEXP (ind
, 1);
12940 if (GET_CODE (ind
) == PLUS
12941 && REG_P (XEXP (ind
, 0))
12942 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12943 && CONST_INT_P (XEXP (ind
, 1))
12944 && INTVAL (XEXP (ind
, 1)) > -1024
12945 && INTVAL (XEXP (ind
, 1)) < 1024
12946 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12952 /* Return TRUE if OP is a memory operand which we can load or store a vector
12953 to/from. TYPE is one of the following values:
12954 0 - Vector load/stor (vldr)
12955 1 - Core registers (ldm)
12956 2 - Element/structure loads (vld1)
12959 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12963 /* Reject eliminable registers. */
12964 if (! (reload_in_progress
|| reload_completed
)
12965 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12966 || reg_mentioned_p (arg_pointer_rtx
, op
)
12967 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12968 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12969 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12970 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12973 /* Constants are converted into offsets from labels. */
12977 ind
= XEXP (op
, 0);
12979 if (reload_completed
12980 && (GET_CODE (ind
) == LABEL_REF
12981 || (GET_CODE (ind
) == CONST
12982 && GET_CODE (XEXP (ind
, 0)) == PLUS
12983 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12984 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12987 /* Match: (mem (reg)). */
12989 return arm_address_register_rtx_p (ind
, 0);
12991 /* Allow post-increment with Neon registers. */
12992 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12993 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12994 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12996 /* Allow post-increment by register for VLDn */
12997 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12998 && GET_CODE (XEXP (ind
, 1)) == PLUS
12999 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13006 && GET_CODE (ind
) == PLUS
13007 && REG_P (XEXP (ind
, 0))
13008 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13009 && CONST_INT_P (XEXP (ind
, 1))
13010 && INTVAL (XEXP (ind
, 1)) > -1024
13011 /* For quad modes, we restrict the constant offset to be slightly less
13012 than what the instruction format permits. We have no such constraint
13013 on double mode offsets. (This must match arm_legitimate_index_p.) */
13014 && (INTVAL (XEXP (ind
, 1))
13015 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13016 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13022 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13025 neon_struct_mem_operand (rtx op
)
13029 /* Reject eliminable registers. */
13030 if (! (reload_in_progress
|| reload_completed
)
13031 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13032 || reg_mentioned_p (arg_pointer_rtx
, op
)
13033 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13034 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13035 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13036 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13039 /* Constants are converted into offsets from labels. */
13043 ind
= XEXP (op
, 0);
13045 if (reload_completed
13046 && (GET_CODE (ind
) == LABEL_REF
13047 || (GET_CODE (ind
) == CONST
13048 && GET_CODE (XEXP (ind
, 0)) == PLUS
13049 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13050 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13053 /* Match: (mem (reg)). */
13055 return arm_address_register_rtx_p (ind
, 0);
13057 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13058 if (GET_CODE (ind
) == POST_INC
13059 || GET_CODE (ind
) == PRE_DEC
)
13060 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13065 /* Return true if X is a register that will be eliminated later on. */
13067 arm_eliminable_register (rtx x
)
13069 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13070 || REGNO (x
) == ARG_POINTER_REGNUM
13071 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13072 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13075 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13076 coprocessor registers. Otherwise return NO_REGS. */
13079 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13081 if (mode
== HFmode
)
13083 if (!TARGET_NEON_FP16
)
13084 return GENERAL_REGS
;
13085 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13087 return GENERAL_REGS
;
13090 /* The neon move patterns handle all legitimate vector and struct
13093 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13094 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13095 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13096 || VALID_NEON_STRUCT_MODE (mode
)))
13099 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13102 return GENERAL_REGS
;
13105 /* Values which must be returned in the most-significant end of the return
13109 arm_return_in_msb (const_tree valtype
)
13111 return (TARGET_AAPCS_BASED
13112 && BYTES_BIG_ENDIAN
13113 && (AGGREGATE_TYPE_P (valtype
)
13114 || TREE_CODE (valtype
) == COMPLEX_TYPE
13115 || FIXED_POINT_TYPE_P (valtype
)));
13118 /* Return TRUE if X references a SYMBOL_REF. */
13120 symbol_mentioned_p (rtx x
)
13125 if (GET_CODE (x
) == SYMBOL_REF
)
13128 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13129 are constant offsets, not symbols. */
13130 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13133 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13135 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13141 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13142 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13145 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13152 /* Return TRUE if X references a LABEL_REF. */
13154 label_mentioned_p (rtx x
)
13159 if (GET_CODE (x
) == LABEL_REF
)
13162 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13163 instruction, but they are constant offsets, not symbols. */
13164 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13167 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13168 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13174 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13175 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13178 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13186 tls_mentioned_p (rtx x
)
13188 switch (GET_CODE (x
))
13191 return tls_mentioned_p (XEXP (x
, 0));
13194 if (XINT (x
, 1) == UNSPEC_TLS
)
13202 /* Must not copy any rtx that uses a pc-relative address. */
13205 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13207 /* The tls call insn cannot be copied, as it is paired with a data
13209 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13212 subrtx_iterator::array_type array
;
13213 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13215 const_rtx x
= *iter
;
13216 if (GET_CODE (x
) == UNSPEC
13217 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13218 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13225 minmax_code (rtx x
)
13227 enum rtx_code code
= GET_CODE (x
);
13240 gcc_unreachable ();
13244 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13247 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13248 int *mask
, bool *signed_sat
)
13250 /* The high bound must be a power of two minus one. */
13251 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13255 /* The low bound is either zero (for usat) or one less than the
13256 negation of the high bound (for ssat). */
13257 if (INTVAL (lo_bound
) == 0)
13262 *signed_sat
= false;
13267 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13272 *signed_sat
= true;
13280 /* Return 1 if memory locations are adjacent. */
13282 adjacent_mem_locations (rtx a
, rtx b
)
13284 /* We don't guarantee to preserve the order of these memory refs. */
13285 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13288 if ((REG_P (XEXP (a
, 0))
13289 || (GET_CODE (XEXP (a
, 0)) == PLUS
13290 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13291 && (REG_P (XEXP (b
, 0))
13292 || (GET_CODE (XEXP (b
, 0)) == PLUS
13293 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13295 HOST_WIDE_INT val0
= 0, val1
= 0;
13299 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13301 reg0
= XEXP (XEXP (a
, 0), 0);
13302 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13305 reg0
= XEXP (a
, 0);
13307 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13309 reg1
= XEXP (XEXP (b
, 0), 0);
13310 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13313 reg1
= XEXP (b
, 0);
13315 /* Don't accept any offset that will require multiple
13316 instructions to handle, since this would cause the
13317 arith_adjacentmem pattern to output an overlong sequence. */
13318 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13321 /* Don't allow an eliminable register: register elimination can make
13322 the offset too large. */
13323 if (arm_eliminable_register (reg0
))
13326 val_diff
= val1
- val0
;
13330 /* If the target has load delay slots, then there's no benefit
13331 to using an ldm instruction unless the offset is zero and
13332 we are optimizing for size. */
13333 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13334 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13335 && (val_diff
== 4 || val_diff
== -4));
13338 return ((REGNO (reg0
) == REGNO (reg1
))
13339 && (val_diff
== 4 || val_diff
== -4));
13345 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13346 for load operations, false for store operations. CONSECUTIVE is true
13347 if the register numbers in the operation must be consecutive in the register
13348 bank. RETURN_PC is true if value is to be loaded in PC.
13349 The pattern we are trying to match for load is:
13350 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13351 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13354 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13357 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13358 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13359 3. If consecutive is TRUE, then for kth register being loaded,
13360 REGNO (R_dk) = REGNO (R_d0) + k.
13361 The pattern for store is similar. */
13363 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13364 bool consecutive
, bool return_pc
)
13366 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13367 rtx reg
, mem
, addr
;
13369 unsigned first_regno
;
13370 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13372 bool addr_reg_in_reglist
= false;
13373 bool update
= false;
13378 /* If not in SImode, then registers must be consecutive
13379 (e.g., VLDM instructions for DFmode). */
13380 gcc_assert ((mode
== SImode
) || consecutive
);
13381 /* Setting return_pc for stores is illegal. */
13382 gcc_assert (!return_pc
|| load
);
13384 /* Set up the increments and the regs per val based on the mode. */
13385 reg_increment
= GET_MODE_SIZE (mode
);
13386 regs_per_val
= reg_increment
/ 4;
13387 offset_adj
= return_pc
? 1 : 0;
13390 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13391 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13394 /* Check if this is a write-back. */
13395 elt
= XVECEXP (op
, 0, offset_adj
);
13396 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13402 /* The offset adjustment must be the number of registers being
13403 popped times the size of a single register. */
13404 if (!REG_P (SET_DEST (elt
))
13405 || !REG_P (XEXP (SET_SRC (elt
), 0))
13406 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13407 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13408 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13409 ((count
- 1 - offset_adj
) * reg_increment
))
13413 i
= i
+ offset_adj
;
13414 base
= base
+ offset_adj
;
13415 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13416 success depends on the type: VLDM can do just one reg,
13417 LDM must do at least two. */
13418 if ((count
<= i
) && (mode
== SImode
))
13421 elt
= XVECEXP (op
, 0, i
- 1);
13422 if (GET_CODE (elt
) != SET
)
13427 reg
= SET_DEST (elt
);
13428 mem
= SET_SRC (elt
);
13432 reg
= SET_SRC (elt
);
13433 mem
= SET_DEST (elt
);
13436 if (!REG_P (reg
) || !MEM_P (mem
))
13439 regno
= REGNO (reg
);
13440 first_regno
= regno
;
13441 addr
= XEXP (mem
, 0);
13442 if (GET_CODE (addr
) == PLUS
)
13444 if (!CONST_INT_P (XEXP (addr
, 1)))
13447 offset
= INTVAL (XEXP (addr
, 1));
13448 addr
= XEXP (addr
, 0);
13454 /* Don't allow SP to be loaded unless it is also the base register. It
13455 guarantees that SP is reset correctly when an LDM instruction
13456 is interrupted. Otherwise, we might end up with a corrupt stack. */
13457 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13460 for (; i
< count
; i
++)
13462 elt
= XVECEXP (op
, 0, i
);
13463 if (GET_CODE (elt
) != SET
)
13468 reg
= SET_DEST (elt
);
13469 mem
= SET_SRC (elt
);
13473 reg
= SET_SRC (elt
);
13474 mem
= SET_DEST (elt
);
13478 || GET_MODE (reg
) != mode
13479 || REGNO (reg
) <= regno
13482 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13483 /* Don't allow SP to be loaded unless it is also the base register. It
13484 guarantees that SP is reset correctly when an LDM instruction
13485 is interrupted. Otherwise, we might end up with a corrupt stack. */
13486 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13488 || GET_MODE (mem
) != mode
13489 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13490 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13491 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13492 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13493 offset
+ (i
- base
) * reg_increment
))
13494 && (!REG_P (XEXP (mem
, 0))
13495 || offset
+ (i
- base
) * reg_increment
!= 0)))
13498 regno
= REGNO (reg
);
13499 if (regno
== REGNO (addr
))
13500 addr_reg_in_reglist
= true;
13505 if (update
&& addr_reg_in_reglist
)
13508 /* For Thumb-1, address register is always modified - either by write-back
13509 or by explicit load. If the pattern does not describe an update,
13510 then the address register must be in the list of loaded registers. */
13512 return update
|| addr_reg_in_reglist
;
13518 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13519 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13520 instruction. ADD_OFFSET is nonzero if the base address register needs
13521 to be modified with an add instruction before we can use it. */
13524 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13525 int nops
, HOST_WIDE_INT add_offset
)
13527 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13528 if the offset isn't small enough. The reason 2 ldrs are faster
13529 is because these ARMs are able to do more than one cache access
13530 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13531 whilst the ARM8 has a double bandwidth cache. This means that
13532 these cores can do both an instruction fetch and a data fetch in
13533 a single cycle, so the trick of calculating the address into a
13534 scratch register (one of the result regs) and then doing a load
13535 multiple actually becomes slower (and no smaller in code size).
13536 That is the transformation
13538 ldr rd1, [rbase + offset]
13539 ldr rd2, [rbase + offset + 4]
13543 add rd1, rbase, offset
13544 ldmia rd1, {rd1, rd2}
13546 produces worse code -- '3 cycles + any stalls on rd2' instead of
13547 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13548 access per cycle, the first sequence could never complete in less
13549 than 6 cycles, whereas the ldm sequence would only take 5 and
13550 would make better use of sequential accesses if not hitting the
13553 We cheat here and test 'arm_ld_sched' which we currently know to
13554 only be true for the ARM8, ARM9 and StrongARM. If this ever
13555 changes, then the test below needs to be reworked. */
13556 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13559 /* XScale has load-store double instructions, but they have stricter
13560 alignment requirements than load-store multiple, so we cannot
13563 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13564 the pipeline until completion.
13572 An ldr instruction takes 1-3 cycles, but does not block the
13581 Best case ldr will always win. However, the more ldr instructions
13582 we issue, the less likely we are to be able to schedule them well.
13583 Using ldr instructions also increases code size.
13585 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13586 for counts of 3 or 4 regs. */
13587 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13592 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13593 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13594 an array ORDER which describes the sequence to use when accessing the
13595 offsets that produces an ascending order. In this sequence, each
13596 offset must be larger by exactly 4 than the previous one. ORDER[0]
13597 must have been filled in with the lowest offset by the caller.
13598 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13599 we use to verify that ORDER produces an ascending order of registers.
13600 Return true if it was possible to construct such an order, false if
13604 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13605 int *unsorted_regs
)
13608 for (i
= 1; i
< nops
; i
++)
13612 order
[i
] = order
[i
- 1];
13613 for (j
= 0; j
< nops
; j
++)
13614 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13616 /* We must find exactly one offset that is higher than the
13617 previous one by 4. */
13618 if (order
[i
] != order
[i
- 1])
13622 if (order
[i
] == order
[i
- 1])
13624 /* The register numbers must be ascending. */
13625 if (unsorted_regs
!= NULL
13626 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13632 /* Used to determine in a peephole whether a sequence of load
13633 instructions can be changed into a load-multiple instruction.
13634 NOPS is the number of separate load instructions we are examining. The
13635 first NOPS entries in OPERANDS are the destination registers, the
13636 next NOPS entries are memory operands. If this function is
13637 successful, *BASE is set to the common base register of the memory
13638 accesses; *LOAD_OFFSET is set to the first memory location's offset
13639 from that base register.
13640 REGS is an array filled in with the destination register numbers.
13641 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13642 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13643 the sequence of registers in REGS matches the loads from ascending memory
13644 locations, and the function verifies that the register numbers are
13645 themselves ascending. If CHECK_REGS is false, the register numbers
13646 are stored in the order they are found in the operands. */
13648 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13649 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13651 int unsorted_regs
[MAX_LDM_STM_OPS
];
13652 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13653 int order
[MAX_LDM_STM_OPS
];
13654 rtx base_reg_rtx
= NULL
;
13658 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13659 easily extended if required. */
13660 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13662 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13664 /* Loop over the operands and check that the memory references are
13665 suitable (i.e. immediate offsets from the same base register). At
13666 the same time, extract the target register, and the memory
13668 for (i
= 0; i
< nops
; i
++)
13673 /* Convert a subreg of a mem into the mem itself. */
13674 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13675 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13677 gcc_assert (MEM_P (operands
[nops
+ i
]));
13679 /* Don't reorder volatile memory references; it doesn't seem worth
13680 looking for the case where the order is ok anyway. */
13681 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13684 offset
= const0_rtx
;
13686 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13687 || (GET_CODE (reg
) == SUBREG
13688 && REG_P (reg
= SUBREG_REG (reg
))))
13689 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13690 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13691 || (GET_CODE (reg
) == SUBREG
13692 && REG_P (reg
= SUBREG_REG (reg
))))
13693 && (CONST_INT_P (offset
13694 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13698 base_reg
= REGNO (reg
);
13699 base_reg_rtx
= reg
;
13700 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13703 else if (base_reg
!= (int) REGNO (reg
))
13704 /* Not addressed from the same base register. */
13707 unsorted_regs
[i
] = (REG_P (operands
[i
])
13708 ? REGNO (operands
[i
])
13709 : REGNO (SUBREG_REG (operands
[i
])));
13711 /* If it isn't an integer register, or if it overwrites the
13712 base register but isn't the last insn in the list, then
13713 we can't do this. */
13714 if (unsorted_regs
[i
] < 0
13715 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13716 || unsorted_regs
[i
] > 14
13717 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13720 /* Don't allow SP to be loaded unless it is also the base
13721 register. It guarantees that SP is reset correctly when
13722 an LDM instruction is interrupted. Otherwise, we might
13723 end up with a corrupt stack. */
13724 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13727 unsorted_offsets
[i
] = INTVAL (offset
);
13728 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13732 /* Not a suitable memory address. */
13736 /* All the useful information has now been extracted from the
13737 operands into unsorted_regs and unsorted_offsets; additionally,
13738 order[0] has been set to the lowest offset in the list. Sort
13739 the offsets into order, verifying that they are adjacent, and
13740 check that the register numbers are ascending. */
13741 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13742 check_regs
? unsorted_regs
: NULL
))
13746 memcpy (saved_order
, order
, sizeof order
);
13752 for (i
= 0; i
< nops
; i
++)
13753 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13755 *load_offset
= unsorted_offsets
[order
[0]];
13759 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13762 if (unsorted_offsets
[order
[0]] == 0)
13763 ldm_case
= 1; /* ldmia */
13764 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13765 ldm_case
= 2; /* ldmib */
13766 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13767 ldm_case
= 3; /* ldmda */
13768 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13769 ldm_case
= 4; /* ldmdb */
13770 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13771 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13776 if (!multiple_operation_profitable_p (false, nops
,
13778 ? unsorted_offsets
[order
[0]] : 0))
13784 /* Used to determine in a peephole whether a sequence of store instructions can
13785 be changed into a store-multiple instruction.
13786 NOPS is the number of separate store instructions we are examining.
13787 NOPS_TOTAL is the total number of instructions recognized by the peephole
13789 The first NOPS entries in OPERANDS are the source registers, the next
13790 NOPS entries are memory operands. If this function is successful, *BASE is
13791 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13792 to the first memory location's offset from that base register. REGS is an
13793 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13794 likewise filled with the corresponding rtx's.
13795 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13796 numbers to an ascending order of stores.
13797 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13798 from ascending memory locations, and the function verifies that the register
13799 numbers are themselves ascending. If CHECK_REGS is false, the register
13800 numbers are stored in the order they are found in the operands. */
13802 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13803 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13804 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13806 int unsorted_regs
[MAX_LDM_STM_OPS
];
13807 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13808 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13809 int order
[MAX_LDM_STM_OPS
];
13811 rtx base_reg_rtx
= NULL
;
13814 /* Write back of base register is currently only supported for Thumb 1. */
13815 int base_writeback
= TARGET_THUMB1
;
13817 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13818 easily extended if required. */
13819 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13821 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13823 /* Loop over the operands and check that the memory references are
13824 suitable (i.e. immediate offsets from the same base register). At
13825 the same time, extract the target register, and the memory
13827 for (i
= 0; i
< nops
; i
++)
13832 /* Convert a subreg of a mem into the mem itself. */
13833 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13834 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13836 gcc_assert (MEM_P (operands
[nops
+ i
]));
13838 /* Don't reorder volatile memory references; it doesn't seem worth
13839 looking for the case where the order is ok anyway. */
13840 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13843 offset
= const0_rtx
;
13845 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13846 || (GET_CODE (reg
) == SUBREG
13847 && REG_P (reg
= SUBREG_REG (reg
))))
13848 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13849 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13850 || (GET_CODE (reg
) == SUBREG
13851 && REG_P (reg
= SUBREG_REG (reg
))))
13852 && (CONST_INT_P (offset
13853 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13855 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13856 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13857 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13861 base_reg
= REGNO (reg
);
13862 base_reg_rtx
= reg
;
13863 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13866 else if (base_reg
!= (int) REGNO (reg
))
13867 /* Not addressed from the same base register. */
13870 /* If it isn't an integer register, then we can't do this. */
13871 if (unsorted_regs
[i
] < 0
13872 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13873 /* The effects are unpredictable if the base register is
13874 both updated and stored. */
13875 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13876 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13877 || unsorted_regs
[i
] > 14)
13880 unsorted_offsets
[i
] = INTVAL (offset
);
13881 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13885 /* Not a suitable memory address. */
13889 /* All the useful information has now been extracted from the
13890 operands into unsorted_regs and unsorted_offsets; additionally,
13891 order[0] has been set to the lowest offset in the list. Sort
13892 the offsets into order, verifying that they are adjacent, and
13893 check that the register numbers are ascending. */
13894 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13895 check_regs
? unsorted_regs
: NULL
))
13899 memcpy (saved_order
, order
, sizeof order
);
13905 for (i
= 0; i
< nops
; i
++)
13907 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13909 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13912 *load_offset
= unsorted_offsets
[order
[0]];
13916 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13919 if (unsorted_offsets
[order
[0]] == 0)
13920 stm_case
= 1; /* stmia */
13921 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13922 stm_case
= 2; /* stmib */
13923 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13924 stm_case
= 3; /* stmda */
13925 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13926 stm_case
= 4; /* stmdb */
13930 if (!multiple_operation_profitable_p (false, nops
, 0))
13936 /* Routines for use in generating RTL. */
13938 /* Generate a load-multiple instruction. COUNT is the number of loads in
13939 the instruction; REGS and MEMS are arrays containing the operands.
13940 BASEREG is the base register to be used in addressing the memory operands.
13941 WBACK_OFFSET is nonzero if the instruction should update the base
13945 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13946 HOST_WIDE_INT wback_offset
)
13951 if (!multiple_operation_profitable_p (false, count
, 0))
13957 for (i
= 0; i
< count
; i
++)
13958 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13960 if (wback_offset
!= 0)
13961 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13963 seq
= get_insns ();
13969 result
= gen_rtx_PARALLEL (VOIDmode
,
13970 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13971 if (wback_offset
!= 0)
13973 XVECEXP (result
, 0, 0)
13974 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13979 for (j
= 0; i
< count
; i
++, j
++)
13980 XVECEXP (result
, 0, i
)
13981 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13986 /* Generate a store-multiple instruction. COUNT is the number of stores in
13987 the instruction; REGS and MEMS are arrays containing the operands.
13988 BASEREG is the base register to be used in addressing the memory operands.
13989 WBACK_OFFSET is nonzero if the instruction should update the base
13993 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13994 HOST_WIDE_INT wback_offset
)
13999 if (GET_CODE (basereg
) == PLUS
)
14000 basereg
= XEXP (basereg
, 0);
14002 if (!multiple_operation_profitable_p (false, count
, 0))
14008 for (i
= 0; i
< count
; i
++)
14009 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14011 if (wback_offset
!= 0)
14012 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14014 seq
= get_insns ();
14020 result
= gen_rtx_PARALLEL (VOIDmode
,
14021 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14022 if (wback_offset
!= 0)
14024 XVECEXP (result
, 0, 0)
14025 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14030 for (j
= 0; i
< count
; i
++, j
++)
14031 XVECEXP (result
, 0, i
)
14032 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14037 /* Generate either a load-multiple or a store-multiple instruction. This
14038 function can be used in situations where we can start with a single MEM
14039 rtx and adjust its address upwards.
14040 COUNT is the number of operations in the instruction, not counting a
14041 possible update of the base register. REGS is an array containing the
14043 BASEREG is the base register to be used in addressing the memory operands,
14044 which are constructed from BASEMEM.
14045 WRITE_BACK specifies whether the generated instruction should include an
14046 update of the base register.
14047 OFFSETP is used to pass an offset to and from this function; this offset
14048 is not used when constructing the address (instead BASEMEM should have an
14049 appropriate offset in its address), it is used only for setting
14050 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14053 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14054 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14056 rtx mems
[MAX_LDM_STM_OPS
];
14057 HOST_WIDE_INT offset
= *offsetp
;
14060 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14062 if (GET_CODE (basereg
) == PLUS
)
14063 basereg
= XEXP (basereg
, 0);
14065 for (i
= 0; i
< count
; i
++)
14067 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14068 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14076 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14077 write_back
? 4 * count
: 0);
14079 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14080 write_back
? 4 * count
: 0);
14084 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14085 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14087 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14092 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14093 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14095 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14099 /* Called from a peephole2 expander to turn a sequence of loads into an
14100 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14101 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14102 is true if we can reorder the registers because they are used commutatively
14104 Returns true iff we could generate a new instruction. */
14107 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14109 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14110 rtx mems
[MAX_LDM_STM_OPS
];
14111 int i
, j
, base_reg
;
14113 HOST_WIDE_INT offset
;
14114 int write_back
= FALSE
;
14118 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14119 &base_reg
, &offset
, !sort_regs
);
14125 for (i
= 0; i
< nops
- 1; i
++)
14126 for (j
= i
+ 1; j
< nops
; j
++)
14127 if (regs
[i
] > regs
[j
])
14133 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14137 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14138 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14144 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14145 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14147 if (!TARGET_THUMB1
)
14149 base_reg
= regs
[0];
14150 base_reg_rtx
= newbase
;
14154 for (i
= 0; i
< nops
; i
++)
14156 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14157 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14160 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14161 write_back
? offset
+ i
* 4 : 0));
14165 /* Called from a peephole2 expander to turn a sequence of stores into an
14166 STM instruction. OPERANDS are the operands found by the peephole matcher;
14167 NOPS indicates how many separate stores we are trying to combine.
14168 Returns true iff we could generate a new instruction. */
14171 gen_stm_seq (rtx
*operands
, int nops
)
14174 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14175 rtx mems
[MAX_LDM_STM_OPS
];
14178 HOST_WIDE_INT offset
;
14179 int write_back
= FALSE
;
14182 bool base_reg_dies
;
14184 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14185 mem_order
, &base_reg
, &offset
, true);
14190 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14192 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14195 gcc_assert (base_reg_dies
);
14201 gcc_assert (base_reg_dies
);
14202 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14206 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14208 for (i
= 0; i
< nops
; i
++)
14210 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14211 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14214 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14215 write_back
? offset
+ i
* 4 : 0));
14219 /* Called from a peephole2 expander to turn a sequence of stores that are
14220 preceded by constant loads into an STM instruction. OPERANDS are the
14221 operands found by the peephole matcher; NOPS indicates how many
14222 separate stores we are trying to combine; there are 2 * NOPS
14223 instructions in the peephole.
14224 Returns true iff we could generate a new instruction. */
14227 gen_const_stm_seq (rtx
*operands
, int nops
)
14229 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14230 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14231 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14232 rtx mems
[MAX_LDM_STM_OPS
];
14235 HOST_WIDE_INT offset
;
14236 int write_back
= FALSE
;
14239 bool base_reg_dies
;
14241 HARD_REG_SET allocated
;
14243 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14244 mem_order
, &base_reg
, &offset
, false);
14249 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14251 /* If the same register is used more than once, try to find a free
14253 CLEAR_HARD_REG_SET (allocated
);
14254 for (i
= 0; i
< nops
; i
++)
14256 for (j
= i
+ 1; j
< nops
; j
++)
14257 if (regs
[i
] == regs
[j
])
14259 rtx t
= peep2_find_free_register (0, nops
* 2,
14260 TARGET_THUMB1
? "l" : "r",
14261 SImode
, &allocated
);
14265 regs
[i
] = REGNO (t
);
14269 /* Compute an ordering that maps the register numbers to an ascending
14272 for (i
= 0; i
< nops
; i
++)
14273 if (regs
[i
] < regs
[reg_order
[0]])
14276 for (i
= 1; i
< nops
; i
++)
14278 int this_order
= reg_order
[i
- 1];
14279 for (j
= 0; j
< nops
; j
++)
14280 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14281 && (this_order
== reg_order
[i
- 1]
14282 || regs
[j
] < regs
[this_order
]))
14284 reg_order
[i
] = this_order
;
14287 /* Ensure that registers that must be live after the instruction end
14288 up with the correct value. */
14289 for (i
= 0; i
< nops
; i
++)
14291 int this_order
= reg_order
[i
];
14292 if ((this_order
!= mem_order
[i
]
14293 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14294 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14298 /* Load the constants. */
14299 for (i
= 0; i
< nops
; i
++)
14301 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14302 sorted_regs
[i
] = regs
[reg_order
[i
]];
14303 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14306 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14308 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14311 gcc_assert (base_reg_dies
);
14317 gcc_assert (base_reg_dies
);
14318 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14322 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14324 for (i
= 0; i
< nops
; i
++)
14326 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14327 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14330 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14331 write_back
? offset
+ i
* 4 : 0));
14335 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14336 unaligned copies on processors which support unaligned semantics for those
14337 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14338 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14339 An interleave factor of 1 (the minimum) will perform no interleaving.
14340 Load/store multiple are used for aligned addresses where possible. */
14343 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14344 HOST_WIDE_INT length
,
14345 unsigned int interleave_factor
)
14347 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14348 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14349 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14350 HOST_WIDE_INT i
, j
;
14351 HOST_WIDE_INT remaining
= length
, words
;
14352 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14354 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14355 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14356 HOST_WIDE_INT srcoffset
, dstoffset
;
14357 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14360 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14362 /* Use hard registers if we have aligned source or destination so we can use
14363 load/store multiple with contiguous registers. */
14364 if (dst_aligned
|| src_aligned
)
14365 for (i
= 0; i
< interleave_factor
; i
++)
14366 regs
[i
] = gen_rtx_REG (SImode
, i
);
14368 for (i
= 0; i
< interleave_factor
; i
++)
14369 regs
[i
] = gen_reg_rtx (SImode
);
14371 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14372 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14374 srcoffset
= dstoffset
= 0;
14376 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14377 For copying the last bytes we want to subtract this offset again. */
14378 src_autoinc
= dst_autoinc
= 0;
14380 for (i
= 0; i
< interleave_factor
; i
++)
14383 /* Copy BLOCK_SIZE_BYTES chunks. */
14385 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14388 if (src_aligned
&& interleave_factor
> 1)
14390 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14391 TRUE
, srcbase
, &srcoffset
));
14392 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14396 for (j
= 0; j
< interleave_factor
; j
++)
14398 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14400 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14401 srcoffset
+ j
* UNITS_PER_WORD
);
14402 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14404 srcoffset
+= block_size_bytes
;
14408 if (dst_aligned
&& interleave_factor
> 1)
14410 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14411 TRUE
, dstbase
, &dstoffset
));
14412 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14416 for (j
= 0; j
< interleave_factor
; j
++)
14418 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14420 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14421 dstoffset
+ j
* UNITS_PER_WORD
);
14422 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14424 dstoffset
+= block_size_bytes
;
14427 remaining
-= block_size_bytes
;
14430 /* Copy any whole words left (note these aren't interleaved with any
14431 subsequent halfword/byte load/stores in the interests of simplicity). */
14433 words
= remaining
/ UNITS_PER_WORD
;
14435 gcc_assert (words
< interleave_factor
);
14437 if (src_aligned
&& words
> 1)
14439 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14441 src_autoinc
+= UNITS_PER_WORD
* words
;
14445 for (j
= 0; j
< words
; j
++)
14447 addr
= plus_constant (Pmode
, src
,
14448 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14449 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14450 srcoffset
+ j
* UNITS_PER_WORD
);
14452 emit_move_insn (regs
[j
], mem
);
14454 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14456 srcoffset
+= words
* UNITS_PER_WORD
;
14459 if (dst_aligned
&& words
> 1)
14461 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14463 dst_autoinc
+= words
* UNITS_PER_WORD
;
14467 for (j
= 0; j
< words
; j
++)
14469 addr
= plus_constant (Pmode
, dst
,
14470 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14471 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14472 dstoffset
+ j
* UNITS_PER_WORD
);
14474 emit_move_insn (mem
, regs
[j
]);
14476 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14478 dstoffset
+= words
* UNITS_PER_WORD
;
14481 remaining
-= words
* UNITS_PER_WORD
;
14483 gcc_assert (remaining
< 4);
14485 /* Copy a halfword if necessary. */
14487 if (remaining
>= 2)
14489 halfword_tmp
= gen_reg_rtx (SImode
);
14491 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14492 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14493 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14495 /* Either write out immediately, or delay until we've loaded the last
14496 byte, depending on interleave factor. */
14497 if (interleave_factor
== 1)
14499 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14500 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14501 emit_insn (gen_unaligned_storehi (mem
,
14502 gen_lowpart (HImode
, halfword_tmp
)));
14503 halfword_tmp
= NULL
;
14511 gcc_assert (remaining
< 2);
14513 /* Copy last byte. */
14515 if ((remaining
& 1) != 0)
14517 byte_tmp
= gen_reg_rtx (SImode
);
14519 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14520 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14521 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14523 if (interleave_factor
== 1)
14525 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14526 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14527 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14536 /* Store last halfword if we haven't done so already. */
14540 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14541 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14542 emit_insn (gen_unaligned_storehi (mem
,
14543 gen_lowpart (HImode
, halfword_tmp
)));
14547 /* Likewise for last byte. */
14551 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14552 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14553 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14557 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14560 /* From mips_adjust_block_mem:
14562 Helper function for doing a loop-based block operation on memory
14563 reference MEM. Each iteration of the loop will operate on LENGTH
14566 Create a new base register for use within the loop and point it to
14567 the start of MEM. Create a new memory reference that uses this
14568 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14571 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14574 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14576 /* Although the new mem does not refer to a known location,
14577 it does keep up to LENGTH bytes of alignment. */
14578 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14579 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14582 /* From mips_block_move_loop:
14584 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14585 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14586 the memory regions do not overlap. */
14589 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14590 unsigned int interleave_factor
,
14591 HOST_WIDE_INT bytes_per_iter
)
14593 rtx src_reg
, dest_reg
, final_src
, test
;
14594 HOST_WIDE_INT leftover
;
14596 leftover
= length
% bytes_per_iter
;
14597 length
-= leftover
;
14599 /* Create registers and memory references for use within the loop. */
14600 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14601 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14603 /* Calculate the value that SRC_REG should have after the last iteration of
14605 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14606 0, 0, OPTAB_WIDEN
);
14608 /* Emit the start of the loop. */
14609 rtx_code_label
*label
= gen_label_rtx ();
14610 emit_label (label
);
14612 /* Emit the loop body. */
14613 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14614 interleave_factor
);
14616 /* Move on to the next block. */
14617 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14618 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14620 /* Emit the loop condition. */
14621 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14622 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14624 /* Mop up any left-over bytes. */
14626 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14629 /* Emit a block move when either the source or destination is unaligned (not
14630 aligned to a four-byte boundary). This may need further tuning depending on
14631 core type, optimize_size setting, etc. */
14634 arm_movmemqi_unaligned (rtx
*operands
)
14636 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14640 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14641 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14642 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14643 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14644 or dst_aligned though: allow more interleaving in those cases since the
14645 resulting code can be smaller. */
14646 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14647 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14650 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14651 interleave_factor
, bytes_per_iter
);
14653 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14654 interleave_factor
);
14658 /* Note that the loop created by arm_block_move_unaligned_loop may be
14659 subject to loop unrolling, which makes tuning this condition a little
14662 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14664 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14671 arm_gen_movmemqi (rtx
*operands
)
14673 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14674 HOST_WIDE_INT srcoffset
, dstoffset
;
14676 rtx src
, dst
, srcbase
, dstbase
;
14677 rtx part_bytes_reg
= NULL
;
14680 if (!CONST_INT_P (operands
[2])
14681 || !CONST_INT_P (operands
[3])
14682 || INTVAL (operands
[2]) > 64)
14685 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14686 return arm_movmemqi_unaligned (operands
);
14688 if (INTVAL (operands
[3]) & 3)
14691 dstbase
= operands
[0];
14692 srcbase
= operands
[1];
14694 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14695 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14697 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14698 out_words_to_go
= INTVAL (operands
[2]) / 4;
14699 last_bytes
= INTVAL (operands
[2]) & 3;
14700 dstoffset
= srcoffset
= 0;
14702 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14703 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14705 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14707 if (in_words_to_go
> 4)
14708 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14709 TRUE
, srcbase
, &srcoffset
));
14711 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14712 src
, FALSE
, srcbase
,
14715 if (out_words_to_go
)
14717 if (out_words_to_go
> 4)
14718 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14719 TRUE
, dstbase
, &dstoffset
));
14720 else if (out_words_to_go
!= 1)
14721 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14722 out_words_to_go
, dst
,
14725 dstbase
, &dstoffset
));
14728 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14729 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14730 if (last_bytes
!= 0)
14732 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14738 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14739 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14742 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14743 if (out_words_to_go
)
14747 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14748 sreg
= copy_to_reg (mem
);
14750 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14751 emit_move_insn (mem
, sreg
);
14754 gcc_assert (!in_words_to_go
); /* Sanity check */
14757 if (in_words_to_go
)
14759 gcc_assert (in_words_to_go
> 0);
14761 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14762 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14765 gcc_assert (!last_bytes
|| part_bytes_reg
);
14767 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14769 rtx tmp
= gen_reg_rtx (SImode
);
14771 /* The bytes we want are in the top end of the word. */
14772 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14773 GEN_INT (8 * (4 - last_bytes
))));
14774 part_bytes_reg
= tmp
;
14778 mem
= adjust_automodify_address (dstbase
, QImode
,
14779 plus_constant (Pmode
, dst
,
14781 dstoffset
+ last_bytes
- 1);
14782 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14786 tmp
= gen_reg_rtx (SImode
);
14787 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14788 part_bytes_reg
= tmp
;
14795 if (last_bytes
> 1)
14797 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14798 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14802 rtx tmp
= gen_reg_rtx (SImode
);
14803 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14804 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14805 part_bytes_reg
= tmp
;
14812 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14813 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14820 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14823 next_consecutive_mem (rtx mem
)
14825 machine_mode mode
= GET_MODE (mem
);
14826 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14827 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14829 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14832 /* Copy using LDRD/STRD instructions whenever possible.
14833 Returns true upon success. */
14835 gen_movmem_ldrd_strd (rtx
*operands
)
14837 unsigned HOST_WIDE_INT len
;
14838 HOST_WIDE_INT align
;
14839 rtx src
, dst
, base
;
14841 bool src_aligned
, dst_aligned
;
14842 bool src_volatile
, dst_volatile
;
14844 gcc_assert (CONST_INT_P (operands
[2]));
14845 gcc_assert (CONST_INT_P (operands
[3]));
14847 len
= UINTVAL (operands
[2]);
14851 /* Maximum alignment we can assume for both src and dst buffers. */
14852 align
= INTVAL (operands
[3]);
14854 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14857 /* Place src and dst addresses in registers
14858 and update the corresponding mem rtx. */
14860 dst_volatile
= MEM_VOLATILE_P (dst
);
14861 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14862 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14863 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14866 src_volatile
= MEM_VOLATILE_P (src
);
14867 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14868 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14869 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14871 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14874 if (src_volatile
|| dst_volatile
)
14877 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14878 if (!(dst_aligned
|| src_aligned
))
14879 return arm_gen_movmemqi (operands
);
14881 src
= adjust_address (src
, DImode
, 0);
14882 dst
= adjust_address (dst
, DImode
, 0);
14886 reg0
= gen_reg_rtx (DImode
);
14888 emit_move_insn (reg0
, src
);
14890 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14893 emit_move_insn (dst
, reg0
);
14895 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14897 src
= next_consecutive_mem (src
);
14898 dst
= next_consecutive_mem (dst
);
14901 gcc_assert (len
< 8);
14904 /* More than a word but less than a double-word to copy. Copy a word. */
14905 reg0
= gen_reg_rtx (SImode
);
14906 src
= adjust_address (src
, SImode
, 0);
14907 dst
= adjust_address (dst
, SImode
, 0);
14909 emit_move_insn (reg0
, src
);
14911 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14914 emit_move_insn (dst
, reg0
);
14916 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14918 src
= next_consecutive_mem (src
);
14919 dst
= next_consecutive_mem (dst
);
14926 /* Copy the remaining bytes. */
14929 dst
= adjust_address (dst
, HImode
, 0);
14930 src
= adjust_address (src
, HImode
, 0);
14931 reg0
= gen_reg_rtx (SImode
);
14933 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14935 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14938 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14940 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14942 src
= next_consecutive_mem (src
);
14943 dst
= next_consecutive_mem (dst
);
14948 dst
= adjust_address (dst
, QImode
, 0);
14949 src
= adjust_address (src
, QImode
, 0);
14950 reg0
= gen_reg_rtx (QImode
);
14951 emit_move_insn (reg0
, src
);
14952 emit_move_insn (dst
, reg0
);
14956 /* Select a dominance comparison mode if possible for a test of the general
14957 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14958 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14959 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14960 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14961 In all cases OP will be either EQ or NE, but we don't need to know which
14962 here. If we are unable to support a dominance comparison we return
14963 CC mode. This will then fail to match for the RTL expressions that
14964 generate this call. */
14966 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14968 enum rtx_code cond1
, cond2
;
14971 /* Currently we will probably get the wrong result if the individual
14972 comparisons are not simple. This also ensures that it is safe to
14973 reverse a comparison if necessary. */
14974 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14976 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14980 /* The if_then_else variant of this tests the second condition if the
14981 first passes, but is true if the first fails. Reverse the first
14982 condition to get a true "inclusive-or" expression. */
14983 if (cond_or
== DOM_CC_NX_OR_Y
)
14984 cond1
= reverse_condition (cond1
);
14986 /* If the comparisons are not equal, and one doesn't dominate the other,
14987 then we can't do this. */
14989 && !comparison_dominates_p (cond1
, cond2
)
14990 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14994 std::swap (cond1
, cond2
);
14999 if (cond_or
== DOM_CC_X_AND_Y
)
15004 case EQ
: return CC_DEQmode
;
15005 case LE
: return CC_DLEmode
;
15006 case LEU
: return CC_DLEUmode
;
15007 case GE
: return CC_DGEmode
;
15008 case GEU
: return CC_DGEUmode
;
15009 default: gcc_unreachable ();
15013 if (cond_or
== DOM_CC_X_AND_Y
)
15025 gcc_unreachable ();
15029 if (cond_or
== DOM_CC_X_AND_Y
)
15041 gcc_unreachable ();
15045 if (cond_or
== DOM_CC_X_AND_Y
)
15046 return CC_DLTUmode
;
15051 return CC_DLTUmode
;
15053 return CC_DLEUmode
;
15057 gcc_unreachable ();
15061 if (cond_or
== DOM_CC_X_AND_Y
)
15062 return CC_DGTUmode
;
15067 return CC_DGTUmode
;
15069 return CC_DGEUmode
;
15073 gcc_unreachable ();
15076 /* The remaining cases only occur when both comparisons are the
15079 gcc_assert (cond1
== cond2
);
15083 gcc_assert (cond1
== cond2
);
15087 gcc_assert (cond1
== cond2
);
15091 gcc_assert (cond1
== cond2
);
15092 return CC_DLEUmode
;
15095 gcc_assert (cond1
== cond2
);
15096 return CC_DGEUmode
;
15099 gcc_unreachable ();
15104 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15106 /* All floating point compares return CCFP if it is an equality
15107 comparison, and CCFPE otherwise. */
15108 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15131 gcc_unreachable ();
15135 /* A compare with a shifted operand. Because of canonicalization, the
15136 comparison will have to be swapped when we emit the assembler. */
15137 if (GET_MODE (y
) == SImode
15138 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15139 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15140 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15141 || GET_CODE (x
) == ROTATERT
))
15144 /* This operation is performed swapped, but since we only rely on the Z
15145 flag we don't need an additional mode. */
15146 if (GET_MODE (y
) == SImode
15147 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15148 && GET_CODE (x
) == NEG
15149 && (op
== EQ
|| op
== NE
))
15152 /* This is a special case that is used by combine to allow a
15153 comparison of a shifted byte load to be split into a zero-extend
15154 followed by a comparison of the shifted integer (only valid for
15155 equalities and unsigned inequalities). */
15156 if (GET_MODE (x
) == SImode
15157 && GET_CODE (x
) == ASHIFT
15158 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15159 && GET_CODE (XEXP (x
, 0)) == SUBREG
15160 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15161 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15162 && (op
== EQ
|| op
== NE
15163 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15164 && CONST_INT_P (y
))
15167 /* A construct for a conditional compare, if the false arm contains
15168 0, then both conditions must be true, otherwise either condition
15169 must be true. Not all conditions are possible, so CCmode is
15170 returned if it can't be done. */
15171 if (GET_CODE (x
) == IF_THEN_ELSE
15172 && (XEXP (x
, 2) == const0_rtx
15173 || XEXP (x
, 2) == const1_rtx
)
15174 && COMPARISON_P (XEXP (x
, 0))
15175 && COMPARISON_P (XEXP (x
, 1)))
15176 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15177 INTVAL (XEXP (x
, 2)));
15179 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15180 if (GET_CODE (x
) == AND
15181 && (op
== EQ
|| op
== NE
)
15182 && COMPARISON_P (XEXP (x
, 0))
15183 && COMPARISON_P (XEXP (x
, 1)))
15184 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15187 if (GET_CODE (x
) == IOR
15188 && (op
== EQ
|| op
== NE
)
15189 && COMPARISON_P (XEXP (x
, 0))
15190 && COMPARISON_P (XEXP (x
, 1)))
15191 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15194 /* An operation (on Thumb) where we want to test for a single bit.
15195 This is done by shifting that bit up into the top bit of a
15196 scratch register; we can then branch on the sign bit. */
15198 && GET_MODE (x
) == SImode
15199 && (op
== EQ
|| op
== NE
)
15200 && GET_CODE (x
) == ZERO_EXTRACT
15201 && XEXP (x
, 1) == const1_rtx
)
15204 /* An operation that sets the condition codes as a side-effect, the
15205 V flag is not set correctly, so we can only use comparisons where
15206 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15208 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15209 if (GET_MODE (x
) == SImode
15211 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15212 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15213 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15214 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15215 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15216 || GET_CODE (x
) == LSHIFTRT
15217 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15218 || GET_CODE (x
) == ROTATERT
15219 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15220 return CC_NOOVmode
;
15222 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15225 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15226 && GET_CODE (x
) == PLUS
15227 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15230 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15236 /* A DImode comparison against zero can be implemented by
15237 or'ing the two halves together. */
15238 if (y
== const0_rtx
)
15241 /* We can do an equality test in three Thumb instructions. */
15251 /* DImode unsigned comparisons can be implemented by cmp +
15252 cmpeq without a scratch register. Not worth doing in
15263 /* DImode signed and unsigned comparisons can be implemented
15264 by cmp + sbcs with a scratch register, but that does not
15265 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15266 gcc_assert (op
!= EQ
&& op
!= NE
);
15270 gcc_unreachable ();
15274 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15275 return GET_MODE (x
);
15280 /* X and Y are two things to compare using CODE. Emit the compare insn and
15281 return the rtx for register 0 in the proper mode. FP means this is a
15282 floating point compare: I don't think that it is needed on the arm. */
15284 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15288 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15290 /* We might have X as a constant, Y as a register because of the predicates
15291 used for cmpdi. If so, force X to a register here. */
15292 if (dimode_comparison
&& !REG_P (x
))
15293 x
= force_reg (DImode
, x
);
15295 mode
= SELECT_CC_MODE (code
, x
, y
);
15296 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15298 if (dimode_comparison
15299 && mode
!= CC_CZmode
)
15303 /* To compare two non-zero values for equality, XOR them and
15304 then compare against zero. Not used for ARM mode; there
15305 CC_CZmode is cheaper. */
15306 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15308 gcc_assert (!reload_completed
);
15309 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15313 /* A scratch register is required. */
15314 if (reload_completed
)
15315 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15317 scratch
= gen_rtx_SCRATCH (SImode
);
15319 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15320 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15321 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15324 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15329 /* Generate a sequence of insns that will generate the correct return
15330 address mask depending on the physical architecture that the program
15333 arm_gen_return_addr_mask (void)
15335 rtx reg
= gen_reg_rtx (Pmode
);
15337 emit_insn (gen_return_addr_mask (reg
));
15342 arm_reload_in_hi (rtx
*operands
)
15344 rtx ref
= operands
[1];
15346 HOST_WIDE_INT offset
= 0;
15348 if (GET_CODE (ref
) == SUBREG
)
15350 offset
= SUBREG_BYTE (ref
);
15351 ref
= SUBREG_REG (ref
);
15356 /* We have a pseudo which has been spilt onto the stack; there
15357 are two cases here: the first where there is a simple
15358 stack-slot replacement and a second where the stack-slot is
15359 out of range, or is used as a subreg. */
15360 if (reg_equiv_mem (REGNO (ref
)))
15362 ref
= reg_equiv_mem (REGNO (ref
));
15363 base
= find_replacement (&XEXP (ref
, 0));
15366 /* The slot is out of range, or was dressed up in a SUBREG. */
15367 base
= reg_equiv_address (REGNO (ref
));
15370 base
= find_replacement (&XEXP (ref
, 0));
15372 /* Handle the case where the address is too complex to be offset by 1. */
15373 if (GET_CODE (base
) == MINUS
15374 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15376 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15378 emit_set_insn (base_plus
, base
);
15381 else if (GET_CODE (base
) == PLUS
)
15383 /* The addend must be CONST_INT, or we would have dealt with it above. */
15384 HOST_WIDE_INT hi
, lo
;
15386 offset
+= INTVAL (XEXP (base
, 1));
15387 base
= XEXP (base
, 0);
15389 /* Rework the address into a legal sequence of insns. */
15390 /* Valid range for lo is -4095 -> 4095 */
15393 : -((-offset
) & 0xfff));
15395 /* Corner case, if lo is the max offset then we would be out of range
15396 once we have added the additional 1 below, so bump the msb into the
15397 pre-loading insn(s). */
15401 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15402 ^ (HOST_WIDE_INT
) 0x80000000)
15403 - (HOST_WIDE_INT
) 0x80000000);
15405 gcc_assert (hi
+ lo
== offset
);
15409 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15411 /* Get the base address; addsi3 knows how to handle constants
15412 that require more than one insn. */
15413 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15419 /* Operands[2] may overlap operands[0] (though it won't overlap
15420 operands[1]), that's why we asked for a DImode reg -- so we can
15421 use the bit that does not overlap. */
15422 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15423 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15425 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15427 emit_insn (gen_zero_extendqisi2 (scratch
,
15428 gen_rtx_MEM (QImode
,
15429 plus_constant (Pmode
, base
,
15431 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15432 gen_rtx_MEM (QImode
,
15433 plus_constant (Pmode
, base
,
15435 if (!BYTES_BIG_ENDIAN
)
15436 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15437 gen_rtx_IOR (SImode
,
15440 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15444 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15445 gen_rtx_IOR (SImode
,
15446 gen_rtx_ASHIFT (SImode
, scratch
,
15448 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15451 /* Handle storing a half-word to memory during reload by synthesizing as two
15452 byte stores. Take care not to clobber the input values until after we
15453 have moved them somewhere safe. This code assumes that if the DImode
15454 scratch in operands[2] overlaps either the input value or output address
15455 in some way, then that value must die in this insn (we absolutely need
15456 two scratch registers for some corner cases). */
15458 arm_reload_out_hi (rtx
*operands
)
15460 rtx ref
= operands
[0];
15461 rtx outval
= operands
[1];
15463 HOST_WIDE_INT offset
= 0;
15465 if (GET_CODE (ref
) == SUBREG
)
15467 offset
= SUBREG_BYTE (ref
);
15468 ref
= SUBREG_REG (ref
);
15473 /* We have a pseudo which has been spilt onto the stack; there
15474 are two cases here: the first where there is a simple
15475 stack-slot replacement and a second where the stack-slot is
15476 out of range, or is used as a subreg. */
15477 if (reg_equiv_mem (REGNO (ref
)))
15479 ref
= reg_equiv_mem (REGNO (ref
));
15480 base
= find_replacement (&XEXP (ref
, 0));
15483 /* The slot is out of range, or was dressed up in a SUBREG. */
15484 base
= reg_equiv_address (REGNO (ref
));
15487 base
= find_replacement (&XEXP (ref
, 0));
15489 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15491 /* Handle the case where the address is too complex to be offset by 1. */
15492 if (GET_CODE (base
) == MINUS
15493 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15495 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15497 /* Be careful not to destroy OUTVAL. */
15498 if (reg_overlap_mentioned_p (base_plus
, outval
))
15500 /* Updating base_plus might destroy outval, see if we can
15501 swap the scratch and base_plus. */
15502 if (!reg_overlap_mentioned_p (scratch
, outval
))
15503 std::swap (scratch
, base_plus
);
15506 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15508 /* Be conservative and copy OUTVAL into the scratch now,
15509 this should only be necessary if outval is a subreg
15510 of something larger than a word. */
15511 /* XXX Might this clobber base? I can't see how it can,
15512 since scratch is known to overlap with OUTVAL, and
15513 must be wider than a word. */
15514 emit_insn (gen_movhi (scratch_hi
, outval
));
15515 outval
= scratch_hi
;
15519 emit_set_insn (base_plus
, base
);
15522 else if (GET_CODE (base
) == PLUS
)
15524 /* The addend must be CONST_INT, or we would have dealt with it above. */
15525 HOST_WIDE_INT hi
, lo
;
15527 offset
+= INTVAL (XEXP (base
, 1));
15528 base
= XEXP (base
, 0);
15530 /* Rework the address into a legal sequence of insns. */
15531 /* Valid range for lo is -4095 -> 4095 */
15534 : -((-offset
) & 0xfff));
15536 /* Corner case, if lo is the max offset then we would be out of range
15537 once we have added the additional 1 below, so bump the msb into the
15538 pre-loading insn(s). */
15542 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15543 ^ (HOST_WIDE_INT
) 0x80000000)
15544 - (HOST_WIDE_INT
) 0x80000000);
15546 gcc_assert (hi
+ lo
== offset
);
15550 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15552 /* Be careful not to destroy OUTVAL. */
15553 if (reg_overlap_mentioned_p (base_plus
, outval
))
15555 /* Updating base_plus might destroy outval, see if we
15556 can swap the scratch and base_plus. */
15557 if (!reg_overlap_mentioned_p (scratch
, outval
))
15558 std::swap (scratch
, base_plus
);
15561 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15563 /* Be conservative and copy outval into scratch now,
15564 this should only be necessary if outval is a
15565 subreg of something larger than a word. */
15566 /* XXX Might this clobber base? I can't see how it
15567 can, since scratch is known to overlap with
15569 emit_insn (gen_movhi (scratch_hi
, outval
));
15570 outval
= scratch_hi
;
15574 /* Get the base address; addsi3 knows how to handle constants
15575 that require more than one insn. */
15576 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15582 if (BYTES_BIG_ENDIAN
)
15584 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15585 plus_constant (Pmode
, base
,
15587 gen_lowpart (QImode
, outval
)));
15588 emit_insn (gen_lshrsi3 (scratch
,
15589 gen_rtx_SUBREG (SImode
, outval
, 0),
15591 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15593 gen_lowpart (QImode
, scratch
)));
15597 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15599 gen_lowpart (QImode
, outval
)));
15600 emit_insn (gen_lshrsi3 (scratch
,
15601 gen_rtx_SUBREG (SImode
, outval
, 0),
15603 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15604 plus_constant (Pmode
, base
,
15606 gen_lowpart (QImode
, scratch
)));
15610 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15611 (padded to the size of a word) should be passed in a register. */
15614 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15616 if (TARGET_AAPCS_BASED
)
15617 return must_pass_in_stack_var_size (mode
, type
);
15619 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15623 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15624 Return true if an argument passed on the stack should be padded upwards,
15625 i.e. if the least-significant byte has useful data.
15626 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15627 aggregate types are placed in the lowest memory address. */
15630 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15632 if (!TARGET_AAPCS_BASED
)
15633 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15635 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15642 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15643 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15644 register has useful data, and return the opposite if the most
15645 significant byte does. */
15648 arm_pad_reg_upward (machine_mode mode
,
15649 tree type
, int first ATTRIBUTE_UNUSED
)
15651 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15653 /* For AAPCS, small aggregates, small fixed-point types,
15654 and small complex types are always padded upwards. */
15657 if ((AGGREGATE_TYPE_P (type
)
15658 || TREE_CODE (type
) == COMPLEX_TYPE
15659 || FIXED_POINT_TYPE_P (type
))
15660 && int_size_in_bytes (type
) <= 4)
15665 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15666 && GET_MODE_SIZE (mode
) <= 4)
15671 /* Otherwise, use default padding. */
15672 return !BYTES_BIG_ENDIAN
;
15675 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15676 assuming that the address in the base register is word aligned. */
15678 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15680 HOST_WIDE_INT max_offset
;
15682 /* Offset must be a multiple of 4 in Thumb mode. */
15683 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15688 else if (TARGET_ARM
)
15693 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15696 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15697 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15698 Assumes that the address in the base register RN is word aligned. Pattern
15699 guarantees that both memory accesses use the same base register,
15700 the offsets are constants within the range, and the gap between the offsets is 4.
15701 If preload complete then check that registers are legal. WBACK indicates whether
15702 address is updated. LOAD indicates whether memory access is load or store. */
15704 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15705 bool wback
, bool load
)
15707 unsigned int t
, t2
, n
;
15709 if (!reload_completed
)
15712 if (!offset_ok_for_ldrd_strd (offset
))
15719 if ((TARGET_THUMB2
)
15720 && ((wback
&& (n
== t
|| n
== t2
))
15721 || (t
== SP_REGNUM
)
15722 || (t
== PC_REGNUM
)
15723 || (t2
== SP_REGNUM
)
15724 || (t2
== PC_REGNUM
)
15725 || (!load
&& (n
== PC_REGNUM
))
15726 || (load
&& (t
== t2
))
15727 /* Triggers Cortex-M3 LDRD errata. */
15728 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15732 && ((wback
&& (n
== t
|| n
== t2
))
15733 || (t2
== PC_REGNUM
)
15734 || (t
% 2 != 0) /* First destination register is not even. */
15736 /* PC can be used as base register (for offset addressing only),
15737 but it is depricated. */
15738 || (n
== PC_REGNUM
)))
15744 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15745 operand MEM's address contains an immediate offset from the base
15746 register and has no side effects, in which case it sets BASE and
15747 OFFSET accordingly. */
15749 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15753 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15755 /* TODO: Handle more general memory operand patterns, such as
15756 PRE_DEC and PRE_INC. */
15758 if (side_effects_p (mem
))
15761 /* Can't deal with subregs. */
15762 if (GET_CODE (mem
) == SUBREG
)
15765 gcc_assert (MEM_P (mem
));
15767 *offset
= const0_rtx
;
15769 addr
= XEXP (mem
, 0);
15771 /* If addr isn't valid for DImode, then we can't handle it. */
15772 if (!arm_legitimate_address_p (DImode
, addr
,
15773 reload_in_progress
|| reload_completed
))
15781 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15783 *base
= XEXP (addr
, 0);
15784 *offset
= XEXP (addr
, 1);
15785 return (REG_P (*base
) && CONST_INT_P (*offset
));
15791 /* Called from a peephole2 to replace two word-size accesses with a
15792 single LDRD/STRD instruction. Returns true iff we can generate a
15793 new instruction sequence. That is, both accesses use the same base
15794 register and the gap between constant offsets is 4. This function
15795 may reorder its operands to match ldrd/strd RTL templates.
15796 OPERANDS are the operands found by the peephole matcher;
15797 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15798 corresponding memory operands. LOAD indicaates whether the access
15799 is load or store. CONST_STORE indicates a store of constant
15800 integer values held in OPERANDS[4,5] and assumes that the pattern
15801 is of length 4 insn, for the purpose of checking dead registers.
15802 COMMUTE indicates that register operands may be reordered. */
15804 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15805 bool const_store
, bool commute
)
15808 HOST_WIDE_INT offsets
[2], offset
;
15809 rtx base
= NULL_RTX
;
15810 rtx cur_base
, cur_offset
, tmp
;
15812 HARD_REG_SET regset
;
15814 gcc_assert (!const_store
|| !load
);
15815 /* Check that the memory references are immediate offsets from the
15816 same base register. Extract the base register, the destination
15817 registers, and the corresponding memory offsets. */
15818 for (i
= 0; i
< nops
; i
++)
15820 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15825 else if (REGNO (base
) != REGNO (cur_base
))
15828 offsets
[i
] = INTVAL (cur_offset
);
15829 if (GET_CODE (operands
[i
]) == SUBREG
)
15831 tmp
= SUBREG_REG (operands
[i
]);
15832 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15837 /* Make sure there is no dependency between the individual loads. */
15838 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15839 return false; /* RAW */
15841 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15842 return false; /* WAW */
15844 /* If the same input register is used in both stores
15845 when storing different constants, try to find a free register.
15846 For example, the code
15851 can be transformed into
15854 in Thumb mode assuming that r1 is free. */
15856 && REGNO (operands
[0]) == REGNO (operands
[1])
15857 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15861 CLEAR_HARD_REG_SET (regset
);
15862 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15863 if (tmp
== NULL_RTX
)
15866 /* Use the new register in the first load to ensure that
15867 if the original input register is not dead after peephole,
15868 then it will have the correct constant value. */
15871 else if (TARGET_ARM
)
15874 int regno
= REGNO (operands
[0]);
15875 if (!peep2_reg_dead_p (4, operands
[0]))
15877 /* When the input register is even and is not dead after the
15878 pattern, it has to hold the second constant but we cannot
15879 form a legal STRD in ARM mode with this register as the second
15881 if (regno
% 2 == 0)
15884 /* Is regno-1 free? */
15885 SET_HARD_REG_SET (regset
);
15886 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15887 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15888 if (tmp
== NULL_RTX
)
15895 /* Find a DImode register. */
15896 CLEAR_HARD_REG_SET (regset
);
15897 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15898 if (tmp
!= NULL_RTX
)
15900 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15901 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15905 /* Can we use the input register to form a DI register? */
15906 SET_HARD_REG_SET (regset
);
15907 CLEAR_HARD_REG_BIT(regset
,
15908 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15909 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15910 if (tmp
== NULL_RTX
)
15912 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15916 gcc_assert (operands
[0] != NULL_RTX
);
15917 gcc_assert (operands
[1] != NULL_RTX
);
15918 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15919 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15923 /* Make sure the instructions are ordered with lower memory access first. */
15924 if (offsets
[0] > offsets
[1])
15926 gap
= offsets
[0] - offsets
[1];
15927 offset
= offsets
[1];
15929 /* Swap the instructions such that lower memory is accessed first. */
15930 std::swap (operands
[0], operands
[1]);
15931 std::swap (operands
[2], operands
[3]);
15933 std::swap (operands
[4], operands
[5]);
15937 gap
= offsets
[1] - offsets
[0];
15938 offset
= offsets
[0];
15941 /* Make sure accesses are to consecutive memory locations. */
15945 /* Make sure we generate legal instructions. */
15946 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15950 /* In Thumb state, where registers are almost unconstrained, there
15951 is little hope to fix it. */
15955 if (load
&& commute
)
15957 /* Try reordering registers. */
15958 std::swap (operands
[0], operands
[1]);
15959 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15966 /* If input registers are dead after this pattern, they can be
15967 reordered or replaced by other registers that are free in the
15968 current pattern. */
15969 if (!peep2_reg_dead_p (4, operands
[0])
15970 || !peep2_reg_dead_p (4, operands
[1]))
15973 /* Try to reorder the input registers. */
15974 /* For example, the code
15979 can be transformed into
15984 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15987 std::swap (operands
[0], operands
[1]);
15991 /* Try to find a free DI register. */
15992 CLEAR_HARD_REG_SET (regset
);
15993 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15994 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15997 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15998 if (tmp
== NULL_RTX
)
16001 /* DREG must be an even-numbered register in DImode.
16002 Split it into SI registers. */
16003 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16004 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16005 gcc_assert (operands
[0] != NULL_RTX
);
16006 gcc_assert (operands
[1] != NULL_RTX
);
16007 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16008 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16010 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16022 /* Print a symbolic form of X to the debug file, F. */
16024 arm_print_value (FILE *f
, rtx x
)
16026 switch (GET_CODE (x
))
16029 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16033 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16041 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16043 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16044 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16052 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16056 fprintf (f
, "`%s'", XSTR (x
, 0));
16060 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16064 arm_print_value (f
, XEXP (x
, 0));
16068 arm_print_value (f
, XEXP (x
, 0));
16070 arm_print_value (f
, XEXP (x
, 1));
16078 fprintf (f
, "????");
16083 /* Routines for manipulation of the constant pool. */
16085 /* Arm instructions cannot load a large constant directly into a
16086 register; they have to come from a pc relative load. The constant
16087 must therefore be placed in the addressable range of the pc
16088 relative load. Depending on the precise pc relative load
16089 instruction the range is somewhere between 256 bytes and 4k. This
16090 means that we often have to dump a constant inside a function, and
16091 generate code to branch around it.
16093 It is important to minimize this, since the branches will slow
16094 things down and make the code larger.
16096 Normally we can hide the table after an existing unconditional
16097 branch so that there is no interruption of the flow, but in the
16098 worst case the code looks like this:
16116 We fix this by performing a scan after scheduling, which notices
16117 which instructions need to have their operands fetched from the
16118 constant table and builds the table.
16120 The algorithm starts by building a table of all the constants that
16121 need fixing up and all the natural barriers in the function (places
16122 where a constant table can be dropped without breaking the flow).
16123 For each fixup we note how far the pc-relative replacement will be
16124 able to reach and the offset of the instruction into the function.
16126 Having built the table we then group the fixes together to form
16127 tables that are as large as possible (subject to addressing
16128 constraints) and emit each table of constants after the last
16129 barrier that is within range of all the instructions in the group.
16130 If a group does not contain a barrier, then we forcibly create one
16131 by inserting a jump instruction into the flow. Once the table has
16132 been inserted, the insns are then modified to reference the
16133 relevant entry in the pool.
16135 Possible enhancements to the algorithm (not implemented) are:
16137 1) For some processors and object formats, there may be benefit in
16138 aligning the pools to the start of cache lines; this alignment
16139 would need to be taken into account when calculating addressability
16142 /* These typedefs are located at the start of this file, so that
16143 they can be used in the prototypes there. This comment is to
16144 remind readers of that fact so that the following structures
16145 can be understood more easily.
16147 typedef struct minipool_node Mnode;
16148 typedef struct minipool_fixup Mfix; */
16150 struct minipool_node
16152 /* Doubly linked chain of entries. */
16155 /* The maximum offset into the code that this entry can be placed. While
16156 pushing fixes for forward references, all entries are sorted in order
16157 of increasing max_address. */
16158 HOST_WIDE_INT max_address
;
16159 /* Similarly for an entry inserted for a backwards ref. */
16160 HOST_WIDE_INT min_address
;
16161 /* The number of fixes referencing this entry. This can become zero
16162 if we "unpush" an entry. In this case we ignore the entry when we
16163 come to emit the code. */
16165 /* The offset from the start of the minipool. */
16166 HOST_WIDE_INT offset
;
16167 /* The value in table. */
16169 /* The mode of value. */
16171 /* The size of the value. With iWMMXt enabled
16172 sizes > 4 also imply an alignment of 8-bytes. */
16176 struct minipool_fixup
16180 HOST_WIDE_INT address
;
16186 HOST_WIDE_INT forwards
;
16187 HOST_WIDE_INT backwards
;
16190 /* Fixes less than a word need padding out to a word boundary. */
16191 #define MINIPOOL_FIX_SIZE(mode) \
16192 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16194 static Mnode
* minipool_vector_head
;
16195 static Mnode
* minipool_vector_tail
;
16196 static rtx_code_label
*minipool_vector_label
;
16197 static int minipool_pad
;
16199 /* The linked list of all minipool fixes required for this function. */
16200 Mfix
* minipool_fix_head
;
16201 Mfix
* minipool_fix_tail
;
16202 /* The fix entry for the current minipool, once it has been placed. */
16203 Mfix
* minipool_barrier
;
16205 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16206 #define JUMP_TABLES_IN_TEXT_SECTION 0
16209 static HOST_WIDE_INT
16210 get_jump_table_size (rtx_jump_table_data
*insn
)
16212 /* ADDR_VECs only take room if read-only data does into the text
16214 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16216 rtx body
= PATTERN (insn
);
16217 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16218 HOST_WIDE_INT size
;
16219 HOST_WIDE_INT modesize
;
16221 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16222 size
= modesize
* XVECLEN (body
, elt
);
16226 /* Round up size of TBB table to a halfword boundary. */
16227 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16230 /* No padding necessary for TBH. */
16233 /* Add two bytes for alignment on Thumb. */
16238 gcc_unreachable ();
16246 /* Return the maximum amount of padding that will be inserted before
16249 static HOST_WIDE_INT
16250 get_label_padding (rtx label
)
16252 HOST_WIDE_INT align
, min_insn_size
;
16254 align
= 1 << label_to_alignment (label
);
16255 min_insn_size
= TARGET_THUMB
? 2 : 4;
16256 return align
> min_insn_size
? align
- min_insn_size
: 0;
16259 /* Move a minipool fix MP from its current location to before MAX_MP.
16260 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16261 constraints may need updating. */
16263 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16264 HOST_WIDE_INT max_address
)
16266 /* The code below assumes these are different. */
16267 gcc_assert (mp
!= max_mp
);
16269 if (max_mp
== NULL
)
16271 if (max_address
< mp
->max_address
)
16272 mp
->max_address
= max_address
;
16276 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16277 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16279 mp
->max_address
= max_address
;
16281 /* Unlink MP from its current position. Since max_mp is non-null,
16282 mp->prev must be non-null. */
16283 mp
->prev
->next
= mp
->next
;
16284 if (mp
->next
!= NULL
)
16285 mp
->next
->prev
= mp
->prev
;
16287 minipool_vector_tail
= mp
->prev
;
16289 /* Re-insert it before MAX_MP. */
16291 mp
->prev
= max_mp
->prev
;
16294 if (mp
->prev
!= NULL
)
16295 mp
->prev
->next
= mp
;
16297 minipool_vector_head
= mp
;
16300 /* Save the new entry. */
16303 /* Scan over the preceding entries and adjust their addresses as
16305 while (mp
->prev
!= NULL
16306 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16308 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16315 /* Add a constant to the minipool for a forward reference. Returns the
16316 node added or NULL if the constant will not fit in this pool. */
16318 add_minipool_forward_ref (Mfix
*fix
)
16320 /* If set, max_mp is the first pool_entry that has a lower
16321 constraint than the one we are trying to add. */
16322 Mnode
* max_mp
= NULL
;
16323 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16326 /* If the minipool starts before the end of FIX->INSN then this FIX
16327 can not be placed into the current pool. Furthermore, adding the
16328 new constant pool entry may cause the pool to start FIX_SIZE bytes
16330 if (minipool_vector_head
&&
16331 (fix
->address
+ get_attr_length (fix
->insn
)
16332 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16335 /* Scan the pool to see if a constant with the same value has
16336 already been added. While we are doing this, also note the
16337 location where we must insert the constant if it doesn't already
16339 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16341 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16342 && fix
->mode
== mp
->mode
16343 && (!LABEL_P (fix
->value
)
16344 || (CODE_LABEL_NUMBER (fix
->value
)
16345 == CODE_LABEL_NUMBER (mp
->value
)))
16346 && rtx_equal_p (fix
->value
, mp
->value
))
16348 /* More than one fix references this entry. */
16350 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16353 /* Note the insertion point if necessary. */
16355 && mp
->max_address
> max_address
)
16358 /* If we are inserting an 8-bytes aligned quantity and
16359 we have not already found an insertion point, then
16360 make sure that all such 8-byte aligned quantities are
16361 placed at the start of the pool. */
16362 if (ARM_DOUBLEWORD_ALIGN
16364 && fix
->fix_size
>= 8
16365 && mp
->fix_size
< 8)
16368 max_address
= mp
->max_address
;
16372 /* The value is not currently in the minipool, so we need to create
16373 a new entry for it. If MAX_MP is NULL, the entry will be put on
16374 the end of the list since the placement is less constrained than
16375 any existing entry. Otherwise, we insert the new fix before
16376 MAX_MP and, if necessary, adjust the constraints on the other
16379 mp
->fix_size
= fix
->fix_size
;
16380 mp
->mode
= fix
->mode
;
16381 mp
->value
= fix
->value
;
16383 /* Not yet required for a backwards ref. */
16384 mp
->min_address
= -65536;
16386 if (max_mp
== NULL
)
16388 mp
->max_address
= max_address
;
16390 mp
->prev
= minipool_vector_tail
;
16392 if (mp
->prev
== NULL
)
16394 minipool_vector_head
= mp
;
16395 minipool_vector_label
= gen_label_rtx ();
16398 mp
->prev
->next
= mp
;
16400 minipool_vector_tail
= mp
;
16404 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16405 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16407 mp
->max_address
= max_address
;
16410 mp
->prev
= max_mp
->prev
;
16412 if (mp
->prev
!= NULL
)
16413 mp
->prev
->next
= mp
;
16415 minipool_vector_head
= mp
;
16418 /* Save the new entry. */
16421 /* Scan over the preceding entries and adjust their addresses as
16423 while (mp
->prev
!= NULL
16424 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16426 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16434 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16435 HOST_WIDE_INT min_address
)
16437 HOST_WIDE_INT offset
;
16439 /* The code below assumes these are different. */
16440 gcc_assert (mp
!= min_mp
);
16442 if (min_mp
== NULL
)
16444 if (min_address
> mp
->min_address
)
16445 mp
->min_address
= min_address
;
16449 /* We will adjust this below if it is too loose. */
16450 mp
->min_address
= min_address
;
16452 /* Unlink MP from its current position. Since min_mp is non-null,
16453 mp->next must be non-null. */
16454 mp
->next
->prev
= mp
->prev
;
16455 if (mp
->prev
!= NULL
)
16456 mp
->prev
->next
= mp
->next
;
16458 minipool_vector_head
= mp
->next
;
16460 /* Reinsert it after MIN_MP. */
16462 mp
->next
= min_mp
->next
;
16464 if (mp
->next
!= NULL
)
16465 mp
->next
->prev
= mp
;
16467 minipool_vector_tail
= mp
;
16473 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16475 mp
->offset
= offset
;
16476 if (mp
->refcount
> 0)
16477 offset
+= mp
->fix_size
;
16479 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16480 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16486 /* Add a constant to the minipool for a backward reference. Returns the
16487 node added or NULL if the constant will not fit in this pool.
16489 Note that the code for insertion for a backwards reference can be
16490 somewhat confusing because the calculated offsets for each fix do
16491 not take into account the size of the pool (which is still under
16494 add_minipool_backward_ref (Mfix
*fix
)
16496 /* If set, min_mp is the last pool_entry that has a lower constraint
16497 than the one we are trying to add. */
16498 Mnode
*min_mp
= NULL
;
16499 /* This can be negative, since it is only a constraint. */
16500 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16503 /* If we can't reach the current pool from this insn, or if we can't
16504 insert this entry at the end of the pool without pushing other
16505 fixes out of range, then we don't try. This ensures that we
16506 can't fail later on. */
16507 if (min_address
>= minipool_barrier
->address
16508 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16509 >= minipool_barrier
->address
))
16512 /* Scan the pool to see if a constant with the same value has
16513 already been added. While we are doing this, also note the
16514 location where we must insert the constant if it doesn't already
16516 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16518 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16519 && fix
->mode
== mp
->mode
16520 && (!LABEL_P (fix
->value
)
16521 || (CODE_LABEL_NUMBER (fix
->value
)
16522 == CODE_LABEL_NUMBER (mp
->value
)))
16523 && rtx_equal_p (fix
->value
, mp
->value
)
16524 /* Check that there is enough slack to move this entry to the
16525 end of the table (this is conservative). */
16526 && (mp
->max_address
16527 > (minipool_barrier
->address
16528 + minipool_vector_tail
->offset
16529 + minipool_vector_tail
->fix_size
)))
16532 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16535 if (min_mp
!= NULL
)
16536 mp
->min_address
+= fix
->fix_size
;
16539 /* Note the insertion point if necessary. */
16540 if (mp
->min_address
< min_address
)
16542 /* For now, we do not allow the insertion of 8-byte alignment
16543 requiring nodes anywhere but at the start of the pool. */
16544 if (ARM_DOUBLEWORD_ALIGN
16545 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16550 else if (mp
->max_address
16551 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16553 /* Inserting before this entry would push the fix beyond
16554 its maximum address (which can happen if we have
16555 re-located a forwards fix); force the new fix to come
16557 if (ARM_DOUBLEWORD_ALIGN
16558 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16563 min_address
= mp
->min_address
+ fix
->fix_size
;
16566 /* Do not insert a non-8-byte aligned quantity before 8-byte
16567 aligned quantities. */
16568 else if (ARM_DOUBLEWORD_ALIGN
16569 && fix
->fix_size
< 8
16570 && mp
->fix_size
>= 8)
16573 min_address
= mp
->min_address
+ fix
->fix_size
;
16578 /* We need to create a new entry. */
16580 mp
->fix_size
= fix
->fix_size
;
16581 mp
->mode
= fix
->mode
;
16582 mp
->value
= fix
->value
;
16584 mp
->max_address
= minipool_barrier
->address
+ 65536;
16586 mp
->min_address
= min_address
;
16588 if (min_mp
== NULL
)
16591 mp
->next
= minipool_vector_head
;
16593 if (mp
->next
== NULL
)
16595 minipool_vector_tail
= mp
;
16596 minipool_vector_label
= gen_label_rtx ();
16599 mp
->next
->prev
= mp
;
16601 minipool_vector_head
= mp
;
16605 mp
->next
= min_mp
->next
;
16609 if (mp
->next
!= NULL
)
16610 mp
->next
->prev
= mp
;
16612 minipool_vector_tail
= mp
;
16615 /* Save the new entry. */
16623 /* Scan over the following entries and adjust their offsets. */
16624 while (mp
->next
!= NULL
)
16626 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16627 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16630 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16632 mp
->next
->offset
= mp
->offset
;
16641 assign_minipool_offsets (Mfix
*barrier
)
16643 HOST_WIDE_INT offset
= 0;
16646 minipool_barrier
= barrier
;
16648 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16650 mp
->offset
= offset
;
16652 if (mp
->refcount
> 0)
16653 offset
+= mp
->fix_size
;
16657 /* Output the literal table */
16659 dump_minipool (rtx_insn
*scan
)
16665 if (ARM_DOUBLEWORD_ALIGN
)
16666 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16667 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16674 fprintf (dump_file
,
16675 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16676 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16678 scan
= emit_label_after (gen_label_rtx (), scan
);
16679 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16680 scan
= emit_label_after (minipool_vector_label
, scan
);
16682 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16684 if (mp
->refcount
> 0)
16688 fprintf (dump_file
,
16689 ";; Offset %u, min %ld, max %ld ",
16690 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16691 (unsigned long) mp
->max_address
);
16692 arm_print_value (dump_file
, mp
->value
);
16693 fputc ('\n', dump_file
);
16696 switch (GET_MODE_SIZE (mp
->mode
))
16698 #ifdef HAVE_consttable_1
16700 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16704 #ifdef HAVE_consttable_2
16706 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16710 #ifdef HAVE_consttable_4
16712 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16716 #ifdef HAVE_consttable_8
16718 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16722 #ifdef HAVE_consttable_16
16724 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16729 gcc_unreachable ();
16737 minipool_vector_head
= minipool_vector_tail
= NULL
;
16738 scan
= emit_insn_after (gen_consttable_end (), scan
);
16739 scan
= emit_barrier_after (scan
);
16742 /* Return the cost of forcibly inserting a barrier after INSN. */
16744 arm_barrier_cost (rtx_insn
*insn
)
16746 /* Basing the location of the pool on the loop depth is preferable,
16747 but at the moment, the basic block information seems to be
16748 corrupt by this stage of the compilation. */
16749 int base_cost
= 50;
16750 rtx_insn
*next
= next_nonnote_insn (insn
);
16752 if (next
!= NULL
&& LABEL_P (next
))
16755 switch (GET_CODE (insn
))
16758 /* It will always be better to place the table before the label, rather
16767 return base_cost
- 10;
16770 return base_cost
+ 10;
16774 /* Find the best place in the insn stream in the range
16775 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16776 Create the barrier by inserting a jump and add a new fix entry for
16779 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16781 HOST_WIDE_INT count
= 0;
16782 rtx_barrier
*barrier
;
16783 rtx_insn
*from
= fix
->insn
;
16784 /* The instruction after which we will insert the jump. */
16785 rtx_insn
*selected
= NULL
;
16787 /* The address at which the jump instruction will be placed. */
16788 HOST_WIDE_INT selected_address
;
16790 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16791 rtx_code_label
*label
= gen_label_rtx ();
16793 selected_cost
= arm_barrier_cost (from
);
16794 selected_address
= fix
->address
;
16796 while (from
&& count
< max_count
)
16798 rtx_jump_table_data
*tmp
;
16801 /* This code shouldn't have been called if there was a natural barrier
16803 gcc_assert (!BARRIER_P (from
));
16805 /* Count the length of this insn. This must stay in sync with the
16806 code that pushes minipool fixes. */
16807 if (LABEL_P (from
))
16808 count
+= get_label_padding (from
);
16810 count
+= get_attr_length (from
);
16812 /* If there is a jump table, add its length. */
16813 if (tablejump_p (from
, NULL
, &tmp
))
16815 count
+= get_jump_table_size (tmp
);
16817 /* Jump tables aren't in a basic block, so base the cost on
16818 the dispatch insn. If we select this location, we will
16819 still put the pool after the table. */
16820 new_cost
= arm_barrier_cost (from
);
16822 if (count
< max_count
16823 && (!selected
|| new_cost
<= selected_cost
))
16826 selected_cost
= new_cost
;
16827 selected_address
= fix
->address
+ count
;
16830 /* Continue after the dispatch table. */
16831 from
= NEXT_INSN (tmp
);
16835 new_cost
= arm_barrier_cost (from
);
16837 if (count
< max_count
16838 && (!selected
|| new_cost
<= selected_cost
))
16841 selected_cost
= new_cost
;
16842 selected_address
= fix
->address
+ count
;
16845 from
= NEXT_INSN (from
);
16848 /* Make sure that we found a place to insert the jump. */
16849 gcc_assert (selected
);
16851 /* Make sure we do not split a call and its corresponding
16852 CALL_ARG_LOCATION note. */
16853 if (CALL_P (selected
))
16855 rtx_insn
*next
= NEXT_INSN (selected
);
16856 if (next
&& NOTE_P (next
)
16857 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16861 /* Create a new JUMP_INSN that branches around a barrier. */
16862 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16863 JUMP_LABEL (from
) = label
;
16864 barrier
= emit_barrier_after (from
);
16865 emit_label_after (label
, barrier
);
16867 /* Create a minipool barrier entry for the new barrier. */
16868 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16869 new_fix
->insn
= barrier
;
16870 new_fix
->address
= selected_address
;
16871 new_fix
->next
= fix
->next
;
16872 fix
->next
= new_fix
;
16877 /* Record that there is a natural barrier in the insn stream at
16880 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16882 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16885 fix
->address
= address
;
16888 if (minipool_fix_head
!= NULL
)
16889 minipool_fix_tail
->next
= fix
;
16891 minipool_fix_head
= fix
;
16893 minipool_fix_tail
= fix
;
16896 /* Record INSN, which will need fixing up to load a value from the
16897 minipool. ADDRESS is the offset of the insn since the start of the
16898 function; LOC is a pointer to the part of the insn which requires
16899 fixing; VALUE is the constant that must be loaded, which is of type
16902 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16903 machine_mode mode
, rtx value
)
16905 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16908 fix
->address
= address
;
16911 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16912 fix
->value
= value
;
16913 fix
->forwards
= get_attr_pool_range (insn
);
16914 fix
->backwards
= get_attr_neg_pool_range (insn
);
16915 fix
->minipool
= NULL
;
16917 /* If an insn doesn't have a range defined for it, then it isn't
16918 expecting to be reworked by this code. Better to stop now than
16919 to generate duff assembly code. */
16920 gcc_assert (fix
->forwards
|| fix
->backwards
);
16922 /* If an entry requires 8-byte alignment then assume all constant pools
16923 require 4 bytes of padding. Trying to do this later on a per-pool
16924 basis is awkward because existing pool entries have to be modified. */
16925 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16930 fprintf (dump_file
,
16931 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16932 GET_MODE_NAME (mode
),
16933 INSN_UID (insn
), (unsigned long) address
,
16934 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16935 arm_print_value (dump_file
, fix
->value
);
16936 fprintf (dump_file
, "\n");
16939 /* Add it to the chain of fixes. */
16942 if (minipool_fix_head
!= NULL
)
16943 minipool_fix_tail
->next
= fix
;
16945 minipool_fix_head
= fix
;
16947 minipool_fix_tail
= fix
;
16950 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16951 Returns the number of insns needed, or 99 if we always want to synthesize
16954 arm_max_const_double_inline_cost ()
16956 /* Let the value get synthesized to avoid the use of literal pools. */
16957 if (arm_disable_literal_pool
)
16960 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16963 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16964 Returns the number of insns needed, or 99 if we don't know how to
16967 arm_const_double_inline_cost (rtx val
)
16969 rtx lowpart
, highpart
;
16972 mode
= GET_MODE (val
);
16974 if (mode
== VOIDmode
)
16977 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16979 lowpart
= gen_lowpart (SImode
, val
);
16980 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16982 gcc_assert (CONST_INT_P (lowpart
));
16983 gcc_assert (CONST_INT_P (highpart
));
16985 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16986 NULL_RTX
, NULL_RTX
, 0, 0)
16987 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16988 NULL_RTX
, NULL_RTX
, 0, 0));
16991 /* Cost of loading a SImode constant. */
16993 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16995 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16996 NULL_RTX
, NULL_RTX
, 1, 0);
16999 /* Return true if it is worthwhile to split a 64-bit constant into two
17000 32-bit operations. This is the case if optimizing for size, or
17001 if we have load delay slots, or if one 32-bit part can be done with
17002 a single data operation. */
17004 arm_const_double_by_parts (rtx val
)
17006 machine_mode mode
= GET_MODE (val
);
17009 if (optimize_size
|| arm_ld_sched
)
17012 if (mode
== VOIDmode
)
17015 part
= gen_highpart_mode (SImode
, mode
, val
);
17017 gcc_assert (CONST_INT_P (part
));
17019 if (const_ok_for_arm (INTVAL (part
))
17020 || const_ok_for_arm (~INTVAL (part
)))
17023 part
= gen_lowpart (SImode
, val
);
17025 gcc_assert (CONST_INT_P (part
));
17027 if (const_ok_for_arm (INTVAL (part
))
17028 || const_ok_for_arm (~INTVAL (part
)))
17034 /* Return true if it is possible to inline both the high and low parts
17035 of a 64-bit constant into 32-bit data processing instructions. */
17037 arm_const_double_by_immediates (rtx val
)
17039 machine_mode mode
= GET_MODE (val
);
17042 if (mode
== VOIDmode
)
17045 part
= gen_highpart_mode (SImode
, mode
, val
);
17047 gcc_assert (CONST_INT_P (part
));
17049 if (!const_ok_for_arm (INTVAL (part
)))
17052 part
= gen_lowpart (SImode
, val
);
17054 gcc_assert (CONST_INT_P (part
));
17056 if (!const_ok_for_arm (INTVAL (part
)))
17062 /* Scan INSN and note any of its operands that need fixing.
17063 If DO_PUSHES is false we do not actually push any of the fixups
17066 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17070 extract_constrain_insn (insn
);
17072 if (recog_data
.n_alternatives
== 0)
17075 /* Fill in recog_op_alt with information about the constraints of
17077 preprocess_constraints (insn
);
17079 const operand_alternative
*op_alt
= which_op_alt ();
17080 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17082 /* Things we need to fix can only occur in inputs. */
17083 if (recog_data
.operand_type
[opno
] != OP_IN
)
17086 /* If this alternative is a memory reference, then any mention
17087 of constants in this alternative is really to fool reload
17088 into allowing us to accept one there. We need to fix them up
17089 now so that we output the right code. */
17090 if (op_alt
[opno
].memory_ok
)
17092 rtx op
= recog_data
.operand
[opno
];
17094 if (CONSTANT_P (op
))
17097 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17098 recog_data
.operand_mode
[opno
], op
);
17100 else if (MEM_P (op
)
17101 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17102 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17106 rtx cop
= avoid_constant_pool_reference (op
);
17108 /* Casting the address of something to a mode narrower
17109 than a word can cause avoid_constant_pool_reference()
17110 to return the pool reference itself. That's no good to
17111 us here. Lets just hope that we can use the
17112 constant pool value directly. */
17114 cop
= get_pool_constant (XEXP (op
, 0));
17116 push_minipool_fix (insn
, address
,
17117 recog_data
.operand_loc
[opno
],
17118 recog_data
.operand_mode
[opno
], cop
);
17128 /* Rewrite move insn into subtract of 0 if the condition codes will
17129 be useful in next conditional jump insn. */
17132 thumb1_reorg (void)
17136 FOR_EACH_BB_FN (bb
, cfun
)
17139 rtx pat
, op0
, set
= NULL
;
17140 rtx_insn
*prev
, *insn
= BB_END (bb
);
17141 bool insn_clobbered
= false;
17143 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17144 insn
= PREV_INSN (insn
);
17146 /* Find the last cbranchsi4_insn in basic block BB. */
17147 if (insn
== BB_HEAD (bb
)
17148 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17151 /* Get the register with which we are comparing. */
17152 pat
= PATTERN (insn
);
17153 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17155 /* Find the first flag setting insn before INSN in basic block BB. */
17156 gcc_assert (insn
!= BB_HEAD (bb
));
17157 for (prev
= PREV_INSN (insn
);
17159 && prev
!= BB_HEAD (bb
)
17161 || DEBUG_INSN_P (prev
)
17162 || ((set
= single_set (prev
)) != NULL
17163 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17164 prev
= PREV_INSN (prev
))
17166 if (reg_set_p (op0
, prev
))
17167 insn_clobbered
= true;
17170 /* Skip if op0 is clobbered by insn other than prev. */
17171 if (insn_clobbered
)
17177 dest
= SET_DEST (set
);
17178 src
= SET_SRC (set
);
17179 if (!low_register_operand (dest
, SImode
)
17180 || !low_register_operand (src
, SImode
))
17183 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17184 in INSN. Both src and dest of the move insn are checked. */
17185 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17187 dest
= copy_rtx (dest
);
17188 src
= copy_rtx (src
);
17189 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17190 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17191 INSN_CODE (prev
) = -1;
17192 /* Set test register in INSN to dest. */
17193 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17194 INSN_CODE (insn
) = -1;
17199 /* Convert instructions to their cc-clobbering variant if possible, since
17200 that allows us to use smaller encodings. */
17203 thumb2_reorg (void)
17208 INIT_REG_SET (&live
);
17210 /* We are freeing block_for_insn in the toplev to keep compatibility
17211 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17212 compute_bb_for_insn ();
17215 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17217 FOR_EACH_BB_FN (bb
, cfun
)
17219 if ((current_tune
->disparage_flag_setting_t16_encodings
17220 == tune_params::DISPARAGE_FLAGS_ALL
)
17221 && optimize_bb_for_speed_p (bb
))
17225 Convert_Action action
= SKIP
;
17226 Convert_Action action_for_partial_flag_setting
17227 = ((current_tune
->disparage_flag_setting_t16_encodings
17228 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17229 && optimize_bb_for_speed_p (bb
))
17232 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17233 df_simulate_initialize_backwards (bb
, &live
);
17234 FOR_BB_INSNS_REVERSE (bb
, insn
)
17236 if (NONJUMP_INSN_P (insn
)
17237 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17238 && GET_CODE (PATTERN (insn
)) == SET
)
17241 rtx pat
= PATTERN (insn
);
17242 rtx dst
= XEXP (pat
, 0);
17243 rtx src
= XEXP (pat
, 1);
17244 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17246 if (UNARY_P (src
) || BINARY_P (src
))
17247 op0
= XEXP (src
, 0);
17249 if (BINARY_P (src
))
17250 op1
= XEXP (src
, 1);
17252 if (low_register_operand (dst
, SImode
))
17254 switch (GET_CODE (src
))
17257 /* Adding two registers and storing the result
17258 in the first source is already a 16-bit
17260 if (rtx_equal_p (dst
, op0
)
17261 && register_operand (op1
, SImode
))
17264 if (low_register_operand (op0
, SImode
))
17266 /* ADDS <Rd>,<Rn>,<Rm> */
17267 if (low_register_operand (op1
, SImode
))
17269 /* ADDS <Rdn>,#<imm8> */
17270 /* SUBS <Rdn>,#<imm8> */
17271 else if (rtx_equal_p (dst
, op0
)
17272 && CONST_INT_P (op1
)
17273 && IN_RANGE (INTVAL (op1
), -255, 255))
17275 /* ADDS <Rd>,<Rn>,#<imm3> */
17276 /* SUBS <Rd>,<Rn>,#<imm3> */
17277 else if (CONST_INT_P (op1
)
17278 && IN_RANGE (INTVAL (op1
), -7, 7))
17281 /* ADCS <Rd>, <Rn> */
17282 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17283 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17284 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17286 && COMPARISON_P (op1
)
17287 && cc_register (XEXP (op1
, 0), VOIDmode
)
17288 && maybe_get_arm_condition_code (op1
) == ARM_CS
17289 && XEXP (op1
, 1) == const0_rtx
)
17294 /* RSBS <Rd>,<Rn>,#0
17295 Not handled here: see NEG below. */
17296 /* SUBS <Rd>,<Rn>,#<imm3>
17298 Not handled here: see PLUS above. */
17299 /* SUBS <Rd>,<Rn>,<Rm> */
17300 if (low_register_operand (op0
, SImode
)
17301 && low_register_operand (op1
, SImode
))
17306 /* MULS <Rdm>,<Rn>,<Rdm>
17307 As an exception to the rule, this is only used
17308 when optimizing for size since MULS is slow on all
17309 known implementations. We do not even want to use
17310 MULS in cold code, if optimizing for speed, so we
17311 test the global flag here. */
17312 if (!optimize_size
)
17314 /* else fall through. */
17318 /* ANDS <Rdn>,<Rm> */
17319 if (rtx_equal_p (dst
, op0
)
17320 && low_register_operand (op1
, SImode
))
17321 action
= action_for_partial_flag_setting
;
17322 else if (rtx_equal_p (dst
, op1
)
17323 && low_register_operand (op0
, SImode
))
17324 action
= action_for_partial_flag_setting
== SKIP
17325 ? SKIP
: SWAP_CONV
;
17331 /* ASRS <Rdn>,<Rm> */
17332 /* LSRS <Rdn>,<Rm> */
17333 /* LSLS <Rdn>,<Rm> */
17334 if (rtx_equal_p (dst
, op0
)
17335 && low_register_operand (op1
, SImode
))
17336 action
= action_for_partial_flag_setting
;
17337 /* ASRS <Rd>,<Rm>,#<imm5> */
17338 /* LSRS <Rd>,<Rm>,#<imm5> */
17339 /* LSLS <Rd>,<Rm>,#<imm5> */
17340 else if (low_register_operand (op0
, SImode
)
17341 && CONST_INT_P (op1
)
17342 && IN_RANGE (INTVAL (op1
), 0, 31))
17343 action
= action_for_partial_flag_setting
;
17347 /* RORS <Rdn>,<Rm> */
17348 if (rtx_equal_p (dst
, op0
)
17349 && low_register_operand (op1
, SImode
))
17350 action
= action_for_partial_flag_setting
;
17354 /* MVNS <Rd>,<Rm> */
17355 if (low_register_operand (op0
, SImode
))
17356 action
= action_for_partial_flag_setting
;
17360 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17361 if (low_register_operand (op0
, SImode
))
17366 /* MOVS <Rd>,#<imm8> */
17367 if (CONST_INT_P (src
)
17368 && IN_RANGE (INTVAL (src
), 0, 255))
17369 action
= action_for_partial_flag_setting
;
17373 /* MOVS and MOV<c> with registers have different
17374 encodings, so are not relevant here. */
17382 if (action
!= SKIP
)
17384 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17385 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17388 if (action
== SWAP_CONV
)
17390 src
= copy_rtx (src
);
17391 XEXP (src
, 0) = op1
;
17392 XEXP (src
, 1) = op0
;
17393 pat
= gen_rtx_SET (dst
, src
);
17394 vec
= gen_rtvec (2, pat
, clobber
);
17396 else /* action == CONV */
17397 vec
= gen_rtvec (2, pat
, clobber
);
17399 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17400 INSN_CODE (insn
) = -1;
17404 if (NONDEBUG_INSN_P (insn
))
17405 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17409 CLEAR_REG_SET (&live
);
17412 /* Gcc puts the pool in the wrong place for ARM, since we can only
17413 load addresses a limited distance around the pc. We do some
17414 special munging to move the constant pool values to the correct
17415 point in the code. */
17420 HOST_WIDE_INT address
= 0;
17425 else if (TARGET_THUMB2
)
17428 /* Ensure all insns that must be split have been split at this point.
17429 Otherwise, the pool placement code below may compute incorrect
17430 insn lengths. Note that when optimizing, all insns have already
17431 been split at this point. */
17433 split_all_insns_noflow ();
17435 minipool_fix_head
= minipool_fix_tail
= NULL
;
17437 /* The first insn must always be a note, or the code below won't
17438 scan it properly. */
17439 insn
= get_insns ();
17440 gcc_assert (NOTE_P (insn
));
17443 /* Scan all the insns and record the operands that will need fixing. */
17444 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17446 if (BARRIER_P (insn
))
17447 push_minipool_barrier (insn
, address
);
17448 else if (INSN_P (insn
))
17450 rtx_jump_table_data
*table
;
17452 note_invalid_constants (insn
, address
, true);
17453 address
+= get_attr_length (insn
);
17455 /* If the insn is a vector jump, add the size of the table
17456 and skip the table. */
17457 if (tablejump_p (insn
, NULL
, &table
))
17459 address
+= get_jump_table_size (table
);
17463 else if (LABEL_P (insn
))
17464 /* Add the worst-case padding due to alignment. We don't add
17465 the _current_ padding because the minipool insertions
17466 themselves might change it. */
17467 address
+= get_label_padding (insn
);
17470 fix
= minipool_fix_head
;
17472 /* Now scan the fixups and perform the required changes. */
17477 Mfix
* last_added_fix
;
17478 Mfix
* last_barrier
= NULL
;
17481 /* Skip any further barriers before the next fix. */
17482 while (fix
&& BARRIER_P (fix
->insn
))
17485 /* No more fixes. */
17489 last_added_fix
= NULL
;
17491 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17493 if (BARRIER_P (ftmp
->insn
))
17495 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17498 last_barrier
= ftmp
;
17500 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17503 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17506 /* If we found a barrier, drop back to that; any fixes that we
17507 could have reached but come after the barrier will now go in
17508 the next mini-pool. */
17509 if (last_barrier
!= NULL
)
17511 /* Reduce the refcount for those fixes that won't go into this
17513 for (fdel
= last_barrier
->next
;
17514 fdel
&& fdel
!= ftmp
;
17517 fdel
->minipool
->refcount
--;
17518 fdel
->minipool
= NULL
;
17521 ftmp
= last_barrier
;
17525 /* ftmp is first fix that we can't fit into this pool and
17526 there no natural barriers that we could use. Insert a
17527 new barrier in the code somewhere between the previous
17528 fix and this one, and arrange to jump around it. */
17529 HOST_WIDE_INT max_address
;
17531 /* The last item on the list of fixes must be a barrier, so
17532 we can never run off the end of the list of fixes without
17533 last_barrier being set. */
17536 max_address
= minipool_vector_head
->max_address
;
17537 /* Check that there isn't another fix that is in range that
17538 we couldn't fit into this pool because the pool was
17539 already too large: we need to put the pool before such an
17540 instruction. The pool itself may come just after the
17541 fix because create_fix_barrier also allows space for a
17542 jump instruction. */
17543 if (ftmp
->address
< max_address
)
17544 max_address
= ftmp
->address
+ 1;
17546 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17549 assign_minipool_offsets (last_barrier
);
17553 if (!BARRIER_P (ftmp
->insn
)
17554 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17561 /* Scan over the fixes we have identified for this pool, fixing them
17562 up and adding the constants to the pool itself. */
17563 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17564 this_fix
= this_fix
->next
)
17565 if (!BARRIER_P (this_fix
->insn
))
17568 = plus_constant (Pmode
,
17569 gen_rtx_LABEL_REF (VOIDmode
,
17570 minipool_vector_label
),
17571 this_fix
->minipool
->offset
);
17572 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17575 dump_minipool (last_barrier
->insn
);
17579 /* From now on we must synthesize any constants that we can't handle
17580 directly. This can happen if the RTL gets split during final
17581 instruction generation. */
17582 cfun
->machine
->after_arm_reorg
= 1;
17584 /* Free the minipool memory. */
17585 obstack_free (&minipool_obstack
, minipool_startobj
);
17588 /* Routines to output assembly language. */
17590 /* Return string representation of passed in real value. */
17591 static const char *
17592 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17594 if (!fp_consts_inited
)
17597 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17601 /* OPERANDS[0] is the entire list of insns that constitute pop,
17602 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17603 is in the list, UPDATE is true iff the list contains explicit
17604 update of base register. */
17606 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17612 const char *conditional
;
17613 int num_saves
= XVECLEN (operands
[0], 0);
17614 unsigned int regno
;
17615 unsigned int regno_base
= REGNO (operands
[1]);
17618 offset
+= update
? 1 : 0;
17619 offset
+= return_pc
? 1 : 0;
17621 /* Is the base register in the list? */
17622 for (i
= offset
; i
< num_saves
; i
++)
17624 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17625 /* If SP is in the list, then the base register must be SP. */
17626 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17627 /* If base register is in the list, there must be no explicit update. */
17628 if (regno
== regno_base
)
17629 gcc_assert (!update
);
17632 conditional
= reverse
? "%?%D0" : "%?%d0";
17633 if ((regno_base
== SP_REGNUM
) && TARGET_THUMB
)
17635 /* Output pop (not stmfd) because it has a shorter encoding. */
17636 gcc_assert (update
);
17637 sprintf (pattern
, "pop%s\t{", conditional
);
17641 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17642 It's just a convention, their semantics are identical. */
17643 if (regno_base
== SP_REGNUM
)
17644 sprintf (pattern
, "ldm%sfd\t", conditional
);
17645 else if (TARGET_UNIFIED_ASM
)
17646 sprintf (pattern
, "ldmia%s\t", conditional
);
17648 sprintf (pattern
, "ldm%sia\t", conditional
);
17650 strcat (pattern
, reg_names
[regno_base
]);
17652 strcat (pattern
, "!, {");
17654 strcat (pattern
, ", {");
17657 /* Output the first destination register. */
17659 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17661 /* Output the rest of the destination registers. */
17662 for (i
= offset
+ 1; i
< num_saves
; i
++)
17664 strcat (pattern
, ", ");
17666 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17669 strcat (pattern
, "}");
17671 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17672 strcat (pattern
, "^");
17674 output_asm_insn (pattern
, &cond
);
17678 /* Output the assembly for a store multiple. */
17681 vfp_output_vstmd (rtx
* operands
)
17687 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17688 ? XEXP (operands
[0], 0)
17689 : XEXP (XEXP (operands
[0], 0), 0);
17690 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17693 strcpy (pattern
, "vpush%?.64\t{%P1");
17695 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17697 p
= strlen (pattern
);
17699 gcc_assert (REG_P (operands
[1]));
17701 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17702 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17704 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17706 strcpy (&pattern
[p
], "}");
17708 output_asm_insn (pattern
, operands
);
17713 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17714 number of bytes pushed. */
17717 vfp_emit_fstmd (int base_reg
, int count
)
17724 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17725 register pairs are stored by a store multiple insn. We avoid this
17726 by pushing an extra pair. */
17727 if (count
== 2 && !arm_arch6
)
17729 if (base_reg
== LAST_VFP_REGNUM
- 3)
17734 /* FSTMD may not store more than 16 doubleword registers at once. Split
17735 larger stores into multiple parts (up to a maximum of two, in
17740 /* NOTE: base_reg is an internal register number, so each D register
17742 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17743 saved
+= vfp_emit_fstmd (base_reg
, 16);
17747 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17748 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17750 reg
= gen_rtx_REG (DFmode
, base_reg
);
17753 XVECEXP (par
, 0, 0)
17754 = gen_rtx_SET (gen_frame_mem
17756 gen_rtx_PRE_MODIFY (Pmode
,
17759 (Pmode
, stack_pointer_rtx
,
17762 gen_rtx_UNSPEC (BLKmode
,
17763 gen_rtvec (1, reg
),
17764 UNSPEC_PUSH_MULT
));
17766 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17767 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17768 RTX_FRAME_RELATED_P (tmp
) = 1;
17769 XVECEXP (dwarf
, 0, 0) = tmp
;
17771 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17772 RTX_FRAME_RELATED_P (tmp
) = 1;
17773 XVECEXP (dwarf
, 0, 1) = tmp
;
17775 for (i
= 1; i
< count
; i
++)
17777 reg
= gen_rtx_REG (DFmode
, base_reg
);
17779 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17781 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17782 plus_constant (Pmode
,
17786 RTX_FRAME_RELATED_P (tmp
) = 1;
17787 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17790 par
= emit_insn (par
);
17791 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17792 RTX_FRAME_RELATED_P (par
) = 1;
17797 /* Emit a call instruction with pattern PAT. ADDR is the address of
17798 the call target. */
17801 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17805 insn
= emit_call_insn (pat
);
17807 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17808 If the call might use such an entry, add a use of the PIC register
17809 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17810 if (TARGET_VXWORKS_RTP
17813 && GET_CODE (addr
) == SYMBOL_REF
17814 && (SYMBOL_REF_DECL (addr
)
17815 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17816 : !SYMBOL_REF_LOCAL_P (addr
)))
17818 require_pic_register ();
17819 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17822 if (TARGET_AAPCS_BASED
)
17824 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17825 linker. We need to add an IP clobber to allow setting
17826 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17827 is not needed since it's a fixed register. */
17828 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17829 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17833 /* Output a 'call' insn. */
17835 output_call (rtx
*operands
)
17837 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17839 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17840 if (REGNO (operands
[0]) == LR_REGNUM
)
17842 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17843 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17846 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17848 if (TARGET_INTERWORK
|| arm_arch4t
)
17849 output_asm_insn ("bx%?\t%0", operands
);
17851 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17856 /* Output a 'call' insn that is a reference in memory. This is
17857 disabled for ARMv5 and we prefer a blx instead because otherwise
17858 there's a significant performance overhead. */
17860 output_call_mem (rtx
*operands
)
17862 gcc_assert (!arm_arch5
);
17863 if (TARGET_INTERWORK
)
17865 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17866 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17867 output_asm_insn ("bx%?\t%|ip", operands
);
17869 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17871 /* LR is used in the memory address. We load the address in the
17872 first instruction. It's safe to use IP as the target of the
17873 load since the call will kill it anyway. */
17874 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17877 output_asm_insn ("bx%?\t%|ip", operands
);
17879 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17883 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17884 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17891 /* Output a move from arm registers to arm registers of a long double
17892 OPERANDS[0] is the destination.
17893 OPERANDS[1] is the source. */
17895 output_mov_long_double_arm_from_arm (rtx
*operands
)
17897 /* We have to be careful here because the two might overlap. */
17898 int dest_start
= REGNO (operands
[0]);
17899 int src_start
= REGNO (operands
[1]);
17903 if (dest_start
< src_start
)
17905 for (i
= 0; i
< 3; i
++)
17907 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17908 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17909 output_asm_insn ("mov%?\t%0, %1", ops
);
17914 for (i
= 2; i
>= 0; i
--)
17916 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17917 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17918 output_asm_insn ("mov%?\t%0, %1", ops
);
17926 arm_emit_movpair (rtx dest
, rtx src
)
17930 /* If the src is an immediate, simplify it. */
17931 if (CONST_INT_P (src
))
17933 HOST_WIDE_INT val
= INTVAL (src
);
17934 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17935 if ((val
>> 16) & 0x0000ffff)
17937 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17939 GEN_INT ((val
>> 16) & 0x0000ffff));
17940 insn
= get_last_insn ();
17941 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17945 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17946 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17947 insn
= get_last_insn ();
17948 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17951 /* Output a move between double words. It must be REG<-MEM
17954 output_move_double (rtx
*operands
, bool emit
, int *count
)
17956 enum rtx_code code0
= GET_CODE (operands
[0]);
17957 enum rtx_code code1
= GET_CODE (operands
[1]);
17962 /* The only case when this might happen is when
17963 you are looking at the length of a DImode instruction
17964 that has an invalid constant in it. */
17965 if (code0
== REG
&& code1
!= MEM
)
17967 gcc_assert (!emit
);
17974 unsigned int reg0
= REGNO (operands
[0]);
17976 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17978 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17980 switch (GET_CODE (XEXP (operands
[1], 0)))
17987 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17988 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17990 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17995 gcc_assert (TARGET_LDRD
);
17997 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
18004 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
18006 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
18014 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
18016 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
18021 gcc_assert (TARGET_LDRD
);
18023 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
18028 /* Autoicrement addressing modes should never have overlapping
18029 base and destination registers, and overlapping index registers
18030 are already prohibited, so this doesn't need to worry about
18032 otherops
[0] = operands
[0];
18033 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18034 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18036 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18038 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18040 /* Registers overlap so split out the increment. */
18043 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18044 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18051 /* Use a single insn if we can.
18052 FIXME: IWMMXT allows offsets larger than ldrd can
18053 handle, fix these up with a pair of ldr. */
18055 || !CONST_INT_P (otherops
[2])
18056 || (INTVAL (otherops
[2]) > -256
18057 && INTVAL (otherops
[2]) < 256))
18060 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18066 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18067 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18077 /* Use a single insn if we can.
18078 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18079 fix these up with a pair of ldr. */
18081 || !CONST_INT_P (otherops
[2])
18082 || (INTVAL (otherops
[2]) > -256
18083 && INTVAL (otherops
[2]) < 256))
18086 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18092 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18093 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18103 /* We might be able to use ldrd %0, %1 here. However the range is
18104 different to ldr/adr, and it is broken on some ARMv7-M
18105 implementations. */
18106 /* Use the second register of the pair to avoid problematic
18108 otherops
[1] = operands
[1];
18110 output_asm_insn ("adr%?\t%0, %1", otherops
);
18111 operands
[1] = otherops
[0];
18115 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18117 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18124 /* ??? This needs checking for thumb2. */
18126 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18127 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18129 otherops
[0] = operands
[0];
18130 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18131 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18133 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18135 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18137 switch ((int) INTVAL (otherops
[2]))
18141 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18147 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18153 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18157 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18158 operands
[1] = otherops
[0];
18160 && (REG_P (otherops
[2])
18162 || (CONST_INT_P (otherops
[2])
18163 && INTVAL (otherops
[2]) > -256
18164 && INTVAL (otherops
[2]) < 256)))
18166 if (reg_overlap_mentioned_p (operands
[0],
18169 /* Swap base and index registers over to
18170 avoid a conflict. */
18171 std::swap (otherops
[1], otherops
[2]);
18173 /* If both registers conflict, it will usually
18174 have been fixed by a splitter. */
18175 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18176 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18180 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18181 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18188 otherops
[0] = operands
[0];
18190 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18195 if (CONST_INT_P (otherops
[2]))
18199 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18200 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18202 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18214 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18221 return "ldr%(d%)\t%0, [%1]";
18223 return "ldm%(ia%)\t%1, %M0";
18227 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18228 /* Take care of overlapping base/data reg. */
18229 if (reg_mentioned_p (operands
[0], operands
[1]))
18233 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18234 output_asm_insn ("ldr%?\t%0, %1", operands
);
18244 output_asm_insn ("ldr%?\t%0, %1", operands
);
18245 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18255 /* Constraints should ensure this. */
18256 gcc_assert (code0
== MEM
&& code1
== REG
);
18257 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18258 || (TARGET_ARM
&& TARGET_LDRD
));
18260 switch (GET_CODE (XEXP (operands
[0], 0)))
18266 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18268 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18273 gcc_assert (TARGET_LDRD
);
18275 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18282 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18284 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18292 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18294 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18299 gcc_assert (TARGET_LDRD
);
18301 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18306 otherops
[0] = operands
[1];
18307 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18308 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18310 /* IWMMXT allows offsets larger than ldrd can handle,
18311 fix these up with a pair of ldr. */
18313 && CONST_INT_P (otherops
[2])
18314 && (INTVAL(otherops
[2]) <= -256
18315 || INTVAL(otherops
[2]) >= 256))
18317 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18321 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18322 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18331 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18332 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18338 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18341 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18346 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18351 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18352 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18354 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18358 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18365 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18372 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18377 && (REG_P (otherops
[2])
18379 || (CONST_INT_P (otherops
[2])
18380 && INTVAL (otherops
[2]) > -256
18381 && INTVAL (otherops
[2]) < 256)))
18383 otherops
[0] = operands
[1];
18384 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18386 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18392 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18393 otherops
[1] = operands
[1];
18396 output_asm_insn ("str%?\t%1, %0", operands
);
18397 output_asm_insn ("str%?\t%H1, %0", otherops
);
18407 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18408 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18411 output_move_quad (rtx
*operands
)
18413 if (REG_P (operands
[0]))
18415 /* Load, or reg->reg move. */
18417 if (MEM_P (operands
[1]))
18419 switch (GET_CODE (XEXP (operands
[1], 0)))
18422 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18427 output_asm_insn ("adr%?\t%0, %1", operands
);
18428 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18432 gcc_unreachable ();
18440 gcc_assert (REG_P (operands
[1]));
18442 dest
= REGNO (operands
[0]);
18443 src
= REGNO (operands
[1]);
18445 /* This seems pretty dumb, but hopefully GCC won't try to do it
18448 for (i
= 0; i
< 4; i
++)
18450 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18451 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18452 output_asm_insn ("mov%?\t%0, %1", ops
);
18455 for (i
= 3; i
>= 0; i
--)
18457 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18458 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18459 output_asm_insn ("mov%?\t%0, %1", ops
);
18465 gcc_assert (MEM_P (operands
[0]));
18466 gcc_assert (REG_P (operands
[1]));
18467 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18469 switch (GET_CODE (XEXP (operands
[0], 0)))
18472 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18476 gcc_unreachable ();
18483 /* Output a VFP load or store instruction. */
18486 output_move_vfp (rtx
*operands
)
18488 rtx reg
, mem
, addr
, ops
[2];
18489 int load
= REG_P (operands
[0]);
18490 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18491 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18496 reg
= operands
[!load
];
18497 mem
= operands
[load
];
18499 mode
= GET_MODE (reg
);
18501 gcc_assert (REG_P (reg
));
18502 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18503 gcc_assert (mode
== SFmode
18507 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18508 gcc_assert (MEM_P (mem
));
18510 addr
= XEXP (mem
, 0);
18512 switch (GET_CODE (addr
))
18515 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18516 ops
[0] = XEXP (addr
, 0);
18521 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18522 ops
[0] = XEXP (addr
, 0);
18527 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18533 sprintf (buff
, templ
,
18534 load
? "ld" : "st",
18537 integer_p
? "\t%@ int" : "");
18538 output_asm_insn (buff
, ops
);
18543 /* Output a Neon double-word or quad-word load or store, or a load
18544 or store for larger structure modes.
18546 WARNING: The ordering of elements is weird in big-endian mode,
18547 because the EABI requires that vectors stored in memory appear
18548 as though they were stored by a VSTM, as required by the EABI.
18549 GCC RTL defines element ordering based on in-memory order.
18550 This can be different from the architectural ordering of elements
18551 within a NEON register. The intrinsics defined in arm_neon.h use the
18552 NEON register element ordering, not the GCC RTL element ordering.
18554 For example, the in-memory ordering of a big-endian a quadword
18555 vector with 16-bit elements when stored from register pair {d0,d1}
18556 will be (lowest address first, d0[N] is NEON register element N):
18558 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18560 When necessary, quadword registers (dN, dN+1) are moved to ARM
18561 registers from rN in the order:
18563 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18565 So that STM/LDM can be used on vectors in ARM registers, and the
18566 same memory layout will result as if VSTM/VLDM were used.
18568 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18569 possible, which allows use of appropriate alignment tags.
18570 Note that the choice of "64" is independent of the actual vector
18571 element size; this size simply ensures that the behavior is
18572 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18574 Due to limitations of those instructions, use of VST1.64/VLD1.64
18575 is not possible if:
18576 - the address contains PRE_DEC, or
18577 - the mode refers to more than 4 double-word registers
18579 In those cases, it would be possible to replace VSTM/VLDM by a
18580 sequence of instructions; this is not currently implemented since
18581 this is not certain to actually improve performance. */
18584 output_move_neon (rtx
*operands
)
18586 rtx reg
, mem
, addr
, ops
[2];
18587 int regno
, nregs
, load
= REG_P (operands
[0]);
18592 reg
= operands
[!load
];
18593 mem
= operands
[load
];
18595 mode
= GET_MODE (reg
);
18597 gcc_assert (REG_P (reg
));
18598 regno
= REGNO (reg
);
18599 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18600 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18601 || NEON_REGNO_OK_FOR_QUAD (regno
));
18602 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18603 || VALID_NEON_QREG_MODE (mode
)
18604 || VALID_NEON_STRUCT_MODE (mode
));
18605 gcc_assert (MEM_P (mem
));
18607 addr
= XEXP (mem
, 0);
18609 /* Strip off const from addresses like (const (plus (...))). */
18610 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18611 addr
= XEXP (addr
, 0);
18613 switch (GET_CODE (addr
))
18616 /* We have to use vldm / vstm for too-large modes. */
18619 templ
= "v%smia%%?\t%%0!, %%h1";
18620 ops
[0] = XEXP (addr
, 0);
18624 templ
= "v%s1.64\t%%h1, %%A0";
18631 /* We have to use vldm / vstm in this case, since there is no
18632 pre-decrement form of the vld1 / vst1 instructions. */
18633 templ
= "v%smdb%%?\t%%0!, %%h1";
18634 ops
[0] = XEXP (addr
, 0);
18639 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18640 gcc_unreachable ();
18643 /* We have to use vldm / vstm for too-large modes. */
18647 templ
= "v%smia%%?\t%%m0, %%h1";
18649 templ
= "v%s1.64\t%%h1, %%A0";
18655 /* Fall through. */
18661 for (i
= 0; i
< nregs
; i
++)
18663 /* We're only using DImode here because it's a convenient size. */
18664 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18665 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18666 if (reg_overlap_mentioned_p (ops
[0], mem
))
18668 gcc_assert (overlap
== -1);
18673 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18674 output_asm_insn (buff
, ops
);
18679 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18680 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18681 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18682 output_asm_insn (buff
, ops
);
18689 gcc_unreachable ();
18692 sprintf (buff
, templ
, load
? "ld" : "st");
18693 output_asm_insn (buff
, ops
);
18698 /* Compute and return the length of neon_mov<mode>, where <mode> is
18699 one of VSTRUCT modes: EI, OI, CI or XI. */
18701 arm_attr_length_move_neon (rtx_insn
*insn
)
18703 rtx reg
, mem
, addr
;
18707 extract_insn_cached (insn
);
18709 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18711 mode
= GET_MODE (recog_data
.operand
[0]);
18722 gcc_unreachable ();
18726 load
= REG_P (recog_data
.operand
[0]);
18727 reg
= recog_data
.operand
[!load
];
18728 mem
= recog_data
.operand
[load
];
18730 gcc_assert (MEM_P (mem
));
18732 mode
= GET_MODE (reg
);
18733 addr
= XEXP (mem
, 0);
18735 /* Strip off const from addresses like (const (plus (...))). */
18736 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18737 addr
= XEXP (addr
, 0);
18739 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18741 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18748 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18752 arm_address_offset_is_imm (rtx_insn
*insn
)
18756 extract_insn_cached (insn
);
18758 if (REG_P (recog_data
.operand
[0]))
18761 mem
= recog_data
.operand
[0];
18763 gcc_assert (MEM_P (mem
));
18765 addr
= XEXP (mem
, 0);
18768 || (GET_CODE (addr
) == PLUS
18769 && REG_P (XEXP (addr
, 0))
18770 && CONST_INT_P (XEXP (addr
, 1))))
18776 /* Output an ADD r, s, #n where n may be too big for one instruction.
18777 If adding zero to one register, output nothing. */
18779 output_add_immediate (rtx
*operands
)
18781 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18783 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18786 output_multi_immediate (operands
,
18787 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18790 output_multi_immediate (operands
,
18791 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18798 /* Output a multiple immediate operation.
18799 OPERANDS is the vector of operands referred to in the output patterns.
18800 INSTR1 is the output pattern to use for the first constant.
18801 INSTR2 is the output pattern to use for subsequent constants.
18802 IMMED_OP is the index of the constant slot in OPERANDS.
18803 N is the constant value. */
18804 static const char *
18805 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18806 int immed_op
, HOST_WIDE_INT n
)
18808 #if HOST_BITS_PER_WIDE_INT > 32
18814 /* Quick and easy output. */
18815 operands
[immed_op
] = const0_rtx
;
18816 output_asm_insn (instr1
, operands
);
18821 const char * instr
= instr1
;
18823 /* Note that n is never zero here (which would give no output). */
18824 for (i
= 0; i
< 32; i
+= 2)
18828 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18829 output_asm_insn (instr
, operands
);
18839 /* Return the name of a shifter operation. */
18840 static const char *
18841 arm_shift_nmem(enum rtx_code code
)
18846 return ARM_LSL_NAME
;
18862 /* Return the appropriate ARM instruction for the operation code.
18863 The returned result should not be overwritten. OP is the rtx of the
18864 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18867 arithmetic_instr (rtx op
, int shift_first_arg
)
18869 switch (GET_CODE (op
))
18875 return shift_first_arg
? "rsb" : "sub";
18890 return arm_shift_nmem(GET_CODE(op
));
18893 gcc_unreachable ();
18897 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18898 for the operation code. The returned result should not be overwritten.
18899 OP is the rtx code of the shift.
18900 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18902 static const char *
18903 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18906 enum rtx_code code
= GET_CODE (op
);
18911 if (!CONST_INT_P (XEXP (op
, 1)))
18913 output_operand_lossage ("invalid shift operand");
18918 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18926 mnem
= arm_shift_nmem(code
);
18927 if (CONST_INT_P (XEXP (op
, 1)))
18929 *amountp
= INTVAL (XEXP (op
, 1));
18931 else if (REG_P (XEXP (op
, 1)))
18938 output_operand_lossage ("invalid shift operand");
18944 /* We never have to worry about the amount being other than a
18945 power of 2, since this case can never be reloaded from a reg. */
18946 if (!CONST_INT_P (XEXP (op
, 1)))
18948 output_operand_lossage ("invalid shift operand");
18952 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18954 /* Amount must be a power of two. */
18955 if (*amountp
& (*amountp
- 1))
18957 output_operand_lossage ("invalid shift operand");
18961 *amountp
= int_log2 (*amountp
);
18962 return ARM_LSL_NAME
;
18965 output_operand_lossage ("invalid shift operand");
18969 /* This is not 100% correct, but follows from the desire to merge
18970 multiplication by a power of 2 with the recognizer for a
18971 shift. >=32 is not a valid shift for "lsl", so we must try and
18972 output a shift that produces the correct arithmetical result.
18973 Using lsr #32 is identical except for the fact that the carry bit
18974 is not set correctly if we set the flags; but we never use the
18975 carry bit from such an operation, so we can ignore that. */
18976 if (code
== ROTATERT
)
18977 /* Rotate is just modulo 32. */
18979 else if (*amountp
!= (*amountp
& 31))
18981 if (code
== ASHIFT
)
18986 /* Shifts of 0 are no-ops. */
18993 /* Obtain the shift from the POWER of two. */
18995 static HOST_WIDE_INT
18996 int_log2 (HOST_WIDE_INT power
)
18998 HOST_WIDE_INT shift
= 0;
19000 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
19002 gcc_assert (shift
<= 31);
19009 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19010 because /bin/as is horribly restrictive. The judgement about
19011 whether or not each character is 'printable' (and can be output as
19012 is) or not (and must be printed with an octal escape) must be made
19013 with reference to the *host* character set -- the situation is
19014 similar to that discussed in the comments above pp_c_char in
19015 c-pretty-print.c. */
19017 #define MAX_ASCII_LEN 51
19020 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19023 int len_so_far
= 0;
19025 fputs ("\t.ascii\t\"", stream
);
19027 for (i
= 0; i
< len
; i
++)
19031 if (len_so_far
>= MAX_ASCII_LEN
)
19033 fputs ("\"\n\t.ascii\t\"", stream
);
19039 if (c
== '\\' || c
== '\"')
19041 putc ('\\', stream
);
19049 fprintf (stream
, "\\%03o", c
);
19054 fputs ("\"\n", stream
);
19057 /* Whether a register is callee saved or not. This is necessary because high
19058 registers are marked as caller saved when optimizing for size on Thumb-1
19059 targets despite being callee saved in order to avoid using them. */
19060 #define callee_saved_reg_p(reg) \
19061 (!call_used_regs[reg] \
19062 || (TARGET_THUMB1 && optimize_size \
19063 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19065 /* Compute the register save mask for registers 0 through 12
19066 inclusive. This code is used by arm_compute_save_reg_mask. */
19068 static unsigned long
19069 arm_compute_save_reg0_reg12_mask (void)
19071 unsigned long func_type
= arm_current_func_type ();
19072 unsigned long save_reg_mask
= 0;
19075 if (IS_INTERRUPT (func_type
))
19077 unsigned int max_reg
;
19078 /* Interrupt functions must not corrupt any registers,
19079 even call clobbered ones. If this is a leaf function
19080 we can just examine the registers used by the RTL, but
19081 otherwise we have to assume that whatever function is
19082 called might clobber anything, and so we have to save
19083 all the call-clobbered registers as well. */
19084 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19085 /* FIQ handlers have registers r8 - r12 banked, so
19086 we only need to check r0 - r7, Normal ISRs only
19087 bank r14 and r15, so we must check up to r12.
19088 r13 is the stack pointer which is always preserved,
19089 so we do not need to consider it here. */
19094 for (reg
= 0; reg
<= max_reg
; reg
++)
19095 if (df_regs_ever_live_p (reg
)
19096 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19097 save_reg_mask
|= (1 << reg
);
19099 /* Also save the pic base register if necessary. */
19101 && !TARGET_SINGLE_PIC_BASE
19102 && arm_pic_register
!= INVALID_REGNUM
19103 && crtl
->uses_pic_offset_table
)
19104 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19106 else if (IS_VOLATILE(func_type
))
19108 /* For noreturn functions we historically omitted register saves
19109 altogether. However this really messes up debugging. As a
19110 compromise save just the frame pointers. Combined with the link
19111 register saved elsewhere this should be sufficient to get
19113 if (frame_pointer_needed
)
19114 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19115 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19116 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19117 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19118 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19122 /* In the normal case we only need to save those registers
19123 which are call saved and which are used by this function. */
19124 for (reg
= 0; reg
<= 11; reg
++)
19125 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19126 save_reg_mask
|= (1 << reg
);
19128 /* Handle the frame pointer as a special case. */
19129 if (frame_pointer_needed
)
19130 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19132 /* If we aren't loading the PIC register,
19133 don't stack it even though it may be live. */
19135 && !TARGET_SINGLE_PIC_BASE
19136 && arm_pic_register
!= INVALID_REGNUM
19137 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19138 || crtl
->uses_pic_offset_table
))
19139 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19141 /* The prologue will copy SP into R0, so save it. */
19142 if (IS_STACKALIGN (func_type
))
19143 save_reg_mask
|= 1;
19146 /* Save registers so the exception handler can modify them. */
19147 if (crtl
->calls_eh_return
)
19153 reg
= EH_RETURN_DATA_REGNO (i
);
19154 if (reg
== INVALID_REGNUM
)
19156 save_reg_mask
|= 1 << reg
;
19160 return save_reg_mask
;
19163 /* Return true if r3 is live at the start of the function. */
19166 arm_r3_live_at_start_p (void)
19168 /* Just look at cfg info, which is still close enough to correct at this
19169 point. This gives false positives for broken functions that might use
19170 uninitialized data that happens to be allocated in r3, but who cares? */
19171 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19174 /* Compute the number of bytes used to store the static chain register on the
19175 stack, above the stack frame. We need to know this accurately to get the
19176 alignment of the rest of the stack frame correct. */
19179 arm_compute_static_chain_stack_bytes (void)
19181 /* See the defining assertion in arm_expand_prologue. */
19182 if (IS_NESTED (arm_current_func_type ())
19183 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19184 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19185 && !df_regs_ever_live_p (LR_REGNUM
)))
19186 && arm_r3_live_at_start_p ()
19187 && crtl
->args
.pretend_args_size
== 0)
19193 /* Compute a bit mask of which registers need to be
19194 saved on the stack for the current function.
19195 This is used by arm_get_frame_offsets, which may add extra registers. */
19197 static unsigned long
19198 arm_compute_save_reg_mask (void)
19200 unsigned int save_reg_mask
= 0;
19201 unsigned long func_type
= arm_current_func_type ();
19204 if (IS_NAKED (func_type
))
19205 /* This should never really happen. */
19208 /* If we are creating a stack frame, then we must save the frame pointer,
19209 IP (which will hold the old stack pointer), LR and the PC. */
19210 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19212 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19215 | (1 << PC_REGNUM
);
19217 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19219 /* Decide if we need to save the link register.
19220 Interrupt routines have their own banked link register,
19221 so they never need to save it.
19222 Otherwise if we do not use the link register we do not need to save
19223 it. If we are pushing other registers onto the stack however, we
19224 can save an instruction in the epilogue by pushing the link register
19225 now and then popping it back into the PC. This incurs extra memory
19226 accesses though, so we only do it when optimizing for size, and only
19227 if we know that we will not need a fancy return sequence. */
19228 if (df_regs_ever_live_p (LR_REGNUM
)
19231 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19232 && !crtl
->tail_call_emit
19233 && !crtl
->calls_eh_return
))
19234 save_reg_mask
|= 1 << LR_REGNUM
;
19236 if (cfun
->machine
->lr_save_eliminated
)
19237 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19239 if (TARGET_REALLY_IWMMXT
19240 && ((bit_count (save_reg_mask
)
19241 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19242 arm_compute_static_chain_stack_bytes())
19245 /* The total number of registers that are going to be pushed
19246 onto the stack is odd. We need to ensure that the stack
19247 is 64-bit aligned before we start to save iWMMXt registers,
19248 and also before we start to create locals. (A local variable
19249 might be a double or long long which we will load/store using
19250 an iWMMXt instruction). Therefore we need to push another
19251 ARM register, so that the stack will be 64-bit aligned. We
19252 try to avoid using the arg registers (r0 -r3) as they might be
19253 used to pass values in a tail call. */
19254 for (reg
= 4; reg
<= 12; reg
++)
19255 if ((save_reg_mask
& (1 << reg
)) == 0)
19259 save_reg_mask
|= (1 << reg
);
19262 cfun
->machine
->sibcall_blocked
= 1;
19263 save_reg_mask
|= (1 << 3);
19267 /* We may need to push an additional register for use initializing the
19268 PIC base register. */
19269 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19270 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19272 reg
= thumb_find_work_register (1 << 4);
19273 if (!call_used_regs
[reg
])
19274 save_reg_mask
|= (1 << reg
);
19277 return save_reg_mask
;
19280 /* Compute a bit mask of which registers need to be
19281 saved on the stack for the current function. */
19282 static unsigned long
19283 thumb1_compute_save_reg_mask (void)
19285 unsigned long mask
;
19289 for (reg
= 0; reg
< 12; reg
++)
19290 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19294 && !TARGET_SINGLE_PIC_BASE
19295 && arm_pic_register
!= INVALID_REGNUM
19296 && crtl
->uses_pic_offset_table
)
19297 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19299 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19300 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19301 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19303 /* LR will also be pushed if any lo regs are pushed. */
19304 if (mask
& 0xff || thumb_force_lr_save ())
19305 mask
|= (1 << LR_REGNUM
);
19307 /* Make sure we have a low work register if we need one.
19308 We will need one if we are going to push a high register,
19309 but we are not currently intending to push a low register. */
19310 if ((mask
& 0xff) == 0
19311 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19313 /* Use thumb_find_work_register to choose which register
19314 we will use. If the register is live then we will
19315 have to push it. Use LAST_LO_REGNUM as our fallback
19316 choice for the register to select. */
19317 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19318 /* Make sure the register returned by thumb_find_work_register is
19319 not part of the return value. */
19320 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19321 reg
= LAST_LO_REGNUM
;
19323 if (callee_saved_reg_p (reg
))
19327 /* The 504 below is 8 bytes less than 512 because there are two possible
19328 alignment words. We can't tell here if they will be present or not so we
19329 have to play it safe and assume that they are. */
19330 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19331 ROUND_UP_WORD (get_frame_size ()) +
19332 crtl
->outgoing_args_size
) >= 504)
19334 /* This is the same as the code in thumb1_expand_prologue() which
19335 determines which register to use for stack decrement. */
19336 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19337 if (mask
& (1 << reg
))
19340 if (reg
> LAST_LO_REGNUM
)
19342 /* Make sure we have a register available for stack decrement. */
19343 mask
|= 1 << LAST_LO_REGNUM
;
19351 /* Return the number of bytes required to save VFP registers. */
19353 arm_get_vfp_saved_size (void)
19355 unsigned int regno
;
19360 /* Space for saved VFP registers. */
19361 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19364 for (regno
= FIRST_VFP_REGNUM
;
19365 regno
< LAST_VFP_REGNUM
;
19368 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19369 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19373 /* Workaround ARM10 VFPr1 bug. */
19374 if (count
== 2 && !arm_arch6
)
19376 saved
+= count
* 8;
19385 if (count
== 2 && !arm_arch6
)
19387 saved
+= count
* 8;
19394 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19395 everything bar the final return instruction. If simple_return is true,
19396 then do not output epilogue, because it has already been emitted in RTL. */
19398 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19399 bool simple_return
)
19401 char conditional
[10];
19404 unsigned long live_regs_mask
;
19405 unsigned long func_type
;
19406 arm_stack_offsets
*offsets
;
19408 func_type
= arm_current_func_type ();
19410 if (IS_NAKED (func_type
))
19413 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19415 /* If this function was declared non-returning, and we have
19416 found a tail call, then we have to trust that the called
19417 function won't return. */
19422 /* Otherwise, trap an attempted return by aborting. */
19424 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19426 assemble_external_libcall (ops
[1]);
19427 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19433 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19435 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19437 cfun
->machine
->return_used_this_function
= 1;
19439 offsets
= arm_get_frame_offsets ();
19440 live_regs_mask
= offsets
->saved_regs_mask
;
19442 if (!simple_return
&& live_regs_mask
)
19444 const char * return_reg
;
19446 /* If we do not have any special requirements for function exit
19447 (e.g. interworking) then we can load the return address
19448 directly into the PC. Otherwise we must load it into LR. */
19450 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19451 return_reg
= reg_names
[PC_REGNUM
];
19453 return_reg
= reg_names
[LR_REGNUM
];
19455 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19457 /* There are three possible reasons for the IP register
19458 being saved. 1) a stack frame was created, in which case
19459 IP contains the old stack pointer, or 2) an ISR routine
19460 corrupted it, or 3) it was saved to align the stack on
19461 iWMMXt. In case 1, restore IP into SP, otherwise just
19463 if (frame_pointer_needed
)
19465 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19466 live_regs_mask
|= (1 << SP_REGNUM
);
19469 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19472 /* On some ARM architectures it is faster to use LDR rather than
19473 LDM to load a single register. On other architectures, the
19474 cost is the same. In 26 bit mode, or for exception handlers,
19475 we have to use LDM to load the PC so that the CPSR is also
19477 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19478 if (live_regs_mask
== (1U << reg
))
19481 if (reg
<= LAST_ARM_REGNUM
19482 && (reg
!= LR_REGNUM
19484 || ! IS_INTERRUPT (func_type
)))
19486 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19487 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19494 /* Generate the load multiple instruction to restore the
19495 registers. Note we can get here, even if
19496 frame_pointer_needed is true, but only if sp already
19497 points to the base of the saved core registers. */
19498 if (live_regs_mask
& (1 << SP_REGNUM
))
19500 unsigned HOST_WIDE_INT stack_adjust
;
19502 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19503 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19505 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19506 if (TARGET_UNIFIED_ASM
)
19507 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19509 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19512 /* If we can't use ldmib (SA110 bug),
19513 then try to pop r3 instead. */
19515 live_regs_mask
|= 1 << 3;
19517 if (TARGET_UNIFIED_ASM
)
19518 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19520 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19524 if (TARGET_UNIFIED_ASM
)
19525 sprintf (instr
, "pop%s\t{", conditional
);
19527 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19529 p
= instr
+ strlen (instr
);
19531 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19532 if (live_regs_mask
& (1 << reg
))
19534 int l
= strlen (reg_names
[reg
]);
19540 memcpy (p
, ", ", 2);
19544 memcpy (p
, "%|", 2);
19545 memcpy (p
+ 2, reg_names
[reg
], l
);
19549 if (live_regs_mask
& (1 << LR_REGNUM
))
19551 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19552 /* If returning from an interrupt, restore the CPSR. */
19553 if (IS_INTERRUPT (func_type
))
19560 output_asm_insn (instr
, & operand
);
19562 /* See if we need to generate an extra instruction to
19563 perform the actual function return. */
19565 && func_type
!= ARM_FT_INTERWORKED
19566 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19568 /* The return has already been handled
19569 by loading the LR into the PC. */
19576 switch ((int) ARM_FUNC_TYPE (func_type
))
19580 /* ??? This is wrong for unified assembly syntax. */
19581 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19584 case ARM_FT_INTERWORKED
:
19585 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19588 case ARM_FT_EXCEPTION
:
19589 /* ??? This is wrong for unified assembly syntax. */
19590 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19594 /* Use bx if it's available. */
19595 if (arm_arch5
|| arm_arch4t
)
19596 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19598 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19602 output_asm_insn (instr
, & operand
);
19608 /* Write the function name into the code section, directly preceding
19609 the function prologue.
19611 Code will be output similar to this:
19613 .ascii "arm_poke_function_name", 0
19616 .word 0xff000000 + (t1 - t0)
19617 arm_poke_function_name
19619 stmfd sp!, {fp, ip, lr, pc}
19622 When performing a stack backtrace, code can inspect the value
19623 of 'pc' stored at 'fp' + 0. If the trace function then looks
19624 at location pc - 12 and the top 8 bits are set, then we know
19625 that there is a function name embedded immediately preceding this
19626 location and has length ((pc[-3]) & 0xff000000).
19628 We assume that pc is declared as a pointer to an unsigned long.
19630 It is of no benefit to output the function name if we are assembling
19631 a leaf function. These function types will not contain a stack
19632 backtrace structure, therefore it is not possible to determine the
19635 arm_poke_function_name (FILE *stream
, const char *name
)
19637 unsigned long alignlength
;
19638 unsigned long length
;
19641 length
= strlen (name
) + 1;
19642 alignlength
= ROUND_UP_WORD (length
);
19644 ASM_OUTPUT_ASCII (stream
, name
, length
);
19645 ASM_OUTPUT_ALIGN (stream
, 2);
19646 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19647 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19650 /* Place some comments into the assembler stream
19651 describing the current function. */
19653 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19655 unsigned long func_type
;
19657 /* ??? Do we want to print some of the below anyway? */
19661 /* Sanity check. */
19662 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19664 func_type
= arm_current_func_type ();
19666 switch ((int) ARM_FUNC_TYPE (func_type
))
19669 case ARM_FT_NORMAL
:
19671 case ARM_FT_INTERWORKED
:
19672 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19675 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19678 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19680 case ARM_FT_EXCEPTION
:
19681 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19685 if (IS_NAKED (func_type
))
19686 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19688 if (IS_VOLATILE (func_type
))
19689 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19691 if (IS_NESTED (func_type
))
19692 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19693 if (IS_STACKALIGN (func_type
))
19694 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19696 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19698 crtl
->args
.pretend_args_size
, frame_size
);
19700 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19701 frame_pointer_needed
,
19702 cfun
->machine
->uses_anonymous_args
);
19704 if (cfun
->machine
->lr_save_eliminated
)
19705 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19707 if (crtl
->calls_eh_return
)
19708 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19713 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19714 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19716 arm_stack_offsets
*offsets
;
19722 /* Emit any call-via-reg trampolines that are needed for v4t support
19723 of call_reg and call_value_reg type insns. */
19724 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19726 rtx label
= cfun
->machine
->call_via
[regno
];
19730 switch_to_section (function_section (current_function_decl
));
19731 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19732 CODE_LABEL_NUMBER (label
));
19733 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19737 /* ??? Probably not safe to set this here, since it assumes that a
19738 function will be emitted as assembly immediately after we generate
19739 RTL for it. This does not happen for inline functions. */
19740 cfun
->machine
->return_used_this_function
= 0;
19742 else /* TARGET_32BIT */
19744 /* We need to take into account any stack-frame rounding. */
19745 offsets
= arm_get_frame_offsets ();
19747 gcc_assert (!use_return_insn (FALSE
, NULL
)
19748 || (cfun
->machine
->return_used_this_function
!= 0)
19749 || offsets
->saved_regs
== offsets
->outgoing_args
19750 || frame_pointer_needed
);
19754 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19755 STR and STRD. If an even number of registers are being pushed, one
19756 or more STRD patterns are created for each register pair. If an
19757 odd number of registers are pushed, emit an initial STR followed by
19758 as many STRD instructions as are needed. This works best when the
19759 stack is initially 64-bit aligned (the normal case), since it
19760 ensures that each STRD is also 64-bit aligned. */
19762 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19767 rtx par
= NULL_RTX
;
19768 rtx dwarf
= NULL_RTX
;
19772 num_regs
= bit_count (saved_regs_mask
);
19774 /* Must be at least one register to save, and can't save SP or PC. */
19775 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19776 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19777 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19779 /* Create sequence for DWARF info. All the frame-related data for
19780 debugging is held in this wrapper. */
19781 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19783 /* Describe the stack adjustment. */
19784 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19785 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19786 RTX_FRAME_RELATED_P (tmp
) = 1;
19787 XVECEXP (dwarf
, 0, 0) = tmp
;
19789 /* Find the first register. */
19790 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19795 /* If there's an odd number of registers to push. Start off by
19796 pushing a single register. This ensures that subsequent strd
19797 operations are dword aligned (assuming that SP was originally
19798 64-bit aligned). */
19799 if ((num_regs
& 1) != 0)
19801 rtx reg
, mem
, insn
;
19803 reg
= gen_rtx_REG (SImode
, regno
);
19805 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19806 stack_pointer_rtx
));
19808 mem
= gen_frame_mem (Pmode
,
19810 (Pmode
, stack_pointer_rtx
,
19811 plus_constant (Pmode
, stack_pointer_rtx
,
19814 tmp
= gen_rtx_SET (mem
, reg
);
19815 RTX_FRAME_RELATED_P (tmp
) = 1;
19816 insn
= emit_insn (tmp
);
19817 RTX_FRAME_RELATED_P (insn
) = 1;
19818 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19819 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19820 RTX_FRAME_RELATED_P (tmp
) = 1;
19823 XVECEXP (dwarf
, 0, i
) = tmp
;
19827 while (i
< num_regs
)
19828 if (saved_regs_mask
& (1 << regno
))
19830 rtx reg1
, reg2
, mem1
, mem2
;
19831 rtx tmp0
, tmp1
, tmp2
;
19834 /* Find the register to pair with this one. */
19835 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19839 reg1
= gen_rtx_REG (SImode
, regno
);
19840 reg2
= gen_rtx_REG (SImode
, regno2
);
19847 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19850 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19852 -4 * (num_regs
- 1)));
19853 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19854 plus_constant (Pmode
, stack_pointer_rtx
,
19856 tmp1
= gen_rtx_SET (mem1
, reg1
);
19857 tmp2
= gen_rtx_SET (mem2
, reg2
);
19858 RTX_FRAME_RELATED_P (tmp0
) = 1;
19859 RTX_FRAME_RELATED_P (tmp1
) = 1;
19860 RTX_FRAME_RELATED_P (tmp2
) = 1;
19861 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19862 XVECEXP (par
, 0, 0) = tmp0
;
19863 XVECEXP (par
, 0, 1) = tmp1
;
19864 XVECEXP (par
, 0, 2) = tmp2
;
19865 insn
= emit_insn (par
);
19866 RTX_FRAME_RELATED_P (insn
) = 1;
19867 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19871 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19874 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19877 tmp1
= gen_rtx_SET (mem1
, reg1
);
19878 tmp2
= gen_rtx_SET (mem2
, reg2
);
19879 RTX_FRAME_RELATED_P (tmp1
) = 1;
19880 RTX_FRAME_RELATED_P (tmp2
) = 1;
19881 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19882 XVECEXP (par
, 0, 0) = tmp1
;
19883 XVECEXP (par
, 0, 1) = tmp2
;
19887 /* Create unwind information. This is an approximation. */
19888 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19889 plus_constant (Pmode
,
19893 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19894 plus_constant (Pmode
,
19899 RTX_FRAME_RELATED_P (tmp1
) = 1;
19900 RTX_FRAME_RELATED_P (tmp2
) = 1;
19901 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19902 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19904 regno
= regno2
+ 1;
19912 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19913 whenever possible, otherwise it emits single-word stores. The first store
19914 also allocates stack space for all saved registers, using writeback with
19915 post-addressing mode. All other stores use offset addressing. If no STRD
19916 can be emitted, this function emits a sequence of single-word stores,
19917 and not an STM as before, because single-word stores provide more freedom
19918 scheduling and can be turned into an STM by peephole optimizations. */
19920 arm_emit_strd_push (unsigned long saved_regs_mask
)
19923 int i
, j
, dwarf_index
= 0;
19925 rtx dwarf
= NULL_RTX
;
19926 rtx insn
= NULL_RTX
;
19929 /* TODO: A more efficient code can be emitted by changing the
19930 layout, e.g., first push all pairs that can use STRD to keep the
19931 stack aligned, and then push all other registers. */
19932 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19933 if (saved_regs_mask
& (1 << i
))
19936 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19937 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19938 gcc_assert (num_regs
> 0);
19940 /* Create sequence for DWARF info. */
19941 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19943 /* For dwarf info, we generate explicit stack update. */
19944 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19945 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19946 RTX_FRAME_RELATED_P (tmp
) = 1;
19947 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19949 /* Save registers. */
19950 offset
= - 4 * num_regs
;
19952 while (j
<= LAST_ARM_REGNUM
)
19953 if (saved_regs_mask
& (1 << j
))
19956 && (saved_regs_mask
& (1 << (j
+ 1))))
19958 /* Current register and previous register form register pair for
19959 which STRD can be generated. */
19962 /* Allocate stack space for all saved registers. */
19963 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19964 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19965 mem
= gen_frame_mem (DImode
, tmp
);
19968 else if (offset
> 0)
19969 mem
= gen_frame_mem (DImode
,
19970 plus_constant (Pmode
,
19974 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19976 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19977 RTX_FRAME_RELATED_P (tmp
) = 1;
19978 tmp
= emit_insn (tmp
);
19980 /* Record the first store insn. */
19981 if (dwarf_index
== 1)
19984 /* Generate dwarf info. */
19985 mem
= gen_frame_mem (SImode
,
19986 plus_constant (Pmode
,
19989 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19990 RTX_FRAME_RELATED_P (tmp
) = 1;
19991 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19993 mem
= gen_frame_mem (SImode
,
19994 plus_constant (Pmode
,
19997 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19998 RTX_FRAME_RELATED_P (tmp
) = 1;
19999 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20006 /* Emit a single word store. */
20009 /* Allocate stack space for all saved registers. */
20010 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20011 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20012 mem
= gen_frame_mem (SImode
, tmp
);
20015 else if (offset
> 0)
20016 mem
= gen_frame_mem (SImode
,
20017 plus_constant (Pmode
,
20021 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20023 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20024 RTX_FRAME_RELATED_P (tmp
) = 1;
20025 tmp
= emit_insn (tmp
);
20027 /* Record the first store insn. */
20028 if (dwarf_index
== 1)
20031 /* Generate dwarf info. */
20032 mem
= gen_frame_mem (SImode
,
20033 plus_constant(Pmode
,
20036 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20037 RTX_FRAME_RELATED_P (tmp
) = 1;
20038 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20047 /* Attach dwarf info to the first insn we generate. */
20048 gcc_assert (insn
!= NULL_RTX
);
20049 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20050 RTX_FRAME_RELATED_P (insn
) = 1;
20053 /* Generate and emit an insn that we will recognize as a push_multi.
20054 Unfortunately, since this insn does not reflect very well the actual
20055 semantics of the operation, we need to annotate the insn for the benefit
20056 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20057 MASK for registers that should be annotated for DWARF2 frame unwind
20060 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20063 int num_dwarf_regs
= 0;
20067 int dwarf_par_index
;
20070 /* We don't record the PC in the dwarf frame information. */
20071 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20073 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20075 if (mask
& (1 << i
))
20077 if (dwarf_regs_mask
& (1 << i
))
20081 gcc_assert (num_regs
&& num_regs
<= 16);
20082 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20084 /* For the body of the insn we are going to generate an UNSPEC in
20085 parallel with several USEs. This allows the insn to be recognized
20086 by the push_multi pattern in the arm.md file.
20088 The body of the insn looks something like this:
20091 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20092 (const_int:SI <num>)))
20093 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20099 For the frame note however, we try to be more explicit and actually
20100 show each register being stored into the stack frame, plus a (single)
20101 decrement of the stack pointer. We do it this way in order to be
20102 friendly to the stack unwinding code, which only wants to see a single
20103 stack decrement per instruction. The RTL we generate for the note looks
20104 something like this:
20107 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20108 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20109 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20110 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20114 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20115 instead we'd have a parallel expression detailing all
20116 the stores to the various memory addresses so that debug
20117 information is more up-to-date. Remember however while writing
20118 this to take care of the constraints with the push instruction.
20120 Note also that this has to be taken care of for the VFP registers.
20122 For more see PR43399. */
20124 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20125 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20126 dwarf_par_index
= 1;
20128 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20130 if (mask
& (1 << i
))
20132 reg
= gen_rtx_REG (SImode
, i
);
20134 XVECEXP (par
, 0, 0)
20135 = gen_rtx_SET (gen_frame_mem
20137 gen_rtx_PRE_MODIFY (Pmode
,
20140 (Pmode
, stack_pointer_rtx
,
20143 gen_rtx_UNSPEC (BLKmode
,
20144 gen_rtvec (1, reg
),
20145 UNSPEC_PUSH_MULT
));
20147 if (dwarf_regs_mask
& (1 << i
))
20149 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20151 RTX_FRAME_RELATED_P (tmp
) = 1;
20152 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20159 for (j
= 1, i
++; j
< num_regs
; i
++)
20161 if (mask
& (1 << i
))
20163 reg
= gen_rtx_REG (SImode
, i
);
20165 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20167 if (dwarf_regs_mask
& (1 << i
))
20170 = gen_rtx_SET (gen_frame_mem
20172 plus_constant (Pmode
, stack_pointer_rtx
,
20175 RTX_FRAME_RELATED_P (tmp
) = 1;
20176 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20183 par
= emit_insn (par
);
20185 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20186 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20187 RTX_FRAME_RELATED_P (tmp
) = 1;
20188 XVECEXP (dwarf
, 0, 0) = tmp
;
20190 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20195 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20196 SIZE is the offset to be adjusted.
20197 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20199 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20203 RTX_FRAME_RELATED_P (insn
) = 1;
20204 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20205 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20208 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20209 SAVED_REGS_MASK shows which registers need to be restored.
20211 Unfortunately, since this insn does not reflect very well the actual
20212 semantics of the operation, we need to annotate the insn for the benefit
20213 of DWARF2 frame unwind information. */
20215 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20220 rtx dwarf
= NULL_RTX
;
20222 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20226 offset_adj
= return_in_pc
? 1 : 0;
20227 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20228 if (saved_regs_mask
& (1 << i
))
20231 gcc_assert (num_regs
&& num_regs
<= 16);
20233 /* If SP is in reglist, then we don't emit SP update insn. */
20234 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20236 /* The parallel needs to hold num_regs SETs
20237 and one SET for the stack update. */
20238 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20241 XVECEXP (par
, 0, 0) = ret_rtx
;
20245 /* Increment the stack pointer, based on there being
20246 num_regs 4-byte registers to restore. */
20247 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20248 plus_constant (Pmode
,
20251 RTX_FRAME_RELATED_P (tmp
) = 1;
20252 XVECEXP (par
, 0, offset_adj
) = tmp
;
20255 /* Now restore every reg, which may include PC. */
20256 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20257 if (saved_regs_mask
& (1 << i
))
20259 reg
= gen_rtx_REG (SImode
, i
);
20260 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20262 /* Emit single load with writeback. */
20263 tmp
= gen_frame_mem (SImode
,
20264 gen_rtx_POST_INC (Pmode
,
20265 stack_pointer_rtx
));
20266 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20267 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20271 tmp
= gen_rtx_SET (reg
,
20274 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20275 RTX_FRAME_RELATED_P (tmp
) = 1;
20276 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20278 /* We need to maintain a sequence for DWARF info too. As dwarf info
20279 should not have PC, skip PC. */
20280 if (i
!= PC_REGNUM
)
20281 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20287 par
= emit_jump_insn (par
);
20289 par
= emit_insn (par
);
20291 REG_NOTES (par
) = dwarf
;
20293 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20294 stack_pointer_rtx
, stack_pointer_rtx
);
20297 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20298 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20300 Unfortunately, since this insn does not reflect very well the actual
20301 semantics of the operation, we need to annotate the insn for the benefit
20302 of DWARF2 frame unwind information. */
20304 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20308 rtx dwarf
= NULL_RTX
;
20311 gcc_assert (num_regs
&& num_regs
<= 32);
20313 /* Workaround ARM10 VFPr1 bug. */
20314 if (num_regs
== 2 && !arm_arch6
)
20316 if (first_reg
== 15)
20322 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20323 there could be up to 32 D-registers to restore.
20324 If there are more than 16 D-registers, make two recursive calls,
20325 each of which emits one pop_multi instruction. */
20328 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20329 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20333 /* The parallel needs to hold num_regs SETs
20334 and one SET for the stack update. */
20335 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20337 /* Increment the stack pointer, based on there being
20338 num_regs 8-byte registers to restore. */
20339 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20340 RTX_FRAME_RELATED_P (tmp
) = 1;
20341 XVECEXP (par
, 0, 0) = tmp
;
20343 /* Now show every reg that will be restored, using a SET for each. */
20344 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20346 reg
= gen_rtx_REG (DFmode
, i
);
20348 tmp
= gen_rtx_SET (reg
,
20351 plus_constant (Pmode
, base_reg
, 8 * j
)));
20352 RTX_FRAME_RELATED_P (tmp
) = 1;
20353 XVECEXP (par
, 0, j
+ 1) = tmp
;
20355 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20360 par
= emit_insn (par
);
20361 REG_NOTES (par
) = dwarf
;
20363 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20364 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20366 RTX_FRAME_RELATED_P (par
) = 1;
20367 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20370 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20371 base_reg
, base_reg
);
20374 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20375 number of registers are being popped, multiple LDRD patterns are created for
20376 all register pairs. If odd number of registers are popped, last register is
20377 loaded by using LDR pattern. */
20379 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20383 rtx par
= NULL_RTX
;
20384 rtx dwarf
= NULL_RTX
;
20385 rtx tmp
, reg
, tmp1
;
20386 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20388 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20389 if (saved_regs_mask
& (1 << i
))
20392 gcc_assert (num_regs
&& num_regs
<= 16);
20394 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20395 to be popped. So, if num_regs is even, now it will become odd,
20396 and we can generate pop with PC. If num_regs is odd, it will be
20397 even now, and ldr with return can be generated for PC. */
20401 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20403 /* Var j iterates over all the registers to gather all the registers in
20404 saved_regs_mask. Var i gives index of saved registers in stack frame.
20405 A PARALLEL RTX of register-pair is created here, so that pattern for
20406 LDRD can be matched. As PC is always last register to be popped, and
20407 we have already decremented num_regs if PC, we don't have to worry
20408 about PC in this loop. */
20409 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20410 if (saved_regs_mask
& (1 << j
))
20412 /* Create RTX for memory load. */
20413 reg
= gen_rtx_REG (SImode
, j
);
20414 tmp
= gen_rtx_SET (reg
,
20415 gen_frame_mem (SImode
,
20416 plus_constant (Pmode
,
20417 stack_pointer_rtx
, 4 * i
)));
20418 RTX_FRAME_RELATED_P (tmp
) = 1;
20422 /* When saved-register index (i) is even, the RTX to be emitted is
20423 yet to be created. Hence create it first. The LDRD pattern we
20424 are generating is :
20425 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20426 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20427 where target registers need not be consecutive. */
20428 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20432 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20433 added as 0th element and if i is odd, reg_i is added as 1st element
20434 of LDRD pattern shown above. */
20435 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20436 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20440 /* When saved-register index (i) is odd, RTXs for both the registers
20441 to be loaded are generated in above given LDRD pattern, and the
20442 pattern can be emitted now. */
20443 par
= emit_insn (par
);
20444 REG_NOTES (par
) = dwarf
;
20445 RTX_FRAME_RELATED_P (par
) = 1;
20451 /* If the number of registers pushed is odd AND return_in_pc is false OR
20452 number of registers are even AND return_in_pc is true, last register is
20453 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20454 then LDR with post increment. */
20456 /* Increment the stack pointer, based on there being
20457 num_regs 4-byte registers to restore. */
20458 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20459 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20460 RTX_FRAME_RELATED_P (tmp
) = 1;
20461 tmp
= emit_insn (tmp
);
20464 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20465 stack_pointer_rtx
, stack_pointer_rtx
);
20470 if (((num_regs
% 2) == 1 && !return_in_pc
)
20471 || ((num_regs
% 2) == 0 && return_in_pc
))
20473 /* Scan for the single register to be popped. Skip until the saved
20474 register is found. */
20475 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20477 /* Gen LDR with post increment here. */
20478 tmp1
= gen_rtx_MEM (SImode
,
20479 gen_rtx_POST_INC (SImode
,
20480 stack_pointer_rtx
));
20481 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20483 reg
= gen_rtx_REG (SImode
, j
);
20484 tmp
= gen_rtx_SET (reg
, tmp1
);
20485 RTX_FRAME_RELATED_P (tmp
) = 1;
20486 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20490 /* If return_in_pc, j must be PC_REGNUM. */
20491 gcc_assert (j
== PC_REGNUM
);
20492 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20493 XVECEXP (par
, 0, 0) = ret_rtx
;
20494 XVECEXP (par
, 0, 1) = tmp
;
20495 par
= emit_jump_insn (par
);
20499 par
= emit_insn (tmp
);
20500 REG_NOTES (par
) = dwarf
;
20501 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20502 stack_pointer_rtx
, stack_pointer_rtx
);
20506 else if ((num_regs
% 2) == 1 && return_in_pc
)
20508 /* There are 2 registers to be popped. So, generate the pattern
20509 pop_multiple_with_stack_update_and_return to pop in PC. */
20510 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20516 /* LDRD in ARM mode needs consecutive registers as operands. This function
20517 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20518 offset addressing and then generates one separate stack udpate. This provides
20519 more scheduling freedom, compared to writeback on every load. However,
20520 if the function returns using load into PC directly
20521 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20522 before the last load. TODO: Add a peephole optimization to recognize
20523 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20524 peephole optimization to merge the load at stack-offset zero
20525 with the stack update instruction using load with writeback
20526 in post-index addressing mode. */
20528 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20532 rtx par
= NULL_RTX
;
20533 rtx dwarf
= NULL_RTX
;
20536 /* Restore saved registers. */
20537 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20539 while (j
<= LAST_ARM_REGNUM
)
20540 if (saved_regs_mask
& (1 << j
))
20543 && (saved_regs_mask
& (1 << (j
+ 1)))
20544 && (j
+ 1) != PC_REGNUM
)
20546 /* Current register and next register form register pair for which
20547 LDRD can be generated. PC is always the last register popped, and
20548 we handle it separately. */
20550 mem
= gen_frame_mem (DImode
,
20551 plus_constant (Pmode
,
20555 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20557 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20558 tmp
= emit_insn (tmp
);
20559 RTX_FRAME_RELATED_P (tmp
) = 1;
20561 /* Generate dwarf info. */
20563 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20564 gen_rtx_REG (SImode
, j
),
20566 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20567 gen_rtx_REG (SImode
, j
+ 1),
20570 REG_NOTES (tmp
) = dwarf
;
20575 else if (j
!= PC_REGNUM
)
20577 /* Emit a single word load. */
20579 mem
= gen_frame_mem (SImode
,
20580 plus_constant (Pmode
,
20584 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20586 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20587 tmp
= emit_insn (tmp
);
20588 RTX_FRAME_RELATED_P (tmp
) = 1;
20590 /* Generate dwarf info. */
20591 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20592 gen_rtx_REG (SImode
, j
),
20598 else /* j == PC_REGNUM */
20604 /* Update the stack. */
20607 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20608 plus_constant (Pmode
,
20611 tmp
= emit_insn (tmp
);
20612 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20613 stack_pointer_rtx
, stack_pointer_rtx
);
20617 if (saved_regs_mask
& (1 << PC_REGNUM
))
20619 /* Only PC is to be popped. */
20620 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20621 XVECEXP (par
, 0, 0) = ret_rtx
;
20622 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20623 gen_frame_mem (SImode
,
20624 gen_rtx_POST_INC (SImode
,
20625 stack_pointer_rtx
)));
20626 RTX_FRAME_RELATED_P (tmp
) = 1;
20627 XVECEXP (par
, 0, 1) = tmp
;
20628 par
= emit_jump_insn (par
);
20630 /* Generate dwarf info. */
20631 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20632 gen_rtx_REG (SImode
, PC_REGNUM
),
20634 REG_NOTES (par
) = dwarf
;
20635 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20636 stack_pointer_rtx
, stack_pointer_rtx
);
20640 /* Calculate the size of the return value that is passed in registers. */
20642 arm_size_return_regs (void)
20646 if (crtl
->return_rtx
!= 0)
20647 mode
= GET_MODE (crtl
->return_rtx
);
20649 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20651 return GET_MODE_SIZE (mode
);
20654 /* Return true if the current function needs to save/restore LR. */
20656 thumb_force_lr_save (void)
20658 return !cfun
->machine
->lr_save_eliminated
20659 && (!leaf_function_p ()
20660 || thumb_far_jump_used_p ()
20661 || df_regs_ever_live_p (LR_REGNUM
));
20664 /* We do not know if r3 will be available because
20665 we do have an indirect tailcall happening in this
20666 particular case. */
20668 is_indirect_tailcall_p (rtx call
)
20670 rtx pat
= PATTERN (call
);
20672 /* Indirect tail call. */
20673 pat
= XVECEXP (pat
, 0, 0);
20674 if (GET_CODE (pat
) == SET
)
20675 pat
= SET_SRC (pat
);
20677 pat
= XEXP (XEXP (pat
, 0), 0);
20678 return REG_P (pat
);
20681 /* Return true if r3 is used by any of the tail call insns in the
20682 current function. */
20684 any_sibcall_could_use_r3 (void)
20689 if (!crtl
->tail_call_emit
)
20691 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20692 if (e
->flags
& EDGE_SIBCALL
)
20694 rtx call
= BB_END (e
->src
);
20695 if (!CALL_P (call
))
20696 call
= prev_nonnote_nondebug_insn (call
);
20697 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20698 if (find_regno_fusage (call
, USE
, 3)
20699 || is_indirect_tailcall_p (call
))
20706 /* Compute the distance from register FROM to register TO.
20707 These can be the arg pointer (26), the soft frame pointer (25),
20708 the stack pointer (13) or the hard frame pointer (11).
20709 In thumb mode r7 is used as the soft frame pointer, if needed.
20710 Typical stack layout looks like this:
20712 old stack pointer -> | |
20715 | | saved arguments for
20716 | | vararg functions
20719 hard FP & arg pointer -> | | \
20727 soft frame pointer -> | | /
20732 locals base pointer -> | | /
20737 current stack pointer -> | | /
20740 For a given function some or all of these stack components
20741 may not be needed, giving rise to the possibility of
20742 eliminating some of the registers.
20744 The values returned by this function must reflect the behavior
20745 of arm_expand_prologue() and arm_compute_save_reg_mask().
20747 The sign of the number returned reflects the direction of stack
20748 growth, so the values are positive for all eliminations except
20749 from the soft frame pointer to the hard frame pointer.
20751 SFP may point just inside the local variables block to ensure correct
20755 /* Calculate stack offsets. These are used to calculate register elimination
20756 offsets and in prologue/epilogue code. Also calculates which registers
20757 should be saved. */
20759 static arm_stack_offsets
*
20760 arm_get_frame_offsets (void)
20762 struct arm_stack_offsets
*offsets
;
20763 unsigned long func_type
;
20767 HOST_WIDE_INT frame_size
;
20770 offsets
= &cfun
->machine
->stack_offsets
;
20772 /* We need to know if we are a leaf function. Unfortunately, it
20773 is possible to be called after start_sequence has been called,
20774 which causes get_insns to return the insns for the sequence,
20775 not the function, which will cause leaf_function_p to return
20776 the incorrect result.
20778 to know about leaf functions once reload has completed, and the
20779 frame size cannot be changed after that time, so we can safely
20780 use the cached value. */
20782 if (reload_completed
)
20785 /* Initially this is the size of the local variables. It will translated
20786 into an offset once we have determined the size of preceding data. */
20787 frame_size
= ROUND_UP_WORD (get_frame_size ());
20789 leaf
= leaf_function_p ();
20791 /* Space for variadic functions. */
20792 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20794 /* In Thumb mode this is incorrect, but never used. */
20796 = (offsets
->saved_args
20797 + arm_compute_static_chain_stack_bytes ()
20798 + (frame_pointer_needed
? 4 : 0));
20802 unsigned int regno
;
20804 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20805 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20806 saved
= core_saved
;
20808 /* We know that SP will be doubleword aligned on entry, and we must
20809 preserve that condition at any subroutine call. We also require the
20810 soft frame pointer to be doubleword aligned. */
20812 if (TARGET_REALLY_IWMMXT
)
20814 /* Check for the call-saved iWMMXt registers. */
20815 for (regno
= FIRST_IWMMXT_REGNUM
;
20816 regno
<= LAST_IWMMXT_REGNUM
;
20818 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20822 func_type
= arm_current_func_type ();
20823 /* Space for saved VFP registers. */
20824 if (! IS_VOLATILE (func_type
)
20825 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20826 saved
+= arm_get_vfp_saved_size ();
20828 else /* TARGET_THUMB1 */
20830 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20831 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20832 saved
= core_saved
;
20833 if (TARGET_BACKTRACE
)
20837 /* Saved registers include the stack frame. */
20838 offsets
->saved_regs
20839 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20840 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20842 /* A leaf function does not need any stack alignment if it has nothing
20844 if (leaf
&& frame_size
== 0
20845 /* However if it calls alloca(), we have a dynamically allocated
20846 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20847 && ! cfun
->calls_alloca
)
20849 offsets
->outgoing_args
= offsets
->soft_frame
;
20850 offsets
->locals_base
= offsets
->soft_frame
;
20854 /* Ensure SFP has the correct alignment. */
20855 if (ARM_DOUBLEWORD_ALIGN
20856 && (offsets
->soft_frame
& 7))
20858 offsets
->soft_frame
+= 4;
20859 /* Try to align stack by pushing an extra reg. Don't bother doing this
20860 when there is a stack frame as the alignment will be rolled into
20861 the normal stack adjustment. */
20862 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20866 /* Register r3 is caller-saved. Normally it does not need to be
20867 saved on entry by the prologue. However if we choose to save
20868 it for padding then we may confuse the compiler into thinking
20869 a prologue sequence is required when in fact it is not. This
20870 will occur when shrink-wrapping if r3 is used as a scratch
20871 register and there are no other callee-saved writes.
20873 This situation can be avoided when other callee-saved registers
20874 are available and r3 is not mandatory if we choose a callee-saved
20875 register for padding. */
20876 bool prefer_callee_reg_p
= false;
20878 /* If it is safe to use r3, then do so. This sometimes
20879 generates better code on Thumb-2 by avoiding the need to
20880 use 32-bit push/pop instructions. */
20881 if (! any_sibcall_could_use_r3 ()
20882 && arm_size_return_regs () <= 12
20883 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20885 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20888 if (!TARGET_THUMB2
)
20889 prefer_callee_reg_p
= true;
20892 || prefer_callee_reg_p
)
20894 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20896 /* Avoid fixed registers; they may be changed at
20897 arbitrary times so it's unsafe to restore them
20898 during the epilogue. */
20900 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20910 offsets
->saved_regs
+= 4;
20911 offsets
->saved_regs_mask
|= (1 << reg
);
20916 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20917 offsets
->outgoing_args
= (offsets
->locals_base
20918 + crtl
->outgoing_args_size
);
20920 if (ARM_DOUBLEWORD_ALIGN
)
20922 /* Ensure SP remains doubleword aligned. */
20923 if (offsets
->outgoing_args
& 7)
20924 offsets
->outgoing_args
+= 4;
20925 gcc_assert (!(offsets
->outgoing_args
& 7));
20932 /* Calculate the relative offsets for the different stack pointers. Positive
20933 offsets are in the direction of stack growth. */
20936 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20938 arm_stack_offsets
*offsets
;
20940 offsets
= arm_get_frame_offsets ();
20942 /* OK, now we have enough information to compute the distances.
20943 There must be an entry in these switch tables for each pair
20944 of registers in ELIMINABLE_REGS, even if some of the entries
20945 seem to be redundant or useless. */
20948 case ARG_POINTER_REGNUM
:
20951 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20954 case FRAME_POINTER_REGNUM
:
20955 /* This is the reverse of the soft frame pointer
20956 to hard frame pointer elimination below. */
20957 return offsets
->soft_frame
- offsets
->saved_args
;
20959 case ARM_HARD_FRAME_POINTER_REGNUM
:
20960 /* This is only non-zero in the case where the static chain register
20961 is stored above the frame. */
20962 return offsets
->frame
- offsets
->saved_args
- 4;
20964 case STACK_POINTER_REGNUM
:
20965 /* If nothing has been pushed on the stack at all
20966 then this will return -4. This *is* correct! */
20967 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20970 gcc_unreachable ();
20972 gcc_unreachable ();
20974 case FRAME_POINTER_REGNUM
:
20977 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20980 case ARM_HARD_FRAME_POINTER_REGNUM
:
20981 /* The hard frame pointer points to the top entry in the
20982 stack frame. The soft frame pointer to the bottom entry
20983 in the stack frame. If there is no stack frame at all,
20984 then they are identical. */
20986 return offsets
->frame
- offsets
->soft_frame
;
20988 case STACK_POINTER_REGNUM
:
20989 return offsets
->outgoing_args
- offsets
->soft_frame
;
20992 gcc_unreachable ();
20994 gcc_unreachable ();
20997 /* You cannot eliminate from the stack pointer.
20998 In theory you could eliminate from the hard frame
20999 pointer to the stack pointer, but this will never
21000 happen, since if a stack frame is not needed the
21001 hard frame pointer will never be used. */
21002 gcc_unreachable ();
21006 /* Given FROM and TO register numbers, say whether this elimination is
21007 allowed. Frame pointer elimination is automatically handled.
21009 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21010 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21011 pointer, we must eliminate FRAME_POINTER_REGNUM into
21012 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21013 ARG_POINTER_REGNUM. */
21016 arm_can_eliminate (const int from
, const int to
)
21018 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21019 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21020 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21021 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21025 /* Emit RTL to save coprocessor registers on function entry. Returns the
21026 number of bytes pushed. */
21029 arm_save_coproc_regs(void)
21031 int saved_size
= 0;
21033 unsigned start_reg
;
21036 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21037 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21039 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21040 insn
= gen_rtx_MEM (V2SImode
, insn
);
21041 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21042 RTX_FRAME_RELATED_P (insn
) = 1;
21046 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21048 start_reg
= FIRST_VFP_REGNUM
;
21050 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21052 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21053 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21055 if (start_reg
!= reg
)
21056 saved_size
+= vfp_emit_fstmd (start_reg
,
21057 (reg
- start_reg
) / 2);
21058 start_reg
= reg
+ 2;
21061 if (start_reg
!= reg
)
21062 saved_size
+= vfp_emit_fstmd (start_reg
,
21063 (reg
- start_reg
) / 2);
21069 /* Set the Thumb frame pointer from the stack pointer. */
21072 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21074 HOST_WIDE_INT amount
;
21077 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21079 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21080 stack_pointer_rtx
, GEN_INT (amount
)));
21083 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21084 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21085 expects the first two operands to be the same. */
21088 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21090 hard_frame_pointer_rtx
));
21094 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21095 hard_frame_pointer_rtx
,
21096 stack_pointer_rtx
));
21098 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21099 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21100 RTX_FRAME_RELATED_P (dwarf
) = 1;
21101 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21104 RTX_FRAME_RELATED_P (insn
) = 1;
21107 struct scratch_reg
{
21112 /* Return a short-lived scratch register for use as a 2nd scratch register on
21113 function entry after the registers are saved in the prologue. This register
21114 must be released by means of release_scratch_register_on_entry. IP is not
21115 considered since it is always used as the 1st scratch register if available.
21117 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21118 mask of live registers. */
21121 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21122 unsigned long live_regs
)
21128 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21134 for (i
= 4; i
< 11; i
++)
21135 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21143 /* If IP is used as the 1st scratch register for a nested function,
21144 then either r3 wasn't available or is used to preserve IP. */
21145 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21147 regno
= (regno1
== 3 ? 2 : 3);
21149 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21154 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21157 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21158 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21159 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21160 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21161 RTX_FRAME_RELATED_P (insn
) = 1;
21162 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21166 /* Release a scratch register obtained from the preceding function. */
21169 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21173 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21174 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21175 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21176 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21177 RTX_FRAME_RELATED_P (insn
) = 1;
21178 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21182 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21184 #if PROBE_INTERVAL > 4096
21185 #error Cannot use indexed addressing mode for stack probing
21188 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21189 inclusive. These are offsets from the current stack pointer. REGNO1
21190 is the index number of the 1st scratch register and LIVE_REGS is the
21191 mask of live registers. */
21194 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21195 unsigned int regno1
, unsigned long live_regs
)
21197 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21199 /* See if we have a constant small number of probes to generate. If so,
21200 that's the easy case. */
21201 if (size
<= PROBE_INTERVAL
)
21203 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21204 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21205 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21208 /* The run-time loop is made up of 10 insns in the generic case while the
21209 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21210 else if (size
<= 5 * PROBE_INTERVAL
)
21212 HOST_WIDE_INT i
, rem
;
21214 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21215 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21216 emit_stack_probe (reg1
);
21218 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21219 it exceeds SIZE. If only two probes are needed, this will not
21220 generate any code. Then probe at FIRST + SIZE. */
21221 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21223 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21224 emit_stack_probe (reg1
);
21227 rem
= size
- (i
- PROBE_INTERVAL
);
21228 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21230 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21231 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21234 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21237 /* Otherwise, do the same as above, but in a loop. Note that we must be
21238 extra careful with variables wrapping around because we might be at
21239 the very top (or the very bottom) of the address space and we have
21240 to be able to handle this case properly; in particular, we use an
21241 equality test for the loop condition. */
21244 HOST_WIDE_INT rounded_size
;
21245 struct scratch_reg sr
;
21247 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21249 emit_move_insn (reg1
, GEN_INT (first
));
21252 /* Step 1: round SIZE to the previous multiple of the interval. */
21254 rounded_size
= size
& -PROBE_INTERVAL
;
21255 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21258 /* Step 2: compute initial and final value of the loop counter. */
21260 /* TEST_ADDR = SP + FIRST. */
21261 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21263 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21264 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21267 /* Step 3: the loop
21269 while (TEST_ADDR != LAST_ADDR)
21271 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21275 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21276 until it is equal to ROUNDED_SIZE. */
21278 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21281 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21282 that SIZE is equal to ROUNDED_SIZE. */
21284 if (size
!= rounded_size
)
21286 HOST_WIDE_INT rem
= size
- rounded_size
;
21288 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21290 emit_set_insn (sr
.reg
,
21291 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21292 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21293 PROBE_INTERVAL
- rem
));
21296 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21299 release_scratch_register_on_entry (&sr
);
21302 /* Make sure nothing is scheduled before we are done. */
21303 emit_insn (gen_blockage ());
21306 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21307 absolute addresses. */
21310 output_probe_stack_range (rtx reg1
, rtx reg2
)
21312 static int labelno
= 0;
21316 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21318 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21320 /* Test if TEST_ADDR == LAST_ADDR. */
21323 output_asm_insn ("cmp\t%0, %1", xops
);
21326 fputs ("\tittt\tne\n", asm_out_file
);
21328 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21329 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21330 output_asm_insn ("subne\t%0, %0, %1", xops
);
21332 /* Probe at TEST_ADDR and branch. */
21333 output_asm_insn ("strne\tr0, [%0, #0]", xops
);
21334 fputs ("\tbne\t", asm_out_file
);
21335 assemble_name_raw (asm_out_file
, loop_lab
);
21336 fputc ('\n', asm_out_file
);
21341 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21344 arm_expand_prologue (void)
21349 unsigned long live_regs_mask
;
21350 unsigned long func_type
;
21352 int saved_pretend_args
= 0;
21353 int saved_regs
= 0;
21354 unsigned HOST_WIDE_INT args_to_push
;
21355 HOST_WIDE_INT size
;
21356 arm_stack_offsets
*offsets
;
21359 func_type
= arm_current_func_type ();
21361 /* Naked functions don't have prologues. */
21362 if (IS_NAKED (func_type
))
21365 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21366 args_to_push
= crtl
->args
.pretend_args_size
;
21368 /* Compute which register we will have to save onto the stack. */
21369 offsets
= arm_get_frame_offsets ();
21370 live_regs_mask
= offsets
->saved_regs_mask
;
21372 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21374 if (IS_STACKALIGN (func_type
))
21378 /* Handle a word-aligned stack pointer. We generate the following:
21383 <save and restore r0 in normal prologue/epilogue>
21387 The unwinder doesn't need to know about the stack realignment.
21388 Just tell it we saved SP in r0. */
21389 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21391 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21392 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21394 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21395 RTX_FRAME_RELATED_P (insn
) = 1;
21396 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21398 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21400 /* ??? The CFA changes here, which may cause GDB to conclude that it
21401 has entered a different function. That said, the unwind info is
21402 correct, individually, before and after this instruction because
21403 we've described the save of SP, which will override the default
21404 handling of SP as restoring from the CFA. */
21405 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21408 /* The static chain register is the same as the IP register. If it is
21409 clobbered when creating the frame, we need to save and restore it. */
21410 clobber_ip
= IS_NESTED (func_type
)
21411 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21412 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21413 && !df_regs_ever_live_p (LR_REGNUM
)
21414 && arm_r3_live_at_start_p ()));
21416 /* Find somewhere to store IP whilst the frame is being created.
21417 We try the following places in order:
21419 1. The last argument register r3 if it is available.
21420 2. A slot on the stack above the frame if there are no
21421 arguments to push onto the stack.
21422 3. Register r3 again, after pushing the argument registers
21423 onto the stack, if this is a varargs function.
21424 4. The last slot on the stack created for the arguments to
21425 push, if this isn't a varargs function.
21427 Note - we only need to tell the dwarf2 backend about the SP
21428 adjustment in the second variant; the static chain register
21429 doesn't need to be unwound, as it doesn't contain a value
21430 inherited from the caller. */
21433 if (!arm_r3_live_at_start_p ())
21434 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21435 else if (args_to_push
== 0)
21439 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21442 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21443 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21446 /* Just tell the dwarf backend that we adjusted SP. */
21447 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21448 plus_constant (Pmode
, stack_pointer_rtx
,
21450 RTX_FRAME_RELATED_P (insn
) = 1;
21451 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21455 /* Store the args on the stack. */
21456 if (cfun
->machine
->uses_anonymous_args
)
21458 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21459 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21460 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21461 saved_pretend_args
= 1;
21467 if (args_to_push
== 4)
21468 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21470 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21471 plus_constant (Pmode
,
21475 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21477 /* Just tell the dwarf backend that we adjusted SP. */
21478 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21479 plus_constant (Pmode
, stack_pointer_rtx
,
21481 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21484 RTX_FRAME_RELATED_P (insn
) = 1;
21485 fp_offset
= args_to_push
;
21490 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21492 if (IS_INTERRUPT (func_type
))
21494 /* Interrupt functions must not corrupt any registers.
21495 Creating a frame pointer however, corrupts the IP
21496 register, so we must push it first. */
21497 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21499 /* Do not set RTX_FRAME_RELATED_P on this insn.
21500 The dwarf stack unwinding code only wants to see one
21501 stack decrement per function, and this is not it. If
21502 this instruction is labeled as being part of the frame
21503 creation sequence then dwarf2out_frame_debug_expr will
21504 die when it encounters the assignment of IP to FP
21505 later on, since the use of SP here establishes SP as
21506 the CFA register and not IP.
21508 Anyway this instruction is not really part of the stack
21509 frame creation although it is part of the prologue. */
21512 insn
= emit_set_insn (ip_rtx
,
21513 plus_constant (Pmode
, stack_pointer_rtx
,
21515 RTX_FRAME_RELATED_P (insn
) = 1;
21520 /* Push the argument registers, or reserve space for them. */
21521 if (cfun
->machine
->uses_anonymous_args
)
21522 insn
= emit_multi_reg_push
21523 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21524 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21527 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21528 GEN_INT (- args_to_push
)));
21529 RTX_FRAME_RELATED_P (insn
) = 1;
21532 /* If this is an interrupt service routine, and the link register
21533 is going to be pushed, and we're not generating extra
21534 push of IP (needed when frame is needed and frame layout if apcs),
21535 subtracting four from LR now will mean that the function return
21536 can be done with a single instruction. */
21537 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21538 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21539 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21542 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21544 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21547 if (live_regs_mask
)
21549 unsigned long dwarf_regs_mask
= live_regs_mask
;
21551 saved_regs
+= bit_count (live_regs_mask
) * 4;
21552 if (optimize_size
&& !frame_pointer_needed
21553 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21555 /* If no coprocessor registers are being pushed and we don't have
21556 to worry about a frame pointer then push extra registers to
21557 create the stack frame. This is done is a way that does not
21558 alter the frame layout, so is independent of the epilogue. */
21562 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21564 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21565 if (frame
&& n
* 4 >= frame
)
21568 live_regs_mask
|= (1 << n
) - 1;
21569 saved_regs
+= frame
;
21574 && current_tune
->prefer_ldrd_strd
21575 && !optimize_function_for_size_p (cfun
))
21577 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21579 thumb2_emit_strd_push (live_regs_mask
);
21580 else if (TARGET_ARM
21581 && !TARGET_APCS_FRAME
21582 && !IS_INTERRUPT (func_type
))
21583 arm_emit_strd_push (live_regs_mask
);
21586 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21587 RTX_FRAME_RELATED_P (insn
) = 1;
21592 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21593 RTX_FRAME_RELATED_P (insn
) = 1;
21597 if (! IS_VOLATILE (func_type
))
21598 saved_regs
+= arm_save_coproc_regs ();
21600 if (frame_pointer_needed
&& TARGET_ARM
)
21602 /* Create the new frame pointer. */
21603 if (TARGET_APCS_FRAME
)
21605 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21606 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21607 RTX_FRAME_RELATED_P (insn
) = 1;
21611 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21612 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21613 stack_pointer_rtx
, insn
));
21614 RTX_FRAME_RELATED_P (insn
) = 1;
21618 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21619 if (flag_stack_usage_info
)
21620 current_function_static_stack_size
= size
;
21622 /* If this isn't an interrupt service routine and we have a frame, then do
21623 stack checking. We use IP as the first scratch register, except for the
21624 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21625 if (!IS_INTERRUPT (func_type
)
21626 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21628 unsigned int regno
;
21630 if (!IS_NESTED (func_type
) || clobber_ip
)
21632 else if (df_regs_ever_live_p (LR_REGNUM
))
21637 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21639 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21640 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21641 size
- STACK_CHECK_PROTECT
,
21642 regno
, live_regs_mask
);
21645 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21646 regno
, live_regs_mask
);
21649 /* Recover the static chain register. */
21652 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21653 insn
= gen_rtx_REG (SImode
, 3);
21656 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21657 insn
= gen_frame_mem (SImode
, insn
);
21659 emit_set_insn (ip_rtx
, insn
);
21660 emit_insn (gen_force_register_use (ip_rtx
));
21663 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21665 /* This add can produce multiple insns for a large constant, so we
21666 need to get tricky. */
21667 rtx_insn
*last
= get_last_insn ();
21669 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21670 - offsets
->outgoing_args
);
21672 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21676 last
= last
? NEXT_INSN (last
) : get_insns ();
21677 RTX_FRAME_RELATED_P (last
) = 1;
21679 while (last
!= insn
);
21681 /* If the frame pointer is needed, emit a special barrier that
21682 will prevent the scheduler from moving stores to the frame
21683 before the stack adjustment. */
21684 if (frame_pointer_needed
)
21685 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21686 hard_frame_pointer_rtx
));
21690 if (frame_pointer_needed
&& TARGET_THUMB2
)
21691 thumb_set_frame_pointer (offsets
);
21693 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21695 unsigned long mask
;
21697 mask
= live_regs_mask
;
21698 mask
&= THUMB2_WORK_REGS
;
21699 if (!IS_NESTED (func_type
))
21700 mask
|= (1 << IP_REGNUM
);
21701 arm_load_pic_register (mask
);
21704 /* If we are profiling, make sure no instructions are scheduled before
21705 the call to mcount. Similarly if the user has requested no
21706 scheduling in the prolog. Similarly if we want non-call exceptions
21707 using the EABI unwinder, to prevent faulting instructions from being
21708 swapped with a stack adjustment. */
21709 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21710 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21711 && cfun
->can_throw_non_call_exceptions
))
21712 emit_insn (gen_blockage ());
21714 /* If the link register is being kept alive, with the return address in it,
21715 then make sure that it does not get reused by the ce2 pass. */
21716 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21717 cfun
->machine
->lr_save_eliminated
= 1;
21720 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21722 arm_print_condition (FILE *stream
)
21724 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21726 /* Branch conversion is not implemented for Thumb-2. */
21729 output_operand_lossage ("predicated Thumb instruction");
21732 if (current_insn_predicate
!= NULL
)
21734 output_operand_lossage
21735 ("predicated instruction in conditional sequence");
21739 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21741 else if (current_insn_predicate
)
21743 enum arm_cond_code code
;
21747 output_operand_lossage ("predicated Thumb instruction");
21751 code
= get_arm_condition_code (current_insn_predicate
);
21752 fputs (arm_condition_codes
[code
], stream
);
21757 /* Globally reserved letters: acln
21758 Puncutation letters currently used: @_|?().!#
21759 Lower case letters currently used: bcdefhimpqtvwxyz
21760 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21761 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21763 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21765 If CODE is 'd', then the X is a condition operand and the instruction
21766 should only be executed if the condition is true.
21767 if CODE is 'D', then the X is a condition operand and the instruction
21768 should only be executed if the condition is false: however, if the mode
21769 of the comparison is CCFPEmode, then always execute the instruction -- we
21770 do this because in these circumstances !GE does not necessarily imply LT;
21771 in these cases the instruction pattern will take care to make sure that
21772 an instruction containing %d will follow, thereby undoing the effects of
21773 doing this instruction unconditionally.
21774 If CODE is 'N' then X is a floating point operand that must be negated
21776 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21777 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21779 arm_print_operand (FILE *stream
, rtx x
, int code
)
21784 fputs (ASM_COMMENT_START
, stream
);
21788 fputs (user_label_prefix
, stream
);
21792 fputs (REGISTER_PREFIX
, stream
);
21796 arm_print_condition (stream
);
21800 /* Nothing in unified syntax, otherwise the current condition code. */
21801 if (!TARGET_UNIFIED_ASM
)
21802 arm_print_condition (stream
);
21806 /* The current condition code in unified syntax, otherwise nothing. */
21807 if (TARGET_UNIFIED_ASM
)
21808 arm_print_condition (stream
);
21812 /* The current condition code for a condition code setting instruction.
21813 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21814 if (TARGET_UNIFIED_ASM
)
21816 fputc('s', stream
);
21817 arm_print_condition (stream
);
21821 arm_print_condition (stream
);
21822 fputc('s', stream
);
21827 /* If the instruction is conditionally executed then print
21828 the current condition code, otherwise print 's'. */
21829 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21830 if (current_insn_predicate
)
21831 arm_print_condition (stream
);
21833 fputc('s', stream
);
21836 /* %# is a "break" sequence. It doesn't output anything, but is used to
21837 separate e.g. operand numbers from following text, if that text consists
21838 of further digits which we don't want to be part of the operand
21846 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21847 r
= real_value_negate (&r
);
21848 fprintf (stream
, "%s", fp_const_from_val (&r
));
21852 /* An integer or symbol address without a preceding # sign. */
21854 switch (GET_CODE (x
))
21857 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21861 output_addr_const (stream
, x
);
21865 if (GET_CODE (XEXP (x
, 0)) == PLUS
21866 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21868 output_addr_const (stream
, x
);
21871 /* Fall through. */
21874 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21878 /* An integer that we want to print in HEX. */
21880 switch (GET_CODE (x
))
21883 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21887 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21892 if (CONST_INT_P (x
))
21895 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21896 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21900 putc ('~', stream
);
21901 output_addr_const (stream
, x
);
21906 /* Print the log2 of a CONST_INT. */
21910 if (!CONST_INT_P (x
)
21911 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21912 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21914 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21919 /* The low 16 bits of an immediate constant. */
21920 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21924 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21928 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21936 shift
= shift_op (x
, &val
);
21940 fprintf (stream
, ", %s ", shift
);
21942 arm_print_operand (stream
, XEXP (x
, 1), 0);
21944 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21949 /* An explanation of the 'Q', 'R' and 'H' register operands:
21951 In a pair of registers containing a DI or DF value the 'Q'
21952 operand returns the register number of the register containing
21953 the least significant part of the value. The 'R' operand returns
21954 the register number of the register containing the most
21955 significant part of the value.
21957 The 'H' operand returns the higher of the two register numbers.
21958 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21959 same as the 'Q' operand, since the most significant part of the
21960 value is held in the lower number register. The reverse is true
21961 on systems where WORDS_BIG_ENDIAN is false.
21963 The purpose of these operands is to distinguish between cases
21964 where the endian-ness of the values is important (for example
21965 when they are added together), and cases where the endian-ness
21966 is irrelevant, but the order of register operations is important.
21967 For example when loading a value from memory into a register
21968 pair, the endian-ness does not matter. Provided that the value
21969 from the lower memory address is put into the lower numbered
21970 register, and the value from the higher address is put into the
21971 higher numbered register, the load will work regardless of whether
21972 the value being loaded is big-wordian or little-wordian. The
21973 order of the two register loads can matter however, if the address
21974 of the memory location is actually held in one of the registers
21975 being overwritten by the load.
21977 The 'Q' and 'R' constraints are also available for 64-bit
21980 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21982 rtx part
= gen_lowpart (SImode
, x
);
21983 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21987 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21989 output_operand_lossage ("invalid operand for code '%c'", code
);
21993 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21997 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21999 machine_mode mode
= GET_MODE (x
);
22002 if (mode
== VOIDmode
)
22004 part
= gen_highpart_mode (SImode
, mode
, x
);
22005 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22009 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22011 output_operand_lossage ("invalid operand for code '%c'", code
);
22015 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22019 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22021 output_operand_lossage ("invalid operand for code '%c'", code
);
22025 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22029 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22031 output_operand_lossage ("invalid operand for code '%c'", code
);
22035 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22039 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22041 output_operand_lossage ("invalid operand for code '%c'", code
);
22045 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22049 asm_fprintf (stream
, "%r",
22050 REG_P (XEXP (x
, 0))
22051 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22055 asm_fprintf (stream
, "{%r-%r}",
22057 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22060 /* Like 'M', but writing doubleword vector registers, for use by Neon
22064 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22065 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22067 asm_fprintf (stream
, "{d%d}", regno
);
22069 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22074 /* CONST_TRUE_RTX means always -- that's the default. */
22075 if (x
== const_true_rtx
)
22078 if (!COMPARISON_P (x
))
22080 output_operand_lossage ("invalid operand for code '%c'", code
);
22084 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22089 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22090 want to do that. */
22091 if (x
== const_true_rtx
)
22093 output_operand_lossage ("instruction never executed");
22096 if (!COMPARISON_P (x
))
22098 output_operand_lossage ("invalid operand for code '%c'", code
);
22102 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22103 (get_arm_condition_code (x
))],
22113 /* Former Maverick support, removed after GCC-4.7. */
22114 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22119 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22120 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22121 /* Bad value for wCG register number. */
22123 output_operand_lossage ("invalid operand for code '%c'", code
);
22128 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22131 /* Print an iWMMXt control register name. */
22133 if (!CONST_INT_P (x
)
22135 || INTVAL (x
) >= 16)
22136 /* Bad value for wC register number. */
22138 output_operand_lossage ("invalid operand for code '%c'", code
);
22144 static const char * wc_reg_names
[16] =
22146 "wCID", "wCon", "wCSSF", "wCASF",
22147 "wC4", "wC5", "wC6", "wC7",
22148 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22149 "wC12", "wC13", "wC14", "wC15"
22152 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22156 /* Print the high single-precision register of a VFP double-precision
22160 machine_mode mode
= GET_MODE (x
);
22163 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22165 output_operand_lossage ("invalid operand for code '%c'", code
);
22170 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22172 output_operand_lossage ("invalid operand for code '%c'", code
);
22176 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22180 /* Print a VFP/Neon double precision or quad precision register name. */
22184 machine_mode mode
= GET_MODE (x
);
22185 int is_quad
= (code
== 'q');
22188 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22190 output_operand_lossage ("invalid operand for code '%c'", code
);
22195 || !IS_VFP_REGNUM (REGNO (x
)))
22197 output_operand_lossage ("invalid operand for code '%c'", code
);
22202 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22203 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22205 output_operand_lossage ("invalid operand for code '%c'", code
);
22209 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22210 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22214 /* These two codes print the low/high doubleword register of a Neon quad
22215 register, respectively. For pair-structure types, can also print
22216 low/high quadword registers. */
22220 machine_mode mode
= GET_MODE (x
);
22223 if ((GET_MODE_SIZE (mode
) != 16
22224 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22226 output_operand_lossage ("invalid operand for code '%c'", code
);
22231 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22233 output_operand_lossage ("invalid operand for code '%c'", code
);
22237 if (GET_MODE_SIZE (mode
) == 16)
22238 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22239 + (code
== 'f' ? 1 : 0));
22241 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22242 + (code
== 'f' ? 1 : 0));
22246 /* Print a VFPv3 floating-point constant, represented as an integer
22250 int index
= vfp3_const_double_index (x
);
22251 gcc_assert (index
!= -1);
22252 fprintf (stream
, "%d", index
);
22256 /* Print bits representing opcode features for Neon.
22258 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22259 and polynomials as unsigned.
22261 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22263 Bit 2 is 1 for rounding functions, 0 otherwise. */
22265 /* Identify the type as 's', 'u', 'p' or 'f'. */
22268 HOST_WIDE_INT bits
= INTVAL (x
);
22269 fputc ("uspf"[bits
& 3], stream
);
22273 /* Likewise, but signed and unsigned integers are both 'i'. */
22276 HOST_WIDE_INT bits
= INTVAL (x
);
22277 fputc ("iipf"[bits
& 3], stream
);
22281 /* As for 'T', but emit 'u' instead of 'p'. */
22284 HOST_WIDE_INT bits
= INTVAL (x
);
22285 fputc ("usuf"[bits
& 3], stream
);
22289 /* Bit 2: rounding (vs none). */
22292 HOST_WIDE_INT bits
= INTVAL (x
);
22293 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22297 /* Memory operand for vld1/vst1 instruction. */
22301 bool postinc
= FALSE
;
22302 rtx postinc_reg
= NULL
;
22303 unsigned align
, memsize
, align_bits
;
22305 gcc_assert (MEM_P (x
));
22306 addr
= XEXP (x
, 0);
22307 if (GET_CODE (addr
) == POST_INC
)
22310 addr
= XEXP (addr
, 0);
22312 if (GET_CODE (addr
) == POST_MODIFY
)
22314 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22315 addr
= XEXP (addr
, 0);
22317 asm_fprintf (stream
, "[%r", REGNO (addr
));
22319 /* We know the alignment of this access, so we can emit a hint in the
22320 instruction (for some alignments) as an aid to the memory subsystem
22322 align
= MEM_ALIGN (x
) >> 3;
22323 memsize
= MEM_SIZE (x
);
22325 /* Only certain alignment specifiers are supported by the hardware. */
22326 if (memsize
== 32 && (align
% 32) == 0)
22328 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22330 else if (memsize
>= 8 && (align
% 8) == 0)
22335 if (align_bits
!= 0)
22336 asm_fprintf (stream
, ":%d", align_bits
);
22338 asm_fprintf (stream
, "]");
22341 fputs("!", stream
);
22343 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22351 gcc_assert (MEM_P (x
));
22352 addr
= XEXP (x
, 0);
22353 gcc_assert (REG_P (addr
));
22354 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22358 /* Translate an S register number into a D register number and element index. */
22361 machine_mode mode
= GET_MODE (x
);
22364 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22366 output_operand_lossage ("invalid operand for code '%c'", code
);
22371 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22373 output_operand_lossage ("invalid operand for code '%c'", code
);
22377 regno
= regno
- FIRST_VFP_REGNUM
;
22378 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22383 gcc_assert (CONST_DOUBLE_P (x
));
22385 result
= vfp3_const_double_for_fract_bits (x
);
22387 result
= vfp3_const_double_for_bits (x
);
22388 fprintf (stream
, "#%d", result
);
22391 /* Register specifier for vld1.16/vst1.16. Translate the S register
22392 number into a D register number and element index. */
22395 machine_mode mode
= GET_MODE (x
);
22398 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22400 output_operand_lossage ("invalid operand for code '%c'", code
);
22405 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22407 output_operand_lossage ("invalid operand for code '%c'", code
);
22411 regno
= regno
- FIRST_VFP_REGNUM
;
22412 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22419 output_operand_lossage ("missing operand");
22423 switch (GET_CODE (x
))
22426 asm_fprintf (stream
, "%r", REGNO (x
));
22430 output_memory_reference_mode
= GET_MODE (x
);
22431 output_address (XEXP (x
, 0));
22437 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22438 sizeof (fpstr
), 0, 1);
22439 fprintf (stream
, "#%s", fpstr
);
22444 gcc_assert (GET_CODE (x
) != NEG
);
22445 fputc ('#', stream
);
22446 if (GET_CODE (x
) == HIGH
)
22448 fputs (":lower16:", stream
);
22452 output_addr_const (stream
, x
);
22458 /* Target hook for printing a memory address. */
22460 arm_print_operand_address (FILE *stream
, rtx x
)
22464 int is_minus
= GET_CODE (x
) == MINUS
;
22467 asm_fprintf (stream
, "[%r]", REGNO (x
));
22468 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22470 rtx base
= XEXP (x
, 0);
22471 rtx index
= XEXP (x
, 1);
22472 HOST_WIDE_INT offset
= 0;
22474 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22476 /* Ensure that BASE is a register. */
22477 /* (one of them must be). */
22478 /* Also ensure the SP is not used as in index register. */
22479 std::swap (base
, index
);
22481 switch (GET_CODE (index
))
22484 offset
= INTVAL (index
);
22487 asm_fprintf (stream
, "[%r, #%wd]",
22488 REGNO (base
), offset
);
22492 asm_fprintf (stream
, "[%r, %s%r]",
22493 REGNO (base
), is_minus
? "-" : "",
22503 asm_fprintf (stream
, "[%r, %s%r",
22504 REGNO (base
), is_minus
? "-" : "",
22505 REGNO (XEXP (index
, 0)));
22506 arm_print_operand (stream
, index
, 'S');
22507 fputs ("]", stream
);
22512 gcc_unreachable ();
22515 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22516 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22518 extern machine_mode output_memory_reference_mode
;
22520 gcc_assert (REG_P (XEXP (x
, 0)));
22522 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22523 asm_fprintf (stream
, "[%r, #%s%d]!",
22524 REGNO (XEXP (x
, 0)),
22525 GET_CODE (x
) == PRE_DEC
? "-" : "",
22526 GET_MODE_SIZE (output_memory_reference_mode
));
22528 asm_fprintf (stream
, "[%r], #%s%d",
22529 REGNO (XEXP (x
, 0)),
22530 GET_CODE (x
) == POST_DEC
? "-" : "",
22531 GET_MODE_SIZE (output_memory_reference_mode
));
22533 else if (GET_CODE (x
) == PRE_MODIFY
)
22535 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22536 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22537 asm_fprintf (stream
, "#%wd]!",
22538 INTVAL (XEXP (XEXP (x
, 1), 1)));
22540 asm_fprintf (stream
, "%r]!",
22541 REGNO (XEXP (XEXP (x
, 1), 1)));
22543 else if (GET_CODE (x
) == POST_MODIFY
)
22545 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22546 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22547 asm_fprintf (stream
, "#%wd",
22548 INTVAL (XEXP (XEXP (x
, 1), 1)));
22550 asm_fprintf (stream
, "%r",
22551 REGNO (XEXP (XEXP (x
, 1), 1)));
22553 else output_addr_const (stream
, x
);
22558 asm_fprintf (stream
, "[%r]", REGNO (x
));
22559 else if (GET_CODE (x
) == POST_INC
)
22560 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22561 else if (GET_CODE (x
) == PLUS
)
22563 gcc_assert (REG_P (XEXP (x
, 0)));
22564 if (CONST_INT_P (XEXP (x
, 1)))
22565 asm_fprintf (stream
, "[%r, #%wd]",
22566 REGNO (XEXP (x
, 0)),
22567 INTVAL (XEXP (x
, 1)));
22569 asm_fprintf (stream
, "[%r, %r]",
22570 REGNO (XEXP (x
, 0)),
22571 REGNO (XEXP (x
, 1)));
22574 output_addr_const (stream
, x
);
22578 /* Target hook for indicating whether a punctuation character for
22579 TARGET_PRINT_OPERAND is valid. */
22581 arm_print_operand_punct_valid_p (unsigned char code
)
22583 return (code
== '@' || code
== '|' || code
== '.'
22584 || code
== '(' || code
== ')' || code
== '#'
22585 || (TARGET_32BIT
&& (code
== '?'))
22586 || (TARGET_THUMB2
&& (code
== '!'))
22587 || (TARGET_THUMB
&& (code
== '_')));
22590 /* Target hook for assembling integer objects. The ARM version needs to
22591 handle word-sized values specially. */
22593 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22597 if (size
== UNITS_PER_WORD
&& aligned_p
)
22599 fputs ("\t.word\t", asm_out_file
);
22600 output_addr_const (asm_out_file
, x
);
22602 /* Mark symbols as position independent. We only do this in the
22603 .text segment, not in the .data segment. */
22604 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22605 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22607 /* See legitimize_pic_address for an explanation of the
22608 TARGET_VXWORKS_RTP check. */
22609 if (!arm_pic_data_is_text_relative
22610 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22611 fputs ("(GOT)", asm_out_file
);
22613 fputs ("(GOTOFF)", asm_out_file
);
22615 fputc ('\n', asm_out_file
);
22619 mode
= GET_MODE (x
);
22621 if (arm_vector_mode_supported_p (mode
))
22625 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22627 units
= CONST_VECTOR_NUNITS (x
);
22628 size
= GET_MODE_UNIT_SIZE (mode
);
22630 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22631 for (i
= 0; i
< units
; i
++)
22633 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22635 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22638 for (i
= 0; i
< units
; i
++)
22640 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22641 REAL_VALUE_TYPE rval
;
22643 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22646 (rval
, GET_MODE_INNER (mode
),
22647 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22653 return default_assemble_integer (x
, size
, aligned_p
);
22657 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22661 if (!TARGET_AAPCS_BASED
)
22664 default_named_section_asm_out_constructor
22665 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22669 /* Put these in the .init_array section, using a special relocation. */
22670 if (priority
!= DEFAULT_INIT_PRIORITY
)
22673 sprintf (buf
, "%s.%.5u",
22674 is_ctor
? ".init_array" : ".fini_array",
22676 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22683 switch_to_section (s
);
22684 assemble_align (POINTER_SIZE
);
22685 fputs ("\t.word\t", asm_out_file
);
22686 output_addr_const (asm_out_file
, symbol
);
22687 fputs ("(target1)\n", asm_out_file
);
22690 /* Add a function to the list of static constructors. */
22693 arm_elf_asm_constructor (rtx symbol
, int priority
)
22695 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22698 /* Add a function to the list of static destructors. */
22701 arm_elf_asm_destructor (rtx symbol
, int priority
)
22703 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22706 /* A finite state machine takes care of noticing whether or not instructions
22707 can be conditionally executed, and thus decrease execution time and code
22708 size by deleting branch instructions. The fsm is controlled by
22709 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22711 /* The state of the fsm controlling condition codes are:
22712 0: normal, do nothing special
22713 1: make ASM_OUTPUT_OPCODE not output this instruction
22714 2: make ASM_OUTPUT_OPCODE not output this instruction
22715 3: make instructions conditional
22716 4: make instructions conditional
22718 State transitions (state->state by whom under condition):
22719 0 -> 1 final_prescan_insn if the `target' is a label
22720 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22721 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22722 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22723 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22724 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22725 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22726 (the target insn is arm_target_insn).
22728 If the jump clobbers the conditions then we use states 2 and 4.
22730 A similar thing can be done with conditional return insns.
22732 XXX In case the `target' is an unconditional branch, this conditionalising
22733 of the instructions always reduces code size, but not always execution
22734 time. But then, I want to reduce the code size to somewhere near what
22735 /bin/cc produces. */
22737 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22738 instructions. When a COND_EXEC instruction is seen the subsequent
22739 instructions are scanned so that multiple conditional instructions can be
22740 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22741 specify the length and true/false mask for the IT block. These will be
22742 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22744 /* Returns the index of the ARM condition code string in
22745 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22746 COMPARISON should be an rtx like `(eq (...) (...))'. */
22749 maybe_get_arm_condition_code (rtx comparison
)
22751 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22752 enum arm_cond_code code
;
22753 enum rtx_code comp_code
= GET_CODE (comparison
);
22755 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22756 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22757 XEXP (comparison
, 1));
22761 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22762 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22763 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22764 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22765 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22766 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22767 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22768 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22769 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22770 case CC_DLTUmode
: code
= ARM_CC
;
22773 if (comp_code
== EQ
)
22774 return ARM_INVERSE_CONDITION_CODE (code
);
22775 if (comp_code
== NE
)
22782 case NE
: return ARM_NE
;
22783 case EQ
: return ARM_EQ
;
22784 case GE
: return ARM_PL
;
22785 case LT
: return ARM_MI
;
22786 default: return ARM_NV
;
22792 case NE
: return ARM_NE
;
22793 case EQ
: return ARM_EQ
;
22794 default: return ARM_NV
;
22800 case NE
: return ARM_MI
;
22801 case EQ
: return ARM_PL
;
22802 default: return ARM_NV
;
22807 /* We can handle all cases except UNEQ and LTGT. */
22810 case GE
: return ARM_GE
;
22811 case GT
: return ARM_GT
;
22812 case LE
: return ARM_LS
;
22813 case LT
: return ARM_MI
;
22814 case NE
: return ARM_NE
;
22815 case EQ
: return ARM_EQ
;
22816 case ORDERED
: return ARM_VC
;
22817 case UNORDERED
: return ARM_VS
;
22818 case UNLT
: return ARM_LT
;
22819 case UNLE
: return ARM_LE
;
22820 case UNGT
: return ARM_HI
;
22821 case UNGE
: return ARM_PL
;
22822 /* UNEQ and LTGT do not have a representation. */
22823 case UNEQ
: /* Fall through. */
22824 case LTGT
: /* Fall through. */
22825 default: return ARM_NV
;
22831 case NE
: return ARM_NE
;
22832 case EQ
: return ARM_EQ
;
22833 case GE
: return ARM_LE
;
22834 case GT
: return ARM_LT
;
22835 case LE
: return ARM_GE
;
22836 case LT
: return ARM_GT
;
22837 case GEU
: return ARM_LS
;
22838 case GTU
: return ARM_CC
;
22839 case LEU
: return ARM_CS
;
22840 case LTU
: return ARM_HI
;
22841 default: return ARM_NV
;
22847 case LTU
: return ARM_CS
;
22848 case GEU
: return ARM_CC
;
22849 default: return ARM_NV
;
22855 case NE
: return ARM_NE
;
22856 case EQ
: return ARM_EQ
;
22857 case GEU
: return ARM_CS
;
22858 case GTU
: return ARM_HI
;
22859 case LEU
: return ARM_LS
;
22860 case LTU
: return ARM_CC
;
22861 default: return ARM_NV
;
22867 case GE
: return ARM_GE
;
22868 case LT
: return ARM_LT
;
22869 case GEU
: return ARM_CS
;
22870 case LTU
: return ARM_CC
;
22871 default: return ARM_NV
;
22877 case NE
: return ARM_NE
;
22878 case EQ
: return ARM_EQ
;
22879 case GE
: return ARM_GE
;
22880 case GT
: return ARM_GT
;
22881 case LE
: return ARM_LE
;
22882 case LT
: return ARM_LT
;
22883 case GEU
: return ARM_CS
;
22884 case GTU
: return ARM_HI
;
22885 case LEU
: return ARM_LS
;
22886 case LTU
: return ARM_CC
;
22887 default: return ARM_NV
;
22890 default: gcc_unreachable ();
22894 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22895 static enum arm_cond_code
22896 get_arm_condition_code (rtx comparison
)
22898 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22899 gcc_assert (code
!= ARM_NV
);
22903 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22906 thumb2_final_prescan_insn (rtx_insn
*insn
)
22908 rtx_insn
*first_insn
= insn
;
22909 rtx body
= PATTERN (insn
);
22911 enum arm_cond_code code
;
22916 /* max_insns_skipped in the tune was already taken into account in the
22917 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22918 just emit the IT blocks as we can. It does not make sense to split
22920 max
= MAX_INSN_PER_IT_BLOCK
;
22922 /* Remove the previous insn from the count of insns to be output. */
22923 if (arm_condexec_count
)
22924 arm_condexec_count
--;
22926 /* Nothing to do if we are already inside a conditional block. */
22927 if (arm_condexec_count
)
22930 if (GET_CODE (body
) != COND_EXEC
)
22933 /* Conditional jumps are implemented directly. */
22937 predicate
= COND_EXEC_TEST (body
);
22938 arm_current_cc
= get_arm_condition_code (predicate
);
22940 n
= get_attr_ce_count (insn
);
22941 arm_condexec_count
= 1;
22942 arm_condexec_mask
= (1 << n
) - 1;
22943 arm_condexec_masklen
= n
;
22944 /* See if subsequent instructions can be combined into the same block. */
22947 insn
= next_nonnote_insn (insn
);
22949 /* Jumping into the middle of an IT block is illegal, so a label or
22950 barrier terminates the block. */
22951 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22954 body
= PATTERN (insn
);
22955 /* USE and CLOBBER aren't really insns, so just skip them. */
22956 if (GET_CODE (body
) == USE
22957 || GET_CODE (body
) == CLOBBER
)
22960 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22961 if (GET_CODE (body
) != COND_EXEC
)
22963 /* Maximum number of conditionally executed instructions in a block. */
22964 n
= get_attr_ce_count (insn
);
22965 if (arm_condexec_masklen
+ n
> max
)
22968 predicate
= COND_EXEC_TEST (body
);
22969 code
= get_arm_condition_code (predicate
);
22970 mask
= (1 << n
) - 1;
22971 if (arm_current_cc
== code
)
22972 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22973 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22976 arm_condexec_count
++;
22977 arm_condexec_masklen
+= n
;
22979 /* A jump must be the last instruction in a conditional block. */
22983 /* Restore recog_data (getting the attributes of other insns can
22984 destroy this array, but final.c assumes that it remains intact
22985 across this call). */
22986 extract_constrain_insn_cached (first_insn
);
22990 arm_final_prescan_insn (rtx_insn
*insn
)
22992 /* BODY will hold the body of INSN. */
22993 rtx body
= PATTERN (insn
);
22995 /* This will be 1 if trying to repeat the trick, and things need to be
22996 reversed if it appears to fail. */
22999 /* If we start with a return insn, we only succeed if we find another one. */
23000 int seeking_return
= 0;
23001 enum rtx_code return_code
= UNKNOWN
;
23003 /* START_INSN will hold the insn from where we start looking. This is the
23004 first insn after the following code_label if REVERSE is true. */
23005 rtx_insn
*start_insn
= insn
;
23007 /* If in state 4, check if the target branch is reached, in order to
23008 change back to state 0. */
23009 if (arm_ccfsm_state
== 4)
23011 if (insn
== arm_target_insn
)
23013 arm_target_insn
= NULL
;
23014 arm_ccfsm_state
= 0;
23019 /* If in state 3, it is possible to repeat the trick, if this insn is an
23020 unconditional branch to a label, and immediately following this branch
23021 is the previous target label which is only used once, and the label this
23022 branch jumps to is not too far off. */
23023 if (arm_ccfsm_state
== 3)
23025 if (simplejump_p (insn
))
23027 start_insn
= next_nonnote_insn (start_insn
);
23028 if (BARRIER_P (start_insn
))
23030 /* XXX Isn't this always a barrier? */
23031 start_insn
= next_nonnote_insn (start_insn
);
23033 if (LABEL_P (start_insn
)
23034 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23035 && LABEL_NUSES (start_insn
) == 1)
23040 else if (ANY_RETURN_P (body
))
23042 start_insn
= next_nonnote_insn (start_insn
);
23043 if (BARRIER_P (start_insn
))
23044 start_insn
= next_nonnote_insn (start_insn
);
23045 if (LABEL_P (start_insn
)
23046 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23047 && LABEL_NUSES (start_insn
) == 1)
23050 seeking_return
= 1;
23051 return_code
= GET_CODE (body
);
23060 gcc_assert (!arm_ccfsm_state
|| reverse
);
23061 if (!JUMP_P (insn
))
23064 /* This jump might be paralleled with a clobber of the condition codes
23065 the jump should always come first */
23066 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23067 body
= XVECEXP (body
, 0, 0);
23070 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23071 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23074 int fail
= FALSE
, succeed
= FALSE
;
23075 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23076 int then_not_else
= TRUE
;
23077 rtx_insn
*this_insn
= start_insn
;
23080 /* Register the insn jumped to. */
23083 if (!seeking_return
)
23084 label
= XEXP (SET_SRC (body
), 0);
23086 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23087 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23088 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23090 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23091 then_not_else
= FALSE
;
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23095 seeking_return
= 1;
23096 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23100 seeking_return
= 1;
23101 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23102 then_not_else
= FALSE
;
23105 gcc_unreachable ();
23107 /* See how many insns this branch skips, and what kind of insns. If all
23108 insns are okay, and the label or unconditional branch to the same
23109 label is not too far away, succeed. */
23110 for (insns_skipped
= 0;
23111 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23115 this_insn
= next_nonnote_insn (this_insn
);
23119 switch (GET_CODE (this_insn
))
23122 /* Succeed if it is the target label, otherwise fail since
23123 control falls in from somewhere else. */
23124 if (this_insn
== label
)
23126 arm_ccfsm_state
= 1;
23134 /* Succeed if the following insn is the target label.
23136 If return insns are used then the last insn in a function
23137 will be a barrier. */
23138 this_insn
= next_nonnote_insn (this_insn
);
23139 if (this_insn
&& this_insn
== label
)
23141 arm_ccfsm_state
= 1;
23149 /* The AAPCS says that conditional calls should not be
23150 used since they make interworking inefficient (the
23151 linker can't transform BL<cond> into BLX). That's
23152 only a problem if the machine has BLX. */
23159 /* Succeed if the following insn is the target label, or
23160 if the following two insns are a barrier and the
23162 this_insn
= next_nonnote_insn (this_insn
);
23163 if (this_insn
&& BARRIER_P (this_insn
))
23164 this_insn
= next_nonnote_insn (this_insn
);
23166 if (this_insn
&& this_insn
== label
23167 && insns_skipped
< max_insns_skipped
)
23169 arm_ccfsm_state
= 1;
23177 /* If this is an unconditional branch to the same label, succeed.
23178 If it is to another label, do nothing. If it is conditional,
23180 /* XXX Probably, the tests for SET and the PC are
23183 scanbody
= PATTERN (this_insn
);
23184 if (GET_CODE (scanbody
) == SET
23185 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23187 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23188 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23190 arm_ccfsm_state
= 2;
23193 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23196 /* Fail if a conditional return is undesirable (e.g. on a
23197 StrongARM), but still allow this if optimizing for size. */
23198 else if (GET_CODE (scanbody
) == return_code
23199 && !use_return_insn (TRUE
, NULL
)
23202 else if (GET_CODE (scanbody
) == return_code
)
23204 arm_ccfsm_state
= 2;
23207 else if (GET_CODE (scanbody
) == PARALLEL
)
23209 switch (get_attr_conds (this_insn
))
23219 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23224 /* Instructions using or affecting the condition codes make it
23226 scanbody
= PATTERN (this_insn
);
23227 if (!(GET_CODE (scanbody
) == SET
23228 || GET_CODE (scanbody
) == PARALLEL
)
23229 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23239 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23240 arm_target_label
= CODE_LABEL_NUMBER (label
);
23243 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23245 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23247 this_insn
= next_nonnote_insn (this_insn
);
23248 gcc_assert (!this_insn
23249 || (!BARRIER_P (this_insn
)
23250 && !LABEL_P (this_insn
)));
23254 /* Oh, dear! we ran off the end.. give up. */
23255 extract_constrain_insn_cached (insn
);
23256 arm_ccfsm_state
= 0;
23257 arm_target_insn
= NULL
;
23260 arm_target_insn
= this_insn
;
23263 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23266 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23268 if (reverse
|| then_not_else
)
23269 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23272 /* Restore recog_data (getting the attributes of other insns can
23273 destroy this array, but final.c assumes that it remains intact
23274 across this call. */
23275 extract_constrain_insn_cached (insn
);
23279 /* Output IT instructions. */
23281 thumb2_asm_output_opcode (FILE * stream
)
23286 if (arm_condexec_mask
)
23288 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23289 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23291 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23292 arm_condition_codes
[arm_current_cc
]);
23293 arm_condexec_mask
= 0;
23297 /* Returns true if REGNO is a valid register
23298 for holding a quantity of type MODE. */
23300 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23302 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23303 return (regno
== CC_REGNUM
23304 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23305 && regno
== VFPCC_REGNUM
));
23307 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23311 /* For the Thumb we only allow values bigger than SImode in
23312 registers 0 - 6, so that there is always a second low
23313 register available to hold the upper part of the value.
23314 We probably we ought to ensure that the register is the
23315 start of an even numbered register pair. */
23316 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23318 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23319 && IS_VFP_REGNUM (regno
))
23321 if (mode
== SFmode
|| mode
== SImode
)
23322 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23324 if (mode
== DFmode
)
23325 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23327 /* VFP registers can hold HFmode values, but there is no point in
23328 putting them there unless we have hardware conversion insns. */
23329 if (mode
== HFmode
)
23330 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23333 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23334 || (VALID_NEON_QREG_MODE (mode
)
23335 && NEON_REGNO_OK_FOR_QUAD (regno
))
23336 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23337 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23338 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23339 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23340 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23345 if (TARGET_REALLY_IWMMXT
)
23347 if (IS_IWMMXT_GR_REGNUM (regno
))
23348 return mode
== SImode
;
23350 if (IS_IWMMXT_REGNUM (regno
))
23351 return VALID_IWMMXT_REG_MODE (mode
);
23354 /* We allow almost any value to be stored in the general registers.
23355 Restrict doubleword quantities to even register pairs in ARM state
23356 so that we can use ldrd. Do not allow very large Neon structure
23357 opaque modes in general registers; they would use too many. */
23358 if (regno
<= LAST_ARM_REGNUM
)
23360 if (ARM_NUM_REGS (mode
) > 4)
23366 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23369 if (regno
== FRAME_POINTER_REGNUM
23370 || regno
== ARG_POINTER_REGNUM
)
23371 /* We only allow integers in the fake hard registers. */
23372 return GET_MODE_CLASS (mode
) == MODE_INT
;
23377 /* Implement MODES_TIEABLE_P. */
23380 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23382 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23385 /* We specifically want to allow elements of "structure" modes to
23386 be tieable to the structure. This more general condition allows
23387 other rarer situations too. */
23389 && (VALID_NEON_DREG_MODE (mode1
)
23390 || VALID_NEON_QREG_MODE (mode1
)
23391 || VALID_NEON_STRUCT_MODE (mode1
))
23392 && (VALID_NEON_DREG_MODE (mode2
)
23393 || VALID_NEON_QREG_MODE (mode2
)
23394 || VALID_NEON_STRUCT_MODE (mode2
)))
23400 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23401 not used in arm mode. */
23404 arm_regno_class (int regno
)
23406 if (regno
== PC_REGNUM
)
23411 if (regno
== STACK_POINTER_REGNUM
)
23413 if (regno
== CC_REGNUM
)
23420 if (TARGET_THUMB2
&& regno
< 8)
23423 if ( regno
<= LAST_ARM_REGNUM
23424 || regno
== FRAME_POINTER_REGNUM
23425 || regno
== ARG_POINTER_REGNUM
)
23426 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23428 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23429 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23431 if (IS_VFP_REGNUM (regno
))
23433 if (regno
<= D7_VFP_REGNUM
)
23434 return VFP_D0_D7_REGS
;
23435 else if (regno
<= LAST_LO_VFP_REGNUM
)
23436 return VFP_LO_REGS
;
23438 return VFP_HI_REGS
;
23441 if (IS_IWMMXT_REGNUM (regno
))
23442 return IWMMXT_REGS
;
23444 if (IS_IWMMXT_GR_REGNUM (regno
))
23445 return IWMMXT_GR_REGS
;
23450 /* Handle a special case when computing the offset
23451 of an argument from the frame pointer. */
23453 arm_debugger_arg_offset (int value
, rtx addr
)
23457 /* We are only interested if dbxout_parms() failed to compute the offset. */
23461 /* We can only cope with the case where the address is held in a register. */
23465 /* If we are using the frame pointer to point at the argument, then
23466 an offset of 0 is correct. */
23467 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23470 /* If we are using the stack pointer to point at the
23471 argument, then an offset of 0 is correct. */
23472 /* ??? Check this is consistent with thumb2 frame layout. */
23473 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23474 && REGNO (addr
) == SP_REGNUM
)
23477 /* Oh dear. The argument is pointed to by a register rather
23478 than being held in a register, or being stored at a known
23479 offset from the frame pointer. Since GDB only understands
23480 those two kinds of argument we must translate the address
23481 held in the register into an offset from the frame pointer.
23482 We do this by searching through the insns for the function
23483 looking to see where this register gets its value. If the
23484 register is initialized from the frame pointer plus an offset
23485 then we are in luck and we can continue, otherwise we give up.
23487 This code is exercised by producing debugging information
23488 for a function with arguments like this:
23490 double func (double a, double b, int c, double d) {return d;}
23492 Without this code the stab for parameter 'd' will be set to
23493 an offset of 0 from the frame pointer, rather than 8. */
23495 /* The if() statement says:
23497 If the insn is a normal instruction
23498 and if the insn is setting the value in a register
23499 and if the register being set is the register holding the address of the argument
23500 and if the address is computing by an addition
23501 that involves adding to a register
23502 which is the frame pointer
23507 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23509 if ( NONJUMP_INSN_P (insn
)
23510 && GET_CODE (PATTERN (insn
)) == SET
23511 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23512 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23513 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23514 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23515 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23518 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23527 warning (0, "unable to compute real location of stacked parameter");
23528 value
= 8; /* XXX magic hack */
23534 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23536 static const char *
23537 arm_invalid_parameter_type (const_tree t
)
23539 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23540 return N_("function parameters cannot have __fp16 type");
23544 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23546 static const char *
23547 arm_invalid_return_type (const_tree t
)
23549 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23550 return N_("functions cannot return __fp16 type");
23554 /* Implement TARGET_PROMOTED_TYPE. */
23557 arm_promoted_type (const_tree t
)
23559 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23560 return float_type_node
;
23564 /* Implement TARGET_CONVERT_TO_TYPE.
23565 Specifically, this hook implements the peculiarity of the ARM
23566 half-precision floating-point C semantics that requires conversions between
23567 __fp16 to or from double to do an intermediate conversion to float. */
23570 arm_convert_to_type (tree type
, tree expr
)
23572 tree fromtype
= TREE_TYPE (expr
);
23573 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23575 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23576 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23577 return convert (type
, convert (float_type_node
, expr
));
23581 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23582 This simply adds HFmode as a supported mode; even though we don't
23583 implement arithmetic on this type directly, it's supported by
23584 optabs conversions, much the way the double-word arithmetic is
23585 special-cased in the default hook. */
23588 arm_scalar_mode_supported_p (machine_mode mode
)
23590 if (mode
== HFmode
)
23591 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23592 else if (ALL_FIXED_POINT_MODE_P (mode
))
23595 return default_scalar_mode_supported_p (mode
);
23598 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23600 neon_reinterpret (rtx dest
, rtx src
)
23602 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23605 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23606 not to early-clobber SRC registers in the process.
23608 We assume that the operands described by SRC and DEST represent a
23609 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23610 number of components into which the copy has been decomposed. */
23612 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23616 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23617 || REGNO (operands
[0]) < REGNO (operands
[1]))
23619 for (i
= 0; i
< count
; i
++)
23621 operands
[2 * i
] = dest
[i
];
23622 operands
[2 * i
+ 1] = src
[i
];
23627 for (i
= 0; i
< count
; i
++)
23629 operands
[2 * i
] = dest
[count
- i
- 1];
23630 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23635 /* Split operands into moves from op[1] + op[2] into op[0]. */
23638 neon_split_vcombine (rtx operands
[3])
23640 unsigned int dest
= REGNO (operands
[0]);
23641 unsigned int src1
= REGNO (operands
[1]);
23642 unsigned int src2
= REGNO (operands
[2]);
23643 machine_mode halfmode
= GET_MODE (operands
[1]);
23644 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23645 rtx destlo
, desthi
;
23647 if (src1
== dest
&& src2
== dest
+ halfregs
)
23649 /* No-op move. Can't split to nothing; emit something. */
23650 emit_note (NOTE_INSN_DELETED
);
23654 /* Preserve register attributes for variable tracking. */
23655 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23656 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23657 GET_MODE_SIZE (halfmode
));
23659 /* Special case of reversed high/low parts. Use VSWP. */
23660 if (src2
== dest
&& src1
== dest
+ halfregs
)
23662 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23663 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23664 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23668 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23670 /* Try to avoid unnecessary moves if part of the result
23671 is in the right place already. */
23673 emit_move_insn (destlo
, operands
[1]);
23674 if (src2
!= dest
+ halfregs
)
23675 emit_move_insn (desthi
, operands
[2]);
23679 if (src2
!= dest
+ halfregs
)
23680 emit_move_insn (desthi
, operands
[2]);
23682 emit_move_insn (destlo
, operands
[1]);
23686 /* Return the number (counting from 0) of
23687 the least significant set bit in MASK. */
23690 number_of_first_bit_set (unsigned mask
)
23692 return ctz_hwi (mask
);
23695 /* Like emit_multi_reg_push, but allowing for a different set of
23696 registers to be described as saved. MASK is the set of registers
23697 to be saved; REAL_REGS is the set of registers to be described as
23698 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23701 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23703 unsigned long regno
;
23704 rtx par
[10], tmp
, reg
;
23708 /* Build the parallel of the registers actually being stored. */
23709 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23711 regno
= ctz_hwi (mask
);
23712 reg
= gen_rtx_REG (SImode
, regno
);
23715 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23717 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23722 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23723 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23724 tmp
= gen_frame_mem (BLKmode
, tmp
);
23725 tmp
= gen_rtx_SET (tmp
, par
[0]);
23728 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23729 insn
= emit_insn (tmp
);
23731 /* Always build the stack adjustment note for unwind info. */
23732 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23733 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23736 /* Build the parallel of the registers recorded as saved for unwind. */
23737 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23739 regno
= ctz_hwi (real_regs
);
23740 reg
= gen_rtx_REG (SImode
, regno
);
23742 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23743 tmp
= gen_frame_mem (SImode
, tmp
);
23744 tmp
= gen_rtx_SET (tmp
, reg
);
23745 RTX_FRAME_RELATED_P (tmp
) = 1;
23753 RTX_FRAME_RELATED_P (par
[0]) = 1;
23754 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23757 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23762 /* Emit code to push or pop registers to or from the stack. F is the
23763 assembly file. MASK is the registers to pop. */
23765 thumb_pop (FILE *f
, unsigned long mask
)
23768 int lo_mask
= mask
& 0xFF;
23769 int pushed_words
= 0;
23773 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23775 /* Special case. Do not generate a POP PC statement here, do it in
23777 thumb_exit (f
, -1);
23781 fprintf (f
, "\tpop\t{");
23783 /* Look at the low registers first. */
23784 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23788 asm_fprintf (f
, "%r", regno
);
23790 if ((lo_mask
& ~1) != 0)
23797 if (mask
& (1 << PC_REGNUM
))
23799 /* Catch popping the PC. */
23800 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23801 || crtl
->calls_eh_return
)
23803 /* The PC is never poped directly, instead
23804 it is popped into r3 and then BX is used. */
23805 fprintf (f
, "}\n");
23807 thumb_exit (f
, -1);
23816 asm_fprintf (f
, "%r", PC_REGNUM
);
23820 fprintf (f
, "}\n");
23823 /* Generate code to return from a thumb function.
23824 If 'reg_containing_return_addr' is -1, then the return address is
23825 actually on the stack, at the stack pointer. */
23827 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23829 unsigned regs_available_for_popping
;
23830 unsigned regs_to_pop
;
23832 unsigned available
;
23836 int restore_a4
= FALSE
;
23838 /* Compute the registers we need to pop. */
23842 if (reg_containing_return_addr
== -1)
23844 regs_to_pop
|= 1 << LR_REGNUM
;
23848 if (TARGET_BACKTRACE
)
23850 /* Restore the (ARM) frame pointer and stack pointer. */
23851 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23855 /* If there is nothing to pop then just emit the BX instruction and
23857 if (pops_needed
== 0)
23859 if (crtl
->calls_eh_return
)
23860 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23862 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23865 /* Otherwise if we are not supporting interworking and we have not created
23866 a backtrace structure and the function was not entered in ARM mode then
23867 just pop the return address straight into the PC. */
23868 else if (!TARGET_INTERWORK
23869 && !TARGET_BACKTRACE
23870 && !is_called_in_ARM_mode (current_function_decl
)
23871 && !crtl
->calls_eh_return
)
23873 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23877 /* Find out how many of the (return) argument registers we can corrupt. */
23878 regs_available_for_popping
= 0;
23880 /* If returning via __builtin_eh_return, the bottom three registers
23881 all contain information needed for the return. */
23882 if (crtl
->calls_eh_return
)
23886 /* If we can deduce the registers used from the function's
23887 return value. This is more reliable that examining
23888 df_regs_ever_live_p () because that will be set if the register is
23889 ever used in the function, not just if the register is used
23890 to hold a return value. */
23892 if (crtl
->return_rtx
!= 0)
23893 mode
= GET_MODE (crtl
->return_rtx
);
23895 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23897 size
= GET_MODE_SIZE (mode
);
23901 /* In a void function we can use any argument register.
23902 In a function that returns a structure on the stack
23903 we can use the second and third argument registers. */
23904 if (mode
== VOIDmode
)
23905 regs_available_for_popping
=
23906 (1 << ARG_REGISTER (1))
23907 | (1 << ARG_REGISTER (2))
23908 | (1 << ARG_REGISTER (3));
23910 regs_available_for_popping
=
23911 (1 << ARG_REGISTER (2))
23912 | (1 << ARG_REGISTER (3));
23914 else if (size
<= 4)
23915 regs_available_for_popping
=
23916 (1 << ARG_REGISTER (2))
23917 | (1 << ARG_REGISTER (3));
23918 else if (size
<= 8)
23919 regs_available_for_popping
=
23920 (1 << ARG_REGISTER (3));
23923 /* Match registers to be popped with registers into which we pop them. */
23924 for (available
= regs_available_for_popping
,
23925 required
= regs_to_pop
;
23926 required
!= 0 && available
!= 0;
23927 available
&= ~(available
& - available
),
23928 required
&= ~(required
& - required
))
23931 /* If we have any popping registers left over, remove them. */
23933 regs_available_for_popping
&= ~available
;
23935 /* Otherwise if we need another popping register we can use
23936 the fourth argument register. */
23937 else if (pops_needed
)
23939 /* If we have not found any free argument registers and
23940 reg a4 contains the return address, we must move it. */
23941 if (regs_available_for_popping
== 0
23942 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23944 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23945 reg_containing_return_addr
= LR_REGNUM
;
23947 else if (size
> 12)
23949 /* Register a4 is being used to hold part of the return value,
23950 but we have dire need of a free, low register. */
23953 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23956 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23958 /* The fourth argument register is available. */
23959 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23965 /* Pop as many registers as we can. */
23966 thumb_pop (f
, regs_available_for_popping
);
23968 /* Process the registers we popped. */
23969 if (reg_containing_return_addr
== -1)
23971 /* The return address was popped into the lowest numbered register. */
23972 regs_to_pop
&= ~(1 << LR_REGNUM
);
23974 reg_containing_return_addr
=
23975 number_of_first_bit_set (regs_available_for_popping
);
23977 /* Remove this register for the mask of available registers, so that
23978 the return address will not be corrupted by further pops. */
23979 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23982 /* If we popped other registers then handle them here. */
23983 if (regs_available_for_popping
)
23987 /* Work out which register currently contains the frame pointer. */
23988 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23990 /* Move it into the correct place. */
23991 asm_fprintf (f
, "\tmov\t%r, %r\n",
23992 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23994 /* (Temporarily) remove it from the mask of popped registers. */
23995 regs_available_for_popping
&= ~(1 << frame_pointer
);
23996 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23998 if (regs_available_for_popping
)
24002 /* We popped the stack pointer as well,
24003 find the register that contains it. */
24004 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24006 /* Move it into the stack register. */
24007 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24009 /* At this point we have popped all necessary registers, so
24010 do not worry about restoring regs_available_for_popping
24011 to its correct value:
24013 assert (pops_needed == 0)
24014 assert (regs_available_for_popping == (1 << frame_pointer))
24015 assert (regs_to_pop == (1 << STACK_POINTER)) */
24019 /* Since we have just move the popped value into the frame
24020 pointer, the popping register is available for reuse, and
24021 we know that we still have the stack pointer left to pop. */
24022 regs_available_for_popping
|= (1 << frame_pointer
);
24026 /* If we still have registers left on the stack, but we no longer have
24027 any registers into which we can pop them, then we must move the return
24028 address into the link register and make available the register that
24030 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24032 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24034 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24035 reg_containing_return_addr
);
24037 reg_containing_return_addr
= LR_REGNUM
;
24040 /* If we have registers left on the stack then pop some more.
24041 We know that at most we will want to pop FP and SP. */
24042 if (pops_needed
> 0)
24047 thumb_pop (f
, regs_available_for_popping
);
24049 /* We have popped either FP or SP.
24050 Move whichever one it is into the correct register. */
24051 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24052 move_to
= number_of_first_bit_set (regs_to_pop
);
24054 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24056 regs_to_pop
&= ~(1 << move_to
);
24061 /* If we still have not popped everything then we must have only
24062 had one register available to us and we are now popping the SP. */
24063 if (pops_needed
> 0)
24067 thumb_pop (f
, regs_available_for_popping
);
24069 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24071 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24073 assert (regs_to_pop == (1 << STACK_POINTER))
24074 assert (pops_needed == 1)
24078 /* If necessary restore the a4 register. */
24081 if (reg_containing_return_addr
!= LR_REGNUM
)
24083 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24084 reg_containing_return_addr
= LR_REGNUM
;
24087 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24090 if (crtl
->calls_eh_return
)
24091 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24093 /* Return to caller. */
24094 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24097 /* Scan INSN just before assembler is output for it.
24098 For Thumb-1, we track the status of the condition codes; this
24099 information is used in the cbranchsi4_insn pattern. */
24101 thumb1_final_prescan_insn (rtx_insn
*insn
)
24103 if (flag_print_asm_name
)
24104 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24105 INSN_ADDRESSES (INSN_UID (insn
)));
24106 /* Don't overwrite the previous setter when we get to a cbranch. */
24107 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24109 enum attr_conds conds
;
24111 if (cfun
->machine
->thumb1_cc_insn
)
24113 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24114 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24117 conds
= get_attr_conds (insn
);
24118 if (conds
== CONDS_SET
)
24120 rtx set
= single_set (insn
);
24121 cfun
->machine
->thumb1_cc_insn
= insn
;
24122 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24123 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24124 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24125 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24127 rtx src1
= XEXP (SET_SRC (set
), 1);
24128 if (src1
== const0_rtx
)
24129 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24131 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24133 /* Record the src register operand instead of dest because
24134 cprop_hardreg pass propagates src. */
24135 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24138 else if (conds
!= CONDS_NOCOND
)
24139 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24142 /* Check if unexpected far jump is used. */
24143 if (cfun
->machine
->lr_save_eliminated
24144 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24145 internal_error("Unexpected thumb1 far jump");
24149 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24151 unsigned HOST_WIDE_INT mask
= 0xff;
24154 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24155 if (val
== 0) /* XXX */
24158 for (i
= 0; i
< 25; i
++)
24159 if ((val
& (mask
<< i
)) == val
)
24165 /* Returns nonzero if the current function contains,
24166 or might contain a far jump. */
24168 thumb_far_jump_used_p (void)
24171 bool far_jump
= false;
24172 unsigned int func_size
= 0;
24174 /* This test is only important for leaf functions. */
24175 /* assert (!leaf_function_p ()); */
24177 /* If we have already decided that far jumps may be used,
24178 do not bother checking again, and always return true even if
24179 it turns out that they are not being used. Once we have made
24180 the decision that far jumps are present (and that hence the link
24181 register will be pushed onto the stack) we cannot go back on it. */
24182 if (cfun
->machine
->far_jump_used
)
24185 /* If this function is not being called from the prologue/epilogue
24186 generation code then it must be being called from the
24187 INITIAL_ELIMINATION_OFFSET macro. */
24188 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24190 /* In this case we know that we are being asked about the elimination
24191 of the arg pointer register. If that register is not being used,
24192 then there are no arguments on the stack, and we do not have to
24193 worry that a far jump might force the prologue to push the link
24194 register, changing the stack offsets. In this case we can just
24195 return false, since the presence of far jumps in the function will
24196 not affect stack offsets.
24198 If the arg pointer is live (or if it was live, but has now been
24199 eliminated and so set to dead) then we do have to test to see if
24200 the function might contain a far jump. This test can lead to some
24201 false negatives, since before reload is completed, then length of
24202 branch instructions is not known, so gcc defaults to returning their
24203 longest length, which in turn sets the far jump attribute to true.
24205 A false negative will not result in bad code being generated, but it
24206 will result in a needless push and pop of the link register. We
24207 hope that this does not occur too often.
24209 If we need doubleword stack alignment this could affect the other
24210 elimination offsets so we can't risk getting it wrong. */
24211 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24212 cfun
->machine
->arg_pointer_live
= 1;
24213 else if (!cfun
->machine
->arg_pointer_live
)
24217 /* We should not change far_jump_used during or after reload, as there is
24218 no chance to change stack frame layout. */
24219 if (reload_in_progress
|| reload_completed
)
24222 /* Check to see if the function contains a branch
24223 insn with the far jump attribute set. */
24224 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24226 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24230 func_size
+= get_attr_length (insn
);
24233 /* Attribute far_jump will always be true for thumb1 before
24234 shorten_branch pass. So checking far_jump attribute before
24235 shorten_branch isn't much useful.
24237 Following heuristic tries to estimate more accurately if a far jump
24238 may finally be used. The heuristic is very conservative as there is
24239 no chance to roll-back the decision of not to use far jump.
24241 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24242 2-byte insn is associated with a 4 byte constant pool. Using
24243 function size 2048/3 as the threshold is conservative enough. */
24246 if ((func_size
* 3) >= 2048)
24248 /* Record the fact that we have decided that
24249 the function does use far jumps. */
24250 cfun
->machine
->far_jump_used
= 1;
24258 /* Return nonzero if FUNC must be entered in ARM mode. */
24260 is_called_in_ARM_mode (tree func
)
24262 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24264 /* Ignore the problem about functions whose address is taken. */
24265 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24269 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24275 /* Given the stack offsets and register mask in OFFSETS, decide how
24276 many additional registers to push instead of subtracting a constant
24277 from SP. For epilogues the principle is the same except we use pop.
24278 FOR_PROLOGUE indicates which we're generating. */
24280 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24282 HOST_WIDE_INT amount
;
24283 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24284 /* Extract a mask of the ones we can give to the Thumb's push/pop
24286 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24287 /* Then count how many other high registers will need to be pushed. */
24288 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24289 int n_free
, reg_base
, size
;
24291 if (!for_prologue
&& frame_pointer_needed
)
24292 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24294 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24296 /* If the stack frame size is 512 exactly, we can save one load
24297 instruction, which should make this a win even when optimizing
24299 if (!optimize_size
&& amount
!= 512)
24302 /* Can't do this if there are high registers to push. */
24303 if (high_regs_pushed
!= 0)
24306 /* Shouldn't do it in the prologue if no registers would normally
24307 be pushed at all. In the epilogue, also allow it if we'll have
24308 a pop insn for the PC. */
24311 || TARGET_BACKTRACE
24312 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24313 || TARGET_INTERWORK
24314 || crtl
->args
.pretend_args_size
!= 0))
24317 /* Don't do this if thumb_expand_prologue wants to emit instructions
24318 between the push and the stack frame allocation. */
24320 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24321 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24328 size
= arm_size_return_regs ();
24329 reg_base
= ARM_NUM_INTS (size
);
24330 live_regs_mask
>>= reg_base
;
24333 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24334 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24336 live_regs_mask
>>= 1;
24342 gcc_assert (amount
/ 4 * 4 == amount
);
24344 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24345 return (amount
- 508) / 4;
24346 if (amount
<= n_free
* 4)
24351 /* The bits which aren't usefully expanded as rtl. */
24353 thumb1_unexpanded_epilogue (void)
24355 arm_stack_offsets
*offsets
;
24357 unsigned long live_regs_mask
= 0;
24358 int high_regs_pushed
= 0;
24360 int had_to_push_lr
;
24363 if (cfun
->machine
->return_used_this_function
!= 0)
24366 if (IS_NAKED (arm_current_func_type ()))
24369 offsets
= arm_get_frame_offsets ();
24370 live_regs_mask
= offsets
->saved_regs_mask
;
24371 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24373 /* If we can deduce the registers used from the function's return value.
24374 This is more reliable that examining df_regs_ever_live_p () because that
24375 will be set if the register is ever used in the function, not just if
24376 the register is used to hold a return value. */
24377 size
= arm_size_return_regs ();
24379 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24382 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24383 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24386 /* The prolog may have pushed some high registers to use as
24387 work registers. e.g. the testsuite file:
24388 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24389 compiles to produce:
24390 push {r4, r5, r6, r7, lr}
24394 as part of the prolog. We have to undo that pushing here. */
24396 if (high_regs_pushed
)
24398 unsigned long mask
= live_regs_mask
& 0xff;
24401 /* The available low registers depend on the size of the value we are
24409 /* Oh dear! We have no low registers into which we can pop
24412 ("no low registers available for popping high registers");
24414 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24415 if (live_regs_mask
& (1 << next_hi_reg
))
24418 while (high_regs_pushed
)
24420 /* Find lo register(s) into which the high register(s) can
24422 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24424 if (mask
& (1 << regno
))
24425 high_regs_pushed
--;
24426 if (high_regs_pushed
== 0)
24430 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24432 /* Pop the values into the low register(s). */
24433 thumb_pop (asm_out_file
, mask
);
24435 /* Move the value(s) into the high registers. */
24436 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24438 if (mask
& (1 << regno
))
24440 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24443 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24444 if (live_regs_mask
& (1 << next_hi_reg
))
24449 live_regs_mask
&= ~0x0f00;
24452 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24453 live_regs_mask
&= 0xff;
24455 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24457 /* Pop the return address into the PC. */
24458 if (had_to_push_lr
)
24459 live_regs_mask
|= 1 << PC_REGNUM
;
24461 /* Either no argument registers were pushed or a backtrace
24462 structure was created which includes an adjusted stack
24463 pointer, so just pop everything. */
24464 if (live_regs_mask
)
24465 thumb_pop (asm_out_file
, live_regs_mask
);
24467 /* We have either just popped the return address into the
24468 PC or it is was kept in LR for the entire function.
24469 Note that thumb_pop has already called thumb_exit if the
24470 PC was in the list. */
24471 if (!had_to_push_lr
)
24472 thumb_exit (asm_out_file
, LR_REGNUM
);
24476 /* Pop everything but the return address. */
24477 if (live_regs_mask
)
24478 thumb_pop (asm_out_file
, live_regs_mask
);
24480 if (had_to_push_lr
)
24484 /* We have no free low regs, so save one. */
24485 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24489 /* Get the return address into a temporary register. */
24490 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24494 /* Move the return address to lr. */
24495 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24497 /* Restore the low register. */
24498 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24503 regno
= LAST_ARG_REGNUM
;
24508 /* Remove the argument registers that were pushed onto the stack. */
24509 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24510 SP_REGNUM
, SP_REGNUM
,
24511 crtl
->args
.pretend_args_size
);
24513 thumb_exit (asm_out_file
, regno
);
24519 /* Functions to save and restore machine-specific function data. */
24520 static struct machine_function
*
24521 arm_init_machine_status (void)
24523 struct machine_function
*machine
;
24524 machine
= ggc_cleared_alloc
<machine_function
> ();
24526 #if ARM_FT_UNKNOWN != 0
24527 machine
->func_type
= ARM_FT_UNKNOWN
;
24532 /* Return an RTX indicating where the return address to the
24533 calling function can be found. */
24535 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24540 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24543 /* Do anything needed before RTL is emitted for each function. */
24545 arm_init_expanders (void)
24547 /* Arrange to initialize and mark the machine per-function status. */
24548 init_machine_status
= arm_init_machine_status
;
24550 /* This is to stop the combine pass optimizing away the alignment
24551 adjustment of va_arg. */
24552 /* ??? It is claimed that this should not be necessary. */
24554 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24557 /* Check that FUNC is called with a different mode. */
24560 arm_change_mode_p (tree func
)
24562 if (TREE_CODE (func
) != FUNCTION_DECL
)
24565 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24568 callee_tree
= target_option_default_node
;
24570 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24571 int flags
= callee_opts
->x_target_flags
;
24573 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24576 /* Like arm_compute_initial_elimination offset. Simpler because there
24577 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24578 to point at the base of the local variables after static stack
24579 space for a function has been allocated. */
24582 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24584 arm_stack_offsets
*offsets
;
24586 offsets
= arm_get_frame_offsets ();
24590 case ARG_POINTER_REGNUM
:
24593 case STACK_POINTER_REGNUM
:
24594 return offsets
->outgoing_args
- offsets
->saved_args
;
24596 case FRAME_POINTER_REGNUM
:
24597 return offsets
->soft_frame
- offsets
->saved_args
;
24599 case ARM_HARD_FRAME_POINTER_REGNUM
:
24600 return offsets
->saved_regs
- offsets
->saved_args
;
24602 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24603 return offsets
->locals_base
- offsets
->saved_args
;
24606 gcc_unreachable ();
24610 case FRAME_POINTER_REGNUM
:
24613 case STACK_POINTER_REGNUM
:
24614 return offsets
->outgoing_args
- offsets
->soft_frame
;
24616 case ARM_HARD_FRAME_POINTER_REGNUM
:
24617 return offsets
->saved_regs
- offsets
->soft_frame
;
24619 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24620 return offsets
->locals_base
- offsets
->soft_frame
;
24623 gcc_unreachable ();
24628 gcc_unreachable ();
24632 /* Generate the function's prologue. */
24635 thumb1_expand_prologue (void)
24639 HOST_WIDE_INT amount
;
24640 HOST_WIDE_INT size
;
24641 arm_stack_offsets
*offsets
;
24642 unsigned long func_type
;
24644 unsigned long live_regs_mask
;
24645 unsigned long l_mask
;
24646 unsigned high_regs_pushed
= 0;
24648 func_type
= arm_current_func_type ();
24650 /* Naked functions don't have prologues. */
24651 if (IS_NAKED (func_type
))
24654 if (IS_INTERRUPT (func_type
))
24656 error ("interrupt Service Routines cannot be coded in Thumb mode");
24660 if (is_called_in_ARM_mode (current_function_decl
))
24661 emit_insn (gen_prologue_thumb1_interwork ());
24663 offsets
= arm_get_frame_offsets ();
24664 live_regs_mask
= offsets
->saved_regs_mask
;
24666 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24667 l_mask
= live_regs_mask
& 0x40ff;
24668 /* Then count how many other high registers will need to be pushed. */
24669 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24671 if (crtl
->args
.pretend_args_size
)
24673 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24675 if (cfun
->machine
->uses_anonymous_args
)
24677 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24678 unsigned long mask
;
24680 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24681 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24683 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24687 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24688 stack_pointer_rtx
, x
));
24690 RTX_FRAME_RELATED_P (insn
) = 1;
24693 if (TARGET_BACKTRACE
)
24695 HOST_WIDE_INT offset
= 0;
24696 unsigned work_register
;
24697 rtx work_reg
, x
, arm_hfp_rtx
;
24699 /* We have been asked to create a stack backtrace structure.
24700 The code looks like this:
24704 0 sub SP, #16 Reserve space for 4 registers.
24705 2 push {R7} Push low registers.
24706 4 add R7, SP, #20 Get the stack pointer before the push.
24707 6 str R7, [SP, #8] Store the stack pointer
24708 (before reserving the space).
24709 8 mov R7, PC Get hold of the start of this code + 12.
24710 10 str R7, [SP, #16] Store it.
24711 12 mov R7, FP Get hold of the current frame pointer.
24712 14 str R7, [SP, #4] Store it.
24713 16 mov R7, LR Get hold of the current return address.
24714 18 str R7, [SP, #12] Store it.
24715 20 add R7, SP, #16 Point at the start of the
24716 backtrace structure.
24717 22 mov FP, R7 Put this value into the frame pointer. */
24719 work_register
= thumb_find_work_register (live_regs_mask
);
24720 work_reg
= gen_rtx_REG (SImode
, work_register
);
24721 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24723 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24724 stack_pointer_rtx
, GEN_INT (-16)));
24725 RTX_FRAME_RELATED_P (insn
) = 1;
24729 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24730 RTX_FRAME_RELATED_P (insn
) = 1;
24732 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24735 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24736 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24738 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24739 x
= gen_frame_mem (SImode
, x
);
24740 emit_move_insn (x
, work_reg
);
24742 /* Make sure that the instruction fetching the PC is in the right place
24743 to calculate "start of backtrace creation code + 12". */
24744 /* ??? The stores using the common WORK_REG ought to be enough to
24745 prevent the scheduler from doing anything weird. Failing that
24746 we could always move all of the following into an UNSPEC_VOLATILE. */
24749 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24750 emit_move_insn (work_reg
, x
);
24752 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24753 x
= gen_frame_mem (SImode
, x
);
24754 emit_move_insn (x
, work_reg
);
24756 emit_move_insn (work_reg
, arm_hfp_rtx
);
24758 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24759 x
= gen_frame_mem (SImode
, x
);
24760 emit_move_insn (x
, work_reg
);
24764 emit_move_insn (work_reg
, arm_hfp_rtx
);
24766 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24767 x
= gen_frame_mem (SImode
, x
);
24768 emit_move_insn (x
, work_reg
);
24770 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24771 emit_move_insn (work_reg
, x
);
24773 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24774 x
= gen_frame_mem (SImode
, x
);
24775 emit_move_insn (x
, work_reg
);
24778 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24779 emit_move_insn (work_reg
, x
);
24781 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24782 x
= gen_frame_mem (SImode
, x
);
24783 emit_move_insn (x
, work_reg
);
24785 x
= GEN_INT (offset
+ 12);
24786 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24788 emit_move_insn (arm_hfp_rtx
, work_reg
);
24790 /* Optimization: If we are not pushing any low registers but we are going
24791 to push some high registers then delay our first push. This will just
24792 be a push of LR and we can combine it with the push of the first high
24794 else if ((l_mask
& 0xff) != 0
24795 || (high_regs_pushed
== 0 && l_mask
))
24797 unsigned long mask
= l_mask
;
24798 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24799 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24800 RTX_FRAME_RELATED_P (insn
) = 1;
24803 if (high_regs_pushed
)
24805 unsigned pushable_regs
;
24806 unsigned next_hi_reg
;
24807 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24808 : crtl
->args
.info
.nregs
;
24809 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24811 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24812 if (live_regs_mask
& (1 << next_hi_reg
))
24815 /* Here we need to mask out registers used for passing arguments
24816 even if they can be pushed. This is to avoid using them to stash the high
24817 registers. Such kind of stash may clobber the use of arguments. */
24818 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24820 if (pushable_regs
== 0)
24821 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24823 while (high_regs_pushed
> 0)
24825 unsigned long real_regs_mask
= 0;
24827 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24829 if (pushable_regs
& (1 << regno
))
24831 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24832 gen_rtx_REG (SImode
, next_hi_reg
));
24834 high_regs_pushed
--;
24835 real_regs_mask
|= (1 << next_hi_reg
);
24837 if (high_regs_pushed
)
24839 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24841 if (live_regs_mask
& (1 << next_hi_reg
))
24846 pushable_regs
&= ~((1 << regno
) - 1);
24852 /* If we had to find a work register and we have not yet
24853 saved the LR then add it to the list of regs to push. */
24854 if (l_mask
== (1 << LR_REGNUM
))
24856 pushable_regs
|= l_mask
;
24857 real_regs_mask
|= l_mask
;
24861 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24862 RTX_FRAME_RELATED_P (insn
) = 1;
24866 /* Load the pic register before setting the frame pointer,
24867 so we can use r7 as a temporary work register. */
24868 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24869 arm_load_pic_register (live_regs_mask
);
24871 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24872 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24873 stack_pointer_rtx
);
24875 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24876 if (flag_stack_usage_info
)
24877 current_function_static_stack_size
= size
;
24879 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24880 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24881 sorry ("-fstack-check=specific for THUMB1");
24883 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24884 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24889 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24890 GEN_INT (- amount
)));
24891 RTX_FRAME_RELATED_P (insn
) = 1;
24897 /* The stack decrement is too big for an immediate value in a single
24898 insn. In theory we could issue multiple subtracts, but after
24899 three of them it becomes more space efficient to place the full
24900 value in the constant pool and load into a register. (Also the
24901 ARM debugger really likes to see only one stack decrement per
24902 function). So instead we look for a scratch register into which
24903 we can load the decrement, and then we subtract this from the
24904 stack pointer. Unfortunately on the thumb the only available
24905 scratch registers are the argument registers, and we cannot use
24906 these as they may hold arguments to the function. Instead we
24907 attempt to locate a call preserved register which is used by this
24908 function. If we can find one, then we know that it will have
24909 been pushed at the start of the prologue and so we can corrupt
24911 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24912 if (live_regs_mask
& (1 << regno
))
24915 gcc_assert(regno
<= LAST_LO_REGNUM
);
24917 reg
= gen_rtx_REG (SImode
, regno
);
24919 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24921 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24922 stack_pointer_rtx
, reg
));
24924 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24925 plus_constant (Pmode
, stack_pointer_rtx
,
24927 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24928 RTX_FRAME_RELATED_P (insn
) = 1;
24932 if (frame_pointer_needed
)
24933 thumb_set_frame_pointer (offsets
);
24935 /* If we are profiling, make sure no instructions are scheduled before
24936 the call to mcount. Similarly if the user has requested no
24937 scheduling in the prolog. Similarly if we want non-call exceptions
24938 using the EABI unwinder, to prevent faulting instructions from being
24939 swapped with a stack adjustment. */
24940 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24941 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24942 && cfun
->can_throw_non_call_exceptions
))
24943 emit_insn (gen_blockage ());
24945 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24946 if (live_regs_mask
& 0xff)
24947 cfun
->machine
->lr_save_eliminated
= 0;
24950 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24951 POP instruction can be generated. LR should be replaced by PC. All
24952 the checks required are already done by USE_RETURN_INSN (). Hence,
24953 all we really need to check here is if single register is to be
24954 returned, or multiple register return. */
24956 thumb2_expand_return (bool simple_return
)
24959 unsigned long saved_regs_mask
;
24960 arm_stack_offsets
*offsets
;
24962 offsets
= arm_get_frame_offsets ();
24963 saved_regs_mask
= offsets
->saved_regs_mask
;
24965 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24966 if (saved_regs_mask
& (1 << i
))
24969 if (!simple_return
&& saved_regs_mask
)
24973 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24974 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24975 rtx addr
= gen_rtx_MEM (SImode
,
24976 gen_rtx_POST_INC (SImode
,
24977 stack_pointer_rtx
));
24978 set_mem_alias_set (addr
, get_frame_alias_set ());
24979 XVECEXP (par
, 0, 0) = ret_rtx
;
24980 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24981 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24982 emit_jump_insn (par
);
24986 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24987 saved_regs_mask
|= (1 << PC_REGNUM
);
24988 arm_emit_multi_reg_pop (saved_regs_mask
);
24993 emit_jump_insn (simple_return_rtx
);
24998 thumb1_expand_epilogue (void)
25000 HOST_WIDE_INT amount
;
25001 arm_stack_offsets
*offsets
;
25004 /* Naked functions don't have prologues. */
25005 if (IS_NAKED (arm_current_func_type ()))
25008 offsets
= arm_get_frame_offsets ();
25009 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25011 if (frame_pointer_needed
)
25013 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25014 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25016 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25018 gcc_assert (amount
>= 0);
25021 emit_insn (gen_blockage ());
25024 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25025 GEN_INT (amount
)));
25028 /* r3 is always free in the epilogue. */
25029 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25031 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25032 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25036 /* Emit a USE (stack_pointer_rtx), so that
25037 the stack adjustment will not be deleted. */
25038 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25040 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25041 emit_insn (gen_blockage ());
25043 /* Emit a clobber for each insn that will be restored in the epilogue,
25044 so that flow2 will get register lifetimes correct. */
25045 for (regno
= 0; regno
< 13; regno
++)
25046 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25047 emit_clobber (gen_rtx_REG (SImode
, regno
));
25049 if (! df_regs_ever_live_p (LR_REGNUM
))
25050 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25053 /* Epilogue code for APCS frame. */
25055 arm_expand_epilogue_apcs_frame (bool really_return
)
25057 unsigned long func_type
;
25058 unsigned long saved_regs_mask
;
25061 int floats_from_frame
= 0;
25062 arm_stack_offsets
*offsets
;
25064 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25065 func_type
= arm_current_func_type ();
25067 /* Get frame offsets for ARM. */
25068 offsets
= arm_get_frame_offsets ();
25069 saved_regs_mask
= offsets
->saved_regs_mask
;
25071 /* Find the offset of the floating-point save area in the frame. */
25073 = (offsets
->saved_args
25074 + arm_compute_static_chain_stack_bytes ()
25077 /* Compute how many core registers saved and how far away the floats are. */
25078 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25079 if (saved_regs_mask
& (1 << i
))
25082 floats_from_frame
+= 4;
25085 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25088 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25090 /* The offset is from IP_REGNUM. */
25091 int saved_size
= arm_get_vfp_saved_size ();
25092 if (saved_size
> 0)
25095 floats_from_frame
+= saved_size
;
25096 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25097 hard_frame_pointer_rtx
,
25098 GEN_INT (-floats_from_frame
)));
25099 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25100 ip_rtx
, hard_frame_pointer_rtx
);
25103 /* Generate VFP register multi-pop. */
25104 start_reg
= FIRST_VFP_REGNUM
;
25106 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25107 /* Look for a case where a reg does not need restoring. */
25108 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25109 && (!df_regs_ever_live_p (i
+ 1)
25110 || call_used_regs
[i
+ 1]))
25112 if (start_reg
!= i
)
25113 arm_emit_vfp_multi_reg_pop (start_reg
,
25114 (i
- start_reg
) / 2,
25115 gen_rtx_REG (SImode
,
25120 /* Restore the remaining regs that we have discovered (or possibly
25121 even all of them, if the conditional in the for loop never
25123 if (start_reg
!= i
)
25124 arm_emit_vfp_multi_reg_pop (start_reg
,
25125 (i
- start_reg
) / 2,
25126 gen_rtx_REG (SImode
, IP_REGNUM
));
25131 /* The frame pointer is guaranteed to be non-double-word aligned, as
25132 it is set to double-word-aligned old_stack_pointer - 4. */
25134 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25136 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25137 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25139 rtx addr
= gen_frame_mem (V2SImode
,
25140 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25142 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25143 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25144 gen_rtx_REG (V2SImode
, i
),
25150 /* saved_regs_mask should contain IP which contains old stack pointer
25151 at the time of activation creation. Since SP and IP are adjacent registers,
25152 we can restore the value directly into SP. */
25153 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25154 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25155 saved_regs_mask
|= (1 << SP_REGNUM
);
25157 /* There are two registers left in saved_regs_mask - LR and PC. We
25158 only need to restore LR (the return address), but to
25159 save time we can load it directly into PC, unless we need a
25160 special function exit sequence, or we are not really returning. */
25162 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25163 && !crtl
->calls_eh_return
)
25164 /* Delete LR from the register mask, so that LR on
25165 the stack is loaded into the PC in the register mask. */
25166 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25168 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25170 num_regs
= bit_count (saved_regs_mask
);
25171 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25174 emit_insn (gen_blockage ());
25175 /* Unwind the stack to just below the saved registers. */
25176 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25177 hard_frame_pointer_rtx
,
25178 GEN_INT (- 4 * num_regs
)));
25180 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25181 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25184 arm_emit_multi_reg_pop (saved_regs_mask
);
25186 if (IS_INTERRUPT (func_type
))
25188 /* Interrupt handlers will have pushed the
25189 IP onto the stack, so restore it now. */
25191 rtx addr
= gen_rtx_MEM (SImode
,
25192 gen_rtx_POST_INC (SImode
,
25193 stack_pointer_rtx
));
25194 set_mem_alias_set (addr
, get_frame_alias_set ());
25195 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25196 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25197 gen_rtx_REG (SImode
, IP_REGNUM
),
25201 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25204 if (crtl
->calls_eh_return
)
25205 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25207 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25209 if (IS_STACKALIGN (func_type
))
25210 /* Restore the original stack pointer. Before prologue, the stack was
25211 realigned and the original stack pointer saved in r0. For details,
25212 see comment in arm_expand_prologue. */
25213 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25215 emit_jump_insn (simple_return_rtx
);
25218 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25219 function is not a sibcall. */
25221 arm_expand_epilogue (bool really_return
)
25223 unsigned long func_type
;
25224 unsigned long saved_regs_mask
;
25228 arm_stack_offsets
*offsets
;
25230 func_type
= arm_current_func_type ();
25232 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25233 let output_return_instruction take care of instruction emission if any. */
25234 if (IS_NAKED (func_type
)
25235 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25238 emit_jump_insn (simple_return_rtx
);
25242 /* If we are throwing an exception, then we really must be doing a
25243 return, so we can't tail-call. */
25244 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25246 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25248 arm_expand_epilogue_apcs_frame (really_return
);
25252 /* Get frame offsets for ARM. */
25253 offsets
= arm_get_frame_offsets ();
25254 saved_regs_mask
= offsets
->saved_regs_mask
;
25255 num_regs
= bit_count (saved_regs_mask
);
25257 if (frame_pointer_needed
)
25260 /* Restore stack pointer if necessary. */
25263 /* In ARM mode, frame pointer points to first saved register.
25264 Restore stack pointer to last saved register. */
25265 amount
= offsets
->frame
- offsets
->saved_regs
;
25267 /* Force out any pending memory operations that reference stacked data
25268 before stack de-allocation occurs. */
25269 emit_insn (gen_blockage ());
25270 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25271 hard_frame_pointer_rtx
,
25272 GEN_INT (amount
)));
25273 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25275 hard_frame_pointer_rtx
);
25277 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25279 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25283 /* In Thumb-2 mode, the frame pointer points to the last saved
25285 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25288 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25289 hard_frame_pointer_rtx
,
25290 GEN_INT (amount
)));
25291 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25292 hard_frame_pointer_rtx
,
25293 hard_frame_pointer_rtx
);
25296 /* Force out any pending memory operations that reference stacked data
25297 before stack de-allocation occurs. */
25298 emit_insn (gen_blockage ());
25299 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25300 hard_frame_pointer_rtx
));
25301 arm_add_cfa_adjust_cfa_note (insn
, 0,
25303 hard_frame_pointer_rtx
);
25304 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25306 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25311 /* Pop off outgoing args and local frame to adjust stack pointer to
25312 last saved register. */
25313 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25317 /* Force out any pending memory operations that reference stacked data
25318 before stack de-allocation occurs. */
25319 emit_insn (gen_blockage ());
25320 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25322 GEN_INT (amount
)));
25323 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25324 stack_pointer_rtx
, stack_pointer_rtx
);
25325 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25327 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25331 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25333 /* Generate VFP register multi-pop. */
25334 int end_reg
= LAST_VFP_REGNUM
+ 1;
25336 /* Scan the registers in reverse order. We need to match
25337 any groupings made in the prologue and generate matching
25338 vldm operations. The need to match groups is because,
25339 unlike pop, vldm can only do consecutive regs. */
25340 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25341 /* Look for a case where a reg does not need restoring. */
25342 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25343 && (!df_regs_ever_live_p (i
+ 1)
25344 || call_used_regs
[i
+ 1]))
25346 /* Restore the regs discovered so far (from reg+2 to
25348 if (end_reg
> i
+ 2)
25349 arm_emit_vfp_multi_reg_pop (i
+ 2,
25350 (end_reg
- (i
+ 2)) / 2,
25351 stack_pointer_rtx
);
25355 /* Restore the remaining regs that we have discovered (or possibly
25356 even all of them, if the conditional in the for loop never
25358 if (end_reg
> i
+ 2)
25359 arm_emit_vfp_multi_reg_pop (i
+ 2,
25360 (end_reg
- (i
+ 2)) / 2,
25361 stack_pointer_rtx
);
25365 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25366 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25369 rtx addr
= gen_rtx_MEM (V2SImode
,
25370 gen_rtx_POST_INC (SImode
,
25371 stack_pointer_rtx
));
25372 set_mem_alias_set (addr
, get_frame_alias_set ());
25373 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25374 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25375 gen_rtx_REG (V2SImode
, i
),
25377 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25378 stack_pointer_rtx
, stack_pointer_rtx
);
25381 if (saved_regs_mask
)
25384 bool return_in_pc
= false;
25386 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25387 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25388 && !IS_STACKALIGN (func_type
)
25390 && crtl
->args
.pretend_args_size
== 0
25391 && saved_regs_mask
& (1 << LR_REGNUM
)
25392 && !crtl
->calls_eh_return
)
25394 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25395 saved_regs_mask
|= (1 << PC_REGNUM
);
25396 return_in_pc
= true;
25399 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25401 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25402 if (saved_regs_mask
& (1 << i
))
25404 rtx addr
= gen_rtx_MEM (SImode
,
25405 gen_rtx_POST_INC (SImode
,
25406 stack_pointer_rtx
));
25407 set_mem_alias_set (addr
, get_frame_alias_set ());
25409 if (i
== PC_REGNUM
)
25411 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25412 XVECEXP (insn
, 0, 0) = ret_rtx
;
25413 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25415 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25416 insn
= emit_jump_insn (insn
);
25420 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25422 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25423 gen_rtx_REG (SImode
, i
),
25425 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25427 stack_pointer_rtx
);
25434 && current_tune
->prefer_ldrd_strd
25435 && !optimize_function_for_size_p (cfun
))
25438 thumb2_emit_ldrd_pop (saved_regs_mask
);
25439 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25440 arm_emit_ldrd_pop (saved_regs_mask
);
25442 arm_emit_multi_reg_pop (saved_regs_mask
);
25445 arm_emit_multi_reg_pop (saved_regs_mask
);
25453 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25457 rtx dwarf
= NULL_RTX
;
25459 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25461 GEN_INT (amount
)));
25463 RTX_FRAME_RELATED_P (tmp
) = 1;
25465 if (cfun
->machine
->uses_anonymous_args
)
25467 /* Restore pretend args. Refer arm_expand_prologue on how to save
25468 pretend_args in stack. */
25469 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25470 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25471 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25472 if (saved_regs_mask
& (1 << i
))
25474 rtx reg
= gen_rtx_REG (SImode
, i
);
25475 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25478 REG_NOTES (tmp
) = dwarf
;
25480 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25481 stack_pointer_rtx
, stack_pointer_rtx
);
25484 if (!really_return
)
25487 if (crtl
->calls_eh_return
)
25488 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25490 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25492 if (IS_STACKALIGN (func_type
))
25493 /* Restore the original stack pointer. Before prologue, the stack was
25494 realigned and the original stack pointer saved in r0. For details,
25495 see comment in arm_expand_prologue. */
25496 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25498 emit_jump_insn (simple_return_rtx
);
25501 /* Implementation of insn prologue_thumb1_interwork. This is the first
25502 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25505 thumb1_output_interwork (void)
25508 FILE *f
= asm_out_file
;
25510 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25511 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25513 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25515 /* Generate code sequence to switch us into Thumb mode. */
25516 /* The .code 32 directive has already been emitted by
25517 ASM_DECLARE_FUNCTION_NAME. */
25518 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25519 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25521 /* Generate a label, so that the debugger will notice the
25522 change in instruction sets. This label is also used by
25523 the assembler to bypass the ARM code when this function
25524 is called from a Thumb encoded function elsewhere in the
25525 same file. Hence the definition of STUB_NAME here must
25526 agree with the definition in gas/config/tc-arm.c. */
25528 #define STUB_NAME ".real_start_of"
25530 fprintf (f
, "\t.code\t16\n");
25532 if (arm_dllexport_name_p (name
))
25533 name
= arm_strip_name_encoding (name
);
25535 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25536 fprintf (f
, "\t.thumb_func\n");
25537 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25542 /* Handle the case of a double word load into a low register from
25543 a computed memory address. The computed address may involve a
25544 register which is overwritten by the load. */
25546 thumb_load_double_from_address (rtx
*operands
)
25554 gcc_assert (REG_P (operands
[0]));
25555 gcc_assert (MEM_P (operands
[1]));
25557 /* Get the memory address. */
25558 addr
= XEXP (operands
[1], 0);
25560 /* Work out how the memory address is computed. */
25561 switch (GET_CODE (addr
))
25564 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25566 if (REGNO (operands
[0]) == REGNO (addr
))
25568 output_asm_insn ("ldr\t%H0, %2", operands
);
25569 output_asm_insn ("ldr\t%0, %1", operands
);
25573 output_asm_insn ("ldr\t%0, %1", operands
);
25574 output_asm_insn ("ldr\t%H0, %2", operands
);
25579 /* Compute <address> + 4 for the high order load. */
25580 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25582 output_asm_insn ("ldr\t%0, %1", operands
);
25583 output_asm_insn ("ldr\t%H0, %2", operands
);
25587 arg1
= XEXP (addr
, 0);
25588 arg2
= XEXP (addr
, 1);
25590 if (CONSTANT_P (arg1
))
25591 base
= arg2
, offset
= arg1
;
25593 base
= arg1
, offset
= arg2
;
25595 gcc_assert (REG_P (base
));
25597 /* Catch the case of <address> = <reg> + <reg> */
25598 if (REG_P (offset
))
25600 int reg_offset
= REGNO (offset
);
25601 int reg_base
= REGNO (base
);
25602 int reg_dest
= REGNO (operands
[0]);
25604 /* Add the base and offset registers together into the
25605 higher destination register. */
25606 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25607 reg_dest
+ 1, reg_base
, reg_offset
);
25609 /* Load the lower destination register from the address in
25610 the higher destination register. */
25611 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25612 reg_dest
, reg_dest
+ 1);
25614 /* Load the higher destination register from its own address
25616 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25617 reg_dest
+ 1, reg_dest
+ 1);
25621 /* Compute <address> + 4 for the high order load. */
25622 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25624 /* If the computed address is held in the low order register
25625 then load the high order register first, otherwise always
25626 load the low order register first. */
25627 if (REGNO (operands
[0]) == REGNO (base
))
25629 output_asm_insn ("ldr\t%H0, %2", operands
);
25630 output_asm_insn ("ldr\t%0, %1", operands
);
25634 output_asm_insn ("ldr\t%0, %1", operands
);
25635 output_asm_insn ("ldr\t%H0, %2", operands
);
25641 /* With no registers to worry about we can just load the value
25643 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25645 output_asm_insn ("ldr\t%H0, %2", operands
);
25646 output_asm_insn ("ldr\t%0, %1", operands
);
25650 gcc_unreachable ();
25657 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25662 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25663 std::swap (operands
[4], operands
[5]);
25665 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25666 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25670 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25671 std::swap (operands
[4], operands
[5]);
25672 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25673 std::swap (operands
[5], operands
[6]);
25674 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25675 std::swap (operands
[4], operands
[5]);
25677 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25678 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25682 gcc_unreachable ();
25688 /* Output a call-via instruction for thumb state. */
25690 thumb_call_via_reg (rtx reg
)
25692 int regno
= REGNO (reg
);
25695 gcc_assert (regno
< LR_REGNUM
);
25697 /* If we are in the normal text section we can use a single instance
25698 per compilation unit. If we are doing function sections, then we need
25699 an entry per section, since we can't rely on reachability. */
25700 if (in_section
== text_section
)
25702 thumb_call_reg_needed
= 1;
25704 if (thumb_call_via_label
[regno
] == NULL
)
25705 thumb_call_via_label
[regno
] = gen_label_rtx ();
25706 labelp
= thumb_call_via_label
+ regno
;
25710 if (cfun
->machine
->call_via
[regno
] == NULL
)
25711 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25712 labelp
= cfun
->machine
->call_via
+ regno
;
25715 output_asm_insn ("bl\t%a0", labelp
);
25719 /* Routines for generating rtl. */
25721 thumb_expand_movmemqi (rtx
*operands
)
25723 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25724 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25725 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25726 HOST_WIDE_INT offset
= 0;
25730 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25736 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25742 rtx reg
= gen_reg_rtx (SImode
);
25743 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25744 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25751 rtx reg
= gen_reg_rtx (HImode
);
25752 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25753 plus_constant (Pmode
, in
,
25755 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25764 rtx reg
= gen_reg_rtx (QImode
);
25765 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25766 plus_constant (Pmode
, in
,
25768 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25775 thumb_reload_out_hi (rtx
*operands
)
25777 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25780 /* Handle reading a half-word from memory during reload. */
25782 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25784 gcc_unreachable ();
25787 /* Return the length of a function name prefix
25788 that starts with the character 'c'. */
25790 arm_get_strip_length (int c
)
25794 ARM_NAME_ENCODING_LENGTHS
25799 /* Return a pointer to a function's name with any
25800 and all prefix encodings stripped from it. */
25802 arm_strip_name_encoding (const char *name
)
25806 while ((skip
= arm_get_strip_length (* name
)))
25812 /* If there is a '*' anywhere in the name's prefix, then
25813 emit the stripped name verbatim, otherwise prepend an
25814 underscore if leading underscores are being used. */
25816 arm_asm_output_labelref (FILE *stream
, const char *name
)
25821 while ((skip
= arm_get_strip_length (* name
)))
25823 verbatim
|= (*name
== '*');
25828 fputs (name
, stream
);
25830 asm_fprintf (stream
, "%U%s", name
);
25833 /* This function is used to emit an EABI tag and its associated value.
25834 We emit the numerical value of the tag in case the assembler does not
25835 support textual tags. (Eg gas prior to 2.20). If requested we include
25836 the tag name in a comment so that anyone reading the assembler output
25837 will know which tag is being set.
25839 This function is not static because arm-c.c needs it too. */
25842 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25844 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25845 if (flag_verbose_asm
|| flag_debug_asm
)
25846 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25847 asm_fprintf (asm_out_file
, "\n");
25850 /* This function is used to print CPU tuning information as comment
25851 in assembler file. Pointers are not printed for now. */
25854 arm_print_tune_info (void)
25856 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25857 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25858 current_tune
->constant_limit
);
25859 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25860 current_tune
->max_insns_skipped
);
25861 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25862 current_tune
->prefetch
.num_slots
);
25863 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25864 current_tune
->prefetch
.l1_cache_size
);
25865 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25866 current_tune
->prefetch
.l1_cache_line_size
);
25867 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25868 (int) current_tune
->prefer_constant_pool
);
25869 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25870 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25871 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25872 current_tune
->branch_cost (false, false));
25873 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25874 current_tune
->branch_cost (false, true));
25875 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25876 current_tune
->branch_cost (true, false));
25877 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25878 current_tune
->branch_cost (true, true));
25879 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25880 (int) current_tune
->prefer_ldrd_strd
);
25881 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25882 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25883 (int) current_tune
->logical_op_non_short_circuit_arm
);
25884 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25885 (int) current_tune
->prefer_neon_for_64bits
);
25886 asm_fprintf (asm_out_file
,
25887 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25888 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25889 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25890 (int) current_tune
->string_ops_prefer_neon
);
25891 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25892 current_tune
->max_insns_inline_memset
);
25893 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25894 current_tune
->fusible_ops
);
25895 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25896 (int) current_tune
->sched_autopref
);
25900 arm_file_start (void)
25906 const char *fpu_name
;
25907 if (arm_selected_arch
)
25909 /* armv7ve doesn't support any extensions. */
25910 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25912 /* Keep backward compatability for assemblers
25913 which don't support armv7ve. */
25914 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25915 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25916 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25917 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25918 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25922 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25926 gcc_assert (strlen (arm_selected_arch
->name
)
25927 <= sizeof (buf
) / sizeof (*pos
));
25928 strncpy (buf
, arm_selected_arch
->name
,
25929 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25930 buf
[pos
- arm_selected_arch
->name
] = '\0';
25931 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25932 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25935 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25938 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25939 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25942 const char* truncated_name
25943 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25944 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25947 if (print_tune_info
)
25948 arm_print_tune_info ();
25950 if (TARGET_SOFT_FLOAT
)
25952 fpu_name
= "softvfp";
25956 fpu_name
= arm_fpu_desc
->name
;
25957 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25959 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25960 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25962 if (TARGET_HARD_FLOAT_ABI
)
25963 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25966 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25968 /* Some of these attributes only apply when the corresponding features
25969 are used. However we don't have any easy way of figuring this out.
25970 Conservatively record the setting that would have been used. */
25972 if (flag_rounding_math
)
25973 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25975 if (!flag_unsafe_math_optimizations
)
25977 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25978 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25980 if (flag_signaling_nans
)
25981 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25983 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25984 flag_finite_math_only
? 1 : 3);
25986 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25987 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25988 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25989 flag_short_enums
? 1 : 2);
25991 /* Tag_ABI_optimization_goals. */
25994 else if (optimize
>= 2)
26000 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26002 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26005 if (arm_fp16_format
)
26006 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26007 (int) arm_fp16_format
);
26009 if (arm_lang_output_object_attributes_hook
)
26010 arm_lang_output_object_attributes_hook();
26013 default_file_start ();
26017 arm_file_end (void)
26021 if (NEED_INDICATE_EXEC_STACK
)
26022 /* Add .note.GNU-stack. */
26023 file_end_indicate_exec_stack ();
26025 if (! thumb_call_reg_needed
)
26028 switch_to_section (text_section
);
26029 asm_fprintf (asm_out_file
, "\t.code 16\n");
26030 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26032 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26034 rtx label
= thumb_call_via_label
[regno
];
26038 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26039 CODE_LABEL_NUMBER (label
));
26040 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26046 /* Symbols in the text segment can be accessed without indirecting via the
26047 constant pool; it may take an extra binary operation, but this is still
26048 faster than indirecting via memory. Don't do this when not optimizing,
26049 since we won't be calculating al of the offsets necessary to do this
26053 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26055 if (optimize
> 0 && TREE_CONSTANT (decl
))
26056 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26058 default_encode_section_info (decl
, rtl
, first
);
26060 #endif /* !ARM_PE */
26063 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26065 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26066 && !strcmp (prefix
, "L"))
26068 arm_ccfsm_state
= 0;
26069 arm_target_insn
= NULL
;
26071 default_internal_label (stream
, prefix
, labelno
);
26074 /* Output code to add DELTA to the first argument, and then jump
26075 to FUNCTION. Used for C++ multiple inheritance. */
26077 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
26078 HOST_WIDE_INT delta
,
26079 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
26082 static int thunk_label
= 0;
26085 int mi_delta
= delta
;
26086 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26088 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26091 mi_delta
= - mi_delta
;
26093 final_start_function (emit_barrier (), file
, 1);
26097 int labelno
= thunk_label
++;
26098 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26099 /* Thunks are entered in arm mode when avaiable. */
26100 if (TARGET_THUMB1_ONLY
)
26102 /* push r3 so we can use it as a temporary. */
26103 /* TODO: Omit this save if r3 is not used. */
26104 fputs ("\tpush {r3}\n", file
);
26105 fputs ("\tldr\tr3, ", file
);
26109 fputs ("\tldr\tr12, ", file
);
26111 assemble_name (file
, label
);
26112 fputc ('\n', file
);
26115 /* If we are generating PIC, the ldr instruction below loads
26116 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26117 the address of the add + 8, so we have:
26119 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26122 Note that we have "+ 1" because some versions of GNU ld
26123 don't set the low bit of the result for R_ARM_REL32
26124 relocations against thumb function symbols.
26125 On ARMv6M this is +4, not +8. */
26126 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26127 assemble_name (file
, labelpc
);
26128 fputs (":\n", file
);
26129 if (TARGET_THUMB1_ONLY
)
26131 /* This is 2 insns after the start of the thunk, so we know it
26132 is 4-byte aligned. */
26133 fputs ("\tadd\tr3, pc, r3\n", file
);
26134 fputs ("\tmov r12, r3\n", file
);
26137 fputs ("\tadd\tr12, pc, r12\n", file
);
26139 else if (TARGET_THUMB1_ONLY
)
26140 fputs ("\tmov r12, r3\n", file
);
26142 if (TARGET_THUMB1_ONLY
)
26144 if (mi_delta
> 255)
26146 fputs ("\tldr\tr3, ", file
);
26147 assemble_name (file
, label
);
26148 fputs ("+4\n", file
);
26149 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26150 mi_op
, this_regno
, this_regno
);
26152 else if (mi_delta
!= 0)
26154 /* Thumb1 unified syntax requires s suffix in instruction name when
26155 one of the operands is immediate. */
26156 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26157 mi_op
, this_regno
, this_regno
,
26163 /* TODO: Use movw/movt for large constants when available. */
26164 while (mi_delta
!= 0)
26166 if ((mi_delta
& (3 << shift
)) == 0)
26170 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26171 mi_op
, this_regno
, this_regno
,
26172 mi_delta
& (0xff << shift
));
26173 mi_delta
&= ~(0xff << shift
);
26180 if (TARGET_THUMB1_ONLY
)
26181 fputs ("\tpop\t{r3}\n", file
);
26183 fprintf (file
, "\tbx\tr12\n");
26184 ASM_OUTPUT_ALIGN (file
, 2);
26185 assemble_name (file
, label
);
26186 fputs (":\n", file
);
26189 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26190 rtx tem
= XEXP (DECL_RTL (function
), 0);
26191 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26192 pipeline offset is four rather than eight. Adjust the offset
26194 tem
= plus_constant (GET_MODE (tem
), tem
,
26195 TARGET_THUMB1_ONLY
? -3 : -7);
26196 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26198 gen_rtx_SYMBOL_REF (Pmode
,
26199 ggc_strdup (labelpc
)));
26200 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26203 /* Output ".word .LTHUNKn". */
26204 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26206 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26207 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26211 fputs ("\tb\t", file
);
26212 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26213 if (NEED_PLT_RELOC
)
26214 fputs ("(PLT)", file
);
26215 fputc ('\n', file
);
26218 final_end_function ();
26222 arm_emit_vector_const (FILE *file
, rtx x
)
26225 const char * pattern
;
26227 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26229 switch (GET_MODE (x
))
26231 case V2SImode
: pattern
= "%08x"; break;
26232 case V4HImode
: pattern
= "%04x"; break;
26233 case V8QImode
: pattern
= "%02x"; break;
26234 default: gcc_unreachable ();
26237 fprintf (file
, "0x");
26238 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26242 element
= CONST_VECTOR_ELT (x
, i
);
26243 fprintf (file
, pattern
, INTVAL (element
));
26249 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26250 HFmode constant pool entries are actually loaded with ldr. */
26252 arm_emit_fp16_const (rtx c
)
26257 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
26258 bits
= real_to_target (NULL
, &r
, HFmode
);
26259 if (WORDS_BIG_ENDIAN
)
26260 assemble_zeros (2);
26261 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26262 if (!WORDS_BIG_ENDIAN
)
26263 assemble_zeros (2);
26267 arm_output_load_gr (rtx
*operands
)
26274 if (!MEM_P (operands
[1])
26275 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26276 || !REG_P (reg
= XEXP (sum
, 0))
26277 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26278 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26279 return "wldrw%?\t%0, %1";
26281 /* Fix up an out-of-range load of a GR register. */
26282 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26283 wcgr
= operands
[0];
26285 output_asm_insn ("ldr%?\t%0, %1", operands
);
26287 operands
[0] = wcgr
;
26289 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26290 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26295 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26297 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26298 named arg and all anonymous args onto the stack.
26299 XXX I know the prologue shouldn't be pushing registers, but it is faster
26303 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26307 int second_time ATTRIBUTE_UNUSED
)
26309 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26312 cfun
->machine
->uses_anonymous_args
= 1;
26313 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26315 nregs
= pcum
->aapcs_ncrn
;
26316 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26320 nregs
= pcum
->nregs
;
26322 if (nregs
< NUM_ARG_REGS
)
26323 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26326 /* We can't rely on the caller doing the proper promotion when
26327 using APCS or ATPCS. */
26330 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26332 return !TARGET_AAPCS_BASED
;
26335 static machine_mode
26336 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26338 int *punsignedp ATTRIBUTE_UNUSED
,
26339 const_tree fntype ATTRIBUTE_UNUSED
,
26340 int for_return ATTRIBUTE_UNUSED
)
26342 if (GET_MODE_CLASS (mode
) == MODE_INT
26343 && GET_MODE_SIZE (mode
) < 4)
26349 /* AAPCS based ABIs use short enums by default. */
26352 arm_default_short_enums (void)
26354 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26358 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26361 arm_align_anon_bitfield (void)
26363 return TARGET_AAPCS_BASED
;
26367 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26370 arm_cxx_guard_type (void)
26372 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26376 /* The EABI says test the least significant bit of a guard variable. */
26379 arm_cxx_guard_mask_bit (void)
26381 return TARGET_AAPCS_BASED
;
26385 /* The EABI specifies that all array cookies are 8 bytes long. */
26388 arm_get_cookie_size (tree type
)
26392 if (!TARGET_AAPCS_BASED
)
26393 return default_cxx_get_cookie_size (type
);
26395 size
= build_int_cst (sizetype
, 8);
26400 /* The EABI says that array cookies should also contain the element size. */
26403 arm_cookie_has_size (void)
26405 return TARGET_AAPCS_BASED
;
26409 /* The EABI says constructors and destructors should return a pointer to
26410 the object constructed/destroyed. */
26413 arm_cxx_cdtor_returns_this (void)
26415 return TARGET_AAPCS_BASED
;
26418 /* The EABI says that an inline function may never be the key
26422 arm_cxx_key_method_may_be_inline (void)
26424 return !TARGET_AAPCS_BASED
;
26428 arm_cxx_determine_class_data_visibility (tree decl
)
26430 if (!TARGET_AAPCS_BASED
26431 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26434 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26435 is exported. However, on systems without dynamic vague linkage,
26436 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26437 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26438 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26440 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26441 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26445 arm_cxx_class_data_always_comdat (void)
26447 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26448 vague linkage if the class has no key function. */
26449 return !TARGET_AAPCS_BASED
;
26453 /* The EABI says __aeabi_atexit should be used to register static
26457 arm_cxx_use_aeabi_atexit (void)
26459 return TARGET_AAPCS_BASED
;
26464 arm_set_return_address (rtx source
, rtx scratch
)
26466 arm_stack_offsets
*offsets
;
26467 HOST_WIDE_INT delta
;
26469 unsigned long saved_regs
;
26471 offsets
= arm_get_frame_offsets ();
26472 saved_regs
= offsets
->saved_regs_mask
;
26474 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26475 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26478 if (frame_pointer_needed
)
26479 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26482 /* LR will be the first saved register. */
26483 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26488 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26489 GEN_INT (delta
& ~4095)));
26494 addr
= stack_pointer_rtx
;
26496 addr
= plus_constant (Pmode
, addr
, delta
);
26498 /* The store needs to be marked as frame related in order to prevent
26499 DSE from deleting it as dead if it is based on fp. */
26500 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26501 RTX_FRAME_RELATED_P (insn
) = 1;
26502 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26508 thumb_set_return_address (rtx source
, rtx scratch
)
26510 arm_stack_offsets
*offsets
;
26511 HOST_WIDE_INT delta
;
26512 HOST_WIDE_INT limit
;
26515 unsigned long mask
;
26519 offsets
= arm_get_frame_offsets ();
26520 mask
= offsets
->saved_regs_mask
;
26521 if (mask
& (1 << LR_REGNUM
))
26524 /* Find the saved regs. */
26525 if (frame_pointer_needed
)
26527 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26528 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26534 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26537 /* Allow for the stack frame. */
26538 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26540 /* The link register is always the first saved register. */
26543 /* Construct the address. */
26544 addr
= gen_rtx_REG (SImode
, reg
);
26547 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26548 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26552 addr
= plus_constant (Pmode
, addr
, delta
);
26554 /* The store needs to be marked as frame related in order to prevent
26555 DSE from deleting it as dead if it is based on fp. */
26556 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26557 RTX_FRAME_RELATED_P (insn
) = 1;
26558 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26561 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26564 /* Implements target hook vector_mode_supported_p. */
26566 arm_vector_mode_supported_p (machine_mode mode
)
26568 /* Neon also supports V2SImode, etc. listed in the clause below. */
26569 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26570 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26571 || mode
== V2DImode
|| mode
== V8HFmode
))
26574 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26575 && ((mode
== V2SImode
)
26576 || (mode
== V4HImode
)
26577 || (mode
== V8QImode
)))
26580 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26581 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26582 || mode
== V2HAmode
))
26588 /* Implements target hook array_mode_supported_p. */
26591 arm_array_mode_supported_p (machine_mode mode
,
26592 unsigned HOST_WIDE_INT nelems
)
26595 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26596 && (nelems
>= 2 && nelems
<= 4))
26602 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26603 registers when autovectorizing for Neon, at least until multiple vector
26604 widths are supported properly by the middle-end. */
26606 static machine_mode
26607 arm_preferred_simd_mode (machine_mode mode
)
26613 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26615 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26617 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26619 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26621 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26628 if (TARGET_REALLY_IWMMXT
)
26644 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26646 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26647 using r0-r4 for function arguments, r7 for the stack frame and don't have
26648 enough left over to do doubleword arithmetic. For Thumb-2 all the
26649 potentially problematic instructions accept high registers so this is not
26650 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26651 that require many low registers. */
26653 arm_class_likely_spilled_p (reg_class_t rclass
)
26655 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26656 || rclass
== CC_REG
)
26662 /* Implements target hook small_register_classes_for_mode_p. */
26664 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26666 return TARGET_THUMB1
;
26669 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26670 ARM insns and therefore guarantee that the shift count is modulo 256.
26671 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26672 guarantee no particular behavior for out-of-range counts. */
26674 static unsigned HOST_WIDE_INT
26675 arm_shift_truncation_mask (machine_mode mode
)
26677 return mode
== SImode
? 255 : 0;
26681 /* Map internal gcc register numbers to DWARF2 register numbers. */
26684 arm_dbx_register_number (unsigned int regno
)
26689 if (IS_VFP_REGNUM (regno
))
26691 /* See comment in arm_dwarf_register_span. */
26692 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26693 return 64 + regno
- FIRST_VFP_REGNUM
;
26695 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26698 if (IS_IWMMXT_GR_REGNUM (regno
))
26699 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26701 if (IS_IWMMXT_REGNUM (regno
))
26702 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26704 return DWARF_FRAME_REGISTERS
;
26707 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26708 GCC models tham as 64 32-bit registers, so we need to describe this to
26709 the DWARF generation code. Other registers can use the default. */
26711 arm_dwarf_register_span (rtx rtl
)
26719 regno
= REGNO (rtl
);
26720 if (!IS_VFP_REGNUM (regno
))
26723 /* XXX FIXME: The EABI defines two VFP register ranges:
26724 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26726 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26727 corresponding D register. Until GDB supports this, we shall use the
26728 legacy encodings. We also use these encodings for D0-D15 for
26729 compatibility with older debuggers. */
26730 mode
= GET_MODE (rtl
);
26731 if (GET_MODE_SIZE (mode
) < 8)
26734 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26736 nregs
= GET_MODE_SIZE (mode
) / 4;
26737 for (i
= 0; i
< nregs
; i
+= 2)
26738 if (TARGET_BIG_END
)
26740 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26741 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26745 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26746 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26751 nregs
= GET_MODE_SIZE (mode
) / 8;
26752 for (i
= 0; i
< nregs
; i
++)
26753 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26756 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26759 #if ARM_UNWIND_INFO
26760 /* Emit unwind directives for a store-multiple instruction or stack pointer
26761 push during alignment.
26762 These should only ever be generated by the function prologue code, so
26763 expect them to have a particular form.
26764 The store-multiple instruction sometimes pushes pc as the last register,
26765 although it should not be tracked into unwind information, or for -Os
26766 sometimes pushes some dummy registers before first register that needs
26767 to be tracked in unwind information; such dummy registers are there just
26768 to avoid separate stack adjustment, and will not be restored in the
26772 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26775 HOST_WIDE_INT offset
;
26776 HOST_WIDE_INT nregs
;
26780 unsigned padfirst
= 0, padlast
= 0;
26783 e
= XVECEXP (p
, 0, 0);
26784 gcc_assert (GET_CODE (e
) == SET
);
26786 /* First insn will adjust the stack pointer. */
26787 gcc_assert (GET_CODE (e
) == SET
26788 && REG_P (SET_DEST (e
))
26789 && REGNO (SET_DEST (e
)) == SP_REGNUM
26790 && GET_CODE (SET_SRC (e
)) == PLUS
);
26792 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26793 nregs
= XVECLEN (p
, 0) - 1;
26794 gcc_assert (nregs
);
26796 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26799 /* For -Os dummy registers can be pushed at the beginning to
26800 avoid separate stack pointer adjustment. */
26801 e
= XVECEXP (p
, 0, 1);
26802 e
= XEXP (SET_DEST (e
), 0);
26803 if (GET_CODE (e
) == PLUS
)
26804 padfirst
= INTVAL (XEXP (e
, 1));
26805 gcc_assert (padfirst
== 0 || optimize_size
);
26806 /* The function prologue may also push pc, but not annotate it as it is
26807 never restored. We turn this into a stack pointer adjustment. */
26808 e
= XVECEXP (p
, 0, nregs
);
26809 e
= XEXP (SET_DEST (e
), 0);
26810 if (GET_CODE (e
) == PLUS
)
26811 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26813 padlast
= offset
- 4;
26814 gcc_assert (padlast
== 0 || padlast
== 4);
26816 fprintf (asm_out_file
, "\t.pad #4\n");
26818 fprintf (asm_out_file
, "\t.save {");
26820 else if (IS_VFP_REGNUM (reg
))
26823 fprintf (asm_out_file
, "\t.vsave {");
26826 /* Unknown register type. */
26827 gcc_unreachable ();
26829 /* If the stack increment doesn't match the size of the saved registers,
26830 something has gone horribly wrong. */
26831 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26835 /* The remaining insns will describe the stores. */
26836 for (i
= 1; i
<= nregs
; i
++)
26838 /* Expect (set (mem <addr>) (reg)).
26839 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26840 e
= XVECEXP (p
, 0, i
);
26841 gcc_assert (GET_CODE (e
) == SET
26842 && MEM_P (SET_DEST (e
))
26843 && REG_P (SET_SRC (e
)));
26845 reg
= REGNO (SET_SRC (e
));
26846 gcc_assert (reg
>= lastreg
);
26849 fprintf (asm_out_file
, ", ");
26850 /* We can't use %r for vfp because we need to use the
26851 double precision register names. */
26852 if (IS_VFP_REGNUM (reg
))
26853 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26855 asm_fprintf (asm_out_file
, "%r", reg
);
26857 #ifdef ENABLE_CHECKING
26858 /* Check that the addresses are consecutive. */
26859 e
= XEXP (SET_DEST (e
), 0);
26860 if (GET_CODE (e
) == PLUS
)
26861 gcc_assert (REG_P (XEXP (e
, 0))
26862 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26863 && CONST_INT_P (XEXP (e
, 1))
26864 && offset
== INTVAL (XEXP (e
, 1)));
26868 && REGNO (e
) == SP_REGNUM
);
26869 offset
+= reg_size
;
26872 fprintf (asm_out_file
, "}\n");
26874 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26877 /* Emit unwind directives for a SET. */
26880 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26888 switch (GET_CODE (e0
))
26891 /* Pushing a single register. */
26892 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26893 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26894 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26897 asm_fprintf (asm_out_file
, "\t.save ");
26898 if (IS_VFP_REGNUM (REGNO (e1
)))
26899 asm_fprintf(asm_out_file
, "{d%d}\n",
26900 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26902 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26906 if (REGNO (e0
) == SP_REGNUM
)
26908 /* A stack increment. */
26909 if (GET_CODE (e1
) != PLUS
26910 || !REG_P (XEXP (e1
, 0))
26911 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26912 || !CONST_INT_P (XEXP (e1
, 1)))
26915 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26916 -INTVAL (XEXP (e1
, 1)));
26918 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26920 HOST_WIDE_INT offset
;
26922 if (GET_CODE (e1
) == PLUS
)
26924 if (!REG_P (XEXP (e1
, 0))
26925 || !CONST_INT_P (XEXP (e1
, 1)))
26927 reg
= REGNO (XEXP (e1
, 0));
26928 offset
= INTVAL (XEXP (e1
, 1));
26929 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26930 HARD_FRAME_POINTER_REGNUM
, reg
,
26933 else if (REG_P (e1
))
26936 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26937 HARD_FRAME_POINTER_REGNUM
, reg
);
26942 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26944 /* Move from sp to reg. */
26945 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26947 else if (GET_CODE (e1
) == PLUS
26948 && REG_P (XEXP (e1
, 0))
26949 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26950 && CONST_INT_P (XEXP (e1
, 1)))
26952 /* Set reg to offset from sp. */
26953 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26954 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26966 /* Emit unwind directives for the given insn. */
26969 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26972 bool handled_one
= false;
26974 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26977 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26978 && (TREE_NOTHROW (current_function_decl
)
26979 || crtl
->all_throwers_are_sibcalls
))
26982 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26985 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26987 switch (REG_NOTE_KIND (note
))
26989 case REG_FRAME_RELATED_EXPR
:
26990 pat
= XEXP (note
, 0);
26993 case REG_CFA_REGISTER
:
26994 pat
= XEXP (note
, 0);
26997 pat
= PATTERN (insn
);
26998 if (GET_CODE (pat
) == PARALLEL
)
26999 pat
= XVECEXP (pat
, 0, 0);
27002 /* Only emitted for IS_STACKALIGN re-alignment. */
27007 src
= SET_SRC (pat
);
27008 dest
= SET_DEST (pat
);
27010 gcc_assert (src
== stack_pointer_rtx
);
27011 reg
= REGNO (dest
);
27012 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27015 handled_one
= true;
27018 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27019 to get correct dwarf information for shrink-wrap. We should not
27020 emit unwind information for it because these are used either for
27021 pretend arguments or notes to adjust sp and restore registers from
27023 case REG_CFA_DEF_CFA
:
27024 case REG_CFA_ADJUST_CFA
:
27025 case REG_CFA_RESTORE
:
27028 case REG_CFA_EXPRESSION
:
27029 case REG_CFA_OFFSET
:
27030 /* ??? Only handling here what we actually emit. */
27031 gcc_unreachable ();
27039 pat
= PATTERN (insn
);
27042 switch (GET_CODE (pat
))
27045 arm_unwind_emit_set (asm_out_file
, pat
);
27049 /* Store multiple. */
27050 arm_unwind_emit_sequence (asm_out_file
, pat
);
27059 /* Output a reference from a function exception table to the type_info
27060 object X. The EABI specifies that the symbol should be relocated by
27061 an R_ARM_TARGET2 relocation. */
27064 arm_output_ttype (rtx x
)
27066 fputs ("\t.word\t", asm_out_file
);
27067 output_addr_const (asm_out_file
, x
);
27068 /* Use special relocations for symbol references. */
27069 if (!CONST_INT_P (x
))
27070 fputs ("(TARGET2)", asm_out_file
);
27071 fputc ('\n', asm_out_file
);
27076 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27079 arm_asm_emit_except_personality (rtx personality
)
27081 fputs ("\t.personality\t", asm_out_file
);
27082 output_addr_const (asm_out_file
, personality
);
27083 fputc ('\n', asm_out_file
);
27086 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27089 arm_asm_init_sections (void)
27091 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27094 #endif /* ARM_UNWIND_INFO */
27096 /* Output unwind directives for the start/end of a function. */
27099 arm_output_fn_unwind (FILE * f
, bool prologue
)
27101 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27105 fputs ("\t.fnstart\n", f
);
27108 /* If this function will never be unwound, then mark it as such.
27109 The came condition is used in arm_unwind_emit to suppress
27110 the frame annotations. */
27111 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27112 && (TREE_NOTHROW (current_function_decl
)
27113 || crtl
->all_throwers_are_sibcalls
))
27114 fputs("\t.cantunwind\n", f
);
27116 fputs ("\t.fnend\n", f
);
27121 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27123 enum tls_reloc reloc
;
27126 val
= XVECEXP (x
, 0, 0);
27127 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27129 output_addr_const (fp
, val
);
27134 fputs ("(tlsgd)", fp
);
27137 fputs ("(tlsldm)", fp
);
27140 fputs ("(tlsldo)", fp
);
27143 fputs ("(gottpoff)", fp
);
27146 fputs ("(tpoff)", fp
);
27149 fputs ("(tlsdesc)", fp
);
27152 gcc_unreachable ();
27161 fputs (" + (. - ", fp
);
27162 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27163 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27164 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27165 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27175 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27178 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27180 gcc_assert (size
== 4);
27181 fputs ("\t.word\t", file
);
27182 output_addr_const (file
, x
);
27183 fputs ("(tlsldo)", file
);
27186 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27189 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27191 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27192 return arm_emit_tls_decoration (fp
, x
);
27193 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27196 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27198 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27199 assemble_name_raw (fp
, label
);
27203 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27205 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27209 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27213 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27215 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27219 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27223 else if (GET_CODE (x
) == CONST_VECTOR
)
27224 return arm_emit_vector_const (fp
, x
);
27229 /* Output assembly for a shift instruction.
27230 SET_FLAGS determines how the instruction modifies the condition codes.
27231 0 - Do not set condition codes.
27232 1 - Set condition codes.
27233 2 - Use smallest instruction. */
27235 arm_output_shift(rtx
* operands
, int set_flags
)
27238 static const char flag_chars
[3] = {'?', '.', '!'};
27243 c
= flag_chars
[set_flags
];
27244 if (TARGET_UNIFIED_ASM
)
27246 shift
= shift_op(operands
[3], &val
);
27250 operands
[2] = GEN_INT(val
);
27251 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27254 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27257 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
27258 output_asm_insn (pattern
, operands
);
27262 /* Output assembly for a WMMX immediate shift instruction. */
27264 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27266 int shift
= INTVAL (operands
[2]);
27268 machine_mode opmode
= GET_MODE (operands
[0]);
27270 gcc_assert (shift
>= 0);
27272 /* If the shift value in the register versions is > 63 (for D qualifier),
27273 31 (for W qualifier) or 15 (for H qualifier). */
27274 if (((opmode
== V4HImode
) && (shift
> 15))
27275 || ((opmode
== V2SImode
) && (shift
> 31))
27276 || ((opmode
== DImode
) && (shift
> 63)))
27280 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27281 output_asm_insn (templ
, operands
);
27282 if (opmode
== DImode
)
27284 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27285 output_asm_insn (templ
, operands
);
27290 /* The destination register will contain all zeros. */
27291 sprintf (templ
, "wzero\t%%0");
27292 output_asm_insn (templ
, operands
);
27297 if ((opmode
== DImode
) && (shift
> 32))
27299 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27300 output_asm_insn (templ
, operands
);
27301 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27302 output_asm_insn (templ
, operands
);
27306 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27307 output_asm_insn (templ
, operands
);
27312 /* Output assembly for a WMMX tinsr instruction. */
27314 arm_output_iwmmxt_tinsr (rtx
*operands
)
27316 int mask
= INTVAL (operands
[3]);
27319 int units
= mode_nunits
[GET_MODE (operands
[0])];
27320 gcc_assert ((mask
& (mask
- 1)) == 0);
27321 for (i
= 0; i
< units
; ++i
)
27323 if ((mask
& 0x01) == 1)
27329 gcc_assert (i
< units
);
27331 switch (GET_MODE (operands
[0]))
27334 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27337 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27340 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27343 gcc_unreachable ();
27346 output_asm_insn (templ
, operands
);
27351 /* Output a Thumb-1 casesi dispatch sequence. */
27353 thumb1_output_casesi (rtx
*operands
)
27355 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27357 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27359 switch (GET_MODE(diff_vec
))
27362 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27363 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27365 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27366 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27368 return "bl\t%___gnu_thumb1_case_si";
27370 gcc_unreachable ();
27374 /* Output a Thumb-2 casesi instruction. */
27376 thumb2_output_casesi (rtx
*operands
)
27378 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27380 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27382 output_asm_insn ("cmp\t%0, %1", operands
);
27383 output_asm_insn ("bhi\t%l3", operands
);
27384 switch (GET_MODE(diff_vec
))
27387 return "tbb\t[%|pc, %0]";
27389 return "tbh\t[%|pc, %0, lsl #1]";
27393 output_asm_insn ("adr\t%4, %l2", operands
);
27394 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27395 output_asm_insn ("add\t%4, %4, %5", operands
);
27400 output_asm_insn ("adr\t%4, %l2", operands
);
27401 return "ldr\t%|pc, [%4, %0, lsl #2]";
27404 gcc_unreachable ();
27408 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27409 per-core tuning structs. */
27411 arm_issue_rate (void)
27413 return current_tune
->issue_rate
;
27416 /* Return how many instructions should scheduler lookahead to choose the
27419 arm_first_cycle_multipass_dfa_lookahead (void)
27421 int issue_rate
= arm_issue_rate ();
27423 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27426 /* Enable modeling of L2 auto-prefetcher. */
27428 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27430 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27434 arm_mangle_type (const_tree type
)
27436 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27437 has to be managled as if it is in the "std" namespace. */
27438 if (TARGET_AAPCS_BASED
27439 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27440 return "St9__va_list";
27442 /* Half-precision float. */
27443 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27446 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27448 if (TYPE_NAME (type
) != NULL
)
27449 return arm_mangle_builtin_type (type
);
27451 /* Use the default mangling. */
27455 /* Order of allocation of core registers for Thumb: this allocation is
27456 written over the corresponding initial entries of the array
27457 initialized with REG_ALLOC_ORDER. We allocate all low registers
27458 first. Saving and restoring a low register is usually cheaper than
27459 using a call-clobbered high register. */
27461 static const int thumb_core_reg_alloc_order
[] =
27463 3, 2, 1, 0, 4, 5, 6, 7,
27464 14, 12, 8, 9, 10, 11
27467 /* Adjust register allocation order when compiling for Thumb. */
27470 arm_order_regs_for_local_alloc (void)
27472 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27473 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27475 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27476 sizeof (thumb_core_reg_alloc_order
));
27479 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27482 arm_frame_pointer_required (void)
27484 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27487 /* If the function receives nonlocal gotos, it needs to save the frame
27488 pointer in the nonlocal_goto_save_area object. */
27489 if (cfun
->has_nonlocal_label
)
27492 /* The frame pointer is required for non-leaf APCS frames. */
27493 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
27496 /* If we are probing the stack in the prologue, we will have a faulting
27497 instruction prior to the stack adjustment and this requires a frame
27498 pointer if we want to catch the exception using the EABI unwinder. */
27499 if (!IS_INTERRUPT (arm_current_func_type ())
27500 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27501 && arm_except_unwind_info (&global_options
) == UI_TARGET
27502 && cfun
->can_throw_non_call_exceptions
)
27504 HOST_WIDE_INT size
= get_frame_size ();
27506 /* That's irrelevant if there is no stack adjustment. */
27510 /* That's relevant only if there is a stack probe. */
27511 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27513 /* We don't have the final size of the frame so adjust. */
27514 size
+= 32 * UNITS_PER_WORD
;
27515 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27525 /* Only thumb1 can't support conditional execution, so return true if
27526 the target is not thumb1. */
27528 arm_have_conditional_execution (void)
27530 return !TARGET_THUMB1
;
27533 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27534 static HOST_WIDE_INT
27535 arm_vector_alignment (const_tree type
)
27537 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27539 if (TARGET_AAPCS_BASED
)
27540 align
= MIN (align
, 64);
27545 static unsigned int
27546 arm_autovectorize_vector_sizes (void)
27548 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27552 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27554 /* Vectors which aren't in packed structures will not be less aligned than
27555 the natural alignment of their element type, so this is safe. */
27556 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27559 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27563 arm_builtin_support_vector_misalignment (machine_mode mode
,
27564 const_tree type
, int misalignment
,
27567 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27569 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27574 /* If the misalignment is unknown, we should be able to handle the access
27575 so long as it is not to a member of a packed data structure. */
27576 if (misalignment
== -1)
27579 /* Return true if the misalignment is a multiple of the natural alignment
27580 of the vector's element type. This is probably always going to be
27581 true in practice, since we've already established that this isn't a
27583 return ((misalignment
% align
) == 0);
27586 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27591 arm_conditional_register_usage (void)
27595 if (TARGET_THUMB1
&& optimize_size
)
27597 /* When optimizing for size on Thumb-1, it's better not
27598 to use the HI regs, because of the overhead of
27600 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27601 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27604 /* The link register can be clobbered by any branch insn,
27605 but we have no way to track that at present, so mark
27606 it as unavailable. */
27608 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27610 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27612 /* VFPv3 registers are disabled when earlier VFP
27613 versions are selected due to the definition of
27614 LAST_VFP_REGNUM. */
27615 for (regno
= FIRST_VFP_REGNUM
;
27616 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27618 fixed_regs
[regno
] = 0;
27619 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27620 || regno
>= FIRST_VFP_REGNUM
+ 32;
27624 if (TARGET_REALLY_IWMMXT
)
27626 regno
= FIRST_IWMMXT_GR_REGNUM
;
27627 /* The 2002/10/09 revision of the XScale ABI has wCG0
27628 and wCG1 as call-preserved registers. The 2002/11/21
27629 revision changed this so that all wCG registers are
27630 scratch registers. */
27631 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27632 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27633 fixed_regs
[regno
] = 0;
27634 /* The XScale ABI has wR0 - wR9 as scratch registers,
27635 the rest as call-preserved registers. */
27636 for (regno
= FIRST_IWMMXT_REGNUM
;
27637 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27639 fixed_regs
[regno
] = 0;
27640 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27644 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27646 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27647 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27649 else if (TARGET_APCS_STACK
)
27651 fixed_regs
[10] = 1;
27652 call_used_regs
[10] = 1;
27654 /* -mcaller-super-interworking reserves r11 for calls to
27655 _interwork_r11_call_via_rN(). Making the register global
27656 is an easy way of ensuring that it remains valid for all
27658 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27659 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27661 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27662 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27663 if (TARGET_CALLER_INTERWORKING
)
27664 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27666 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27670 arm_preferred_rename_class (reg_class_t rclass
)
27672 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27673 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27674 and code size can be reduced. */
27675 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27681 /* Compute the atrribute "length" of insn "*push_multi".
27682 So this function MUST be kept in sync with that insn pattern. */
27684 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27686 int i
, regno
, hi_reg
;
27687 int num_saves
= XVECLEN (parallel_op
, 0);
27697 regno
= REGNO (first_op
);
27698 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27699 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27701 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27702 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27710 /* Compute the number of instructions emitted by output_move_double. */
27712 arm_count_output_move_double_insns (rtx
*operands
)
27716 /* output_move_double may modify the operands array, so call it
27717 here on a copy of the array. */
27718 ops
[0] = operands
[0];
27719 ops
[1] = operands
[1];
27720 output_move_double (ops
, false, &count
);
27725 vfp3_const_double_for_fract_bits (rtx operand
)
27727 REAL_VALUE_TYPE r0
;
27729 if (!CONST_DOUBLE_P (operand
))
27732 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27733 if (exact_real_inverse (DFmode
, &r0
)
27734 && !REAL_VALUE_NEGATIVE (r0
))
27736 if (exact_real_truncate (DFmode
, &r0
))
27738 HOST_WIDE_INT value
= real_to_integer (&r0
);
27739 value
= value
& 0xffffffff;
27740 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27741 return int_log2 (value
);
27748 vfp3_const_double_for_bits (rtx operand
)
27750 REAL_VALUE_TYPE r0
;
27752 if (!CONST_DOUBLE_P (operand
))
27755 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27756 if (exact_real_truncate (DFmode
, &r0
))
27758 HOST_WIDE_INT value
= real_to_integer (&r0
);
27759 value
= value
& 0xffffffff;
27760 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27761 return int_log2 (value
);
27767 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27770 arm_pre_atomic_barrier (enum memmodel model
)
27772 if (need_atomic_barrier_p (model
, true))
27773 emit_insn (gen_memory_barrier ());
27777 arm_post_atomic_barrier (enum memmodel model
)
27779 if (need_atomic_barrier_p (model
, false))
27780 emit_insn (gen_memory_barrier ());
27783 /* Emit the load-exclusive and store-exclusive instructions.
27784 Use acquire and release versions if necessary. */
27787 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27789 rtx (*gen
) (rtx
, rtx
);
27795 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27796 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27797 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27798 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27800 gcc_unreachable ();
27807 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27808 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27809 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27810 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27812 gcc_unreachable ();
27816 emit_insn (gen (rval
, mem
));
27820 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27823 rtx (*gen
) (rtx
, rtx
, rtx
);
27829 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27830 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27831 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27832 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27834 gcc_unreachable ();
27841 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27842 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27843 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27844 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27846 gcc_unreachable ();
27850 emit_insn (gen (bval
, rval
, mem
));
27853 /* Mark the previous jump instruction as unlikely. */
27856 emit_unlikely_jump (rtx insn
)
27858 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27860 insn
= emit_jump_insn (insn
);
27861 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27864 /* Expand a compare and swap pattern. */
27867 arm_expand_compare_and_swap (rtx operands
[])
27869 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27871 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27873 bval
= operands
[0];
27874 rval
= operands
[1];
27876 oldval
= operands
[3];
27877 newval
= operands
[4];
27878 is_weak
= operands
[5];
27879 mod_s
= operands
[6];
27880 mod_f
= operands
[7];
27881 mode
= GET_MODE (mem
);
27883 /* Normally the succ memory model must be stronger than fail, but in the
27884 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27885 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27887 if (TARGET_HAVE_LDACQ
27888 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
27889 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
27890 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27896 /* For narrow modes, we're going to perform the comparison in SImode,
27897 so do the zero-extension now. */
27898 rval
= gen_reg_rtx (SImode
);
27899 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27903 /* Force the value into a register if needed. We waited until after
27904 the zero-extension above to do this properly. */
27905 if (!arm_add_operand (oldval
, SImode
))
27906 oldval
= force_reg (SImode
, oldval
);
27910 if (!cmpdi_operand (oldval
, mode
))
27911 oldval
= force_reg (mode
, oldval
);
27915 gcc_unreachable ();
27920 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27921 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27922 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27923 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27925 gcc_unreachable ();
27928 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27930 if (mode
== QImode
|| mode
== HImode
)
27931 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27933 /* In all cases, we arrange for success to be signaled by Z set.
27934 This arrangement allows for the boolean result to be used directly
27935 in a subsequent branch, post optimization. */
27936 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27937 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27938 emit_insn (gen_rtx_SET (bval
, x
));
27941 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27942 another memory store between the load-exclusive and store-exclusive can
27943 reset the monitor from Exclusive to Open state. This means we must wait
27944 until after reload to split the pattern, lest we get a register spill in
27945 the middle of the atomic sequence. */
27948 arm_split_compare_and_swap (rtx operands
[])
27950 rtx rval
, mem
, oldval
, newval
, scratch
;
27952 enum memmodel mod_s
, mod_f
;
27954 rtx_code_label
*label1
, *label2
;
27957 rval
= operands
[0];
27959 oldval
= operands
[2];
27960 newval
= operands
[3];
27961 is_weak
= (operands
[4] != const0_rtx
);
27962 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
27963 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
27964 scratch
= operands
[7];
27965 mode
= GET_MODE (mem
);
27967 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
27969 bool use_acquire
= TARGET_HAVE_LDACQ
27970 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27971 || is_mm_release (mod_s
));
27973 bool use_release
= TARGET_HAVE_LDACQ
27974 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27975 || is_mm_acquire (mod_s
));
27977 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27978 a full barrier is emitted after the store-release. */
27980 use_acquire
= false;
27982 /* Checks whether a barrier is needed and emits one accordingly. */
27983 if (!(use_acquire
|| use_release
))
27984 arm_pre_atomic_barrier (mod_s
);
27989 label1
= gen_label_rtx ();
27990 emit_label (label1
);
27992 label2
= gen_label_rtx ();
27994 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27996 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27997 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27998 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27999 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28000 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28002 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
28004 /* Weak or strong, we want EQ to be true for success, so that we
28005 match the flags that we got from the compare above. */
28006 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28007 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
28008 emit_insn (gen_rtx_SET (cond
, x
));
28012 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28013 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28014 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
28015 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28018 if (!is_mm_relaxed (mod_f
))
28019 emit_label (label2
);
28021 /* Checks whether a barrier is needed and emits one accordingly. */
28023 || !(use_acquire
|| use_release
))
28024 arm_post_atomic_barrier (mod_s
);
28026 if (is_mm_relaxed (mod_f
))
28027 emit_label (label2
);
28031 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28032 rtx value
, rtx model_rtx
, rtx cond
)
28034 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28035 machine_mode mode
= GET_MODE (mem
);
28036 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28037 rtx_code_label
*label
;
28040 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28042 bool use_acquire
= TARGET_HAVE_LDACQ
28043 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28044 || is_mm_release (model
));
28046 bool use_release
= TARGET_HAVE_LDACQ
28047 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28048 || is_mm_acquire (model
));
28050 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28051 a full barrier is emitted after the store-release. */
28053 use_acquire
= false;
28055 /* Checks whether a barrier is needed and emits one accordingly. */
28056 if (!(use_acquire
|| use_release
))
28057 arm_pre_atomic_barrier (model
);
28059 label
= gen_label_rtx ();
28060 emit_label (label
);
28063 new_out
= gen_lowpart (wmode
, new_out
);
28065 old_out
= gen_lowpart (wmode
, old_out
);
28068 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28070 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28079 x
= gen_rtx_AND (wmode
, old_out
, value
);
28080 emit_insn (gen_rtx_SET (new_out
, x
));
28081 x
= gen_rtx_NOT (wmode
, new_out
);
28082 emit_insn (gen_rtx_SET (new_out
, x
));
28086 if (CONST_INT_P (value
))
28088 value
= GEN_INT (-INTVAL (value
));
28094 if (mode
== DImode
)
28096 /* DImode plus/minus need to clobber flags. */
28097 /* The adddi3 and subdi3 patterns are incorrectly written so that
28098 they require matching operands, even when we could easily support
28099 three operands. Thankfully, this can be fixed up post-splitting,
28100 as the individual add+adc patterns do accept three operands and
28101 post-reload cprop can make these moves go away. */
28102 emit_move_insn (new_out
, old_out
);
28104 x
= gen_adddi3 (new_out
, new_out
, value
);
28106 x
= gen_subdi3 (new_out
, new_out
, value
);
28113 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28114 emit_insn (gen_rtx_SET (new_out
, x
));
28118 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28121 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28122 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28124 /* Checks whether a barrier is needed and emits one accordingly. */
28126 || !(use_acquire
|| use_release
))
28127 arm_post_atomic_barrier (model
);
28130 #define MAX_VECT_LEN 16
28132 struct expand_vec_perm_d
28134 rtx target
, op0
, op1
;
28135 unsigned char perm
[MAX_VECT_LEN
];
28136 machine_mode vmode
;
28137 unsigned char nelt
;
28142 /* Generate a variable permutation. */
28145 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28147 machine_mode vmode
= GET_MODE (target
);
28148 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28150 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28151 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28152 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28153 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28154 gcc_checking_assert (TARGET_NEON
);
28158 if (vmode
== V8QImode
)
28159 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28161 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28167 if (vmode
== V8QImode
)
28169 pair
= gen_reg_rtx (V16QImode
);
28170 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28171 pair
= gen_lowpart (TImode
, pair
);
28172 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28176 pair
= gen_reg_rtx (OImode
);
28177 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28178 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28184 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28186 machine_mode vmode
= GET_MODE (target
);
28187 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28188 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28189 rtx rmask
[MAX_VECT_LEN
], mask
;
28191 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28192 numbering of elements for big-endian, we must reverse the order. */
28193 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28195 /* The VTBL instruction does not use a modulo index, so we must take care
28196 of that ourselves. */
28197 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28198 for (i
= 0; i
< nelt
; ++i
)
28200 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28201 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28203 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28206 /* Generate or test for an insn that supports a constant permutation. */
28208 /* Recognize patterns for the VUZP insns. */
28211 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28213 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28214 rtx out0
, out1
, in0
, in1
;
28215 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28217 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28220 /* Note that these are little-endian tests. Adjust for big-endian later. */
28221 if (d
->perm
[0] == 0)
28223 else if (d
->perm
[0] == 1)
28227 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28229 for (i
= 0; i
< nelt
; i
++)
28231 unsigned elt
= (i
* 2 + odd
) & mask
;
28232 if (d
->perm
[i
] != elt
)
28242 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28243 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28244 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28245 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28246 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28247 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28248 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28249 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28251 gcc_unreachable ();
28256 if (BYTES_BIG_ENDIAN
)
28258 std::swap (in0
, in1
);
28263 out1
= gen_reg_rtx (d
->vmode
);
28265 std::swap (out0
, out1
);
28267 emit_insn (gen (out0
, in0
, in1
, out1
));
28271 /* Recognize patterns for the VZIP insns. */
28274 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28276 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28277 rtx out0
, out1
, in0
, in1
;
28278 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28280 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28283 /* Note that these are little-endian tests. Adjust for big-endian later. */
28285 if (d
->perm
[0] == high
)
28287 else if (d
->perm
[0] == 0)
28291 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28293 for (i
= 0; i
< nelt
/ 2; i
++)
28295 unsigned elt
= (i
+ high
) & mask
;
28296 if (d
->perm
[i
* 2] != elt
)
28298 elt
= (elt
+ nelt
) & mask
;
28299 if (d
->perm
[i
* 2 + 1] != elt
)
28309 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28310 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28311 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28312 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28313 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28314 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28315 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28316 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28318 gcc_unreachable ();
28323 if (BYTES_BIG_ENDIAN
)
28325 std::swap (in0
, in1
);
28330 out1
= gen_reg_rtx (d
->vmode
);
28332 std::swap (out0
, out1
);
28334 emit_insn (gen (out0
, in0
, in1
, out1
));
28338 /* Recognize patterns for the VREV insns. */
28341 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28343 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28344 rtx (*gen
)(rtx
, rtx
);
28346 if (!d
->one_vector_p
)
28355 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28356 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28364 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28365 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28366 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28367 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28375 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28376 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28377 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28378 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28379 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28380 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28381 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28382 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28391 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28392 for (j
= 0; j
<= diff
; j
+= 1)
28394 /* This is guaranteed to be true as the value of diff
28395 is 7, 3, 1 and we should have enough elements in the
28396 queue to generate this. Getting a vector mask with a
28397 value of diff other than these values implies that
28398 something is wrong by the time we get here. */
28399 gcc_assert (i
+ j
< nelt
);
28400 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28408 emit_insn (gen (d
->target
, d
->op0
));
28412 /* Recognize patterns for the VTRN insns. */
28415 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28417 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28418 rtx out0
, out1
, in0
, in1
;
28419 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28421 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28424 /* Note that these are little-endian tests. Adjust for big-endian later. */
28425 if (d
->perm
[0] == 0)
28427 else if (d
->perm
[0] == 1)
28431 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28433 for (i
= 0; i
< nelt
; i
+= 2)
28435 if (d
->perm
[i
] != i
+ odd
)
28437 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28447 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28448 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28449 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28450 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28451 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28452 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28453 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28454 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28456 gcc_unreachable ();
28461 if (BYTES_BIG_ENDIAN
)
28463 std::swap (in0
, in1
);
28468 out1
= gen_reg_rtx (d
->vmode
);
28470 std::swap (out0
, out1
);
28472 emit_insn (gen (out0
, in0
, in1
, out1
));
28476 /* Recognize patterns for the VEXT insns. */
28479 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28481 unsigned int i
, nelt
= d
->nelt
;
28482 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28485 unsigned int location
;
28487 unsigned int next
= d
->perm
[0] + 1;
28489 /* TODO: Handle GCC's numbering of elements for big-endian. */
28490 if (BYTES_BIG_ENDIAN
)
28493 /* Check if the extracted indexes are increasing by one. */
28494 for (i
= 1; i
< nelt
; next
++, i
++)
28496 /* If we hit the most significant element of the 2nd vector in
28497 the previous iteration, no need to test further. */
28498 if (next
== 2 * nelt
)
28501 /* If we are operating on only one vector: it could be a
28502 rotation. If there are only two elements of size < 64, let
28503 arm_evpc_neon_vrev catch it. */
28504 if (d
->one_vector_p
&& (next
== nelt
))
28506 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28512 if (d
->perm
[i
] != next
)
28516 location
= d
->perm
[0];
28520 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28521 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28522 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28523 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28524 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28525 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28526 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28527 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28528 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28537 offset
= GEN_INT (location
);
28538 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28542 /* The NEON VTBL instruction is a fully variable permuation that's even
28543 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28544 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28545 can do slightly better by expanding this as a constant where we don't
28546 have to apply a mask. */
28549 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28551 rtx rperm
[MAX_VECT_LEN
], sel
;
28552 machine_mode vmode
= d
->vmode
;
28553 unsigned int i
, nelt
= d
->nelt
;
28555 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28556 numbering of elements for big-endian, we must reverse the order. */
28557 if (BYTES_BIG_ENDIAN
)
28563 /* Generic code will try constant permutation twice. Once with the
28564 original mode and again with the elements lowered to QImode.
28565 So wait and don't do the selector expansion ourselves. */
28566 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28569 for (i
= 0; i
< nelt
; ++i
)
28570 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28571 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28572 sel
= force_reg (vmode
, sel
);
28574 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28579 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28581 /* Check if the input mask matches vext before reordering the
28584 if (arm_evpc_neon_vext (d
))
28587 /* The pattern matching functions above are written to look for a small
28588 number to begin the sequence (0, 1, N/2). If we begin with an index
28589 from the second operand, we can swap the operands. */
28590 if (d
->perm
[0] >= d
->nelt
)
28592 unsigned i
, nelt
= d
->nelt
;
28594 for (i
= 0; i
< nelt
; ++i
)
28595 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28597 std::swap (d
->op0
, d
->op1
);
28602 if (arm_evpc_neon_vuzp (d
))
28604 if (arm_evpc_neon_vzip (d
))
28606 if (arm_evpc_neon_vrev (d
))
28608 if (arm_evpc_neon_vtrn (d
))
28610 return arm_evpc_neon_vtbl (d
);
28615 /* Expand a vec_perm_const pattern. */
28618 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28620 struct expand_vec_perm_d d
;
28621 int i
, nelt
, which
;
28627 d
.vmode
= GET_MODE (target
);
28628 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28629 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28630 d
.testing_p
= false;
28632 for (i
= which
= 0; i
< nelt
; ++i
)
28634 rtx e
= XVECEXP (sel
, 0, i
);
28635 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28636 which
|= (ei
< nelt
? 1 : 2);
28646 d
.one_vector_p
= false;
28647 if (!rtx_equal_p (op0
, op1
))
28650 /* The elements of PERM do not suggest that only the first operand
28651 is used, but both operands are identical. Allow easier matching
28652 of the permutation by folding the permutation into the single
28656 for (i
= 0; i
< nelt
; ++i
)
28657 d
.perm
[i
] &= nelt
- 1;
28659 d
.one_vector_p
= true;
28664 d
.one_vector_p
= true;
28668 return arm_expand_vec_perm_const_1 (&d
);
28671 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28674 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28675 const unsigned char *sel
)
28677 struct expand_vec_perm_d d
;
28678 unsigned int i
, nelt
, which
;
28682 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28683 d
.testing_p
= true;
28684 memcpy (d
.perm
, sel
, nelt
);
28686 /* Categorize the set of elements in the selector. */
28687 for (i
= which
= 0; i
< nelt
; ++i
)
28689 unsigned char e
= d
.perm
[i
];
28690 gcc_assert (e
< 2 * nelt
);
28691 which
|= (e
< nelt
? 1 : 2);
28694 /* For all elements from second vector, fold the elements to first. */
28696 for (i
= 0; i
< nelt
; ++i
)
28699 /* Check whether the mask can be applied to the vector type. */
28700 d
.one_vector_p
= (which
!= 3);
28702 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28703 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28704 if (!d
.one_vector_p
)
28705 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28708 ret
= arm_expand_vec_perm_const_1 (&d
);
28715 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28717 /* If we are soft float and we do not have ldrd
28718 then all auto increment forms are ok. */
28719 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28724 /* Post increment and Pre Decrement are supported for all
28725 instruction forms except for vector forms. */
28728 if (VECTOR_MODE_P (mode
))
28730 if (code
!= ARM_PRE_DEC
)
28740 /* Without LDRD and mode size greater than
28741 word size, there is no point in auto-incrementing
28742 because ldm and stm will not have these forms. */
28743 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28746 /* Vector and floating point modes do not support
28747 these auto increment forms. */
28748 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28761 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28762 on ARM, since we know that shifts by negative amounts are no-ops.
28763 Additionally, the default expansion code is not available or suitable
28764 for post-reload insn splits (this can occur when the register allocator
28765 chooses not to do a shift in NEON).
28767 This function is used in both initial expand and post-reload splits, and
28768 handles all kinds of 64-bit shifts.
28770 Input requirements:
28771 - It is safe for the input and output to be the same register, but
28772 early-clobber rules apply for the shift amount and scratch registers.
28773 - Shift by register requires both scratch registers. In all other cases
28774 the scratch registers may be NULL.
28775 - Ashiftrt by a register also clobbers the CC register. */
28777 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28778 rtx amount
, rtx scratch1
, rtx scratch2
)
28780 rtx out_high
= gen_highpart (SImode
, out
);
28781 rtx out_low
= gen_lowpart (SImode
, out
);
28782 rtx in_high
= gen_highpart (SImode
, in
);
28783 rtx in_low
= gen_lowpart (SImode
, in
);
28786 in = the register pair containing the input value.
28787 out = the destination register pair.
28788 up = the high- or low-part of each pair.
28789 down = the opposite part to "up".
28790 In a shift, we can consider bits to shift from "up"-stream to
28791 "down"-stream, so in a left-shift "up" is the low-part and "down"
28792 is the high-part of each register pair. */
28794 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28795 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28796 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28797 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28799 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28801 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28802 && GET_MODE (out
) == DImode
);
28804 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28805 && GET_MODE (in
) == DImode
);
28807 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28808 && GET_MODE (amount
) == SImode
)
28809 || CONST_INT_P (amount
)));
28810 gcc_assert (scratch1
== NULL
28811 || (GET_CODE (scratch1
) == SCRATCH
)
28812 || (GET_MODE (scratch1
) == SImode
28813 && REG_P (scratch1
)));
28814 gcc_assert (scratch2
== NULL
28815 || (GET_CODE (scratch2
) == SCRATCH
)
28816 || (GET_MODE (scratch2
) == SImode
28817 && REG_P (scratch2
)));
28818 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28819 || !HARD_REGISTER_P (out
)
28820 || (REGNO (out
) != REGNO (amount
)
28821 && REGNO (out
) + 1 != REGNO (amount
)));
28823 /* Macros to make following code more readable. */
28824 #define SUB_32(DEST,SRC) \
28825 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28826 #define RSB_32(DEST,SRC) \
28827 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28828 #define SUB_S_32(DEST,SRC) \
28829 gen_addsi3_compare0 ((DEST), (SRC), \
28831 #define SET(DEST,SRC) \
28832 gen_rtx_SET ((DEST), (SRC))
28833 #define SHIFT(CODE,SRC,AMOUNT) \
28834 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28835 #define LSHIFT(CODE,SRC,AMOUNT) \
28836 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28837 SImode, (SRC), (AMOUNT))
28838 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28839 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28840 SImode, (SRC), (AMOUNT))
28842 gen_rtx_IOR (SImode, (A), (B))
28843 #define BRANCH(COND,LABEL) \
28844 gen_arm_cond_branch ((LABEL), \
28845 gen_rtx_ ## COND (CCmode, cc_reg, \
28849 /* Shifts by register and shifts by constant are handled separately. */
28850 if (CONST_INT_P (amount
))
28852 /* We have a shift-by-constant. */
28854 /* First, handle out-of-range shift amounts.
28855 In both cases we try to match the result an ARM instruction in a
28856 shift-by-register would give. This helps reduce execution
28857 differences between optimization levels, but it won't stop other
28858 parts of the compiler doing different things. This is "undefined
28859 behaviour, in any case. */
28860 if (INTVAL (amount
) <= 0)
28861 emit_insn (gen_movdi (out
, in
));
28862 else if (INTVAL (amount
) >= 64)
28864 if (code
== ASHIFTRT
)
28866 rtx const31_rtx
= GEN_INT (31);
28867 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28868 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28871 emit_insn (gen_movdi (out
, const0_rtx
));
28874 /* Now handle valid shifts. */
28875 else if (INTVAL (amount
) < 32)
28877 /* Shifts by a constant less than 32. */
28878 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28880 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28881 emit_insn (SET (out_down
,
28882 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28884 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28888 /* Shifts by a constant greater than 31. */
28889 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28891 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28892 if (code
== ASHIFTRT
)
28893 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28896 emit_insn (SET (out_up
, const0_rtx
));
28901 /* We have a shift-by-register. */
28902 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28904 /* This alternative requires the scratch registers. */
28905 gcc_assert (scratch1
&& REG_P (scratch1
));
28906 gcc_assert (scratch2
&& REG_P (scratch2
));
28908 /* We will need the values "amount-32" and "32-amount" later.
28909 Swapping them around now allows the later code to be more general. */
28913 emit_insn (SUB_32 (scratch1
, amount
));
28914 emit_insn (RSB_32 (scratch2
, amount
));
28917 emit_insn (RSB_32 (scratch1
, amount
));
28918 /* Also set CC = amount > 32. */
28919 emit_insn (SUB_S_32 (scratch2
, amount
));
28922 emit_insn (RSB_32 (scratch1
, amount
));
28923 emit_insn (SUB_32 (scratch2
, amount
));
28926 gcc_unreachable ();
28929 /* Emit code like this:
28932 out_down = in_down << amount;
28933 out_down = (in_up << (amount - 32)) | out_down;
28934 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28935 out_up = in_up << amount;
28938 out_down = in_down >> amount;
28939 out_down = (in_up << (32 - amount)) | out_down;
28941 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28942 out_up = in_up << amount;
28945 out_down = in_down >> amount;
28946 out_down = (in_up << (32 - amount)) | out_down;
28948 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28949 out_up = in_up << amount;
28951 The ARM and Thumb2 variants are the same but implemented slightly
28952 differently. If this were only called during expand we could just
28953 use the Thumb2 case and let combine do the right thing, but this
28954 can also be called from post-reload splitters. */
28956 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28958 if (!TARGET_THUMB2
)
28960 /* Emit code for ARM mode. */
28961 emit_insn (SET (out_down
,
28962 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28963 if (code
== ASHIFTRT
)
28965 rtx_code_label
*done_label
= gen_label_rtx ();
28966 emit_jump_insn (BRANCH (LT
, done_label
));
28967 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28969 emit_label (done_label
);
28972 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28977 /* Emit code for Thumb2 mode.
28978 Thumb2 can't do shift and or in one insn. */
28979 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28980 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28982 if (code
== ASHIFTRT
)
28984 rtx_code_label
*done_label
= gen_label_rtx ();
28985 emit_jump_insn (BRANCH (LT
, done_label
));
28986 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28987 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28988 emit_label (done_label
);
28992 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28993 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28997 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29011 /* Returns true if the pattern is a valid symbolic address, which is either a
29012 symbol_ref or (symbol_ref + addend).
29014 According to the ARM ELF ABI, the initial addend of REL-type relocations
29015 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29016 literal field of the instruction as a 16-bit signed value in the range
29017 -32768 <= A < 32768. */
29020 arm_valid_symbolic_address_p (rtx addr
)
29022 rtx xop0
, xop1
= NULL_RTX
;
29025 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29028 /* (const (plus: symbol_ref const_int)) */
29029 if (GET_CODE (addr
) == CONST
)
29030 tmp
= XEXP (addr
, 0);
29032 if (GET_CODE (tmp
) == PLUS
)
29034 xop0
= XEXP (tmp
, 0);
29035 xop1
= XEXP (tmp
, 1);
29037 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29038 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29044 /* Returns true if a valid comparison operation and makes
29045 the operands in a form that is valid. */
29047 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29049 enum rtx_code code
= GET_CODE (*comparison
);
29051 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29052 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29054 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29056 if (code
== UNEQ
|| code
== LTGT
)
29059 code_int
= (int)code
;
29060 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29061 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29066 if (!arm_add_operand (*op1
, mode
))
29067 *op1
= force_reg (mode
, *op1
);
29068 if (!arm_add_operand (*op2
, mode
))
29069 *op2
= force_reg (mode
, *op2
);
29073 if (!cmpdi_operand (*op1
, mode
))
29074 *op1
= force_reg (mode
, *op1
);
29075 if (!cmpdi_operand (*op2
, mode
))
29076 *op2
= force_reg (mode
, *op2
);
29081 if (!arm_float_compare_operand (*op1
, mode
))
29082 *op1
= force_reg (mode
, *op1
);
29083 if (!arm_float_compare_operand (*op2
, mode
))
29084 *op2
= force_reg (mode
, *op2
);
29094 /* Maximum number of instructions to set block of memory. */
29096 arm_block_set_max_insns (void)
29098 if (optimize_function_for_size_p (cfun
))
29101 return current_tune
->max_insns_inline_memset
;
29104 /* Return TRUE if it's profitable to set block of memory for
29105 non-vectorized case. VAL is the value to set the memory
29106 with. LENGTH is the number of bytes to set. ALIGN is the
29107 alignment of the destination memory in bytes. UNALIGNED_P
29108 is TRUE if we can only set the memory with instructions
29109 meeting alignment requirements. USE_STRD_P is TRUE if we
29110 can use strd to set the memory. */
29112 arm_block_set_non_vect_profit_p (rtx val
,
29113 unsigned HOST_WIDE_INT length
,
29114 unsigned HOST_WIDE_INT align
,
29115 bool unaligned_p
, bool use_strd_p
)
29118 /* For leftovers in bytes of 0-7, we can set the memory block using
29119 strb/strh/str with minimum instruction number. */
29120 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29124 num
= arm_const_inline_cost (SET
, val
);
29125 num
+= length
/ align
+ length
% align
;
29127 else if (use_strd_p
)
29129 num
= arm_const_double_inline_cost (val
);
29130 num
+= (length
>> 3) + leftover
[length
& 7];
29134 num
= arm_const_inline_cost (SET
, val
);
29135 num
+= (length
>> 2) + leftover
[length
& 3];
29138 /* We may be able to combine last pair STRH/STRB into a single STR
29139 by shifting one byte back. */
29140 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29143 return (num
<= arm_block_set_max_insns ());
29146 /* Return TRUE if it's profitable to set block of memory for
29147 vectorized case. LENGTH is the number of bytes to set.
29148 ALIGN is the alignment of destination memory in bytes.
29149 MODE is the vector mode used to set the memory. */
29151 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29152 unsigned HOST_WIDE_INT align
,
29156 bool unaligned_p
= ((align
& 3) != 0);
29157 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29159 /* Instruction loading constant value. */
29161 /* Instructions storing the memory. */
29162 num
+= (length
+ nelt
- 1) / nelt
;
29163 /* Instructions adjusting the address expression. Only need to
29164 adjust address expression if it's 4 bytes aligned and bytes
29165 leftover can only be stored by mis-aligned store instruction. */
29166 if (!unaligned_p
&& (length
& 3) != 0)
29169 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29170 if (!unaligned_p
&& mode
== V16QImode
)
29173 return (num
<= arm_block_set_max_insns ());
29176 /* Set a block of memory using vectorization instructions for the
29177 unaligned case. We fill the first LENGTH bytes of the memory
29178 area starting from DSTBASE with byte constant VALUE. ALIGN is
29179 the alignment requirement of memory. Return TRUE if succeeded. */
29181 arm_block_set_unaligned_vect (rtx dstbase
,
29182 unsigned HOST_WIDE_INT length
,
29183 unsigned HOST_WIDE_INT value
,
29184 unsigned HOST_WIDE_INT align
)
29186 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29188 rtx val_elt
, val_vec
, reg
;
29189 rtx rval
[MAX_VECT_LEN
];
29190 rtx (*gen_func
) (rtx
, rtx
);
29192 unsigned HOST_WIDE_INT v
= value
;
29194 gcc_assert ((align
& 0x3) != 0);
29195 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29196 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29197 if (length
>= nelt_v16
)
29200 gen_func
= gen_movmisalignv16qi
;
29205 gen_func
= gen_movmisalignv8qi
;
29207 nelt_mode
= GET_MODE_NUNITS (mode
);
29208 gcc_assert (length
>= nelt_mode
);
29209 /* Skip if it isn't profitable. */
29210 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29213 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29214 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29216 v
= sext_hwi (v
, BITS_PER_WORD
);
29217 val_elt
= GEN_INT (v
);
29218 for (j
= 0; j
< nelt_mode
; j
++)
29221 reg
= gen_reg_rtx (mode
);
29222 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29223 /* Emit instruction loading the constant value. */
29224 emit_move_insn (reg
, val_vec
);
29226 /* Handle nelt_mode bytes in a vector. */
29227 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29229 emit_insn ((*gen_func
) (mem
, reg
));
29230 if (i
+ 2 * nelt_mode
<= length
)
29231 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29234 /* If there are not less than nelt_v8 bytes leftover, we must be in
29236 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29238 /* Handle (8, 16) bytes leftover. */
29239 if (i
+ nelt_v8
< length
)
29241 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29242 /* We are shifting bytes back, set the alignment accordingly. */
29243 if ((length
& 1) != 0 && align
>= 2)
29244 set_mem_align (mem
, BITS_PER_UNIT
);
29246 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29248 /* Handle (0, 8] bytes leftover. */
29249 else if (i
< length
&& i
+ nelt_v8
>= length
)
29251 if (mode
== V16QImode
)
29253 reg
= gen_lowpart (V8QImode
, reg
);
29254 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
29256 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29257 + (nelt_mode
- nelt_v8
))));
29258 /* We are shifting bytes back, set the alignment accordingly. */
29259 if ((length
& 1) != 0 && align
>= 2)
29260 set_mem_align (mem
, BITS_PER_UNIT
);
29262 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29268 /* Set a block of memory using vectorization instructions for the
29269 aligned case. We fill the first LENGTH bytes of the memory area
29270 starting from DSTBASE with byte constant VALUE. ALIGN is the
29271 alignment requirement of memory. Return TRUE if succeeded. */
29273 arm_block_set_aligned_vect (rtx dstbase
,
29274 unsigned HOST_WIDE_INT length
,
29275 unsigned HOST_WIDE_INT value
,
29276 unsigned HOST_WIDE_INT align
)
29278 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29279 rtx dst
, addr
, mem
;
29280 rtx val_elt
, val_vec
, reg
;
29281 rtx rval
[MAX_VECT_LEN
];
29283 unsigned HOST_WIDE_INT v
= value
;
29285 gcc_assert ((align
& 0x3) == 0);
29286 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29287 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29288 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29293 nelt_mode
= GET_MODE_NUNITS (mode
);
29294 gcc_assert (length
>= nelt_mode
);
29295 /* Skip if it isn't profitable. */
29296 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29299 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29301 v
= sext_hwi (v
, BITS_PER_WORD
);
29302 val_elt
= GEN_INT (v
);
29303 for (j
= 0; j
< nelt_mode
; j
++)
29306 reg
= gen_reg_rtx (mode
);
29307 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29308 /* Emit instruction loading the constant value. */
29309 emit_move_insn (reg
, val_vec
);
29312 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29313 if (mode
== V16QImode
)
29315 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29316 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29318 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29319 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29321 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29322 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29323 /* We are shifting bytes back, set the alignment accordingly. */
29324 if ((length
& 0x3) == 0)
29325 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29326 else if ((length
& 0x1) == 0)
29327 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29329 set_mem_align (mem
, BITS_PER_UNIT
);
29331 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29334 /* Fall through for bytes leftover. */
29336 nelt_mode
= GET_MODE_NUNITS (mode
);
29337 reg
= gen_lowpart (V8QImode
, reg
);
29340 /* Handle 8 bytes in a vector. */
29341 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29343 addr
= plus_constant (Pmode
, dst
, i
);
29344 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29345 emit_move_insn (mem
, reg
);
29348 /* Handle single word leftover by shifting 4 bytes back. We can
29349 use aligned access for this case. */
29350 if (i
+ UNITS_PER_WORD
== length
)
29352 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29353 mem
= adjust_automodify_address (dstbase
, mode
,
29354 addr
, i
- UNITS_PER_WORD
);
29355 /* We are shifting 4 bytes back, set the alignment accordingly. */
29356 if (align
> UNITS_PER_WORD
)
29357 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29359 emit_move_insn (mem
, reg
);
29361 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29362 We have to use unaligned access for this case. */
29363 else if (i
< length
)
29365 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29366 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29367 /* We are shifting bytes back, set the alignment accordingly. */
29368 if ((length
& 1) == 0)
29369 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29371 set_mem_align (mem
, BITS_PER_UNIT
);
29373 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29379 /* Set a block of memory using plain strh/strb instructions, only
29380 using instructions allowed by ALIGN on processor. We fill the
29381 first LENGTH bytes of the memory area starting from DSTBASE
29382 with byte constant VALUE. ALIGN is the alignment requirement
29385 arm_block_set_unaligned_non_vect (rtx dstbase
,
29386 unsigned HOST_WIDE_INT length
,
29387 unsigned HOST_WIDE_INT value
,
29388 unsigned HOST_WIDE_INT align
)
29391 rtx dst
, addr
, mem
;
29392 rtx val_exp
, val_reg
, reg
;
29394 HOST_WIDE_INT v
= value
;
29396 gcc_assert (align
== 1 || align
== 2);
29399 v
|= (value
<< BITS_PER_UNIT
);
29401 v
= sext_hwi (v
, BITS_PER_WORD
);
29402 val_exp
= GEN_INT (v
);
29403 /* Skip if it isn't profitable. */
29404 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29405 align
, true, false))
29408 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29409 mode
= (align
== 2 ? HImode
: QImode
);
29410 val_reg
= force_reg (SImode
, val_exp
);
29411 reg
= gen_lowpart (mode
, val_reg
);
29413 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29415 addr
= plus_constant (Pmode
, dst
, i
);
29416 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29417 emit_move_insn (mem
, reg
);
29420 /* Handle single byte leftover. */
29421 if (i
+ 1 == length
)
29423 reg
= gen_lowpart (QImode
, val_reg
);
29424 addr
= plus_constant (Pmode
, dst
, i
);
29425 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29426 emit_move_insn (mem
, reg
);
29430 gcc_assert (i
== length
);
29434 /* Set a block of memory using plain strd/str/strh/strb instructions,
29435 to permit unaligned copies on processors which support unaligned
29436 semantics for those instructions. We fill the first LENGTH bytes
29437 of the memory area starting from DSTBASE with byte constant VALUE.
29438 ALIGN is the alignment requirement of memory. */
29440 arm_block_set_aligned_non_vect (rtx dstbase
,
29441 unsigned HOST_WIDE_INT length
,
29442 unsigned HOST_WIDE_INT value
,
29443 unsigned HOST_WIDE_INT align
)
29446 rtx dst
, addr
, mem
;
29447 rtx val_exp
, val_reg
, reg
;
29448 unsigned HOST_WIDE_INT v
;
29451 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29452 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29454 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29455 if (length
< UNITS_PER_WORD
)
29456 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29459 v
|= (v
<< BITS_PER_WORD
);
29461 v
= sext_hwi (v
, BITS_PER_WORD
);
29463 val_exp
= GEN_INT (v
);
29464 /* Skip if it isn't profitable. */
29465 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29466 align
, false, use_strd_p
))
29471 /* Try without strd. */
29472 v
= (v
>> BITS_PER_WORD
);
29473 v
= sext_hwi (v
, BITS_PER_WORD
);
29474 val_exp
= GEN_INT (v
);
29475 use_strd_p
= false;
29476 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29477 align
, false, use_strd_p
))
29482 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29483 /* Handle double words using strd if possible. */
29486 val_reg
= force_reg (DImode
, val_exp
);
29488 for (; (i
+ 8 <= length
); i
+= 8)
29490 addr
= plus_constant (Pmode
, dst
, i
);
29491 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29492 emit_move_insn (mem
, reg
);
29496 val_reg
= force_reg (SImode
, val_exp
);
29498 /* Handle words. */
29499 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29500 for (; (i
+ 4 <= length
); i
+= 4)
29502 addr
= plus_constant (Pmode
, dst
, i
);
29503 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29504 if ((align
& 3) == 0)
29505 emit_move_insn (mem
, reg
);
29507 emit_insn (gen_unaligned_storesi (mem
, reg
));
29510 /* Merge last pair of STRH and STRB into a STR if possible. */
29511 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29513 addr
= plus_constant (Pmode
, dst
, i
- 1);
29514 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29515 /* We are shifting one byte back, set the alignment accordingly. */
29516 if ((align
& 1) == 0)
29517 set_mem_align (mem
, BITS_PER_UNIT
);
29519 /* Most likely this is an unaligned access, and we can't tell at
29520 compilation time. */
29521 emit_insn (gen_unaligned_storesi (mem
, reg
));
29525 /* Handle half word leftover. */
29526 if (i
+ 2 <= length
)
29528 reg
= gen_lowpart (HImode
, val_reg
);
29529 addr
= plus_constant (Pmode
, dst
, i
);
29530 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29531 if ((align
& 1) == 0)
29532 emit_move_insn (mem
, reg
);
29534 emit_insn (gen_unaligned_storehi (mem
, reg
));
29539 /* Handle single byte leftover. */
29540 if (i
+ 1 == length
)
29542 reg
= gen_lowpart (QImode
, val_reg
);
29543 addr
= plus_constant (Pmode
, dst
, i
);
29544 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29545 emit_move_insn (mem
, reg
);
29551 /* Set a block of memory using vectorization instructions for both
29552 aligned and unaligned cases. We fill the first LENGTH bytes of
29553 the memory area starting from DSTBASE with byte constant VALUE.
29554 ALIGN is the alignment requirement of memory. */
29556 arm_block_set_vect (rtx dstbase
,
29557 unsigned HOST_WIDE_INT length
,
29558 unsigned HOST_WIDE_INT value
,
29559 unsigned HOST_WIDE_INT align
)
29561 /* Check whether we need to use unaligned store instruction. */
29562 if (((align
& 3) != 0 || (length
& 3) != 0)
29563 /* Check whether unaligned store instruction is available. */
29564 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29567 if ((align
& 3) == 0)
29568 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29570 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29573 /* Expand string store operation. Firstly we try to do that by using
29574 vectorization instructions, then try with ARM unaligned access and
29575 double-word store if profitable. OPERANDS[0] is the destination,
29576 OPERANDS[1] is the number of bytes, operands[2] is the value to
29577 initialize the memory, OPERANDS[3] is the known alignment of the
29580 arm_gen_setmem (rtx
*operands
)
29582 rtx dstbase
= operands
[0];
29583 unsigned HOST_WIDE_INT length
;
29584 unsigned HOST_WIDE_INT value
;
29585 unsigned HOST_WIDE_INT align
;
29587 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29590 length
= UINTVAL (operands
[1]);
29594 value
= (UINTVAL (operands
[2]) & 0xFF);
29595 align
= UINTVAL (operands
[3]);
29596 if (TARGET_NEON
&& length
>= 8
29597 && current_tune
->string_ops_prefer_neon
29598 && arm_block_set_vect (dstbase
, length
, value
, align
))
29601 if (!unaligned_access
&& (align
& 3) != 0)
29602 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29604 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29609 arm_macro_fusion_p (void)
29611 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29616 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29619 rtx prev_set
= single_set (prev
);
29620 rtx curr_set
= single_set (curr
);
29626 if (any_condjump_p (curr
))
29629 if (!arm_macro_fusion_p ())
29632 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29634 /* We are trying to fuse
29635 movw imm / movt imm
29636 instructions as a group that gets scheduled together. */
29638 set_dest
= SET_DEST (curr_set
);
29640 if (GET_MODE (set_dest
) != SImode
)
29643 /* We are trying to match:
29644 prev (movw) == (set (reg r0) (const_int imm16))
29645 curr (movt) == (set (zero_extract (reg r0)
29648 (const_int imm16_1))
29650 prev (movw) == (set (reg r1)
29651 (high (symbol_ref ("SYM"))))
29652 curr (movt) == (set (reg r0)
29654 (symbol_ref ("SYM")))) */
29655 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29657 if (CONST_INT_P (SET_SRC (curr_set
))
29658 && CONST_INT_P (SET_SRC (prev_set
))
29659 && REG_P (XEXP (set_dest
, 0))
29660 && REG_P (SET_DEST (prev_set
))
29661 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29664 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29665 && REG_P (SET_DEST (curr_set
))
29666 && REG_P (SET_DEST (prev_set
))
29667 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29668 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29674 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29676 static unsigned HOST_WIDE_INT
29677 arm_asan_shadow_offset (void)
29679 return (unsigned HOST_WIDE_INT
) 1 << 29;
29683 /* This is a temporary fix for PR60655. Ideally we need
29684 to handle most of these cases in the generic part but
29685 currently we reject minus (..) (sym_ref). We try to
29686 ameliorate the case with minus (sym_ref1) (sym_ref2)
29687 where they are in the same section. */
29690 arm_const_not_ok_for_debug_p (rtx p
)
29692 tree decl_op0
= NULL
;
29693 tree decl_op1
= NULL
;
29695 if (GET_CODE (p
) == MINUS
)
29697 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29699 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29701 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29702 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29704 if ((TREE_CODE (decl_op1
) == VAR_DECL
29705 || TREE_CODE (decl_op1
) == CONST_DECL
)
29706 && (TREE_CODE (decl_op0
) == VAR_DECL
29707 || TREE_CODE (decl_op0
) == CONST_DECL
))
29708 return (get_variable_section (decl_op1
, false)
29709 != get_variable_section (decl_op0
, false));
29711 if (TREE_CODE (decl_op1
) == LABEL_DECL
29712 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29713 return (DECL_CONTEXT (decl_op1
)
29714 != DECL_CONTEXT (decl_op0
));
29724 /* return TRUE if x is a reference to a value in a constant pool */
29726 arm_is_constant_pool_ref (rtx x
)
29729 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29730 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29733 /* Remember the last target of arm_set_current_function. */
29734 static GTY(()) tree arm_previous_fndecl
;
29736 /* Invalidate arm_previous_fndecl. */
29738 arm_reset_previous_fndecl (void)
29740 arm_previous_fndecl
= NULL_TREE
;
29743 /* Establish appropriate back-end context for processing the function
29744 FNDECL. The argument might be NULL to indicate processing at top
29745 level, outside of any function scope. */
29747 arm_set_current_function (tree fndecl
)
29749 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29752 tree old_tree
= (arm_previous_fndecl
29753 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29756 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29758 arm_previous_fndecl
= fndecl
;
29759 if (old_tree
== new_tree
)
29762 if (new_tree
&& new_tree
!= target_option_default_node
)
29764 cl_target_option_restore (&global_options
,
29765 TREE_TARGET_OPTION (new_tree
));
29767 if (TREE_TARGET_GLOBALS (new_tree
))
29768 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29770 TREE_TARGET_GLOBALS (new_tree
)
29771 = save_target_globals_default_opts ();
29774 else if (old_tree
&& old_tree
!= target_option_default_node
)
29776 new_tree
= target_option_current_node
;
29778 cl_target_option_restore (&global_options
,
29779 TREE_TARGET_OPTION (new_tree
));
29780 if (TREE_TARGET_GLOBALS (new_tree
))
29781 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29782 else if (new_tree
== target_option_default_node
)
29783 restore_target_globals (&default_target_globals
);
29785 TREE_TARGET_GLOBALS (new_tree
)
29786 = save_target_globals_default_opts ();
29789 arm_option_params_internal ();
29792 /* Implement TARGET_OPTION_PRINT. */
29795 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
29797 int flags
= ptr
->x_target_flags
;
29799 fprintf (file
, "%*sselected arch %s\n", indent
, "",
29800 TARGET_THUMB2_P (flags
) ? "thumb2" :
29801 TARGET_THUMB_P (flags
) ? "thumb1" :
29805 /* Hook to determine if one function can safely inline another. */
29808 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED
, tree callee ATTRIBUTE_UNUSED
)
29810 /* Overidde default hook: Always OK to inline between different modes.
29811 Function with mode specific instructions, e.g using asm, must be explicitely
29812 protected with noinline. */
29816 /* Inner function to process the attribute((target(...))), take an argument and
29817 set the current options from the argument. If we have a list, recursively
29818 go over the list. */
29821 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29823 if (TREE_CODE (args
) == TREE_LIST
)
29826 for (; args
; args
= TREE_CHAIN (args
))
29827 if (TREE_VALUE (args
)
29828 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29833 else if (TREE_CODE (args
) != STRING_CST
)
29835 error ("attribute %<target%> argument not a string");
29839 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29840 while (argstr
&& *argstr
!= '\0')
29842 while (ISSPACE (*argstr
))
29845 if (!strcmp (argstr
, "thumb"))
29847 opts
->x_target_flags
|= MASK_THUMB
;
29848 arm_option_check_internal (opts
);
29852 if (!strcmp (argstr
, "arm"))
29854 opts
->x_target_flags
&= ~MASK_THUMB
;
29855 arm_option_check_internal (opts
);
29859 warning (0, "attribute(target(\"%s\")) is unknown", argstr
);
29866 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29869 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29870 struct gcc_options
*opts_set
)
29872 if (!arm_valid_target_attribute_rec (args
, opts
))
29875 /* Do any overrides, such as global options arch=xxx. */
29876 arm_option_override_internal (opts
, opts_set
);
29878 return build_target_option_node (opts
);
29882 add_attribute (const char * mode
, tree
*attributes
)
29884 size_t len
= strlen (mode
);
29885 tree value
= build_string (len
, mode
);
29887 TREE_TYPE (value
) = build_array_type (char_type_node
,
29888 build_index_type (size_int (len
)));
29890 *attributes
= tree_cons (get_identifier ("target"),
29891 build_tree_list (NULL_TREE
, value
),
29895 /* For testing. Insert thumb or arm modes alternatively on functions. */
29898 arm_insert_attributes (tree fndecl
, tree
* attributes
)
29902 if (! TARGET_FLIP_THUMB
)
29905 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
29906 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
29909 /* Nested definitions must inherit mode. */
29910 if (current_function_decl
)
29912 mode
= TARGET_THUMB
? "thumb" : "arm";
29913 add_attribute (mode
, attributes
);
29917 /* If there is already a setting don't change it. */
29918 if (lookup_attribute ("target", *attributes
) != NULL
)
29921 mode
= thumb_flipper
? "thumb" : "arm";
29922 add_attribute (mode
, attributes
);
29924 thumb_flipper
= !thumb_flipper
;
29927 /* Hook to validate attribute((target("string"))). */
29930 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29931 tree args
, int ARG_UNUSED (flags
))
29934 struct gcc_options func_options
;
29935 tree cur_tree
, new_optimize
;
29936 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29938 /* Get the optimization options of the current function. */
29939 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29941 /* If the function changed the optimization levels as well as setting target
29942 options, start with the optimizations specified. */
29943 if (!func_optimize
)
29944 func_optimize
= optimization_default_node
;
29946 /* Init func_options. */
29947 memset (&func_options
, 0, sizeof (func_options
));
29948 init_options_struct (&func_options
, NULL
);
29949 lang_hooks
.init_options_struct (&func_options
);
29951 /* Initialize func_options to the defaults. */
29952 cl_optimization_restore (&func_options
,
29953 TREE_OPTIMIZATION (func_optimize
));
29955 cl_target_option_restore (&func_options
,
29956 TREE_TARGET_OPTION (target_option_default_node
));
29958 /* Set func_options flags with new target mode. */
29959 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29960 &global_options_set
);
29962 if (cur_tree
== NULL_TREE
)
29965 new_optimize
= build_optimization_node (&func_options
);
29967 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
29969 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
29975 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29977 if (TARGET_UNIFIED_ASM
)
29978 fprintf (stream
, "\t.syntax unified\n");
29980 fprintf (stream
, "\t.syntax divided\n");
29984 if (is_called_in_ARM_mode (decl
)
29985 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29986 && cfun
->is_thunk
))
29987 fprintf (stream
, "\t.code 32\n");
29988 else if (TARGET_THUMB1
)
29989 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29991 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29994 fprintf (stream
, "\t.arm\n");
29996 if (TARGET_POKE_FUNCTION_NAME
)
29997 arm_poke_function_name (stream
, (const char *) name
);
30000 /* If MEM is in the form of [base+offset], extract the two parts
30001 of address and set to BASE and OFFSET, otherwise return false
30002 after clearing BASE and OFFSET. */
30005 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30009 gcc_assert (MEM_P (mem
));
30011 addr
= XEXP (mem
, 0);
30013 /* Strip off const from addresses like (const (addr)). */
30014 if (GET_CODE (addr
) == CONST
)
30015 addr
= XEXP (addr
, 0);
30017 if (GET_CODE (addr
) == REG
)
30020 *offset
= const0_rtx
;
30024 if (GET_CODE (addr
) == PLUS
30025 && GET_CODE (XEXP (addr
, 0)) == REG
30026 && CONST_INT_P (XEXP (addr
, 1)))
30028 *base
= XEXP (addr
, 0);
30029 *offset
= XEXP (addr
, 1);
30034 *offset
= NULL_RTX
;
30039 /* If INSN is a load or store of address in the form of [base+offset],
30040 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30041 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30042 otherwise return FALSE. */
30045 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30049 gcc_assert (INSN_P (insn
));
30050 x
= PATTERN (insn
);
30051 if (GET_CODE (x
) != SET
)
30055 dest
= SET_DEST (x
);
30056 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30059 extract_base_offset_in_addr (dest
, base
, offset
);
30061 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30064 extract_base_offset_in_addr (src
, base
, offset
);
30069 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30072 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30074 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30075 and PRI are only calculated for these instructions. For other instruction,
30076 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30077 instruction fusion can be supported by returning different priorities.
30079 It's important that irrelevant instructions get the largest FUSION_PRI. */
30082 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30083 int *fusion_pri
, int *pri
)
30089 gcc_assert (INSN_P (insn
));
30092 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30099 /* Load goes first. */
30101 *fusion_pri
= tmp
- 1;
30103 *fusion_pri
= tmp
- 2;
30107 /* INSN with smaller base register goes first. */
30108 tmp
-= ((REGNO (base
) & 0xff) << 20);
30110 /* INSN with smaller offset goes first. */
30111 off_val
= (int)(INTVAL (offset
));
30113 tmp
-= (off_val
& 0xfffff);
30115 tmp
+= ((- off_val
) & 0xfffff);
30120 #include "gt-arm.h"