1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "double-int.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
50 #include "insn-attr.h"
55 #include "statistics.h"
57 #include "fixed-value.h"
64 #include "insn-codes.h"
66 #include "diagnostic-core.h"
69 #include "dominance.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
79 #include "plugin-api.h"
86 #include "sched-int.h"
87 #include "target-def.h"
89 #include "langhooks.h"
96 #include "gimple-expr.h"
98 #include "tm-constrs.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode
;
104 typedef struct minipool_fixup Mfix
;
106 void (*arm_lang_output_object_attributes_hook
)(void);
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx
);
115 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets
*arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
120 HOST_WIDE_INT
, rtx
, rtx
, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx
, int);
123 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
124 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
125 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
126 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
127 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
128 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
129 inline static int thumb1_index_register_rtx_p (rtx
, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx
, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx
, int);
135 static void arm_print_operand_address (FILE *, rtx
);
136 static bool arm_print_operand_punct_valid_p (unsigned char code
);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
138 static arm_cc
get_arm_condition_code (rtx
);
139 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
140 static const char *output_multi_immediate (rtx
*, const char *, const char *,
142 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
143 static struct machine_function
*arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
146 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
147 static Mnode
*add_minipool_forward_ref (Mfix
*);
148 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
149 static Mnode
*add_minipool_backward_ref (Mfix
*);
150 static void assign_minipool_offsets (Mfix
*);
151 static void arm_print_value (FILE *, rtx
);
152 static void dump_minipool (rtx_insn
*);
153 static int arm_barrier_cost (rtx
);
154 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
155 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
156 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree
);
163 static unsigned long arm_compute_func_type (void);
164 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
165 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
166 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
172 static int arm_comp_type_attributes (const_tree
, const_tree
);
173 static void arm_set_default_type_attributes (tree
);
174 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code
,
177 unsigned HOST_WIDE_INT val
,
178 struct four_ints
*return_sequence
);
179 static int optimal_immediate_sequence_1 (enum rtx_code code
,
180 unsigned HOST_WIDE_INT val
,
181 struct four_ints
*return_sequence
,
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree
, tree
);
185 static machine_mode
arm_promote_function_mode (const_tree
,
188 static bool arm_return_in_memory (const_tree
, const_tree
);
189 static rtx
arm_function_value (const_tree
, const_tree
, bool);
190 static rtx
arm_libcall_value_1 (machine_mode
);
191 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
198 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
199 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
200 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
201 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
204 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
205 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
206 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
207 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
208 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
209 static void emit_constant_insn (rtx cond
, rtx pattern
);
210 static rtx_insn
*emit_set_insn (rtx
, rtx
);
211 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
214 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
216 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
218 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
219 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
221 static rtx
aapcs_libcall_value (machine_mode
);
222 static int aapcs_select_return_coproc (const_tree
, const_tree
);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
226 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
229 static void arm_encode_section_info (tree
, rtx
, int);
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
237 static bool arm_pass_by_reference (cumulative_args_t
,
238 machine_mode
, const_tree
, bool);
239 static bool arm_promote_prototypes (const_tree
);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree
);
243 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
244 static bool arm_return_in_memory (const_tree
, const_tree
);
246 static void arm_unwind_emit (FILE *, rtx_insn
*);
247 static bool arm_output_ttype (rtx
);
248 static void arm_asm_emit_except_personality (rtx
);
249 static void arm_asm_init_sections (void);
251 static rtx
arm_dwarf_register_span (rtx
);
253 static tree
arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree
arm_get_cookie_size (tree
);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree
);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree
arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree
, rtx
);
265 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn
*);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
274 static bool arm_output_addr_const_extra (FILE *, rtx
);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree
);
277 static const char *arm_invalid_parameter_type (const_tree t
);
278 static const char *arm_invalid_return_type (const_tree t
);
279 static tree
arm_promoted_type (const_tree t
);
280 static tree
arm_convert_to_type (tree type
, tree expr
);
281 static bool arm_scalar_mode_supported_p (machine_mode
);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx
, tree
, rtx
);
286 static rtx
arm_trampoline_adjust_address (rtx
);
287 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
291 static bool arm_array_mode_supported_p (machine_mode
,
292 unsigned HOST_WIDE_INT
);
293 static machine_mode
arm_preferred_simd_mode (machine_mode
);
294 static bool arm_class_likely_spilled_p (reg_class_t
);
295 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
296 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
301 static void arm_conditional_register_usage (void);
302 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
310 const unsigned char *sel
);
312 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
316 int misalign ATTRIBUTE_UNUSED
);
317 static unsigned arm_add_stmt_cost (void *data
, int count
,
318 enum vect_cost_for_stmt kind
,
319 struct _stmt_vec_info
*stmt_info
,
321 enum vect_cost_model_location where
);
323 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
324 bool op0_preserve_value
);
325 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table
[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
337 { "long_call", 0, 0, false, true, true, NULL
, false },
338 /* Whereas these functions are always known to reside within the 26 bit
340 { "short_call", 0, 0, false, true, true, NULL
, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
352 /* ARM/PE has three new attributes:
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
361 { "dllimport", 0, 0, true, false, false, NULL
, false },
362 { "dllexport", 0, 0, true, false, false, NULL
, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
371 { NULL
, 0, 0, false, false, false, NULL
, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm
= TARGET_INITIALIZER
;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack
;
746 static char * minipool_startobj
;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped
= 5;
752 extern FILE * asm_out_file
;
754 /* True if we are currently building a constant table. */
755 int making_const_table
;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune
= arm_none
;
760 /* The current tuning set. */
761 const struct tune_params
*current_tune
;
763 /* Which floating point hardware to schedule for. */
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc
*arm_fpu_desc
;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label
[14];
771 static int thumb_call_reg_needed
;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags
= 0;
777 /* The bits in this mask specify which instruction scheduling options should
779 unsigned long tune_flags
= 0;
781 /* The highest ARM architecture version supported by the
783 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
806 /* Nonzero if this chip supports the ARM 6K extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched
= 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm
= 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2
= 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale
= 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale
= 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf
= 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9
= 0;
849 /* Nonzero if generating Thumb instructions. */
852 /* Nonzero if generating Thumb-1 instructions. */
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork
= 0;
862 /* Nonzero if chip supports Thumb 2. */
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv
;
867 int arm_arch_thumb_hwdiv
;
869 /* Nonzero if chip disallows volatile memory access in IT block. */
870 int arm_arch_no_volatile_ce
;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits
= 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool
= false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 machine_mode output_memory_reference_mode
;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register
= INVALID_REGNUM
;
887 enum arm_pcs arm_pcs_default
;
889 /* For an explanation of these variables, see final_prescan_insn below. */
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc
;
895 int arm_target_label
;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count
= 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask
= 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen
= 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc
= 0;
907 /* Nonzero if the core has a very small, high-latency, multiply unit. */
908 int arm_m_profile_small_mul
= 0;
910 /* The condition codes of the ARM, and the inverse function. */
911 static const char * const arm_condition_codes
[] =
913 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
917 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
918 int arm_regs_in_sequence
[] =
920 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 /* Initialization code. */
934 const char *const name
;
935 enum processor_type core
;
937 enum base_architecture base_arch
;
938 const unsigned long flags
;
939 const struct tune_params
*const tune
;
943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
949 /* arm generic vectorizer costs. */
951 struct cpu_vec_costs arm_default_vec_cost
= {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs
=
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
990 true /* non_exec_costs_exec. */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1008 COSTS_N_INSNS (4), /* extend_add. */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1068 COSTS_N_INSNS (1) /* alu. */
1072 const struct cpu_cost_table cortexa8_extra_costs
=
1078 COSTS_N_INSNS (1), /* shift. */
1080 COSTS_N_INSNS (1), /* arith_shift. */
1081 0, /* arith_shift_reg. */
1082 COSTS_N_INSNS (1), /* log_shift. */
1083 0, /* log_shift_reg. */
1085 0, /* extend_arith. */
1091 true /* non_exec_costs_exec. */
1096 COSTS_N_INSNS (1), /* simple. */
1097 COSTS_N_INSNS (1), /* flag_setting. */
1098 COSTS_N_INSNS (1), /* extend. */
1099 COSTS_N_INSNS (1), /* add. */
1100 COSTS_N_INSNS (1), /* extend_add. */
1101 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1105 0, /* simple (N/A). */
1106 0, /* flag_setting (N/A). */
1107 COSTS_N_INSNS (2), /* extend. */
1109 COSTS_N_INSNS (2), /* extend_add. */
1115 COSTS_N_INSNS (1), /* load. */
1116 COSTS_N_INSNS (1), /* load_sign_extend. */
1117 COSTS_N_INSNS (1), /* ldrd. */
1118 COSTS_N_INSNS (1), /* ldm_1st. */
1119 1, /* ldm_regs_per_insn_1st. */
1120 2, /* ldm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* loadf. */
1122 COSTS_N_INSNS (1), /* loadd. */
1123 COSTS_N_INSNS (1), /* load_unaligned. */
1124 COSTS_N_INSNS (1), /* store. */
1125 COSTS_N_INSNS (1), /* strd. */
1126 COSTS_N_INSNS (1), /* stm_1st. */
1127 1, /* stm_regs_per_insn_1st. */
1128 2, /* stm_regs_per_insn_subsequent. */
1129 COSTS_N_INSNS (1), /* storef. */
1130 COSTS_N_INSNS (1), /* stored. */
1131 COSTS_N_INSNS (1) /* store_unaligned. */
1136 COSTS_N_INSNS (36), /* div. */
1137 COSTS_N_INSNS (11), /* mult. */
1138 COSTS_N_INSNS (20), /* mult_addsub. */
1139 COSTS_N_INSNS (30), /* fma. */
1140 COSTS_N_INSNS (9), /* addsub. */
1141 COSTS_N_INSNS (3), /* fpconst. */
1142 COSTS_N_INSNS (3), /* neg. */
1143 COSTS_N_INSNS (6), /* compare. */
1144 COSTS_N_INSNS (4), /* widen. */
1145 COSTS_N_INSNS (4), /* narrow. */
1146 COSTS_N_INSNS (8), /* toint. */
1147 COSTS_N_INSNS (8), /* fromint. */
1148 COSTS_N_INSNS (8) /* roundint. */
1152 COSTS_N_INSNS (64), /* div. */
1153 COSTS_N_INSNS (16), /* mult. */
1154 COSTS_N_INSNS (25), /* mult_addsub. */
1155 COSTS_N_INSNS (30), /* fma. */
1156 COSTS_N_INSNS (9), /* addsub. */
1157 COSTS_N_INSNS (3), /* fpconst. */
1158 COSTS_N_INSNS (3), /* neg. */
1159 COSTS_N_INSNS (6), /* compare. */
1160 COSTS_N_INSNS (6), /* widen. */
1161 COSTS_N_INSNS (6), /* narrow. */
1162 COSTS_N_INSNS (8), /* toint. */
1163 COSTS_N_INSNS (8), /* fromint. */
1164 COSTS_N_INSNS (8) /* roundint. */
1169 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa5_extra_costs
=
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1192 true /* non_exec_costs_exec. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (6), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (4), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1271 COSTS_N_INSNS (1) /* alu. */
1276 const struct cpu_cost_table cortexa7_extra_costs
=
1282 COSTS_N_INSNS (1), /* shift. */
1283 COSTS_N_INSNS (1), /* shift_reg. */
1284 COSTS_N_INSNS (1), /* arith_shift. */
1285 COSTS_N_INSNS (1), /* arith_shift_reg. */
1286 COSTS_N_INSNS (1), /* log_shift. */
1287 COSTS_N_INSNS (1), /* log_shift_reg. */
1288 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (1), /* extend_arith. */
1290 COSTS_N_INSNS (1), /* bfi. */
1291 COSTS_N_INSNS (1), /* bfx. */
1292 COSTS_N_INSNS (1), /* clz. */
1293 COSTS_N_INSNS (1), /* rev. */
1295 true /* non_exec_costs_exec. */
1302 COSTS_N_INSNS (1), /* flag_setting. */
1303 COSTS_N_INSNS (1), /* extend. */
1304 COSTS_N_INSNS (1), /* add. */
1305 COSTS_N_INSNS (1), /* extend_add. */
1306 COSTS_N_INSNS (7) /* idiv. */
1310 0, /* simple (N/A). */
1311 0, /* flag_setting (N/A). */
1312 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (2), /* extend_add. */
1320 COSTS_N_INSNS (1), /* load. */
1321 COSTS_N_INSNS (1), /* load_sign_extend. */
1322 COSTS_N_INSNS (3), /* ldrd. */
1323 COSTS_N_INSNS (1), /* ldm_1st. */
1324 1, /* ldm_regs_per_insn_1st. */
1325 2, /* ldm_regs_per_insn_subsequent. */
1326 COSTS_N_INSNS (2), /* loadf. */
1327 COSTS_N_INSNS (2), /* loadd. */
1328 COSTS_N_INSNS (1), /* load_unaligned. */
1329 COSTS_N_INSNS (1), /* store. */
1330 COSTS_N_INSNS (3), /* strd. */
1331 COSTS_N_INSNS (1), /* stm_1st. */
1332 1, /* stm_regs_per_insn_1st. */
1333 2, /* stm_regs_per_insn_subsequent. */
1334 COSTS_N_INSNS (2), /* storef. */
1335 COSTS_N_INSNS (2), /* stored. */
1336 COSTS_N_INSNS (1) /* store_unaligned. */
1341 COSTS_N_INSNS (15), /* div. */
1342 COSTS_N_INSNS (3), /* mult. */
1343 COSTS_N_INSNS (7), /* mult_addsub. */
1344 COSTS_N_INSNS (7), /* fma. */
1345 COSTS_N_INSNS (3), /* addsub. */
1346 COSTS_N_INSNS (3), /* fpconst. */
1347 COSTS_N_INSNS (3), /* neg. */
1348 COSTS_N_INSNS (3), /* compare. */
1349 COSTS_N_INSNS (3), /* widen. */
1350 COSTS_N_INSNS (3), /* narrow. */
1351 COSTS_N_INSNS (3), /* toint. */
1352 COSTS_N_INSNS (3), /* fromint. */
1353 COSTS_N_INSNS (3) /* roundint. */
1357 COSTS_N_INSNS (30), /* div. */
1358 COSTS_N_INSNS (6), /* mult. */
1359 COSTS_N_INSNS (10), /* mult_addsub. */
1360 COSTS_N_INSNS (7), /* fma. */
1361 COSTS_N_INSNS (3), /* addsub. */
1362 COSTS_N_INSNS (3), /* fpconst. */
1363 COSTS_N_INSNS (3), /* neg. */
1364 COSTS_N_INSNS (3), /* compare. */
1365 COSTS_N_INSNS (3), /* widen. */
1366 COSTS_N_INSNS (3), /* narrow. */
1367 COSTS_N_INSNS (3), /* toint. */
1368 COSTS_N_INSNS (3), /* fromint. */
1369 COSTS_N_INSNS (3) /* roundint. */
1374 COSTS_N_INSNS (1) /* alu. */
1378 const struct cpu_cost_table cortexa12_extra_costs
=
1385 COSTS_N_INSNS (1), /* shift_reg. */
1386 COSTS_N_INSNS (1), /* arith_shift. */
1387 COSTS_N_INSNS (1), /* arith_shift_reg. */
1388 COSTS_N_INSNS (1), /* log_shift. */
1389 COSTS_N_INSNS (1), /* log_shift_reg. */
1391 COSTS_N_INSNS (1), /* extend_arith. */
1393 COSTS_N_INSNS (1), /* bfx. */
1394 COSTS_N_INSNS (1), /* clz. */
1395 COSTS_N_INSNS (1), /* rev. */
1397 true /* non_exec_costs_exec. */
1402 COSTS_N_INSNS (2), /* simple. */
1403 COSTS_N_INSNS (3), /* flag_setting. */
1404 COSTS_N_INSNS (2), /* extend. */
1405 COSTS_N_INSNS (3), /* add. */
1406 COSTS_N_INSNS (2), /* extend_add. */
1407 COSTS_N_INSNS (18) /* idiv. */
1411 0, /* simple (N/A). */
1412 0, /* flag_setting (N/A). */
1413 COSTS_N_INSNS (3), /* extend. */
1415 COSTS_N_INSNS (3), /* extend_add. */
1421 COSTS_N_INSNS (3), /* load. */
1422 COSTS_N_INSNS (3), /* load_sign_extend. */
1423 COSTS_N_INSNS (3), /* ldrd. */
1424 COSTS_N_INSNS (3), /* ldm_1st. */
1425 1, /* ldm_regs_per_insn_1st. */
1426 2, /* ldm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (3), /* loadf. */
1428 COSTS_N_INSNS (3), /* loadd. */
1429 0, /* load_unaligned. */
1433 1, /* stm_regs_per_insn_1st. */
1434 2, /* stm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (2), /* storef. */
1436 COSTS_N_INSNS (2), /* stored. */
1437 0 /* store_unaligned. */
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1475 COSTS_N_INSNS (1) /* alu. */
1479 const struct cpu_cost_table cortexa15_extra_costs
=
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1498 true /* non_exec_costs_exec. */
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1516 COSTS_N_INSNS (3), /* extend_add. */
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1538 0 /* store_unaligned. */
1543 COSTS_N_INSNS (17), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (5), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1559 COSTS_N_INSNS (31), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (2), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1576 COSTS_N_INSNS (1) /* alu. */
1580 const struct cpu_cost_table v7m_extra_costs
=
1588 0, /* arith_shift. */
1589 COSTS_N_INSNS (1), /* arith_shift_reg. */
1591 COSTS_N_INSNS (1), /* log_shift_reg. */
1593 COSTS_N_INSNS (1), /* extend_arith. */
1598 COSTS_N_INSNS (1), /* non_exec. */
1599 false /* non_exec_costs_exec. */
1604 COSTS_N_INSNS (1), /* simple. */
1605 COSTS_N_INSNS (1), /* flag_setting. */
1606 COSTS_N_INSNS (2), /* extend. */
1607 COSTS_N_INSNS (1), /* add. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 COSTS_N_INSNS (8) /* idiv. */
1613 0, /* simple (N/A). */
1614 0, /* flag_setting (N/A). */
1615 COSTS_N_INSNS (2), /* extend. */
1617 COSTS_N_INSNS (3), /* extend_add. */
1623 COSTS_N_INSNS (2), /* load. */
1624 0, /* load_sign_extend. */
1625 COSTS_N_INSNS (3), /* ldrd. */
1626 COSTS_N_INSNS (2), /* ldm_1st. */
1627 1, /* ldm_regs_per_insn_1st. */
1628 1, /* ldm_regs_per_insn_subsequent. */
1629 COSTS_N_INSNS (2), /* loadf. */
1630 COSTS_N_INSNS (3), /* loadd. */
1631 COSTS_N_INSNS (1), /* load_unaligned. */
1632 COSTS_N_INSNS (2), /* store. */
1633 COSTS_N_INSNS (3), /* strd. */
1634 COSTS_N_INSNS (2), /* stm_1st. */
1635 1, /* stm_regs_per_insn_1st. */
1636 1, /* stm_regs_per_insn_subsequent. */
1637 COSTS_N_INSNS (2), /* storef. */
1638 COSTS_N_INSNS (3), /* stored. */
1639 COSTS_N_INSNS (1) /* store_unaligned. */
1644 COSTS_N_INSNS (7), /* div. */
1645 COSTS_N_INSNS (2), /* mult. */
1646 COSTS_N_INSNS (5), /* mult_addsub. */
1647 COSTS_N_INSNS (3), /* fma. */
1648 COSTS_N_INSNS (1), /* addsub. */
1660 COSTS_N_INSNS (15), /* div. */
1661 COSTS_N_INSNS (5), /* mult. */
1662 COSTS_N_INSNS (7), /* mult_addsub. */
1663 COSTS_N_INSNS (7), /* fma. */
1664 COSTS_N_INSNS (3), /* addsub. */
1677 COSTS_N_INSNS (1) /* alu. */
1681 #define ARM_FUSE_NOTHING (0)
1682 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1684 const struct tune_params arm_slowmul_tune
=
1686 arm_slowmul_rtx_costs
,
1688 NULL
, /* Sched adj cost. */
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL
,
1692 true, /* Prefer constant pool. */
1693 arm_default_branch_cost
,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost
, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false, /* Prefer 32-bit encodings. */
1699 false, /* Prefer Neon for stringops. */
1700 8, /* Maximum insns to inline memset. */
1701 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1702 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1705 const struct tune_params arm_fastmul_tune
=
1707 arm_fastmul_rtx_costs
,
1709 NULL
, /* Sched adj cost. */
1710 1, /* Constant limit. */
1711 5, /* Max cond insns. */
1712 ARM_PREFETCH_NOT_BENEFICIAL
,
1713 true, /* Prefer constant pool. */
1714 arm_default_branch_cost
,
1715 false, /* Prefer LDRD/STRD. */
1716 {true, true}, /* Prefer non short circuit. */
1717 &arm_default_vec_cost
, /* Vectorizer costs. */
1718 false, /* Prefer Neon for 64-bits bitops. */
1719 false, false, /* Prefer 32-bit encodings. */
1720 false, /* Prefer Neon for stringops. */
1721 8, /* Maximum insns to inline memset. */
1722 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1723 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1726 /* StrongARM has early execution of branches, so a sequence that is worth
1727 skipping is shorter. Set max_insns_skipped to a lower value. */
1729 const struct tune_params arm_strongarm_tune
=
1731 arm_fastmul_rtx_costs
,
1733 NULL
, /* Sched adj cost. */
1734 1, /* Constant limit. */
1735 3, /* Max cond insns. */
1736 ARM_PREFETCH_NOT_BENEFICIAL
,
1737 true, /* Prefer constant pool. */
1738 arm_default_branch_cost
,
1739 false, /* Prefer LDRD/STRD. */
1740 {true, true}, /* Prefer non short circuit. */
1741 &arm_default_vec_cost
, /* Vectorizer costs. */
1742 false, /* Prefer Neon for 64-bits bitops. */
1743 false, false, /* Prefer 32-bit encodings. */
1744 false, /* Prefer Neon for stringops. */
1745 8, /* Maximum insns to inline memset. */
1746 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1747 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1750 const struct tune_params arm_xscale_tune
=
1752 arm_xscale_rtx_costs
,
1754 xscale_sched_adjust_cost
,
1755 2, /* Constant limit. */
1756 3, /* Max cond insns. */
1757 ARM_PREFETCH_NOT_BENEFICIAL
,
1758 true, /* Prefer constant pool. */
1759 arm_default_branch_cost
,
1760 false, /* Prefer LDRD/STRD. */
1761 {true, true}, /* Prefer non short circuit. */
1762 &arm_default_vec_cost
, /* Vectorizer costs. */
1763 false, /* Prefer Neon for 64-bits bitops. */
1764 false, false, /* Prefer 32-bit encodings. */
1765 false, /* Prefer Neon for stringops. */
1766 8, /* Maximum insns to inline memset. */
1767 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1768 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1771 const struct tune_params arm_9e_tune
=
1775 NULL
, /* Sched adj cost. */
1776 1, /* Constant limit. */
1777 5, /* Max cond insns. */
1778 ARM_PREFETCH_NOT_BENEFICIAL
,
1779 true, /* Prefer constant pool. */
1780 arm_default_branch_cost
,
1781 false, /* Prefer LDRD/STRD. */
1782 {true, true}, /* Prefer non short circuit. */
1783 &arm_default_vec_cost
, /* Vectorizer costs. */
1784 false, /* Prefer Neon for 64-bits bitops. */
1785 false, false, /* Prefer 32-bit encodings. */
1786 false, /* Prefer Neon for stringops. */
1787 8, /* Maximum insns to inline memset. */
1788 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1789 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1792 const struct tune_params arm_v6t2_tune
=
1796 NULL
, /* Sched adj cost. */
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 ARM_PREFETCH_NOT_BENEFICIAL
,
1800 false, /* Prefer constant pool. */
1801 arm_default_branch_cost
,
1802 false, /* Prefer LDRD/STRD. */
1803 {true, true}, /* Prefer non short circuit. */
1804 &arm_default_vec_cost
, /* Vectorizer costs. */
1805 false, /* Prefer Neon for 64-bits bitops. */
1806 false, false, /* Prefer 32-bit encodings. */
1807 false, /* Prefer Neon for stringops. */
1808 8, /* Maximum insns to inline memset. */
1809 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1810 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1813 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1814 const struct tune_params arm_cortex_tune
=
1817 &generic_extra_costs
,
1818 NULL
, /* Sched adj cost. */
1819 1, /* Constant limit. */
1820 5, /* Max cond insns. */
1821 ARM_PREFETCH_NOT_BENEFICIAL
,
1822 false, /* Prefer constant pool. */
1823 arm_default_branch_cost
,
1824 false, /* Prefer LDRD/STRD. */
1825 {true, true}, /* Prefer non short circuit. */
1826 &arm_default_vec_cost
, /* Vectorizer costs. */
1827 false, /* Prefer Neon for 64-bits bitops. */
1828 false, false, /* Prefer 32-bit encodings. */
1829 false, /* Prefer Neon for stringops. */
1830 8, /* Maximum insns to inline memset. */
1831 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1832 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1835 const struct tune_params arm_cortex_a8_tune
=
1838 &cortexa8_extra_costs
,
1839 NULL
, /* Sched adj cost. */
1840 1, /* Constant limit. */
1841 5, /* Max cond insns. */
1842 ARM_PREFETCH_NOT_BENEFICIAL
,
1843 false, /* Prefer constant pool. */
1844 arm_default_branch_cost
,
1845 false, /* Prefer LDRD/STRD. */
1846 {true, true}, /* Prefer non short circuit. */
1847 &arm_default_vec_cost
, /* Vectorizer costs. */
1848 false, /* Prefer Neon for 64-bits bitops. */
1849 false, false, /* Prefer 32-bit encodings. */
1850 true, /* Prefer Neon for stringops. */
1851 8, /* Maximum insns to inline memset. */
1852 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1853 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1856 const struct tune_params arm_cortex_a7_tune
=
1859 &cortexa7_extra_costs
,
1861 1, /* Constant limit. */
1862 5, /* Max cond insns. */
1863 ARM_PREFETCH_NOT_BENEFICIAL
,
1864 false, /* Prefer constant pool. */
1865 arm_default_branch_cost
,
1866 false, /* Prefer LDRD/STRD. */
1867 {true, true}, /* Prefer non short circuit. */
1868 &arm_default_vec_cost
, /* Vectorizer costs. */
1869 false, /* Prefer Neon for 64-bits bitops. */
1870 false, false, /* Prefer 32-bit encodings. */
1871 true, /* Prefer Neon for stringops. */
1872 8, /* Maximum insns to inline memset. */
1873 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1874 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1877 const struct tune_params arm_cortex_a15_tune
=
1880 &cortexa15_extra_costs
,
1881 NULL
, /* Sched adj cost. */
1882 1, /* Constant limit. */
1883 2, /* Max cond insns. */
1884 ARM_PREFETCH_NOT_BENEFICIAL
,
1885 false, /* Prefer constant pool. */
1886 arm_default_branch_cost
,
1887 true, /* Prefer LDRD/STRD. */
1888 {true, true}, /* Prefer non short circuit. */
1889 &arm_default_vec_cost
, /* Vectorizer costs. */
1890 false, /* Prefer Neon for 64-bits bitops. */
1891 true, true, /* Prefer 32-bit encodings. */
1892 true, /* Prefer Neon for stringops. */
1893 8, /* Maximum insns to inline memset. */
1894 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1895 ARM_SCHED_AUTOPREF_FULL
/* Sched L2 autopref. */
1898 const struct tune_params arm_cortex_a53_tune
=
1901 &cortexa53_extra_costs
,
1902 NULL
, /* Scheduler cost adjustment. */
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_NOT_BENEFICIAL
,
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost
,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost
, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 true, /* Prefer Neon for stringops. */
1914 8, /* Maximum insns to inline memset. */
1915 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
1916 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1919 const struct tune_params arm_cortex_a57_tune
=
1922 &cortexa57_extra_costs
,
1923 NULL
, /* Scheduler cost adjustment. */
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 false, /* Prefer constant pool. */
1928 arm_default_branch_cost
,
1929 true, /* Prefer LDRD/STRD. */
1930 {true, true}, /* Prefer non short circuit. */
1931 &arm_default_vec_cost
, /* Vectorizer costs. */
1932 false, /* Prefer Neon for 64-bits bitops. */
1933 true, true, /* Prefer 32-bit encodings. */
1934 true, /* Prefer Neon for stringops. */
1935 8, /* Maximum insns to inline memset. */
1936 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
1937 ARM_SCHED_AUTOPREF_FULL
/* Sched L2 autopref. */
1940 const struct tune_params arm_xgene1_tune
=
1943 &xgene1_extra_costs
,
1944 NULL
, /* Scheduler cost adjustment. */
1945 1, /* Constant limit. */
1946 2, /* Max cond insns. */
1947 ARM_PREFETCH_NOT_BENEFICIAL
,
1948 false, /* Prefer constant pool. */
1949 arm_default_branch_cost
,
1950 true, /* Prefer LDRD/STRD. */
1951 {true, true}, /* Prefer non short circuit. */
1952 &arm_default_vec_cost
, /* Vectorizer costs. */
1953 false, /* Prefer Neon for 64-bits bitops. */
1954 true, true, /* Prefer 32-bit encodings. */
1955 false, /* Prefer Neon for stringops. */
1956 32, /* Maximum insns to inline memset. */
1957 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1958 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1961 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1962 less appealing. Set max_insns_skipped to a low value. */
1964 const struct tune_params arm_cortex_a5_tune
=
1967 &cortexa5_extra_costs
,
1968 NULL
, /* Sched adj cost. */
1969 1, /* Constant limit. */
1970 1, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL
,
1972 false, /* Prefer constant pool. */
1973 arm_cortex_a5_branch_cost
,
1974 false, /* Prefer LDRD/STRD. */
1975 {false, false}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost
, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 true, /* Prefer Neon for stringops. */
1980 8, /* Maximum insns to inline memset. */
1981 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1982 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
1985 const struct tune_params arm_cortex_a9_tune
=
1988 &cortexa9_extra_costs
,
1989 cortex_a9_sched_adjust_cost
,
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_BENEFICIAL(4,32,32),
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost
,
1995 false, /* Prefer LDRD/STRD. */
1996 {true, true}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost
, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8, /* Maximum insns to inline memset. */
2002 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2003 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2006 const struct tune_params arm_cortex_a12_tune
=
2009 &cortexa12_extra_costs
,
2010 NULL
, /* Sched adj cost. */
2011 1, /* Constant limit. */
2012 2, /* Max cond insns. */
2013 ARM_PREFETCH_NOT_BENEFICIAL
,
2014 false, /* Prefer constant pool. */
2015 arm_default_branch_cost
,
2016 true, /* Prefer LDRD/STRD. */
2017 {true, true}, /* Prefer non short circuit. */
2018 &arm_default_vec_cost
, /* Vectorizer costs. */
2019 false, /* Prefer Neon for 64-bits bitops. */
2020 true, true, /* Prefer 32-bit encodings. */
2021 true, /* Prefer Neon for stringops. */
2022 8, /* Maximum insns to inline memset. */
2023 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
2024 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2027 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2028 cycle to execute each. An LDR from the constant pool also takes two cycles
2029 to execute, but mildly increases pipelining opportunity (consecutive
2030 loads/stores can be pipelined together, saving one cycle), and may also
2031 improve icache utilisation. Hence we prefer the constant pool for such
2034 const struct tune_params arm_v7m_tune
=
2038 NULL
, /* Sched adj cost. */
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 ARM_PREFETCH_NOT_BENEFICIAL
,
2042 true, /* Prefer constant pool. */
2043 arm_cortex_m_branch_cost
,
2044 false, /* Prefer LDRD/STRD. */
2045 {false, false}, /* Prefer non short circuit. */
2046 &arm_default_vec_cost
, /* Vectorizer costs. */
2047 false, /* Prefer Neon for 64-bits bitops. */
2048 false, false, /* Prefer 32-bit encodings. */
2049 false, /* Prefer Neon for stringops. */
2050 8, /* Maximum insns to inline memset. */
2051 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2052 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2055 /* Cortex-M7 tuning. */
2057 const struct tune_params arm_cortex_m7_tune
=
2061 NULL
, /* Sched adj cost. */
2062 0, /* Constant limit. */
2063 1, /* Max cond insns. */
2064 ARM_PREFETCH_NOT_BENEFICIAL
,
2065 true, /* Prefer constant pool. */
2066 arm_cortex_m7_branch_cost
,
2067 false, /* Prefer LDRD/STRD. */
2068 {true, true}, /* Prefer non short circuit. */
2069 &arm_default_vec_cost
, /* Vectorizer costs. */
2070 false, /* Prefer Neon for 64-bits bitops. */
2071 false, false, /* Prefer 32-bit encodings. */
2072 false, /* Prefer Neon for stringops. */
2073 8, /* Maximum insns to inline memset. */
2074 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2075 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2078 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2079 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2080 const struct tune_params arm_v6m_tune
=
2084 NULL
, /* Sched adj cost. */
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 ARM_PREFETCH_NOT_BENEFICIAL
,
2088 false, /* Prefer constant pool. */
2089 arm_default_branch_cost
,
2090 false, /* Prefer LDRD/STRD. */
2091 {false, false}, /* Prefer non short circuit. */
2092 &arm_default_vec_cost
, /* Vectorizer costs. */
2093 false, /* Prefer Neon for 64-bits bitops. */
2094 false, false, /* Prefer 32-bit encodings. */
2095 false, /* Prefer Neon for stringops. */
2096 8, /* Maximum insns to inline memset. */
2097 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2098 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2101 const struct tune_params arm_fa726te_tune
=
2105 fa726te_sched_adjust_cost
,
2106 1, /* Constant limit. */
2107 5, /* Max cond insns. */
2108 ARM_PREFETCH_NOT_BENEFICIAL
,
2109 true, /* Prefer constant pool. */
2110 arm_default_branch_cost
,
2111 false, /* Prefer LDRD/STRD. */
2112 {true, true}, /* Prefer non short circuit. */
2113 &arm_default_vec_cost
, /* Vectorizer costs. */
2114 false, /* Prefer Neon for 64-bits bitops. */
2115 false, false, /* Prefer 32-bit encodings. */
2116 false, /* Prefer Neon for stringops. */
2117 8, /* Maximum insns to inline memset. */
2118 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2119 ARM_SCHED_AUTOPREF_OFF
/* Sched L2 autopref. */
2123 /* Not all of these give usefully different compilation alternatives,
2124 but there is no simple way of generalizing them. */
2125 static const struct processors all_cores
[] =
2128 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2129 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2130 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2131 #include "arm-cores.def"
2133 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2136 static const struct processors all_architectures
[] =
2138 /* ARM Architectures */
2139 /* We don't specify tuning costs here as it will be figured out
2142 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2143 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2144 #include "arm-arches.def"
2146 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2150 /* These are populated as commandline arguments are processed, or NULL
2151 if not specified. */
2152 static const struct processors
*arm_selected_arch
;
2153 static const struct processors
*arm_selected_cpu
;
2154 static const struct processors
*arm_selected_tune
;
2156 /* The name of the preprocessor macro to define for this architecture. */
2158 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2160 /* Available values for -mfpu=. */
2162 static const struct arm_fpu_desc all_fpus
[] =
2164 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2165 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2166 #include "arm-fpus.def"
2171 /* Supported TLS relocations. */
2179 TLS_DESCSEQ
/* GNU scheme */
2182 /* The maximum number of insns to be used when loading a constant. */
2184 arm_constant_limit (bool size_p
)
2186 return size_p
? 1 : current_tune
->constant_limit
;
2189 /* Emit an insn that's a simple single-set. Both the operands must be known
2191 inline static rtx_insn
*
2192 emit_set_insn (rtx x
, rtx y
)
2194 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2197 /* Return the number of bits set in VALUE. */
2199 bit_count (unsigned long value
)
2201 unsigned long count
= 0;
2206 value
&= value
- 1; /* Clear the least-significant set bit. */
2216 } arm_fixed_mode_set
;
2218 /* A small helper for setting fixed-point library libfuncs. */
2221 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2222 const char *funcname
, const char *modename
,
2227 if (num_suffix
== 0)
2228 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2230 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2232 set_optab_libfunc (optable
, mode
, buffer
);
2236 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2237 machine_mode from
, const char *funcname
,
2238 const char *toname
, const char *fromname
)
2241 const char *maybe_suffix_2
= "";
2243 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2244 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2245 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2246 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2247 maybe_suffix_2
= "2";
2249 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2252 set_conv_libfunc (optable
, to
, from
, buffer
);
2255 /* Set up library functions unique to ARM. */
2258 arm_init_libfuncs (void)
2260 /* For Linux, we have access to kernel support for atomic operations. */
2261 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2262 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2264 /* There are no special library functions unless we are using the
2269 /* The functions below are described in Section 4 of the "Run-Time
2270 ABI for the ARM architecture", Version 1.0. */
2272 /* Double-precision floating-point arithmetic. Table 2. */
2273 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2274 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2275 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2276 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2277 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2279 /* Double-precision comparisons. Table 3. */
2280 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2281 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2282 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2283 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2284 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2285 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2286 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2288 /* Single-precision floating-point arithmetic. Table 4. */
2289 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2290 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2291 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2292 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2293 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2295 /* Single-precision comparisons. Table 5. */
2296 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2297 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2298 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2299 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2300 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2301 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2302 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2304 /* Floating-point to integer conversions. Table 6. */
2305 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2306 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2307 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2308 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2309 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2310 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2311 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2312 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2314 /* Conversions between floating types. Table 7. */
2315 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2316 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2318 /* Integer to floating-point conversions. Table 8. */
2319 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2320 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2321 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2322 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2323 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2324 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2325 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2326 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2328 /* Long long. Table 9. */
2329 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2330 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2331 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2332 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2333 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2334 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2335 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2336 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2338 /* Integer (32/32->32) division. \S 4.3.1. */
2339 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2340 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2342 /* The divmod functions are designed so that they can be used for
2343 plain division, even though they return both the quotient and the
2344 remainder. The quotient is returned in the usual location (i.e.,
2345 r0 for SImode, {r0, r1} for DImode), just as would be expected
2346 for an ordinary division routine. Because the AAPCS calling
2347 conventions specify that all of { r0, r1, r2, r3 } are
2348 callee-saved registers, there is no need to tell the compiler
2349 explicitly that those registers are clobbered by these
2351 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2352 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2354 /* For SImode division the ABI provides div-without-mod routines,
2355 which are faster. */
2356 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2357 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2359 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2360 divmod libcalls instead. */
2361 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2362 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2363 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2364 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2366 /* Half-precision float operations. The compiler handles all operations
2367 with NULL libfuncs by converting the SFmode. */
2368 switch (arm_fp16_format
)
2370 case ARM_FP16_FORMAT_IEEE
:
2371 case ARM_FP16_FORMAT_ALTERNATIVE
:
2374 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2375 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2377 : "__gnu_f2h_alternative"));
2378 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2379 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2381 : "__gnu_h2f_alternative"));
2384 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2385 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2386 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2387 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2388 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2391 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2392 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2393 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2394 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2395 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2396 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2397 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2404 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2406 const arm_fixed_mode_set fixed_arith_modes
[] =
2427 const arm_fixed_mode_set fixed_conv_modes
[] =
2457 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2459 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2460 "add", fixed_arith_modes
[i
].name
, 3);
2461 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2462 "ssadd", fixed_arith_modes
[i
].name
, 3);
2463 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2464 "usadd", fixed_arith_modes
[i
].name
, 3);
2465 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2466 "sub", fixed_arith_modes
[i
].name
, 3);
2467 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2468 "sssub", fixed_arith_modes
[i
].name
, 3);
2469 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2470 "ussub", fixed_arith_modes
[i
].name
, 3);
2471 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2472 "mul", fixed_arith_modes
[i
].name
, 3);
2473 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2474 "ssmul", fixed_arith_modes
[i
].name
, 3);
2475 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2476 "usmul", fixed_arith_modes
[i
].name
, 3);
2477 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2478 "div", fixed_arith_modes
[i
].name
, 3);
2479 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2480 "udiv", fixed_arith_modes
[i
].name
, 3);
2481 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2482 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2483 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2484 "usdiv", fixed_arith_modes
[i
].name
, 3);
2485 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2486 "neg", fixed_arith_modes
[i
].name
, 2);
2487 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2488 "ssneg", fixed_arith_modes
[i
].name
, 2);
2489 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2490 "usneg", fixed_arith_modes
[i
].name
, 2);
2491 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2492 "ashl", fixed_arith_modes
[i
].name
, 3);
2493 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2494 "ashr", fixed_arith_modes
[i
].name
, 3);
2495 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2496 "lshr", fixed_arith_modes
[i
].name
, 3);
2497 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2498 "ssashl", fixed_arith_modes
[i
].name
, 3);
2499 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2500 "usashl", fixed_arith_modes
[i
].name
, 3);
2501 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2502 "cmp", fixed_arith_modes
[i
].name
, 2);
2505 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2506 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2509 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2510 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2513 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2514 fixed_conv_modes
[j
].mode
, "fract",
2515 fixed_conv_modes
[i
].name
,
2516 fixed_conv_modes
[j
].name
);
2517 arm_set_fixed_conv_libfunc (satfract_optab
,
2518 fixed_conv_modes
[i
].mode
,
2519 fixed_conv_modes
[j
].mode
, "satfract",
2520 fixed_conv_modes
[i
].name
,
2521 fixed_conv_modes
[j
].name
);
2522 arm_set_fixed_conv_libfunc (fractuns_optab
,
2523 fixed_conv_modes
[i
].mode
,
2524 fixed_conv_modes
[j
].mode
, "fractuns",
2525 fixed_conv_modes
[i
].name
,
2526 fixed_conv_modes
[j
].name
);
2527 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2528 fixed_conv_modes
[i
].mode
,
2529 fixed_conv_modes
[j
].mode
, "satfractuns",
2530 fixed_conv_modes
[i
].name
,
2531 fixed_conv_modes
[j
].name
);
2535 if (TARGET_AAPCS_BASED
)
2536 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2539 /* On AAPCS systems, this is the "struct __va_list". */
2540 static GTY(()) tree va_list_type
;
2542 /* Return the type to use as __builtin_va_list. */
2544 arm_build_builtin_va_list (void)
2549 if (!TARGET_AAPCS_BASED
)
2550 return std_build_builtin_va_list ();
2552 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2560 The C Library ABI further reinforces this definition in \S
2563 We must follow this definition exactly. The structure tag
2564 name is visible in C++ mangled names, and thus forms a part
2565 of the ABI. The field name may be used by people who
2566 #include <stdarg.h>. */
2567 /* Create the type. */
2568 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2569 /* Give it the required name. */
2570 va_list_name
= build_decl (BUILTINS_LOCATION
,
2572 get_identifier ("__va_list"),
2574 DECL_ARTIFICIAL (va_list_name
) = 1;
2575 TYPE_NAME (va_list_type
) = va_list_name
;
2576 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2577 /* Create the __ap field. */
2578 ap_field
= build_decl (BUILTINS_LOCATION
,
2580 get_identifier ("__ap"),
2582 DECL_ARTIFICIAL (ap_field
) = 1;
2583 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2584 TYPE_FIELDS (va_list_type
) = ap_field
;
2585 /* Compute its layout. */
2586 layout_type (va_list_type
);
2588 return va_list_type
;
2591 /* Return an expression of type "void *" pointing to the next
2592 available argument in a variable-argument list. VALIST is the
2593 user-level va_list object, of type __builtin_va_list. */
2595 arm_extract_valist_ptr (tree valist
)
2597 if (TREE_TYPE (valist
) == error_mark_node
)
2598 return error_mark_node
;
2600 /* On an AAPCS target, the pointer is stored within "struct
2602 if (TARGET_AAPCS_BASED
)
2604 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2605 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2606 valist
, ap_field
, NULL_TREE
);
2612 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2614 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2616 valist
= arm_extract_valist_ptr (valist
);
2617 std_expand_builtin_va_start (valist
, nextarg
);
2620 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2622 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2625 valist
= arm_extract_valist_ptr (valist
);
2626 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2629 /* Fix up any incompatible options that the user has specified. */
2631 arm_option_override (void)
2633 arm_selected_arch
= NULL
;
2634 arm_selected_cpu
= NULL
;
2635 arm_selected_tune
= NULL
;
2637 if (global_options_set
.x_arm_arch_option
)
2638 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2640 if (global_options_set
.x_arm_cpu_option
)
2642 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2643 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2646 if (global_options_set
.x_arm_tune_option
)
2647 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2649 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2650 SUBTARGET_OVERRIDE_OPTIONS
;
2653 if (arm_selected_arch
)
2655 if (arm_selected_cpu
)
2657 /* Check for conflict between mcpu and march. */
2658 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2660 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2661 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2662 /* -march wins for code generation.
2663 -mcpu wins for default tuning. */
2664 if (!arm_selected_tune
)
2665 arm_selected_tune
= arm_selected_cpu
;
2667 arm_selected_cpu
= arm_selected_arch
;
2671 arm_selected_arch
= NULL
;
2674 /* Pick a CPU based on the architecture. */
2675 arm_selected_cpu
= arm_selected_arch
;
2678 /* If the user did not specify a processor, choose one for them. */
2679 if (!arm_selected_cpu
)
2681 const struct processors
* sel
;
2682 unsigned int sought
;
2684 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2685 if (!arm_selected_cpu
->name
)
2687 #ifdef SUBTARGET_CPU_DEFAULT
2688 /* Use the subtarget default CPU if none was specified by
2690 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2692 /* Default to ARM6. */
2693 if (!arm_selected_cpu
->name
)
2694 arm_selected_cpu
= &all_cores
[arm6
];
2697 sel
= arm_selected_cpu
;
2698 insn_flags
= sel
->flags
;
2700 /* Now check to see if the user has specified some command line
2701 switch that require certain abilities from the cpu. */
2704 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2706 sought
|= (FL_THUMB
| FL_MODE32
);
2708 /* There are no ARM processors that support both APCS-26 and
2709 interworking. Therefore we force FL_MODE26 to be removed
2710 from insn_flags here (if it was set), so that the search
2711 below will always be able to find a compatible processor. */
2712 insn_flags
&= ~FL_MODE26
;
2715 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2717 /* Try to locate a CPU type that supports all of the abilities
2718 of the default CPU, plus the extra abilities requested by
2720 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2721 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2724 if (sel
->name
== NULL
)
2726 unsigned current_bit_count
= 0;
2727 const struct processors
* best_fit
= NULL
;
2729 /* Ideally we would like to issue an error message here
2730 saying that it was not possible to find a CPU compatible
2731 with the default CPU, but which also supports the command
2732 line options specified by the programmer, and so they
2733 ought to use the -mcpu=<name> command line option to
2734 override the default CPU type.
2736 If we cannot find a cpu that has both the
2737 characteristics of the default cpu and the given
2738 command line options we scan the array again looking
2739 for a best match. */
2740 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2741 if ((sel
->flags
& sought
) == sought
)
2745 count
= bit_count (sel
->flags
& insn_flags
);
2747 if (count
>= current_bit_count
)
2750 current_bit_count
= count
;
2754 gcc_assert (best_fit
);
2758 arm_selected_cpu
= sel
;
2762 gcc_assert (arm_selected_cpu
);
2763 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2764 if (!arm_selected_tune
)
2765 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2767 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2768 insn_flags
= arm_selected_cpu
->flags
;
2769 arm_base_arch
= arm_selected_cpu
->base_arch
;
2771 arm_tune
= arm_selected_tune
->core
;
2772 tune_flags
= arm_selected_tune
->flags
;
2773 current_tune
= arm_selected_tune
->tune
;
2775 /* Make sure that the processor choice does not conflict with any of the
2776 other command line choices. */
2777 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2778 error ("target CPU does not support ARM mode");
2780 /* BPABI targets use linker tricks to allow interworking on cores
2781 without thumb support. */
2782 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2784 warning (0, "target CPU does not support interworking" );
2785 target_flags
&= ~MASK_INTERWORK
;
2788 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2790 warning (0, "target CPU does not support THUMB instructions");
2791 target_flags
&= ~MASK_THUMB
;
2794 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2796 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2797 target_flags
&= ~MASK_APCS_FRAME
;
2800 /* Callee super interworking implies thumb interworking. Adding
2801 this to the flags here simplifies the logic elsewhere. */
2802 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2803 target_flags
|= MASK_INTERWORK
;
2805 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2806 from here where no function is being compiled currently. */
2807 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2808 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2810 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2811 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2813 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2815 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2816 target_flags
|= MASK_APCS_FRAME
;
2819 if (TARGET_POKE_FUNCTION_NAME
)
2820 target_flags
|= MASK_APCS_FRAME
;
2822 if (TARGET_APCS_REENT
&& flag_pic
)
2823 error ("-fpic and -mapcs-reent are incompatible");
2825 if (TARGET_APCS_REENT
)
2826 warning (0, "APCS reentrant code not supported. Ignored");
2828 /* If this target is normally configured to use APCS frames, warn if they
2829 are turned off and debugging is turned on. */
2831 && write_symbols
!= NO_DEBUG
2832 && !TARGET_APCS_FRAME
2833 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2834 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2836 if (TARGET_APCS_FLOAT
)
2837 warning (0, "passing floating point arguments in fp regs not yet supported");
2839 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2840 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2841 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2842 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2843 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2844 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2845 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2846 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2847 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2848 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2849 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2850 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2851 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2852 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2853 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2855 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2856 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2857 thumb_code
= TARGET_ARM
== 0;
2858 thumb1_code
= TARGET_THUMB1
!= 0;
2859 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2860 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2861 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2862 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2863 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2864 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2865 arm_arch_no_volatile_ce
= (insn_flags
& FL_NO_VOLATILE_CE
) != 0;
2866 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2867 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2868 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
2869 if (arm_restrict_it
== 2)
2870 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2873 arm_restrict_it
= 0;
2875 /* If we are not using the default (ARM mode) section anchor offset
2876 ranges, then set the correct ranges now. */
2879 /* Thumb-1 LDR instructions cannot have negative offsets.
2880 Permissible positive offset ranges are 5-bit (for byte loads),
2881 6-bit (for halfword loads), or 7-bit (for word loads).
2882 Empirical results suggest a 7-bit anchor range gives the best
2883 overall code size. */
2884 targetm
.min_anchor_offset
= 0;
2885 targetm
.max_anchor_offset
= 127;
2887 else if (TARGET_THUMB2
)
2889 /* The minimum is set such that the total size of the block
2890 for a particular anchor is 248 + 1 + 4095 bytes, which is
2891 divisible by eight, ensuring natural spacing of anchors. */
2892 targetm
.min_anchor_offset
= -248;
2893 targetm
.max_anchor_offset
= 4095;
2896 /* V5 code we generate is completely interworking capable, so we turn off
2897 TARGET_INTERWORK here to avoid many tests later on. */
2899 /* XXX However, we must pass the right pre-processor defines to CPP
2900 or GLD can get confused. This is a hack. */
2901 if (TARGET_INTERWORK
)
2902 arm_cpp_interwork
= 1;
2905 target_flags
&= ~MASK_INTERWORK
;
2907 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2908 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2910 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2911 error ("iwmmxt abi requires an iwmmxt capable cpu");
2913 if (!global_options_set
.x_arm_fpu_index
)
2915 const char *target_fpu_name
;
2918 #ifdef FPUTYPE_DEFAULT
2919 target_fpu_name
= FPUTYPE_DEFAULT
;
2921 target_fpu_name
= "vfp";
2924 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2929 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2931 switch (arm_fpu_desc
->model
)
2933 case ARM_FP_MODEL_VFP
:
2934 arm_fpu_attr
= FPU_VFP
;
2941 if (TARGET_AAPCS_BASED
)
2943 if (TARGET_CALLER_INTERWORKING
)
2944 error ("AAPCS does not support -mcaller-super-interworking");
2946 if (TARGET_CALLEE_INTERWORKING
)
2947 error ("AAPCS does not support -mcallee-super-interworking");
2950 /* iWMMXt and NEON are incompatible. */
2951 if (TARGET_IWMMXT
&& TARGET_NEON
)
2952 error ("iWMMXt and NEON are incompatible");
2954 /* iWMMXt unsupported under Thumb mode. */
2955 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2956 error ("iWMMXt unsupported under Thumb mode");
2958 /* __fp16 support currently assumes the core has ldrh. */
2959 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2960 sorry ("__fp16 and no ldrh");
2962 /* If soft-float is specified then don't use FPU. */
2963 if (TARGET_SOFT_FLOAT
)
2964 arm_fpu_attr
= FPU_NONE
;
2966 if (TARGET_AAPCS_BASED
)
2968 if (arm_abi
== ARM_ABI_IWMMXT
)
2969 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2970 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2971 && TARGET_HARD_FLOAT
2973 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2975 arm_pcs_default
= ARM_PCS_AAPCS
;
2979 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2980 sorry ("-mfloat-abi=hard and VFP");
2982 if (arm_abi
== ARM_ABI_APCS
)
2983 arm_pcs_default
= ARM_PCS_APCS
;
2985 arm_pcs_default
= ARM_PCS_ATPCS
;
2988 /* For arm2/3 there is no need to do any scheduling if we are doing
2989 software floating-point. */
2990 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2991 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2993 /* Use the cp15 method if it is available. */
2994 if (target_thread_pointer
== TP_AUTO
)
2996 if (arm_arch6k
&& !TARGET_THUMB1
)
2997 target_thread_pointer
= TP_CP15
;
2999 target_thread_pointer
= TP_SOFT
;
3002 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
3003 error ("can not use -mtp=cp15 with 16-bit Thumb");
3005 /* Override the default structure alignment for AAPCS ABI. */
3006 if (!global_options_set
.x_arm_structure_size_boundary
)
3008 if (TARGET_AAPCS_BASED
)
3009 arm_structure_size_boundary
= 8;
3013 if (arm_structure_size_boundary
!= 8
3014 && arm_structure_size_boundary
!= 32
3015 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3017 if (ARM_DOUBLEWORD_ALIGN
)
3019 "structure size boundary can only be set to 8, 32 or 64");
3021 warning (0, "structure size boundary can only be set to 8 or 32");
3022 arm_structure_size_boundary
3023 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3027 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
3029 error ("RTP PIC is incompatible with Thumb");
3033 /* If stack checking is disabled, we can use r10 as the PIC register,
3034 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3035 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3037 if (TARGET_VXWORKS_RTP
)
3038 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3039 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3042 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3043 arm_pic_register
= 9;
3045 if (arm_pic_register_string
!= NULL
)
3047 int pic_register
= decode_reg_name (arm_pic_register_string
);
3050 warning (0, "-mpic-register= is useless without -fpic");
3052 /* Prevent the user from choosing an obviously stupid PIC register. */
3053 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3054 || pic_register
== HARD_FRAME_POINTER_REGNUM
3055 || pic_register
== STACK_POINTER_REGNUM
3056 || pic_register
>= PC_REGNUM
3057 || (TARGET_VXWORKS_RTP
3058 && (unsigned int) pic_register
!= arm_pic_register
))
3059 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3061 arm_pic_register
= pic_register
;
3064 if (TARGET_VXWORKS_RTP
3065 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3066 arm_pic_data_is_text_relative
= 0;
3068 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3069 if (fix_cm3_ldrd
== 2)
3071 if (arm_selected_cpu
->core
== cortexm3
)
3077 /* Enable -munaligned-access by default for
3078 - all ARMv6 architecture-based processors
3079 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3080 - ARMv8 architecture-base processors.
3082 Disable -munaligned-access by default for
3083 - all pre-ARMv6 architecture-based processors
3084 - ARMv6-M architecture-based processors. */
3086 if (unaligned_access
== 2)
3088 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3089 unaligned_access
= 1;
3091 unaligned_access
= 0;
3093 else if (unaligned_access
== 1
3094 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3096 warning (0, "target CPU does not support unaligned accesses");
3097 unaligned_access
= 0;
3100 if (TARGET_THUMB1
&& flag_schedule_insns
)
3102 /* Don't warn since it's on by default in -O2. */
3103 flag_schedule_insns
= 0;
3108 /* If optimizing for size, bump the number of instructions that we
3109 are prepared to conditionally execute (even on a StrongARM). */
3110 max_insns_skipped
= 6;
3112 /* For THUMB2, we limit the conditional sequence to one IT block. */
3114 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3117 max_insns_skipped
= current_tune
->max_insns_skipped
;
3119 /* Hot/Cold partitioning is not currently supported, since we can't
3120 handle literal pool placement in that case. */
3121 if (flag_reorder_blocks_and_partition
)
3123 inform (input_location
,
3124 "-freorder-blocks-and-partition not supported on this architecture");
3125 flag_reorder_blocks_and_partition
= 0;
3126 flag_reorder_blocks
= 1;
3130 /* Hoisting PIC address calculations more aggressively provides a small,
3131 but measurable, size reduction for PIC code. Therefore, we decrease
3132 the bar for unrestricted expression hoisting to the cost of PIC address
3133 calculation, which is 2 instructions. */
3134 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3135 global_options
.x_param_values
,
3136 global_options_set
.x_param_values
);
3138 /* ARM EABI defaults to strict volatile bitfields. */
3139 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3140 && abi_version_at_least(2))
3141 flag_strict_volatile_bitfields
= 1;
3143 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3144 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3145 if (flag_prefetch_loop_arrays
< 0
3148 && current_tune
->num_prefetch_slots
> 0)
3149 flag_prefetch_loop_arrays
= 1;
3151 /* Set up parameters to be used in prefetching algorithm. Do not override the
3152 defaults unless we are tuning for a core we have researched values for. */
3153 if (current_tune
->num_prefetch_slots
> 0)
3154 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3155 current_tune
->num_prefetch_slots
,
3156 global_options
.x_param_values
,
3157 global_options_set
.x_param_values
);
3158 if (current_tune
->l1_cache_line_size
>= 0)
3159 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3160 current_tune
->l1_cache_line_size
,
3161 global_options
.x_param_values
,
3162 global_options_set
.x_param_values
);
3163 if (current_tune
->l1_cache_size
>= 0)
3164 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3165 current_tune
->l1_cache_size
,
3166 global_options
.x_param_values
,
3167 global_options_set
.x_param_values
);
3169 /* Use Neon to perform 64-bits operations rather than core
3171 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3172 if (use_neon_for_64bits
== 1)
3173 prefer_neon_for_64bits
= true;
3175 /* Use the alternative scheduling-pressure algorithm by default. */
3176 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3177 global_options
.x_param_values
,
3178 global_options_set
.x_param_values
);
3180 /* Look through ready list and all of queue for instructions
3181 relevant for L2 auto-prefetcher. */
3182 int param_sched_autopref_queue_depth
;
3183 if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_OFF
)
3184 param_sched_autopref_queue_depth
= -1;
3185 else if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_RANK
)
3186 param_sched_autopref_queue_depth
= 0;
3187 else if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_FULL
)
3188 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3191 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3192 param_sched_autopref_queue_depth
,
3193 global_options
.x_param_values
,
3194 global_options_set
.x_param_values
);
3196 /* Disable shrink-wrap when optimizing function for size, since it tends to
3197 generate additional returns. */
3198 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3199 flag_shrink_wrap
= false;
3200 /* TBD: Dwarf info for apcs frame is not handled yet. */
3201 if (TARGET_APCS_FRAME
)
3202 flag_shrink_wrap
= false;
3204 /* We only support -mslow-flash-data on armv7-m targets. */
3205 if (target_slow_flash_data
3206 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3207 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3208 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3210 /* Currently, for slow flash data, we just disable literal pools. */
3211 if (target_slow_flash_data
)
3212 arm_disable_literal_pool
= true;
3214 /* Thumb2 inline assembly code should always use unified syntax.
3215 This will apply to ARM and Thumb1 eventually. */
3217 inline_asm_unified
= 1;
3219 /* Disable scheduling fusion by default if it's not armv7 processor
3220 or doesn't prefer ldrd/strd. */
3221 if (flag_schedule_fusion
== 2
3222 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3223 flag_schedule_fusion
= 0;
3225 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3226 - epilogue_insns - does not accurately model the corresponding insns
3227 emitted in the asm file. In particular, see the comment in thumb_exit
3228 'Find out how many of the (return) argument registers we can corrupt'.
3229 As a consequence, the epilogue may clobber registers without fipa-ra
3230 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3231 TODO: Accurately model clobbers for epilogue_insns and reenable
3236 /* Register global variables with the garbage collector. */
3237 arm_add_gc_roots ();
3241 arm_add_gc_roots (void)
3243 gcc_obstack_init(&minipool_obstack
);
3244 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3247 /* A table of known ARM exception types.
3248 For use with the interrupt function attribute. */
3252 const char *const arg
;
3253 const unsigned long return_value
;
3257 static const isr_attribute_arg isr_attribute_args
[] =
3259 { "IRQ", ARM_FT_ISR
},
3260 { "irq", ARM_FT_ISR
},
3261 { "FIQ", ARM_FT_FIQ
},
3262 { "fiq", ARM_FT_FIQ
},
3263 { "ABORT", ARM_FT_ISR
},
3264 { "abort", ARM_FT_ISR
},
3265 { "ABORT", ARM_FT_ISR
},
3266 { "abort", ARM_FT_ISR
},
3267 { "UNDEF", ARM_FT_EXCEPTION
},
3268 { "undef", ARM_FT_EXCEPTION
},
3269 { "SWI", ARM_FT_EXCEPTION
},
3270 { "swi", ARM_FT_EXCEPTION
},
3271 { NULL
, ARM_FT_NORMAL
}
3274 /* Returns the (interrupt) function type of the current
3275 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3277 static unsigned long
3278 arm_isr_value (tree argument
)
3280 const isr_attribute_arg
* ptr
;
3284 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3286 /* No argument - default to IRQ. */
3287 if (argument
== NULL_TREE
)
3290 /* Get the value of the argument. */
3291 if (TREE_VALUE (argument
) == NULL_TREE
3292 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3293 return ARM_FT_UNKNOWN
;
3295 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3297 /* Check it against the list of known arguments. */
3298 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3299 if (streq (arg
, ptr
->arg
))
3300 return ptr
->return_value
;
3302 /* An unrecognized interrupt type. */
3303 return ARM_FT_UNKNOWN
;
3306 /* Computes the type of the current function. */
3308 static unsigned long
3309 arm_compute_func_type (void)
3311 unsigned long type
= ARM_FT_UNKNOWN
;
3315 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3317 /* Decide if the current function is volatile. Such functions
3318 never return, and many memory cycles can be saved by not storing
3319 register values that will never be needed again. This optimization
3320 was added to speed up context switching in a kernel application. */
3322 && (TREE_NOTHROW (current_function_decl
)
3323 || !(flag_unwind_tables
3325 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3326 && TREE_THIS_VOLATILE (current_function_decl
))
3327 type
|= ARM_FT_VOLATILE
;
3329 if (cfun
->static_chain_decl
!= NULL
)
3330 type
|= ARM_FT_NESTED
;
3332 attr
= DECL_ATTRIBUTES (current_function_decl
);
3334 a
= lookup_attribute ("naked", attr
);
3336 type
|= ARM_FT_NAKED
;
3338 a
= lookup_attribute ("isr", attr
);
3340 a
= lookup_attribute ("interrupt", attr
);
3343 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3345 type
|= arm_isr_value (TREE_VALUE (a
));
3350 /* Returns the type of the current function. */
3353 arm_current_func_type (void)
3355 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3356 cfun
->machine
->func_type
= arm_compute_func_type ();
3358 return cfun
->machine
->func_type
;
3362 arm_allocate_stack_slots_for_args (void)
3364 /* Naked functions should not allocate stack slots for arguments. */
3365 return !IS_NAKED (arm_current_func_type ());
3369 arm_warn_func_return (tree decl
)
3371 /* Naked functions are implemented entirely in assembly, including the
3372 return sequence, so suppress warnings about this. */
3373 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3377 /* Output assembler code for a block containing the constant parts
3378 of a trampoline, leaving space for the variable parts.
3380 On the ARM, (if r8 is the static chain regnum, and remembering that
3381 referencing pc adds an offset of 8) the trampoline looks like:
3384 .word static chain value
3385 .word function's address
3386 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3389 arm_asm_trampoline_template (FILE *f
)
3393 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3394 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3396 else if (TARGET_THUMB2
)
3398 /* The Thumb-2 trampoline is similar to the arm implementation.
3399 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3400 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3401 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3402 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3406 ASM_OUTPUT_ALIGN (f
, 2);
3407 fprintf (f
, "\t.code\t16\n");
3408 fprintf (f
, ".Ltrampoline_start:\n");
3409 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3410 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3411 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3412 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3413 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3414 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3416 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3417 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3420 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3423 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3425 rtx fnaddr
, mem
, a_tramp
;
3427 emit_block_move (m_tramp
, assemble_trampoline_template (),
3428 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3430 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3431 emit_move_insn (mem
, chain_value
);
3433 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3434 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3435 emit_move_insn (mem
, fnaddr
);
3437 a_tramp
= XEXP (m_tramp
, 0);
3438 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3439 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3440 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3443 /* Thumb trampolines should be entered in thumb mode, so set
3444 the bottom bit of the address. */
3447 arm_trampoline_adjust_address (rtx addr
)
3450 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3451 NULL
, 0, OPTAB_LIB_WIDEN
);
3455 /* Return 1 if it is possible to return using a single instruction.
3456 If SIBLING is non-null, this is a test for a return before a sibling
3457 call. SIBLING is the call insn, so we can examine its register usage. */
3460 use_return_insn (int iscond
, rtx sibling
)
3463 unsigned int func_type
;
3464 unsigned long saved_int_regs
;
3465 unsigned HOST_WIDE_INT stack_adjust
;
3466 arm_stack_offsets
*offsets
;
3468 /* Never use a return instruction before reload has run. */
3469 if (!reload_completed
)
3472 func_type
= arm_current_func_type ();
3474 /* Naked, volatile and stack alignment functions need special
3476 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3479 /* So do interrupt functions that use the frame pointer and Thumb
3480 interrupt functions. */
3481 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3484 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3485 && !optimize_function_for_size_p (cfun
))
3488 offsets
= arm_get_frame_offsets ();
3489 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3491 /* As do variadic functions. */
3492 if (crtl
->args
.pretend_args_size
3493 || cfun
->machine
->uses_anonymous_args
3494 /* Or if the function calls __builtin_eh_return () */
3495 || crtl
->calls_eh_return
3496 /* Or if the function calls alloca */
3497 || cfun
->calls_alloca
3498 /* Or if there is a stack adjustment. However, if the stack pointer
3499 is saved on the stack, we can use a pre-incrementing stack load. */
3500 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3501 && stack_adjust
== 4)))
3504 saved_int_regs
= offsets
->saved_regs_mask
;
3506 /* Unfortunately, the insn
3508 ldmib sp, {..., sp, ...}
3510 triggers a bug on most SA-110 based devices, such that the stack
3511 pointer won't be correctly restored if the instruction takes a
3512 page fault. We work around this problem by popping r3 along with
3513 the other registers, since that is never slower than executing
3514 another instruction.
3516 We test for !arm_arch5 here, because code for any architecture
3517 less than this could potentially be run on one of the buggy
3519 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3521 /* Validate that r3 is a call-clobbered register (always true in
3522 the default abi) ... */
3523 if (!call_used_regs
[3])
3526 /* ... that it isn't being used for a return value ... */
3527 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3530 /* ... or for a tail-call argument ... */
3533 gcc_assert (CALL_P (sibling
));
3535 if (find_regno_fusage (sibling
, USE
, 3))
3539 /* ... and that there are no call-saved registers in r0-r2
3540 (always true in the default ABI). */
3541 if (saved_int_regs
& 0x7)
3545 /* Can't be done if interworking with Thumb, and any registers have been
3547 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3550 /* On StrongARM, conditional returns are expensive if they aren't
3551 taken and multiple registers have been stacked. */
3552 if (iscond
&& arm_tune_strongarm
)
3554 /* Conditional return when just the LR is stored is a simple
3555 conditional-load instruction, that's not expensive. */
3556 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3560 && arm_pic_register
!= INVALID_REGNUM
3561 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3565 /* If there are saved registers but the LR isn't saved, then we need
3566 two instructions for the return. */
3567 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3570 /* Can't be done if any of the VFP regs are pushed,
3571 since this also requires an insn. */
3572 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3573 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3574 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3577 if (TARGET_REALLY_IWMMXT
)
3578 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3579 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3585 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3586 shrink-wrapping if possible. This is the case if we need to emit a
3587 prologue, which we can test by looking at the offsets. */
3589 use_simple_return_p (void)
3591 arm_stack_offsets
*offsets
;
3593 offsets
= arm_get_frame_offsets ();
3594 return offsets
->outgoing_args
!= 0;
3597 /* Return TRUE if int I is a valid immediate ARM constant. */
3600 const_ok_for_arm (HOST_WIDE_INT i
)
3604 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3605 be all zero, or all one. */
3606 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3607 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3608 != ((~(unsigned HOST_WIDE_INT
) 0)
3609 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3612 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3614 /* Fast return for 0 and small values. We must do this for zero, since
3615 the code below can't handle that one case. */
3616 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3619 /* Get the number of trailing zeros. */
3620 lowbit
= ffs((int) i
) - 1;
3622 /* Only even shifts are allowed in ARM mode so round down to the
3623 nearest even number. */
3627 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3632 /* Allow rotated constants in ARM mode. */
3634 && ((i
& ~0xc000003f) == 0
3635 || (i
& ~0xf000000f) == 0
3636 || (i
& ~0xfc000003) == 0))
3643 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3646 if (i
== v
|| i
== (v
| (v
<< 8)))
3649 /* Allow repeated pattern 0xXY00XY00. */
3659 /* Return true if I is a valid constant for the operation CODE. */
3661 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3663 if (const_ok_for_arm (i
))
3669 /* See if we can use movw. */
3670 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3673 /* Otherwise, try mvn. */
3674 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3677 /* See if we can use addw or subw. */
3679 && ((i
& 0xfffff000) == 0
3680 || ((-i
) & 0xfffff000) == 0))
3682 /* else fall through. */
3702 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3704 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3710 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3714 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3721 /* Return true if I is a valid di mode constant for the operation CODE. */
3723 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3725 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3726 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3727 rtx hi
= GEN_INT (hi_val
);
3728 rtx lo
= GEN_INT (lo_val
);
3738 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3739 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3741 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3748 /* Emit a sequence of insns to handle a large constant.
3749 CODE is the code of the operation required, it can be any of SET, PLUS,
3750 IOR, AND, XOR, MINUS;
3751 MODE is the mode in which the operation is being performed;
3752 VAL is the integer to operate on;
3753 SOURCE is the other operand (a register, or a null-pointer for SET);
3754 SUBTARGETS means it is safe to create scratch registers if that will
3755 either produce a simpler sequence, or we will want to cse the values.
3756 Return value is the number of insns emitted. */
3758 /* ??? Tweak this for thumb2. */
3760 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3761 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3765 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3766 cond
= COND_EXEC_TEST (PATTERN (insn
));
3770 if (subtargets
|| code
== SET
3771 || (REG_P (target
) && REG_P (source
)
3772 && REGNO (target
) != REGNO (source
)))
3774 /* After arm_reorg has been called, we can't fix up expensive
3775 constants by pushing them into memory so we must synthesize
3776 them in-line, regardless of the cost. This is only likely to
3777 be more costly on chips that have load delay slots and we are
3778 compiling without running the scheduler (so no splitting
3779 occurred before the final instruction emission).
3781 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3783 if (!cfun
->machine
->after_arm_reorg
3785 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3787 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3792 /* Currently SET is the only monadic value for CODE, all
3793 the rest are diadic. */
3794 if (TARGET_USE_MOVT
)
3795 arm_emit_movpair (target
, GEN_INT (val
));
3797 emit_set_insn (target
, GEN_INT (val
));
3803 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3805 if (TARGET_USE_MOVT
)
3806 arm_emit_movpair (temp
, GEN_INT (val
));
3808 emit_set_insn (temp
, GEN_INT (val
));
3810 /* For MINUS, the value is subtracted from, since we never
3811 have subtraction of a constant. */
3813 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3815 emit_set_insn (target
,
3816 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3822 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3826 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3827 ARM/THUMB2 immediates, and add up to VAL.
3828 Thr function return value gives the number of insns required. */
3830 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3831 struct four_ints
*return_sequence
)
3833 int best_consecutive_zeros
= 0;
3837 struct four_ints tmp_sequence
;
3839 /* If we aren't targeting ARM, the best place to start is always at
3840 the bottom, otherwise look more closely. */
3843 for (i
= 0; i
< 32; i
+= 2)
3845 int consecutive_zeros
= 0;
3847 if (!(val
& (3 << i
)))
3849 while ((i
< 32) && !(val
& (3 << i
)))
3851 consecutive_zeros
+= 2;
3854 if (consecutive_zeros
> best_consecutive_zeros
)
3856 best_consecutive_zeros
= consecutive_zeros
;
3857 best_start
= i
- consecutive_zeros
;
3864 /* So long as it won't require any more insns to do so, it's
3865 desirable to emit a small constant (in bits 0...9) in the last
3866 insn. This way there is more chance that it can be combined with
3867 a later addressing insn to form a pre-indexed load or store
3868 operation. Consider:
3870 *((volatile int *)0xe0000100) = 1;
3871 *((volatile int *)0xe0000110) = 2;
3873 We want this to wind up as:
3877 str rB, [rA, #0x100]
3879 str rB, [rA, #0x110]
3881 rather than having to synthesize both large constants from scratch.
3883 Therefore, we calculate how many insns would be required to emit
3884 the constant starting from `best_start', and also starting from
3885 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3886 yield a shorter sequence, we may as well use zero. */
3887 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3889 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3891 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3892 if (insns2
<= insns1
)
3894 *return_sequence
= tmp_sequence
;
3902 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3904 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3905 struct four_ints
*return_sequence
, int i
)
3907 int remainder
= val
& 0xffffffff;
3910 /* Try and find a way of doing the job in either two or three
3913 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3914 location. We start at position I. This may be the MSB, or
3915 optimial_immediate_sequence may have positioned it at the largest block
3916 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3917 wrapping around to the top of the word when we drop off the bottom.
3918 In the worst case this code should produce no more than four insns.
3920 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3921 constants, shifted to any arbitrary location. We should always start
3926 unsigned int b1
, b2
, b3
, b4
;
3927 unsigned HOST_WIDE_INT result
;
3930 gcc_assert (insns
< 4);
3935 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3936 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3939 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3940 /* We can use addw/subw for the last 12 bits. */
3944 /* Use an 8-bit shifted/rotated immediate. */
3948 result
= remainder
& ((0x0ff << end
)
3949 | ((i
< end
) ? (0xff >> (32 - end
))
3956 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3957 arbitrary shifts. */
3958 i
-= TARGET_ARM
? 2 : 1;
3962 /* Next, see if we can do a better job with a thumb2 replicated
3965 We do it this way around to catch the cases like 0x01F001E0 where
3966 two 8-bit immediates would work, but a replicated constant would
3969 TODO: 16-bit constants that don't clear all the bits, but still win.
3970 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3973 b1
= (remainder
& 0xff000000) >> 24;
3974 b2
= (remainder
& 0x00ff0000) >> 16;
3975 b3
= (remainder
& 0x0000ff00) >> 8;
3976 b4
= remainder
& 0xff;
3980 /* The 8-bit immediate already found clears b1 (and maybe b2),
3981 but must leave b3 and b4 alone. */
3983 /* First try to find a 32-bit replicated constant that clears
3984 almost everything. We can assume that we can't do it in one,
3985 or else we wouldn't be here. */
3986 unsigned int tmp
= b1
& b2
& b3
& b4
;
3987 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3989 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3990 + (tmp
== b3
) + (tmp
== b4
);
3992 && (matching_bytes
>= 3
3993 || (matching_bytes
== 2
3994 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3996 /* At least 3 of the bytes match, and the fourth has at
3997 least as many bits set, or two of the bytes match
3998 and it will only require one more insn to finish. */
4006 /* Second, try to find a 16-bit replicated constant that can
4007 leave three of the bytes clear. If b2 or b4 is already
4008 zero, then we can. If the 8-bit from above would not
4009 clear b2 anyway, then we still win. */
4010 else if (b1
== b3
&& (!b2
|| !b4
4011 || (remainder
& 0x00ff0000 & ~result
)))
4013 result
= remainder
& 0xff00ff00;
4019 /* The 8-bit immediate already found clears b2 (and maybe b3)
4020 and we don't get here unless b1 is alredy clear, but it will
4021 leave b4 unchanged. */
4023 /* If we can clear b2 and b4 at once, then we win, since the
4024 8-bits couldn't possibly reach that far. */
4027 result
= remainder
& 0x00ff00ff;
4033 return_sequence
->i
[insns
++] = result
;
4034 remainder
&= ~result
;
4036 if (code
== SET
|| code
== MINUS
)
4044 /* Emit an instruction with the indicated PATTERN. If COND is
4045 non-NULL, conditionalize the execution of the instruction on COND
4049 emit_constant_insn (rtx cond
, rtx pattern
)
4052 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4053 emit_insn (pattern
);
4056 /* As above, but extra parameter GENERATE which, if clear, suppresses
4060 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4061 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4066 int final_invert
= 0;
4068 int set_sign_bit_copies
= 0;
4069 int clear_sign_bit_copies
= 0;
4070 int clear_zero_bit_copies
= 0;
4071 int set_zero_bit_copies
= 0;
4072 int insns
= 0, neg_insns
, inv_insns
;
4073 unsigned HOST_WIDE_INT temp1
, temp2
;
4074 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4075 struct four_ints
*immediates
;
4076 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4078 /* Find out which operations are safe for a given CODE. Also do a quick
4079 check for degenerate cases; these can occur when DImode operations
4092 if (remainder
== 0xffffffff)
4095 emit_constant_insn (cond
,
4096 gen_rtx_SET (VOIDmode
, target
,
4097 GEN_INT (ARM_SIGN_EXTEND (val
))));
4103 if (reload_completed
&& rtx_equal_p (target
, source
))
4107 emit_constant_insn (cond
,
4108 gen_rtx_SET (VOIDmode
, target
, source
));
4117 emit_constant_insn (cond
,
4118 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
4121 if (remainder
== 0xffffffff)
4123 if (reload_completed
&& rtx_equal_p (target
, source
))
4126 emit_constant_insn (cond
,
4127 gen_rtx_SET (VOIDmode
, target
, source
));
4136 if (reload_completed
&& rtx_equal_p (target
, source
))
4139 emit_constant_insn (cond
,
4140 gen_rtx_SET (VOIDmode
, target
, source
));
4144 if (remainder
== 0xffffffff)
4147 emit_constant_insn (cond
,
4148 gen_rtx_SET (VOIDmode
, target
,
4149 gen_rtx_NOT (mode
, source
)));
4156 /* We treat MINUS as (val - source), since (source - val) is always
4157 passed as (source + (-val)). */
4161 emit_constant_insn (cond
,
4162 gen_rtx_SET (VOIDmode
, target
,
4163 gen_rtx_NEG (mode
, source
)));
4166 if (const_ok_for_arm (val
))
4169 emit_constant_insn (cond
,
4170 gen_rtx_SET (VOIDmode
, target
,
4171 gen_rtx_MINUS (mode
, GEN_INT (val
),
4182 /* If we can do it in one insn get out quickly. */
4183 if (const_ok_for_op (val
, code
))
4186 emit_constant_insn (cond
,
4187 gen_rtx_SET (VOIDmode
, target
,
4189 ? gen_rtx_fmt_ee (code
, mode
, source
,
4195 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4197 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4198 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4202 if (mode
== SImode
&& i
== 16)
4203 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4205 emit_constant_insn (cond
,
4206 gen_zero_extendhisi2
4207 (target
, gen_lowpart (HImode
, source
)));
4209 /* Extz only supports SImode, but we can coerce the operands
4211 emit_constant_insn (cond
,
4212 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4213 gen_lowpart (SImode
, source
),
4214 GEN_INT (i
), const0_rtx
));
4220 /* Calculate a few attributes that may be useful for specific
4222 /* Count number of leading zeros. */
4223 for (i
= 31; i
>= 0; i
--)
4225 if ((remainder
& (1 << i
)) == 0)
4226 clear_sign_bit_copies
++;
4231 /* Count number of leading 1's. */
4232 for (i
= 31; i
>= 0; i
--)
4234 if ((remainder
& (1 << i
)) != 0)
4235 set_sign_bit_copies
++;
4240 /* Count number of trailing zero's. */
4241 for (i
= 0; i
<= 31; i
++)
4243 if ((remainder
& (1 << i
)) == 0)
4244 clear_zero_bit_copies
++;
4249 /* Count number of trailing 1's. */
4250 for (i
= 0; i
<= 31; i
++)
4252 if ((remainder
& (1 << i
)) != 0)
4253 set_zero_bit_copies
++;
4261 /* See if we can do this by sign_extending a constant that is known
4262 to be negative. This is a good, way of doing it, since the shift
4263 may well merge into a subsequent insn. */
4264 if (set_sign_bit_copies
> 1)
4266 if (const_ok_for_arm
4267 (temp1
= ARM_SIGN_EXTEND (remainder
4268 << (set_sign_bit_copies
- 1))))
4272 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4273 emit_constant_insn (cond
,
4274 gen_rtx_SET (VOIDmode
, new_src
,
4276 emit_constant_insn (cond
,
4277 gen_ashrsi3 (target
, new_src
,
4278 GEN_INT (set_sign_bit_copies
- 1)));
4282 /* For an inverted constant, we will need to set the low bits,
4283 these will be shifted out of harm's way. */
4284 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4285 if (const_ok_for_arm (~temp1
))
4289 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4290 emit_constant_insn (cond
,
4291 gen_rtx_SET (VOIDmode
, new_src
,
4293 emit_constant_insn (cond
,
4294 gen_ashrsi3 (target
, new_src
,
4295 GEN_INT (set_sign_bit_copies
- 1)));
4301 /* See if we can calculate the value as the difference between two
4302 valid immediates. */
4303 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4305 int topshift
= clear_sign_bit_copies
& ~1;
4307 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4308 & (0xff000000 >> topshift
));
4310 /* If temp1 is zero, then that means the 9 most significant
4311 bits of remainder were 1 and we've caused it to overflow.
4312 When topshift is 0 we don't need to do anything since we
4313 can borrow from 'bit 32'. */
4314 if (temp1
== 0 && topshift
!= 0)
4315 temp1
= 0x80000000 >> (topshift
- 1);
4317 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4319 if (const_ok_for_arm (temp2
))
4323 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4324 emit_constant_insn (cond
,
4325 gen_rtx_SET (VOIDmode
, new_src
,
4327 emit_constant_insn (cond
,
4328 gen_addsi3 (target
, new_src
,
4336 /* See if we can generate this by setting the bottom (or the top)
4337 16 bits, and then shifting these into the other half of the
4338 word. We only look for the simplest cases, to do more would cost
4339 too much. Be careful, however, not to generate this when the
4340 alternative would take fewer insns. */
4341 if (val
& 0xffff0000)
4343 temp1
= remainder
& 0xffff0000;
4344 temp2
= remainder
& 0x0000ffff;
4346 /* Overlaps outside this range are best done using other methods. */
4347 for (i
= 9; i
< 24; i
++)
4349 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4350 && !const_ok_for_arm (temp2
))
4352 rtx new_src
= (subtargets
4353 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4355 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4356 source
, subtargets
, generate
);
4364 gen_rtx_ASHIFT (mode
, source
,
4371 /* Don't duplicate cases already considered. */
4372 for (i
= 17; i
< 24; i
++)
4374 if (((temp1
| (temp1
>> i
)) == remainder
)
4375 && !const_ok_for_arm (temp1
))
4377 rtx new_src
= (subtargets
4378 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4380 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4381 source
, subtargets
, generate
);
4386 gen_rtx_SET (VOIDmode
, target
,
4389 gen_rtx_LSHIFTRT (mode
, source
,
4400 /* If we have IOR or XOR, and the constant can be loaded in a
4401 single instruction, and we can find a temporary to put it in,
4402 then this can be done in two instructions instead of 3-4. */
4404 /* TARGET can't be NULL if SUBTARGETS is 0 */
4405 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4407 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4411 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4413 emit_constant_insn (cond
,
4414 gen_rtx_SET (VOIDmode
, sub
,
4416 emit_constant_insn (cond
,
4417 gen_rtx_SET (VOIDmode
, target
,
4418 gen_rtx_fmt_ee (code
, mode
,
4429 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4430 and the remainder 0s for e.g. 0xfff00000)
4431 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4433 This can be done in 2 instructions by using shifts with mov or mvn.
4438 mvn r0, r0, lsr #12 */
4439 if (set_sign_bit_copies
> 8
4440 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4444 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4445 rtx shift
= GEN_INT (set_sign_bit_copies
);
4449 gen_rtx_SET (VOIDmode
, sub
,
4451 gen_rtx_ASHIFT (mode
,
4456 gen_rtx_SET (VOIDmode
, target
,
4458 gen_rtx_LSHIFTRT (mode
, sub
,
4465 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4467 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4469 For eg. r0 = r0 | 0xfff
4474 if (set_zero_bit_copies
> 8
4475 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4479 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4480 rtx shift
= GEN_INT (set_zero_bit_copies
);
4484 gen_rtx_SET (VOIDmode
, sub
,
4486 gen_rtx_LSHIFTRT (mode
,
4491 gen_rtx_SET (VOIDmode
, target
,
4493 gen_rtx_ASHIFT (mode
, sub
,
4499 /* This will never be reached for Thumb2 because orn is a valid
4500 instruction. This is for Thumb1 and the ARM 32 bit cases.
4502 x = y | constant (such that ~constant is a valid constant)
4504 x = ~(~y & ~constant).
4506 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4510 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4511 emit_constant_insn (cond
,
4512 gen_rtx_SET (VOIDmode
, sub
,
4513 gen_rtx_NOT (mode
, source
)));
4516 sub
= gen_reg_rtx (mode
);
4517 emit_constant_insn (cond
,
4518 gen_rtx_SET (VOIDmode
, sub
,
4519 gen_rtx_AND (mode
, source
,
4521 emit_constant_insn (cond
,
4522 gen_rtx_SET (VOIDmode
, target
,
4523 gen_rtx_NOT (mode
, sub
)));
4530 /* See if two shifts will do 2 or more insn's worth of work. */
4531 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4533 HOST_WIDE_INT shift_mask
= ((0xffffffff
4534 << (32 - clear_sign_bit_copies
))
4537 if ((remainder
| shift_mask
) != 0xffffffff)
4539 HOST_WIDE_INT new_val
4540 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4544 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4545 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4546 new_src
, source
, subtargets
, 1);
4551 rtx targ
= subtargets
? NULL_RTX
: target
;
4552 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4553 targ
, source
, subtargets
, 0);
4559 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4560 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4562 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4563 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4569 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4571 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4573 if ((remainder
| shift_mask
) != 0xffffffff)
4575 HOST_WIDE_INT new_val
4576 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4579 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4581 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4582 new_src
, source
, subtargets
, 1);
4587 rtx targ
= subtargets
? NULL_RTX
: target
;
4589 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4590 targ
, source
, subtargets
, 0);
4596 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4597 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4599 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4600 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4612 /* Calculate what the instruction sequences would be if we generated it
4613 normally, negated, or inverted. */
4615 /* AND cannot be split into multiple insns, so invert and use BIC. */
4618 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4621 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4626 if (can_invert
|| final_invert
)
4627 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4632 immediates
= &pos_immediates
;
4634 /* Is the negated immediate sequence more efficient? */
4635 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4638 immediates
= &neg_immediates
;
4643 /* Is the inverted immediate sequence more efficient?
4644 We must allow for an extra NOT instruction for XOR operations, although
4645 there is some chance that the final 'mvn' will get optimized later. */
4646 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4649 immediates
= &inv_immediates
;
4657 /* Now output the chosen sequence as instructions. */
4660 for (i
= 0; i
< insns
; i
++)
4662 rtx new_src
, temp1_rtx
;
4664 temp1
= immediates
->i
[i
];
4666 if (code
== SET
|| code
== MINUS
)
4667 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4668 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4669 new_src
= gen_reg_rtx (mode
);
4675 else if (can_negate
)
4678 temp1
= trunc_int_for_mode (temp1
, mode
);
4679 temp1_rtx
= GEN_INT (temp1
);
4683 else if (code
== MINUS
)
4684 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4686 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4688 emit_constant_insn (cond
,
4689 gen_rtx_SET (VOIDmode
, new_src
,
4695 can_negate
= can_invert
;
4699 else if (code
== MINUS
)
4707 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4708 gen_rtx_NOT (mode
, source
)));
4715 /* Canonicalize a comparison so that we are more likely to recognize it.
4716 This can be done for a few constant compares, where we can make the
4717 immediate value easier to load. */
4720 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4721 bool op0_preserve_value
)
4724 unsigned HOST_WIDE_INT i
, maxval
;
4726 mode
= GET_MODE (*op0
);
4727 if (mode
== VOIDmode
)
4728 mode
= GET_MODE (*op1
);
4730 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4732 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4733 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4734 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4735 for GTU/LEU in Thumb mode. */
4739 if (*code
== GT
|| *code
== LE
4740 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4742 /* Missing comparison. First try to use an available
4744 if (CONST_INT_P (*op1
))
4752 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4754 *op1
= GEN_INT (i
+ 1);
4755 *code
= *code
== GT
? GE
: LT
;
4761 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4762 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4764 *op1
= GEN_INT (i
+ 1);
4765 *code
= *code
== GTU
? GEU
: LTU
;
4774 /* If that did not work, reverse the condition. */
4775 if (!op0_preserve_value
)
4777 std::swap (*op0
, *op1
);
4778 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4784 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4785 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4786 to facilitate possible combining with a cmp into 'ands'. */
4788 && GET_CODE (*op0
) == ZERO_EXTEND
4789 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4790 && GET_MODE (XEXP (*op0
, 0)) == QImode
4791 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4792 && subreg_lowpart_p (XEXP (*op0
, 0))
4793 && *op1
== const0_rtx
)
4794 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4797 /* Comparisons smaller than DImode. Only adjust comparisons against
4798 an out-of-range constant. */
4799 if (!CONST_INT_P (*op1
)
4800 || const_ok_for_arm (INTVAL (*op1
))
4801 || const_ok_for_arm (- INTVAL (*op1
)))
4815 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4817 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4818 *code
= *code
== GT
? GE
: LT
;
4826 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4828 *op1
= GEN_INT (i
- 1);
4829 *code
= *code
== GE
? GT
: LE
;
4836 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4837 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4839 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4840 *code
= *code
== GTU
? GEU
: LTU
;
4848 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4850 *op1
= GEN_INT (i
- 1);
4851 *code
= *code
== GEU
? GTU
: LEU
;
4862 /* Define how to find the value returned by a function. */
4865 arm_function_value(const_tree type
, const_tree func
,
4866 bool outgoing ATTRIBUTE_UNUSED
)
4869 int unsignedp ATTRIBUTE_UNUSED
;
4870 rtx r ATTRIBUTE_UNUSED
;
4872 mode
= TYPE_MODE (type
);
4874 if (TARGET_AAPCS_BASED
)
4875 return aapcs_allocate_return_reg (mode
, type
, func
);
4877 /* Promote integer types. */
4878 if (INTEGRAL_TYPE_P (type
))
4879 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4881 /* Promotes small structs returned in a register to full-word size
4882 for big-endian AAPCS. */
4883 if (arm_return_in_msb (type
))
4885 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4886 if (size
% UNITS_PER_WORD
!= 0)
4888 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4889 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4893 return arm_libcall_value_1 (mode
);
4896 /* libcall hashtable helpers. */
4898 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4900 typedef const rtx_def
*value_type
;
4901 typedef const rtx_def
*compare_type
;
4902 static inline hashval_t
hash (const rtx_def
*);
4903 static inline bool equal (const rtx_def
*, const rtx_def
*);
4904 static inline void remove (rtx_def
*);
4908 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
4910 return rtx_equal_p (p1
, p2
);
4914 libcall_hasher::hash (const rtx_def
*p1
)
4916 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4919 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4922 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4924 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4928 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4930 static bool init_done
= false;
4931 static libcall_table_type
*libcall_htab
= NULL
;
4937 libcall_htab
= new libcall_table_type (31);
4938 add_libcall (libcall_htab
,
4939 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4940 add_libcall (libcall_htab
,
4941 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4942 add_libcall (libcall_htab
,
4943 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4944 add_libcall (libcall_htab
,
4945 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4947 add_libcall (libcall_htab
,
4948 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4949 add_libcall (libcall_htab
,
4950 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4951 add_libcall (libcall_htab
,
4952 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4953 add_libcall (libcall_htab
,
4954 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4956 add_libcall (libcall_htab
,
4957 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4958 add_libcall (libcall_htab
,
4959 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4960 add_libcall (libcall_htab
,
4961 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4962 add_libcall (libcall_htab
,
4963 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4964 add_libcall (libcall_htab
,
4965 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4966 add_libcall (libcall_htab
,
4967 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4968 add_libcall (libcall_htab
,
4969 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4970 add_libcall (libcall_htab
,
4971 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4973 /* Values from double-precision helper functions are returned in core
4974 registers if the selected core only supports single-precision
4975 arithmetic, even if we are using the hard-float ABI. The same is
4976 true for single-precision helpers, but we will never be using the
4977 hard-float ABI on a CPU which doesn't support single-precision
4978 operations in hardware. */
4979 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4980 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4981 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4982 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4983 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4984 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4985 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4986 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4987 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4988 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4989 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4990 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4992 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4996 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5000 arm_libcall_value_1 (machine_mode mode
)
5002 if (TARGET_AAPCS_BASED
)
5003 return aapcs_libcall_value (mode
);
5004 else if (TARGET_IWMMXT_ABI
5005 && arm_vector_mode_supported_p (mode
))
5006 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5008 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5011 /* Define how to find the value returned by a library function
5012 assuming the value has mode MODE. */
5015 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5017 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5018 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5020 /* The following libcalls return their result in integer registers,
5021 even though they return a floating point value. */
5022 if (arm_libcall_uses_aapcs_base (libcall
))
5023 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5027 return arm_libcall_value_1 (mode
);
5030 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5033 arm_function_value_regno_p (const unsigned int regno
)
5035 if (regno
== ARG_REGISTER (1)
5037 && TARGET_AAPCS_BASED
5039 && TARGET_HARD_FLOAT
5040 && regno
== FIRST_VFP_REGNUM
)
5041 || (TARGET_IWMMXT_ABI
5042 && regno
== FIRST_IWMMXT_REGNUM
))
5048 /* Determine the amount of memory needed to store the possible return
5049 registers of an untyped call. */
5051 arm_apply_result_size (void)
5057 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5059 if (TARGET_IWMMXT_ABI
)
5066 /* Decide whether TYPE should be returned in memory (true)
5067 or in a register (false). FNTYPE is the type of the function making
5070 arm_return_in_memory (const_tree type
, const_tree fntype
)
5074 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5076 if (TARGET_AAPCS_BASED
)
5078 /* Simple, non-aggregate types (ie not including vectors and
5079 complex) are always returned in a register (or registers).
5080 We don't care about which register here, so we can short-cut
5081 some of the detail. */
5082 if (!AGGREGATE_TYPE_P (type
)
5083 && TREE_CODE (type
) != VECTOR_TYPE
5084 && TREE_CODE (type
) != COMPLEX_TYPE
)
5087 /* Any return value that is no larger than one word can be
5089 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5092 /* Check any available co-processors to see if they accept the
5093 type as a register candidate (VFP, for example, can return
5094 some aggregates in consecutive registers). These aren't
5095 available if the call is variadic. */
5096 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5099 /* Vector values should be returned using ARM registers, not
5100 memory (unless they're over 16 bytes, which will break since
5101 we only have four call-clobbered registers to play with). */
5102 if (TREE_CODE (type
) == VECTOR_TYPE
)
5103 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5105 /* The rest go in memory. */
5109 if (TREE_CODE (type
) == VECTOR_TYPE
)
5110 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5112 if (!AGGREGATE_TYPE_P (type
) &&
5113 (TREE_CODE (type
) != VECTOR_TYPE
))
5114 /* All simple types are returned in registers. */
5117 if (arm_abi
!= ARM_ABI_APCS
)
5119 /* ATPCS and later return aggregate types in memory only if they are
5120 larger than a word (or are variable size). */
5121 return (size
< 0 || size
> UNITS_PER_WORD
);
5124 /* For the arm-wince targets we choose to be compatible with Microsoft's
5125 ARM and Thumb compilers, which always return aggregates in memory. */
5127 /* All structures/unions bigger than one word are returned in memory.
5128 Also catch the case where int_size_in_bytes returns -1. In this case
5129 the aggregate is either huge or of variable size, and in either case
5130 we will want to return it via memory and not in a register. */
5131 if (size
< 0 || size
> UNITS_PER_WORD
)
5134 if (TREE_CODE (type
) == RECORD_TYPE
)
5138 /* For a struct the APCS says that we only return in a register
5139 if the type is 'integer like' and every addressable element
5140 has an offset of zero. For practical purposes this means
5141 that the structure can have at most one non bit-field element
5142 and that this element must be the first one in the structure. */
5144 /* Find the first field, ignoring non FIELD_DECL things which will
5145 have been created by C++. */
5146 for (field
= TYPE_FIELDS (type
);
5147 field
&& TREE_CODE (field
) != FIELD_DECL
;
5148 field
= DECL_CHAIN (field
))
5152 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5154 /* Check that the first field is valid for returning in a register. */
5156 /* ... Floats are not allowed */
5157 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5160 /* ... Aggregates that are not themselves valid for returning in
5161 a register are not allowed. */
5162 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5165 /* Now check the remaining fields, if any. Only bitfields are allowed,
5166 since they are not addressable. */
5167 for (field
= DECL_CHAIN (field
);
5169 field
= DECL_CHAIN (field
))
5171 if (TREE_CODE (field
) != FIELD_DECL
)
5174 if (!DECL_BIT_FIELD_TYPE (field
))
5181 if (TREE_CODE (type
) == UNION_TYPE
)
5185 /* Unions can be returned in registers if every element is
5186 integral, or can be returned in an integer register. */
5187 for (field
= TYPE_FIELDS (type
);
5189 field
= DECL_CHAIN (field
))
5191 if (TREE_CODE (field
) != FIELD_DECL
)
5194 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5197 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5203 #endif /* not ARM_WINCE */
5205 /* Return all other types in memory. */
5209 const struct pcs_attribute_arg
5213 } pcs_attribute_args
[] =
5215 {"aapcs", ARM_PCS_AAPCS
},
5216 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5218 /* We could recognize these, but changes would be needed elsewhere
5219 * to implement them. */
5220 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5221 {"atpcs", ARM_PCS_ATPCS
},
5222 {"apcs", ARM_PCS_APCS
},
5224 {NULL
, ARM_PCS_UNKNOWN
}
5228 arm_pcs_from_attribute (tree attr
)
5230 const struct pcs_attribute_arg
*ptr
;
5233 /* Get the value of the argument. */
5234 if (TREE_VALUE (attr
) == NULL_TREE
5235 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5236 return ARM_PCS_UNKNOWN
;
5238 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5240 /* Check it against the list of known arguments. */
5241 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5242 if (streq (arg
, ptr
->arg
))
5245 /* An unrecognized interrupt type. */
5246 return ARM_PCS_UNKNOWN
;
5249 /* Get the PCS variant to use for this call. TYPE is the function's type
5250 specification, DECL is the specific declartion. DECL may be null if
5251 the call could be indirect or if this is a library call. */
5253 arm_get_pcs_model (const_tree type
, const_tree decl
)
5255 bool user_convention
= false;
5256 enum arm_pcs user_pcs
= arm_pcs_default
;
5261 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5264 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5265 user_convention
= true;
5268 if (TARGET_AAPCS_BASED
)
5270 /* Detect varargs functions. These always use the base rules
5271 (no argument is ever a candidate for a co-processor
5273 bool base_rules
= stdarg_p (type
);
5275 if (user_convention
)
5277 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5278 sorry ("non-AAPCS derived PCS variant");
5279 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5280 error ("variadic functions must use the base AAPCS variant");
5284 return ARM_PCS_AAPCS
;
5285 else if (user_convention
)
5287 else if (decl
&& flag_unit_at_a_time
)
5289 /* Local functions never leak outside this compilation unit,
5290 so we are free to use whatever conventions are
5292 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5293 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5295 return ARM_PCS_AAPCS_LOCAL
;
5298 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5299 sorry ("PCS variant");
5301 /* For everything else we use the target's default. */
5302 return arm_pcs_default
;
5307 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5308 const_tree fntype ATTRIBUTE_UNUSED
,
5309 rtx libcall ATTRIBUTE_UNUSED
,
5310 const_tree fndecl ATTRIBUTE_UNUSED
)
5312 /* Record the unallocated VFP registers. */
5313 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5314 pcum
->aapcs_vfp_reg_alloc
= 0;
5317 /* Walk down the type tree of TYPE counting consecutive base elements.
5318 If *MODEP is VOIDmode, then set it to the first valid floating point
5319 type. If a non-floating point type is found, or if a floating point
5320 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5321 otherwise return the count in the sub-tree. */
5323 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5328 switch (TREE_CODE (type
))
5331 mode
= TYPE_MODE (type
);
5332 if (mode
!= DFmode
&& mode
!= SFmode
)
5335 if (*modep
== VOIDmode
)
5344 mode
= TYPE_MODE (TREE_TYPE (type
));
5345 if (mode
!= DFmode
&& mode
!= SFmode
)
5348 if (*modep
== VOIDmode
)
5357 /* Use V2SImode and V4SImode as representatives of all 64-bit
5358 and 128-bit vector types, whether or not those modes are
5359 supported with the present options. */
5360 size
= int_size_in_bytes (type
);
5373 if (*modep
== VOIDmode
)
5376 /* Vector modes are considered to be opaque: two vectors are
5377 equivalent for the purposes of being homogeneous aggregates
5378 if they are the same size. */
5387 tree index
= TYPE_DOMAIN (type
);
5389 /* Can't handle incomplete types nor sizes that are not
5391 if (!COMPLETE_TYPE_P (type
)
5392 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5395 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5398 || !TYPE_MAX_VALUE (index
)
5399 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5400 || !TYPE_MIN_VALUE (index
)
5401 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5405 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5406 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5408 /* There must be no padding. */
5409 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5421 /* Can't handle incomplete types nor sizes that are not
5423 if (!COMPLETE_TYPE_P (type
)
5424 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5427 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5429 if (TREE_CODE (field
) != FIELD_DECL
)
5432 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5438 /* There must be no padding. */
5439 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5446 case QUAL_UNION_TYPE
:
5448 /* These aren't very interesting except in a degenerate case. */
5453 /* Can't handle incomplete types nor sizes that are not
5455 if (!COMPLETE_TYPE_P (type
)
5456 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5459 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5461 if (TREE_CODE (field
) != FIELD_DECL
)
5464 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5467 count
= count
> sub_count
? count
: sub_count
;
5470 /* There must be no padding. */
5471 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5484 /* Return true if PCS_VARIANT should use VFP registers. */
5486 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5488 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5490 static bool seen_thumb1_vfp
= false;
5492 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5494 sorry ("Thumb-1 hard-float VFP ABI");
5495 /* sorry() is not immediately fatal, so only display this once. */
5496 seen_thumb1_vfp
= true;
5502 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5505 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5506 (TARGET_VFP_DOUBLE
|| !is_double
));
5509 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5510 suitable for passing or returning in VFP registers for the PCS
5511 variant selected. If it is, then *BASE_MODE is updated to contain
5512 a machine mode describing each element of the argument's type and
5513 *COUNT to hold the number of such elements. */
5515 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5516 machine_mode mode
, const_tree type
,
5517 machine_mode
*base_mode
, int *count
)
5519 machine_mode new_mode
= VOIDmode
;
5521 /* If we have the type information, prefer that to working things
5522 out from the mode. */
5525 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5527 if (ag_count
> 0 && ag_count
<= 4)
5532 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5533 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5534 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5539 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5542 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5548 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5551 *base_mode
= new_mode
;
5556 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5557 machine_mode mode
, const_tree type
)
5559 int count ATTRIBUTE_UNUSED
;
5560 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5562 if (!use_vfp_abi (pcs_variant
, false))
5564 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5569 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5572 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5575 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5576 &pcum
->aapcs_vfp_rmode
,
5577 &pcum
->aapcs_vfp_rcount
);
5581 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5582 const_tree type ATTRIBUTE_UNUSED
)
5584 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5585 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5588 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5589 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5591 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5593 || (mode
== TImode
&& ! TARGET_NEON
)
5594 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5597 int rcount
= pcum
->aapcs_vfp_rcount
;
5599 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5603 /* Avoid using unsupported vector modes. */
5604 if (rmode
== V2SImode
)
5606 else if (rmode
== V4SImode
)
5613 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5614 for (i
= 0; i
< rcount
; i
++)
5616 rtx tmp
= gen_rtx_REG (rmode
,
5617 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5618 tmp
= gen_rtx_EXPR_LIST
5620 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5621 XVECEXP (par
, 0, i
) = tmp
;
5624 pcum
->aapcs_reg
= par
;
5627 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5634 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5636 const_tree type ATTRIBUTE_UNUSED
)
5638 if (!use_vfp_abi (pcs_variant
, false))
5641 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5644 machine_mode ag_mode
;
5649 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5654 if (ag_mode
== V2SImode
)
5656 else if (ag_mode
== V4SImode
)
5662 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5663 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5664 for (i
= 0; i
< count
; i
++)
5666 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5667 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5668 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5669 XVECEXP (par
, 0, i
) = tmp
;
5675 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5679 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5680 machine_mode mode ATTRIBUTE_UNUSED
,
5681 const_tree type ATTRIBUTE_UNUSED
)
5683 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5684 pcum
->aapcs_vfp_reg_alloc
= 0;
5688 #define AAPCS_CP(X) \
5690 aapcs_ ## X ## _cum_init, \
5691 aapcs_ ## X ## _is_call_candidate, \
5692 aapcs_ ## X ## _allocate, \
5693 aapcs_ ## X ## _is_return_candidate, \
5694 aapcs_ ## X ## _allocate_return_reg, \
5695 aapcs_ ## X ## _advance \
5698 /* Table of co-processors that can be used to pass arguments in
5699 registers. Idealy no arugment should be a candidate for more than
5700 one co-processor table entry, but the table is processed in order
5701 and stops after the first match. If that entry then fails to put
5702 the argument into a co-processor register, the argument will go on
5706 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5707 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5709 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5710 BLKmode) is a candidate for this co-processor's registers; this
5711 function should ignore any position-dependent state in
5712 CUMULATIVE_ARGS and only use call-type dependent information. */
5713 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5715 /* Return true if the argument does get a co-processor register; it
5716 should set aapcs_reg to an RTX of the register allocated as is
5717 required for a return from FUNCTION_ARG. */
5718 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5720 /* Return true if a result of mode MODE (or type TYPE if MODE is
5721 BLKmode) is can be returned in this co-processor's registers. */
5722 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5724 /* Allocate and return an RTX element to hold the return type of a
5725 call, this routine must not fail and will only be called if
5726 is_return_candidate returned true with the same parameters. */
5727 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5729 /* Finish processing this argument and prepare to start processing
5731 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5732 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5740 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5745 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5746 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5753 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5755 /* We aren't passed a decl, so we can't check that a call is local.
5756 However, it isn't clear that that would be a win anyway, since it
5757 might limit some tail-calling opportunities. */
5758 enum arm_pcs pcs_variant
;
5762 const_tree fndecl
= NULL_TREE
;
5764 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5767 fntype
= TREE_TYPE (fntype
);
5770 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5773 pcs_variant
= arm_pcs_default
;
5775 if (pcs_variant
!= ARM_PCS_AAPCS
)
5779 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5780 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5789 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5792 /* We aren't passed a decl, so we can't check that a call is local.
5793 However, it isn't clear that that would be a win anyway, since it
5794 might limit some tail-calling opportunities. */
5795 enum arm_pcs pcs_variant
;
5796 int unsignedp ATTRIBUTE_UNUSED
;
5800 const_tree fndecl
= NULL_TREE
;
5802 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5805 fntype
= TREE_TYPE (fntype
);
5808 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5811 pcs_variant
= arm_pcs_default
;
5813 /* Promote integer types. */
5814 if (type
&& INTEGRAL_TYPE_P (type
))
5815 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5817 if (pcs_variant
!= ARM_PCS_AAPCS
)
5821 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5822 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5824 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5828 /* Promotes small structs returned in a register to full-word size
5829 for big-endian AAPCS. */
5830 if (type
&& arm_return_in_msb (type
))
5832 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5833 if (size
% UNITS_PER_WORD
!= 0)
5835 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5836 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5840 return gen_rtx_REG (mode
, R0_REGNUM
);
5844 aapcs_libcall_value (machine_mode mode
)
5846 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5847 && GET_MODE_SIZE (mode
) <= 4)
5850 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5853 /* Lay out a function argument using the AAPCS rules. The rule
5854 numbers referred to here are those in the AAPCS. */
5856 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5857 const_tree type
, bool named
)
5862 /* We only need to do this once per argument. */
5863 if (pcum
->aapcs_arg_processed
)
5866 pcum
->aapcs_arg_processed
= true;
5868 /* Special case: if named is false then we are handling an incoming
5869 anonymous argument which is on the stack. */
5873 /* Is this a potential co-processor register candidate? */
5874 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5876 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5877 pcum
->aapcs_cprc_slot
= slot
;
5879 /* We don't have to apply any of the rules from part B of the
5880 preparation phase, these are handled elsewhere in the
5885 /* A Co-processor register candidate goes either in its own
5886 class of registers or on the stack. */
5887 if (!pcum
->aapcs_cprc_failed
[slot
])
5889 /* C1.cp - Try to allocate the argument to co-processor
5891 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5894 /* C2.cp - Put the argument on the stack and note that we
5895 can't assign any more candidates in this slot. We also
5896 need to note that we have allocated stack space, so that
5897 we won't later try to split a non-cprc candidate between
5898 core registers and the stack. */
5899 pcum
->aapcs_cprc_failed
[slot
] = true;
5900 pcum
->can_split
= false;
5903 /* We didn't get a register, so this argument goes on the
5905 gcc_assert (pcum
->can_split
== false);
5910 /* C3 - For double-word aligned arguments, round the NCRN up to the
5911 next even number. */
5912 ncrn
= pcum
->aapcs_ncrn
;
5913 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5916 nregs
= ARM_NUM_REGS2(mode
, type
);
5918 /* Sigh, this test should really assert that nregs > 0, but a GCC
5919 extension allows empty structs and then gives them empty size; it
5920 then allows such a structure to be passed by value. For some of
5921 the code below we have to pretend that such an argument has
5922 non-zero size so that we 'locate' it correctly either in
5923 registers or on the stack. */
5924 gcc_assert (nregs
>= 0);
5926 nregs2
= nregs
? nregs
: 1;
5928 /* C4 - Argument fits entirely in core registers. */
5929 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5931 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5932 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5936 /* C5 - Some core registers left and there are no arguments already
5937 on the stack: split this argument between the remaining core
5938 registers and the stack. */
5939 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5941 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5942 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5943 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5947 /* C6 - NCRN is set to 4. */
5948 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5950 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5954 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5955 for a call to a function whose data type is FNTYPE.
5956 For a library call, FNTYPE is NULL. */
5958 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5960 tree fndecl ATTRIBUTE_UNUSED
)
5962 /* Long call handling. */
5964 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5966 pcum
->pcs_variant
= arm_pcs_default
;
5968 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5970 if (arm_libcall_uses_aapcs_base (libname
))
5971 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5973 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5974 pcum
->aapcs_reg
= NULL_RTX
;
5975 pcum
->aapcs_partial
= 0;
5976 pcum
->aapcs_arg_processed
= false;
5977 pcum
->aapcs_cprc_slot
= -1;
5978 pcum
->can_split
= true;
5980 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5984 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5986 pcum
->aapcs_cprc_failed
[i
] = false;
5987 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5995 /* On the ARM, the offset starts at 0. */
5997 pcum
->iwmmxt_nregs
= 0;
5998 pcum
->can_split
= true;
6000 /* Varargs vectors are treated the same as long long.
6001 named_count avoids having to change the way arm handles 'named' */
6002 pcum
->named_count
= 0;
6005 if (TARGET_REALLY_IWMMXT
&& fntype
)
6009 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6011 fn_arg
= TREE_CHAIN (fn_arg
))
6012 pcum
->named_count
+= 1;
6014 if (! pcum
->named_count
)
6015 pcum
->named_count
= INT_MAX
;
6019 /* Return true if mode/type need doubleword alignment. */
6021 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6023 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
6024 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
6028 /* Determine where to put an argument to a function.
6029 Value is zero to push the argument on the stack,
6030 or a hard register in which to store the argument.
6032 MODE is the argument's machine mode.
6033 TYPE is the data type of the argument (as a tree).
6034 This is null for libcalls where that information may
6036 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6037 the preceding args and about the function being called.
6038 NAMED is nonzero if this argument is a named parameter
6039 (otherwise it is an extra parameter matching an ellipsis).
6041 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6042 other arguments are passed on the stack. If (NAMED == 0) (which happens
6043 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6044 defined), say it is passed in the stack (function_prologue will
6045 indeed make it pass in the stack if necessary). */
6048 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6049 const_tree type
, bool named
)
6051 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6054 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6055 a call insn (op3 of a call_value insn). */
6056 if (mode
== VOIDmode
)
6059 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6061 aapcs_layout_arg (pcum
, mode
, type
, named
);
6062 return pcum
->aapcs_reg
;
6065 /* Varargs vectors are treated the same as long long.
6066 named_count avoids having to change the way arm handles 'named' */
6067 if (TARGET_IWMMXT_ABI
6068 && arm_vector_mode_supported_p (mode
)
6069 && pcum
->named_count
> pcum
->nargs
+ 1)
6071 if (pcum
->iwmmxt_nregs
<= 9)
6072 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6075 pcum
->can_split
= false;
6080 /* Put doubleword aligned quantities in even register pairs. */
6082 && ARM_DOUBLEWORD_ALIGN
6083 && arm_needs_doubleword_align (mode
, type
))
6086 /* Only allow splitting an arg between regs and memory if all preceding
6087 args were allocated to regs. For args passed by reference we only count
6088 the reference pointer. */
6089 if (pcum
->can_split
)
6092 nregs
= ARM_NUM_REGS2 (mode
, type
);
6094 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6097 return gen_rtx_REG (mode
, pcum
->nregs
);
6101 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6103 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6104 ? DOUBLEWORD_ALIGNMENT
6109 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6110 tree type
, bool named
)
6112 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6113 int nregs
= pcum
->nregs
;
6115 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6117 aapcs_layout_arg (pcum
, mode
, type
, named
);
6118 return pcum
->aapcs_partial
;
6121 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6124 if (NUM_ARG_REGS
> nregs
6125 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6127 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6132 /* Update the data in PCUM to advance over an argument
6133 of mode MODE and data type TYPE.
6134 (TYPE is null for libcalls where that information may not be available.) */
6137 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6138 const_tree type
, bool named
)
6140 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6142 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6144 aapcs_layout_arg (pcum
, mode
, type
, named
);
6146 if (pcum
->aapcs_cprc_slot
>= 0)
6148 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6150 pcum
->aapcs_cprc_slot
= -1;
6153 /* Generic stuff. */
6154 pcum
->aapcs_arg_processed
= false;
6155 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6156 pcum
->aapcs_reg
= NULL_RTX
;
6157 pcum
->aapcs_partial
= 0;
6162 if (arm_vector_mode_supported_p (mode
)
6163 && pcum
->named_count
> pcum
->nargs
6164 && TARGET_IWMMXT_ABI
)
6165 pcum
->iwmmxt_nregs
+= 1;
6167 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6171 /* Variable sized types are passed by reference. This is a GCC
6172 extension to the ARM ABI. */
6175 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6176 machine_mode mode ATTRIBUTE_UNUSED
,
6177 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6179 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6182 /* Encode the current state of the #pragma [no_]long_calls. */
6185 OFF
, /* No #pragma [no_]long_calls is in effect. */
6186 LONG
, /* #pragma long_calls is in effect. */
6187 SHORT
/* #pragma no_long_calls is in effect. */
6190 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6193 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6195 arm_pragma_long_calls
= LONG
;
6199 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6201 arm_pragma_long_calls
= SHORT
;
6205 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6207 arm_pragma_long_calls
= OFF
;
6210 /* Handle an attribute requiring a FUNCTION_DECL;
6211 arguments as in struct attribute_spec.handler. */
6213 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6214 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6216 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6218 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6220 *no_add_attrs
= true;
6226 /* Handle an "interrupt" or "isr" attribute;
6227 arguments as in struct attribute_spec.handler. */
6229 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6234 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6236 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6238 *no_add_attrs
= true;
6240 /* FIXME: the argument if any is checked for type attributes;
6241 should it be checked for decl ones? */
6245 if (TREE_CODE (*node
) == FUNCTION_TYPE
6246 || TREE_CODE (*node
) == METHOD_TYPE
)
6248 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6250 warning (OPT_Wattributes
, "%qE attribute ignored",
6252 *no_add_attrs
= true;
6255 else if (TREE_CODE (*node
) == POINTER_TYPE
6256 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6257 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6258 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6260 *node
= build_variant_type_copy (*node
);
6261 TREE_TYPE (*node
) = build_type_attribute_variant
6263 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6264 *no_add_attrs
= true;
6268 /* Possibly pass this attribute on from the type to a decl. */
6269 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6270 | (int) ATTR_FLAG_FUNCTION_NEXT
6271 | (int) ATTR_FLAG_ARRAY_NEXT
))
6273 *no_add_attrs
= true;
6274 return tree_cons (name
, args
, NULL_TREE
);
6278 warning (OPT_Wattributes
, "%qE attribute ignored",
6287 /* Handle a "pcs" attribute; arguments as in struct
6288 attribute_spec.handler. */
6290 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6291 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6293 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6295 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6296 *no_add_attrs
= true;
6301 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6302 /* Handle the "notshared" attribute. This attribute is another way of
6303 requesting hidden visibility. ARM's compiler supports
6304 "__declspec(notshared)"; we support the same thing via an
6308 arm_handle_notshared_attribute (tree
*node
,
6309 tree name ATTRIBUTE_UNUSED
,
6310 tree args ATTRIBUTE_UNUSED
,
6311 int flags ATTRIBUTE_UNUSED
,
6314 tree decl
= TYPE_NAME (*node
);
6318 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6319 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6320 *no_add_attrs
= false;
6326 /* Return 0 if the attributes for two types are incompatible, 1 if they
6327 are compatible, and 2 if they are nearly compatible (which causes a
6328 warning to be generated). */
6330 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6334 /* Check for mismatch of non-default calling convention. */
6335 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6338 /* Check for mismatched call attributes. */
6339 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6340 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6341 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6342 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6344 /* Only bother to check if an attribute is defined. */
6345 if (l1
| l2
| s1
| s2
)
6347 /* If one type has an attribute, the other must have the same attribute. */
6348 if ((l1
!= l2
) || (s1
!= s2
))
6351 /* Disallow mixed attributes. */
6352 if ((l1
& s2
) || (l2
& s1
))
6356 /* Check for mismatched ISR attribute. */
6357 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6359 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6360 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6362 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6369 /* Assigns default attributes to newly defined type. This is used to
6370 set short_call/long_call attributes for function types of
6371 functions defined inside corresponding #pragma scopes. */
6373 arm_set_default_type_attributes (tree type
)
6375 /* Add __attribute__ ((long_call)) to all functions, when
6376 inside #pragma long_calls or __attribute__ ((short_call)),
6377 when inside #pragma no_long_calls. */
6378 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6380 tree type_attr_list
, attr_name
;
6381 type_attr_list
= TYPE_ATTRIBUTES (type
);
6383 if (arm_pragma_long_calls
== LONG
)
6384 attr_name
= get_identifier ("long_call");
6385 else if (arm_pragma_long_calls
== SHORT
)
6386 attr_name
= get_identifier ("short_call");
6390 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6391 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6395 /* Return true if DECL is known to be linked into section SECTION. */
6398 arm_function_in_section_p (tree decl
, section
*section
)
6400 /* We can only be certain about the prevailing symbol definition. */
6401 if (!decl_binds_to_current_def_p (decl
))
6404 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6405 if (!DECL_SECTION_NAME (decl
))
6407 /* Make sure that we will not create a unique section for DECL. */
6408 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6412 return function_section (decl
) == section
;
6415 /* Return nonzero if a 32-bit "long_call" should be generated for
6416 a call from the current function to DECL. We generate a long_call
6419 a. has an __attribute__((long call))
6420 or b. is within the scope of a #pragma long_calls
6421 or c. the -mlong-calls command line switch has been specified
6423 However we do not generate a long call if the function:
6425 d. has an __attribute__ ((short_call))
6426 or e. is inside the scope of a #pragma no_long_calls
6427 or f. is defined in the same section as the current function. */
6430 arm_is_long_call_p (tree decl
)
6435 return TARGET_LONG_CALLS
;
6437 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6438 if (lookup_attribute ("short_call", attrs
))
6441 /* For "f", be conservative, and only cater for cases in which the
6442 whole of the current function is placed in the same section. */
6443 if (!flag_reorder_blocks_and_partition
6444 && TREE_CODE (decl
) == FUNCTION_DECL
6445 && arm_function_in_section_p (decl
, current_function_section ()))
6448 if (lookup_attribute ("long_call", attrs
))
6451 return TARGET_LONG_CALLS
;
6454 /* Return nonzero if it is ok to make a tail-call to DECL. */
6456 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6458 unsigned long func_type
;
6460 if (cfun
->machine
->sibcall_blocked
)
6463 /* Never tailcall something if we are generating code for Thumb-1. */
6467 /* The PIC register is live on entry to VxWorks PLT entries, so we
6468 must make the call before restoring the PIC register. */
6469 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6472 /* If we are interworking and the function is not declared static
6473 then we can't tail-call it unless we know that it exists in this
6474 compilation unit (since it might be a Thumb routine). */
6475 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6476 && !TREE_ASM_WRITTEN (decl
))
6479 func_type
= arm_current_func_type ();
6480 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6481 if (IS_INTERRUPT (func_type
))
6484 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6486 /* Check that the return value locations are the same. For
6487 example that we aren't returning a value from the sibling in
6488 a VFP register but then need to transfer it to a core
6492 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6493 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6495 if (!rtx_equal_p (a
, b
))
6499 /* Never tailcall if function may be called with a misaligned SP. */
6500 if (IS_STACKALIGN (func_type
))
6503 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6504 references should become a NOP. Don't convert such calls into
6506 if (TARGET_AAPCS_BASED
6507 && arm_abi
== ARM_ABI_AAPCS
6509 && DECL_WEAK (decl
))
6512 /* Everything else is ok. */
6517 /* Addressing mode support functions. */
6519 /* Return nonzero if X is a legitimate immediate operand when compiling
6520 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6522 legitimate_pic_operand_p (rtx x
)
6524 if (GET_CODE (x
) == SYMBOL_REF
6525 || (GET_CODE (x
) == CONST
6526 && GET_CODE (XEXP (x
, 0)) == PLUS
6527 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6533 /* Record that the current function needs a PIC register. Initialize
6534 cfun->machine->pic_reg if we have not already done so. */
6537 require_pic_register (void)
6539 /* A lot of the logic here is made obscure by the fact that this
6540 routine gets called as part of the rtx cost estimation process.
6541 We don't want those calls to affect any assumptions about the real
6542 function; and further, we can't call entry_of_function() until we
6543 start the real expansion process. */
6544 if (!crtl
->uses_pic_offset_table
)
6546 gcc_assert (can_create_pseudo_p ());
6547 if (arm_pic_register
!= INVALID_REGNUM
6548 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6550 if (!cfun
->machine
->pic_reg
)
6551 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6553 /* Play games to avoid marking the function as needing pic
6554 if we are being called as part of the cost-estimation
6556 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6557 crtl
->uses_pic_offset_table
= 1;
6561 rtx_insn
*seq
, *insn
;
6563 if (!cfun
->machine
->pic_reg
)
6564 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6566 /* Play games to avoid marking the function as needing pic
6567 if we are being called as part of the cost-estimation
6569 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6571 crtl
->uses_pic_offset_table
= 1;
6574 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6575 && arm_pic_register
> LAST_LO_REGNUM
)
6576 emit_move_insn (cfun
->machine
->pic_reg
,
6577 gen_rtx_REG (Pmode
, arm_pic_register
));
6579 arm_load_pic_register (0UL);
6584 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6586 INSN_LOCATION (insn
) = prologue_location
;
6588 /* We can be called during expansion of PHI nodes, where
6589 we can't yet emit instructions directly in the final
6590 insn stream. Queue the insns on the entry edge, they will
6591 be committed after everything else is expanded. */
6592 insert_insn_on_edge (seq
,
6593 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6600 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6602 if (GET_CODE (orig
) == SYMBOL_REF
6603 || GET_CODE (orig
) == LABEL_REF
)
6609 gcc_assert (can_create_pseudo_p ());
6610 reg
= gen_reg_rtx (Pmode
);
6613 /* VxWorks does not impose a fixed gap between segments; the run-time
6614 gap can be different from the object-file gap. We therefore can't
6615 use GOTOFF unless we are absolutely sure that the symbol is in the
6616 same segment as the GOT. Unfortunately, the flexibility of linker
6617 scripts means that we can't be sure of that in general, so assume
6618 that GOTOFF is never valid on VxWorks. */
6619 if ((GET_CODE (orig
) == LABEL_REF
6620 || (GET_CODE (orig
) == SYMBOL_REF
&&
6621 SYMBOL_REF_LOCAL_P (orig
)))
6623 && arm_pic_data_is_text_relative
)
6624 insn
= arm_pic_static_addr (orig
, reg
);
6630 /* If this function doesn't have a pic register, create one now. */
6631 require_pic_register ();
6633 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6635 /* Make the MEM as close to a constant as possible. */
6636 mem
= SET_SRC (pat
);
6637 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6638 MEM_READONLY_P (mem
) = 1;
6639 MEM_NOTRAP_P (mem
) = 1;
6641 insn
= emit_insn (pat
);
6644 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6646 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6650 else if (GET_CODE (orig
) == CONST
)
6654 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6655 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6658 /* Handle the case where we have: const (UNSPEC_TLS). */
6659 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6660 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6663 /* Handle the case where we have:
6664 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6666 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6667 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6668 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6670 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6676 gcc_assert (can_create_pseudo_p ());
6677 reg
= gen_reg_rtx (Pmode
);
6680 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6682 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6683 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6684 base
== reg
? 0 : reg
);
6686 if (CONST_INT_P (offset
))
6688 /* The base register doesn't really matter, we only want to
6689 test the index for the appropriate mode. */
6690 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6692 gcc_assert (can_create_pseudo_p ());
6693 offset
= force_reg (Pmode
, offset
);
6696 if (CONST_INT_P (offset
))
6697 return plus_constant (Pmode
, base
, INTVAL (offset
));
6700 if (GET_MODE_SIZE (mode
) > 4
6701 && (GET_MODE_CLASS (mode
) == MODE_INT
6702 || TARGET_SOFT_FLOAT
))
6704 emit_insn (gen_addsi3 (reg
, base
, offset
));
6708 return gen_rtx_PLUS (Pmode
, base
, offset
);
6715 /* Find a spare register to use during the prolog of a function. */
6718 thumb_find_work_register (unsigned long pushed_regs_mask
)
6722 /* Check the argument registers first as these are call-used. The
6723 register allocation order means that sometimes r3 might be used
6724 but earlier argument registers might not, so check them all. */
6725 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6726 if (!df_regs_ever_live_p (reg
))
6729 /* Before going on to check the call-saved registers we can try a couple
6730 more ways of deducing that r3 is available. The first is when we are
6731 pushing anonymous arguments onto the stack and we have less than 4
6732 registers worth of fixed arguments(*). In this case r3 will be part of
6733 the variable argument list and so we can be sure that it will be
6734 pushed right at the start of the function. Hence it will be available
6735 for the rest of the prologue.
6736 (*): ie crtl->args.pretend_args_size is greater than 0. */
6737 if (cfun
->machine
->uses_anonymous_args
6738 && crtl
->args
.pretend_args_size
> 0)
6739 return LAST_ARG_REGNUM
;
6741 /* The other case is when we have fixed arguments but less than 4 registers
6742 worth. In this case r3 might be used in the body of the function, but
6743 it is not being used to convey an argument into the function. In theory
6744 we could just check crtl->args.size to see how many bytes are
6745 being passed in argument registers, but it seems that it is unreliable.
6746 Sometimes it will have the value 0 when in fact arguments are being
6747 passed. (See testcase execute/20021111-1.c for an example). So we also
6748 check the args_info.nregs field as well. The problem with this field is
6749 that it makes no allowances for arguments that are passed to the
6750 function but which are not used. Hence we could miss an opportunity
6751 when a function has an unused argument in r3. But it is better to be
6752 safe than to be sorry. */
6753 if (! cfun
->machine
->uses_anonymous_args
6754 && crtl
->args
.size
>= 0
6755 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6756 && (TARGET_AAPCS_BASED
6757 ? crtl
->args
.info
.aapcs_ncrn
< 4
6758 : crtl
->args
.info
.nregs
< 4))
6759 return LAST_ARG_REGNUM
;
6761 /* Otherwise look for a call-saved register that is going to be pushed. */
6762 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6763 if (pushed_regs_mask
& (1 << reg
))
6768 /* Thumb-2 can use high regs. */
6769 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6770 if (pushed_regs_mask
& (1 << reg
))
6773 /* Something went wrong - thumb_compute_save_reg_mask()
6774 should have arranged for a suitable register to be pushed. */
6778 static GTY(()) int pic_labelno
;
6780 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6784 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6786 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6788 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6791 gcc_assert (flag_pic
);
6793 pic_reg
= cfun
->machine
->pic_reg
;
6794 if (TARGET_VXWORKS_RTP
)
6796 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6797 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6798 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6800 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6802 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6803 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6807 /* We use an UNSPEC rather than a LABEL_REF because this label
6808 never appears in the code stream. */
6810 labelno
= GEN_INT (pic_labelno
++);
6811 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6812 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6814 /* On the ARM the PC register contains 'dot + 8' at the time of the
6815 addition, on the Thumb it is 'dot + 4'. */
6816 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6817 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6819 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6823 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6825 else /* TARGET_THUMB1 */
6827 if (arm_pic_register
!= INVALID_REGNUM
6828 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6830 /* We will have pushed the pic register, so we should always be
6831 able to find a work register. */
6832 pic_tmp
= gen_rtx_REG (SImode
,
6833 thumb_find_work_register (saved_regs
));
6834 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6835 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6836 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6838 else if (arm_pic_register
!= INVALID_REGNUM
6839 && arm_pic_register
> LAST_LO_REGNUM
6840 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6842 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6843 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6844 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6847 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6851 /* Need to emit this whether or not we obey regdecls,
6852 since setjmp/longjmp can cause life info to screw up. */
6856 /* Generate code to load the address of a static var when flag_pic is set. */
6858 arm_pic_static_addr (rtx orig
, rtx reg
)
6860 rtx l1
, labelno
, offset_rtx
, insn
;
6862 gcc_assert (flag_pic
);
6864 /* We use an UNSPEC rather than a LABEL_REF because this label
6865 never appears in the code stream. */
6866 labelno
= GEN_INT (pic_labelno
++);
6867 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6868 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6870 /* On the ARM the PC register contains 'dot + 8' at the time of the
6871 addition, on the Thumb it is 'dot + 4'. */
6872 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6873 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6874 UNSPEC_SYMBOL_OFFSET
);
6875 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6877 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6881 /* Return nonzero if X is valid as an ARM state addressing register. */
6883 arm_address_register_rtx_p (rtx x
, int strict_p
)
6893 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6895 return (regno
<= LAST_ARM_REGNUM
6896 || regno
>= FIRST_PSEUDO_REGISTER
6897 || regno
== FRAME_POINTER_REGNUM
6898 || regno
== ARG_POINTER_REGNUM
);
6901 /* Return TRUE if this rtx is the difference of a symbol and a label,
6902 and will reduce to a PC-relative relocation in the object file.
6903 Expressions like this can be left alone when generating PIC, rather
6904 than forced through the GOT. */
6906 pcrel_constant_p (rtx x
)
6908 if (GET_CODE (x
) == MINUS
)
6909 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6914 /* Return true if X will surely end up in an index register after next
6917 will_be_in_index_register (const_rtx x
)
6919 /* arm.md: calculate_pic_address will split this into a register. */
6920 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6923 /* Return nonzero if X is a valid ARM state address operand. */
6925 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6929 enum rtx_code code
= GET_CODE (x
);
6931 if (arm_address_register_rtx_p (x
, strict_p
))
6934 use_ldrd
= (TARGET_LDRD
6936 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6938 if (code
== POST_INC
|| code
== PRE_DEC
6939 || ((code
== PRE_INC
|| code
== POST_DEC
)
6940 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6941 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6943 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6944 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6945 && GET_CODE (XEXP (x
, 1)) == PLUS
6946 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6948 rtx addend
= XEXP (XEXP (x
, 1), 1);
6950 /* Don't allow ldrd post increment by register because it's hard
6951 to fixup invalid register choices. */
6953 && GET_CODE (x
) == POST_MODIFY
6957 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6958 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6961 /* After reload constants split into minipools will have addresses
6962 from a LABEL_REF. */
6963 else if (reload_completed
6964 && (code
== LABEL_REF
6966 && GET_CODE (XEXP (x
, 0)) == PLUS
6967 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6968 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6971 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6974 else if (code
== PLUS
)
6976 rtx xop0
= XEXP (x
, 0);
6977 rtx xop1
= XEXP (x
, 1);
6979 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6980 && ((CONST_INT_P (xop1
)
6981 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6982 || (!strict_p
&& will_be_in_index_register (xop1
))))
6983 || (arm_address_register_rtx_p (xop1
, strict_p
)
6984 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6988 /* Reload currently can't handle MINUS, so disable this for now */
6989 else if (GET_CODE (x
) == MINUS
)
6991 rtx xop0
= XEXP (x
, 0);
6992 rtx xop1
= XEXP (x
, 1);
6994 return (arm_address_register_rtx_p (xop0
, strict_p
)
6995 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6999 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7000 && code
== SYMBOL_REF
7001 && CONSTANT_POOL_ADDRESS_P (x
)
7003 && symbol_mentioned_p (get_pool_constant (x
))
7004 && ! pcrel_constant_p (get_pool_constant (x
))))
7010 /* Return nonzero if X is a valid Thumb-2 address operand. */
7012 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7015 enum rtx_code code
= GET_CODE (x
);
7017 if (arm_address_register_rtx_p (x
, strict_p
))
7020 use_ldrd
= (TARGET_LDRD
7022 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7024 if (code
== POST_INC
|| code
== PRE_DEC
7025 || ((code
== PRE_INC
|| code
== POST_DEC
)
7026 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7027 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7029 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7030 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7031 && GET_CODE (XEXP (x
, 1)) == PLUS
7032 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7034 /* Thumb-2 only has autoincrement by constant. */
7035 rtx addend
= XEXP (XEXP (x
, 1), 1);
7036 HOST_WIDE_INT offset
;
7038 if (!CONST_INT_P (addend
))
7041 offset
= INTVAL(addend
);
7042 if (GET_MODE_SIZE (mode
) <= 4)
7043 return (offset
> -256 && offset
< 256);
7045 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7046 && (offset
& 3) == 0);
7049 /* After reload constants split into minipools will have addresses
7050 from a LABEL_REF. */
7051 else if (reload_completed
7052 && (code
== LABEL_REF
7054 && GET_CODE (XEXP (x
, 0)) == PLUS
7055 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7056 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7059 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7062 else if (code
== PLUS
)
7064 rtx xop0
= XEXP (x
, 0);
7065 rtx xop1
= XEXP (x
, 1);
7067 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7068 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7069 || (!strict_p
&& will_be_in_index_register (xop1
))))
7070 || (arm_address_register_rtx_p (xop1
, strict_p
)
7071 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7074 /* Normally we can assign constant values to target registers without
7075 the help of constant pool. But there are cases we have to use constant
7077 1) assign a label to register.
7078 2) sign-extend a 8bit value to 32bit and then assign to register.
7080 Constant pool access in format:
7081 (set (reg r0) (mem (symbol_ref (".LC0"))))
7082 will cause the use of literal pool (later in function arm_reorg).
7083 So here we mark such format as an invalid format, then the compiler
7084 will adjust it into:
7085 (set (reg r0) (symbol_ref (".LC0")))
7086 (set (reg r0) (mem (reg r0))).
7087 No extra register is required, and (mem (reg r0)) won't cause the use
7088 of literal pools. */
7089 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7090 && CONSTANT_POOL_ADDRESS_P (x
))
7093 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7094 && code
== SYMBOL_REF
7095 && CONSTANT_POOL_ADDRESS_P (x
)
7097 && symbol_mentioned_p (get_pool_constant (x
))
7098 && ! pcrel_constant_p (get_pool_constant (x
))))
7104 /* Return nonzero if INDEX is valid for an address index operand in
7107 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7110 HOST_WIDE_INT range
;
7111 enum rtx_code code
= GET_CODE (index
);
7113 /* Standard coprocessor addressing modes. */
7114 if (TARGET_HARD_FLOAT
7116 && (mode
== SFmode
|| mode
== DFmode
))
7117 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7118 && INTVAL (index
) > -1024
7119 && (INTVAL (index
) & 3) == 0);
7121 /* For quad modes, we restrict the constant offset to be slightly less
7122 than what the instruction format permits. We do this because for
7123 quad mode moves, we will actually decompose them into two separate
7124 double-mode reads or writes. INDEX must therefore be a valid
7125 (double-mode) offset and so should INDEX+8. */
7126 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7127 return (code
== CONST_INT
7128 && INTVAL (index
) < 1016
7129 && INTVAL (index
) > -1024
7130 && (INTVAL (index
) & 3) == 0);
7132 /* We have no such constraint on double mode offsets, so we permit the
7133 full range of the instruction format. */
7134 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7135 return (code
== CONST_INT
7136 && INTVAL (index
) < 1024
7137 && INTVAL (index
) > -1024
7138 && (INTVAL (index
) & 3) == 0);
7140 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7141 return (code
== CONST_INT
7142 && INTVAL (index
) < 1024
7143 && INTVAL (index
) > -1024
7144 && (INTVAL (index
) & 3) == 0);
7146 if (arm_address_register_rtx_p (index
, strict_p
)
7147 && (GET_MODE_SIZE (mode
) <= 4))
7150 if (mode
== DImode
|| mode
== DFmode
)
7152 if (code
== CONST_INT
)
7154 HOST_WIDE_INT val
= INTVAL (index
);
7157 return val
> -256 && val
< 256;
7159 return val
> -4096 && val
< 4092;
7162 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7165 if (GET_MODE_SIZE (mode
) <= 4
7169 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7173 rtx xiop0
= XEXP (index
, 0);
7174 rtx xiop1
= XEXP (index
, 1);
7176 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7177 && power_of_two_operand (xiop1
, SImode
))
7178 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7179 && power_of_two_operand (xiop0
, SImode
)));
7181 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7182 || code
== ASHIFT
|| code
== ROTATERT
)
7184 rtx op
= XEXP (index
, 1);
7186 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7189 && INTVAL (op
) <= 31);
7193 /* For ARM v4 we may be doing a sign-extend operation during the
7199 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7205 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7207 return (code
== CONST_INT
7208 && INTVAL (index
) < range
7209 && INTVAL (index
) > -range
);
7212 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7213 index operand. i.e. 1, 2, 4 or 8. */
7215 thumb2_index_mul_operand (rtx op
)
7219 if (!CONST_INT_P (op
))
7223 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7226 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7228 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7230 enum rtx_code code
= GET_CODE (index
);
7232 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7233 /* Standard coprocessor addressing modes. */
7234 if (TARGET_HARD_FLOAT
7236 && (mode
== SFmode
|| mode
== DFmode
))
7237 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7238 /* Thumb-2 allows only > -256 index range for it's core register
7239 load/stores. Since we allow SF/DF in core registers, we have
7240 to use the intersection between -256~4096 (core) and -1024~1024
7242 && INTVAL (index
) > -256
7243 && (INTVAL (index
) & 3) == 0);
7245 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7247 /* For DImode assume values will usually live in core regs
7248 and only allow LDRD addressing modes. */
7249 if (!TARGET_LDRD
|| mode
!= DImode
)
7250 return (code
== CONST_INT
7251 && INTVAL (index
) < 1024
7252 && INTVAL (index
) > -1024
7253 && (INTVAL (index
) & 3) == 0);
7256 /* For quad modes, we restrict the constant offset to be slightly less
7257 than what the instruction format permits. We do this because for
7258 quad mode moves, we will actually decompose them into two separate
7259 double-mode reads or writes. INDEX must therefore be a valid
7260 (double-mode) offset and so should INDEX+8. */
7261 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7262 return (code
== CONST_INT
7263 && INTVAL (index
) < 1016
7264 && INTVAL (index
) > -1024
7265 && (INTVAL (index
) & 3) == 0);
7267 /* We have no such constraint on double mode offsets, so we permit the
7268 full range of the instruction format. */
7269 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7270 return (code
== CONST_INT
7271 && INTVAL (index
) < 1024
7272 && INTVAL (index
) > -1024
7273 && (INTVAL (index
) & 3) == 0);
7275 if (arm_address_register_rtx_p (index
, strict_p
)
7276 && (GET_MODE_SIZE (mode
) <= 4))
7279 if (mode
== DImode
|| mode
== DFmode
)
7281 if (code
== CONST_INT
)
7283 HOST_WIDE_INT val
= INTVAL (index
);
7284 /* ??? Can we assume ldrd for thumb2? */
7285 /* Thumb-2 ldrd only has reg+const addressing modes. */
7286 /* ldrd supports offsets of +-1020.
7287 However the ldr fallback does not. */
7288 return val
> -256 && val
< 256 && (val
& 3) == 0;
7296 rtx xiop0
= XEXP (index
, 0);
7297 rtx xiop1
= XEXP (index
, 1);
7299 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7300 && thumb2_index_mul_operand (xiop1
))
7301 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7302 && thumb2_index_mul_operand (xiop0
)));
7304 else if (code
== ASHIFT
)
7306 rtx op
= XEXP (index
, 1);
7308 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7311 && INTVAL (op
) <= 3);
7314 return (code
== CONST_INT
7315 && INTVAL (index
) < 4096
7316 && INTVAL (index
) > -256);
7319 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7321 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7331 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7333 return (regno
<= LAST_LO_REGNUM
7334 || regno
> LAST_VIRTUAL_REGISTER
7335 || regno
== FRAME_POINTER_REGNUM
7336 || (GET_MODE_SIZE (mode
) >= 4
7337 && (regno
== STACK_POINTER_REGNUM
7338 || regno
>= FIRST_PSEUDO_REGISTER
7339 || x
== hard_frame_pointer_rtx
7340 || x
== arg_pointer_rtx
)));
7343 /* Return nonzero if x is a legitimate index register. This is the case
7344 for any base register that can access a QImode object. */
7346 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7348 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7351 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7353 The AP may be eliminated to either the SP or the FP, so we use the
7354 least common denominator, e.g. SImode, and offsets from 0 to 64.
7356 ??? Verify whether the above is the right approach.
7358 ??? Also, the FP may be eliminated to the SP, so perhaps that
7359 needs special handling also.
7361 ??? Look at how the mips16 port solves this problem. It probably uses
7362 better ways to solve some of these problems.
7364 Although it is not incorrect, we don't accept QImode and HImode
7365 addresses based on the frame pointer or arg pointer until the
7366 reload pass starts. This is so that eliminating such addresses
7367 into stack based ones won't produce impossible code. */
7369 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7371 /* ??? Not clear if this is right. Experiment. */
7372 if (GET_MODE_SIZE (mode
) < 4
7373 && !(reload_in_progress
|| reload_completed
)
7374 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7375 || reg_mentioned_p (arg_pointer_rtx
, x
)
7376 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7377 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7378 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7379 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7382 /* Accept any base register. SP only in SImode or larger. */
7383 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7386 /* This is PC relative data before arm_reorg runs. */
7387 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7388 && GET_CODE (x
) == SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7392 /* This is PC relative data after arm_reorg runs. */
7393 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7395 && (GET_CODE (x
) == LABEL_REF
7396 || (GET_CODE (x
) == CONST
7397 && GET_CODE (XEXP (x
, 0)) == PLUS
7398 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7399 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7402 /* Post-inc indexing only supported for SImode and larger. */
7403 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7404 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7407 else if (GET_CODE (x
) == PLUS
)
7409 /* REG+REG address can be any two index registers. */
7410 /* We disallow FRAME+REG addressing since we know that FRAME
7411 will be replaced with STACK, and SP relative addressing only
7412 permits SP+OFFSET. */
7413 if (GET_MODE_SIZE (mode
) <= 4
7414 && XEXP (x
, 0) != frame_pointer_rtx
7415 && XEXP (x
, 1) != frame_pointer_rtx
7416 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7417 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7418 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7421 /* REG+const has 5-7 bit offset for non-SP registers. */
7422 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7423 || XEXP (x
, 0) == arg_pointer_rtx
)
7424 && CONST_INT_P (XEXP (x
, 1))
7425 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7428 /* REG+const has 10-bit offset for SP, but only SImode and
7429 larger is supported. */
7430 /* ??? Should probably check for DI/DFmode overflow here
7431 just like GO_IF_LEGITIMATE_OFFSET does. */
7432 else if (REG_P (XEXP (x
, 0))
7433 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7434 && GET_MODE_SIZE (mode
) >= 4
7435 && CONST_INT_P (XEXP (x
, 1))
7436 && INTVAL (XEXP (x
, 1)) >= 0
7437 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7438 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7441 else if (REG_P (XEXP (x
, 0))
7442 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7443 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7444 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7445 && REGNO (XEXP (x
, 0))
7446 <= LAST_VIRTUAL_POINTER_REGISTER
))
7447 && GET_MODE_SIZE (mode
) >= 4
7448 && CONST_INT_P (XEXP (x
, 1))
7449 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7453 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7454 && GET_MODE_SIZE (mode
) == 4
7455 && GET_CODE (x
) == SYMBOL_REF
7456 && CONSTANT_POOL_ADDRESS_P (x
)
7458 && symbol_mentioned_p (get_pool_constant (x
))
7459 && ! pcrel_constant_p (get_pool_constant (x
))))
7465 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7466 instruction of mode MODE. */
7468 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7470 switch (GET_MODE_SIZE (mode
))
7473 return val
>= 0 && val
< 32;
7476 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7480 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7486 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7489 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7490 else if (TARGET_THUMB2
)
7491 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7492 else /* if (TARGET_THUMB1) */
7493 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7496 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7498 Given an rtx X being reloaded into a reg required to be
7499 in class CLASS, return the class of reg to actually use.
7500 In general this is just CLASS, but for the Thumb core registers and
7501 immediate constants we prefer a LO_REGS class or a subset. */
7504 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7510 if (rclass
== GENERAL_REGS
)
7517 /* Build the SYMBOL_REF for __tls_get_addr. */
7519 static GTY(()) rtx tls_get_addr_libfunc
;
7522 get_tls_get_addr (void)
7524 if (!tls_get_addr_libfunc
)
7525 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7526 return tls_get_addr_libfunc
;
7530 arm_load_tp (rtx target
)
7533 target
= gen_reg_rtx (SImode
);
7537 /* Can return in any reg. */
7538 emit_insn (gen_load_tp_hard (target
));
7542 /* Always returned in r0. Immediately copy the result into a pseudo,
7543 otherwise other uses of r0 (e.g. setting up function arguments) may
7544 clobber the value. */
7548 emit_insn (gen_load_tp_soft ());
7550 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7551 emit_move_insn (target
, tmp
);
7557 load_tls_operand (rtx x
, rtx reg
)
7561 if (reg
== NULL_RTX
)
7562 reg
= gen_reg_rtx (SImode
);
7564 tmp
= gen_rtx_CONST (SImode
, x
);
7566 emit_move_insn (reg
, tmp
);
7572 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7574 rtx insns
, label
, labelno
, sum
;
7576 gcc_assert (reloc
!= TLS_DESCSEQ
);
7579 labelno
= GEN_INT (pic_labelno
++);
7580 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7581 label
= gen_rtx_CONST (VOIDmode
, label
);
7583 sum
= gen_rtx_UNSPEC (Pmode
,
7584 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7585 GEN_INT (TARGET_ARM
? 8 : 4)),
7587 reg
= load_tls_operand (sum
, reg
);
7590 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7592 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7594 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7595 LCT_PURE
, /* LCT_CONST? */
7596 Pmode
, 1, reg
, Pmode
);
7598 insns
= get_insns ();
7605 arm_tls_descseq_addr (rtx x
, rtx reg
)
7607 rtx labelno
= GEN_INT (pic_labelno
++);
7608 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7609 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7610 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7611 gen_rtx_CONST (VOIDmode
, label
),
7612 GEN_INT (!TARGET_ARM
)),
7614 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7616 emit_insn (gen_tlscall (x
, labelno
));
7618 reg
= gen_reg_rtx (SImode
);
7620 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7622 emit_move_insn (reg
, reg0
);
7628 legitimize_tls_address (rtx x
, rtx reg
)
7630 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7631 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7635 case TLS_MODEL_GLOBAL_DYNAMIC
:
7636 if (TARGET_GNU2_TLS
)
7638 reg
= arm_tls_descseq_addr (x
, reg
);
7640 tp
= arm_load_tp (NULL_RTX
);
7642 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7646 /* Original scheme */
7647 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7648 dest
= gen_reg_rtx (Pmode
);
7649 emit_libcall_block (insns
, dest
, ret
, x
);
7653 case TLS_MODEL_LOCAL_DYNAMIC
:
7654 if (TARGET_GNU2_TLS
)
7656 reg
= arm_tls_descseq_addr (x
, reg
);
7658 tp
= arm_load_tp (NULL_RTX
);
7660 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7664 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7666 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7667 share the LDM result with other LD model accesses. */
7668 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7670 dest
= gen_reg_rtx (Pmode
);
7671 emit_libcall_block (insns
, dest
, ret
, eqv
);
7673 /* Load the addend. */
7674 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7675 GEN_INT (TLS_LDO32
)),
7677 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7678 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7682 case TLS_MODEL_INITIAL_EXEC
:
7683 labelno
= GEN_INT (pic_labelno
++);
7684 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7685 label
= gen_rtx_CONST (VOIDmode
, label
);
7686 sum
= gen_rtx_UNSPEC (Pmode
,
7687 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7688 GEN_INT (TARGET_ARM
? 8 : 4)),
7690 reg
= load_tls_operand (sum
, reg
);
7693 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7694 else if (TARGET_THUMB2
)
7695 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7698 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7699 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7702 tp
= arm_load_tp (NULL_RTX
);
7704 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7706 case TLS_MODEL_LOCAL_EXEC
:
7707 tp
= arm_load_tp (NULL_RTX
);
7709 reg
= gen_rtx_UNSPEC (Pmode
,
7710 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7712 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7714 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7721 /* Try machine-dependent ways of modifying an illegitimate address
7722 to be legitimate. If we find one, return the new, valid address. */
7724 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7726 if (arm_tls_referenced_p (x
))
7730 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7732 addend
= XEXP (XEXP (x
, 0), 1);
7733 x
= XEXP (XEXP (x
, 0), 0);
7736 if (GET_CODE (x
) != SYMBOL_REF
)
7739 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7741 x
= legitimize_tls_address (x
, NULL_RTX
);
7745 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7754 /* TODO: legitimize_address for Thumb2. */
7757 return thumb_legitimize_address (x
, orig_x
, mode
);
7760 if (GET_CODE (x
) == PLUS
)
7762 rtx xop0
= XEXP (x
, 0);
7763 rtx xop1
= XEXP (x
, 1);
7765 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7766 xop0
= force_reg (SImode
, xop0
);
7768 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7769 && !symbol_mentioned_p (xop1
))
7770 xop1
= force_reg (SImode
, xop1
);
7772 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7773 && CONST_INT_P (xop1
))
7775 HOST_WIDE_INT n
, low_n
;
7779 /* VFP addressing modes actually allow greater offsets, but for
7780 now we just stick with the lowest common denominator. */
7782 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7794 low_n
= ((mode
) == TImode
? 0
7795 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7799 base_reg
= gen_reg_rtx (SImode
);
7800 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7801 emit_move_insn (base_reg
, val
);
7802 x
= plus_constant (Pmode
, base_reg
, low_n
);
7804 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7805 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7808 /* XXX We don't allow MINUS any more -- see comment in
7809 arm_legitimate_address_outer_p (). */
7810 else if (GET_CODE (x
) == MINUS
)
7812 rtx xop0
= XEXP (x
, 0);
7813 rtx xop1
= XEXP (x
, 1);
7815 if (CONSTANT_P (xop0
))
7816 xop0
= force_reg (SImode
, xop0
);
7818 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7819 xop1
= force_reg (SImode
, xop1
);
7821 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7822 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7825 /* Make sure to take full advantage of the pre-indexed addressing mode
7826 with absolute addresses which often allows for the base register to
7827 be factorized for multiple adjacent memory references, and it might
7828 even allows for the mini pool to be avoided entirely. */
7829 else if (CONST_INT_P (x
) && optimize
> 0)
7832 HOST_WIDE_INT mask
, base
, index
;
7835 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7836 use a 8-bit index. So let's use a 12-bit index for SImode only and
7837 hope that arm_gen_constant will enable ldrb to use more bits. */
7838 bits
= (mode
== SImode
) ? 12 : 8;
7839 mask
= (1 << bits
) - 1;
7840 base
= INTVAL (x
) & ~mask
;
7841 index
= INTVAL (x
) & mask
;
7842 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7844 /* It'll most probably be more efficient to generate the base
7845 with more bits set and use a negative index instead. */
7849 base_reg
= force_reg (SImode
, GEN_INT (base
));
7850 x
= plus_constant (Pmode
, base_reg
, index
);
7855 /* We need to find and carefully transform any SYMBOL and LABEL
7856 references; so go back to the original address expression. */
7857 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7859 if (new_x
!= orig_x
)
7867 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7868 to be legitimate. If we find one, return the new, valid address. */
7870 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7872 if (GET_CODE (x
) == PLUS
7873 && CONST_INT_P (XEXP (x
, 1))
7874 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7875 || INTVAL (XEXP (x
, 1)) < 0))
7877 rtx xop0
= XEXP (x
, 0);
7878 rtx xop1
= XEXP (x
, 1);
7879 HOST_WIDE_INT offset
= INTVAL (xop1
);
7881 /* Try and fold the offset into a biasing of the base register and
7882 then offsetting that. Don't do this when optimizing for space
7883 since it can cause too many CSEs. */
7884 if (optimize_size
&& offset
>= 0
7885 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7887 HOST_WIDE_INT delta
;
7890 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7891 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7892 delta
= 31 * GET_MODE_SIZE (mode
);
7894 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7896 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7898 x
= plus_constant (Pmode
, xop0
, delta
);
7900 else if (offset
< 0 && offset
> -256)
7901 /* Small negative offsets are best done with a subtract before the
7902 dereference, forcing these into a register normally takes two
7904 x
= force_operand (x
, NULL_RTX
);
7907 /* For the remaining cases, force the constant into a register. */
7908 xop1
= force_reg (SImode
, xop1
);
7909 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7912 else if (GET_CODE (x
) == PLUS
7913 && s_register_operand (XEXP (x
, 1), SImode
)
7914 && !s_register_operand (XEXP (x
, 0), SImode
))
7916 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7918 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7923 /* We need to find and carefully transform any SYMBOL and LABEL
7924 references; so go back to the original address expression. */
7925 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7927 if (new_x
!= orig_x
)
7935 arm_legitimize_reload_address (rtx
*p
,
7937 int opnum
, int type
,
7938 int ind_levels ATTRIBUTE_UNUSED
)
7940 /* We must recognize output that we have already generated ourselves. */
7941 if (GET_CODE (*p
) == PLUS
7942 && GET_CODE (XEXP (*p
, 0)) == PLUS
7943 && REG_P (XEXP (XEXP (*p
, 0), 0))
7944 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7945 && CONST_INT_P (XEXP (*p
, 1)))
7947 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7948 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7949 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7953 if (GET_CODE (*p
) == PLUS
7954 && REG_P (XEXP (*p
, 0))
7955 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7956 /* If the base register is equivalent to a constant, let the generic
7957 code handle it. Otherwise we will run into problems if a future
7958 reload pass decides to rematerialize the constant. */
7959 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7960 && CONST_INT_P (XEXP (*p
, 1)))
7962 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7963 HOST_WIDE_INT low
, high
;
7965 /* Detect coprocessor load/stores. */
7966 bool coproc_p
= ((TARGET_HARD_FLOAT
7968 && (mode
== SFmode
|| mode
== DFmode
))
7969 || (TARGET_REALLY_IWMMXT
7970 && VALID_IWMMXT_REG_MODE (mode
))
7972 && (VALID_NEON_DREG_MODE (mode
)
7973 || VALID_NEON_QREG_MODE (mode
))));
7975 /* For some conditions, bail out when lower two bits are unaligned. */
7976 if ((val
& 0x3) != 0
7977 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7979 /* For DI, and DF under soft-float: */
7980 || ((mode
== DImode
|| mode
== DFmode
)
7981 /* Without ldrd, we use stm/ldm, which does not
7982 fair well with unaligned bits. */
7984 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7985 || TARGET_THUMB2
))))
7988 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7989 of which the (reg+high) gets turned into a reload add insn,
7990 we try to decompose the index into high/low values that can often
7991 also lead to better reload CSE.
7993 ldr r0, [r2, #4100] // Offset too large
7994 ldr r1, [r2, #4104] // Offset too large
7996 is best reloaded as:
8002 which post-reload CSE can simplify in most cases to eliminate the
8003 second add instruction:
8008 The idea here is that we want to split out the bits of the constant
8009 as a mask, rather than as subtracting the maximum offset that the
8010 respective type of load/store used can handle.
8012 When encountering negative offsets, we can still utilize it even if
8013 the overall offset is positive; sometimes this may lead to an immediate
8014 that can be constructed with fewer instructions.
8016 ldr r0, [r2, #0x3FFFFC]
8018 This is best reloaded as:
8019 add t1, r2, #0x400000
8022 The trick for spotting this for a load insn with N bits of offset
8023 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
8024 negative offset that is going to make bit N and all the bits below
8025 it become zero in the remainder part.
8027 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
8028 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
8029 used in most cases of ARM load/store instructions. */
8031 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
8032 (((VAL) & ((1 << (N)) - 1)) \
8033 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
8038 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
8040 /* NEON quad-word load/stores are made of two double-word accesses,
8041 so the valid index range is reduced by 8. Treat as 9-bit range if
8043 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
8044 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
8046 else if (GET_MODE_SIZE (mode
) == 8)
8049 low
= (TARGET_THUMB2
8050 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
8051 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
8053 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8054 to access doublewords. The supported load/store offsets are
8055 -8, -4, and 4, which we try to produce here. */
8056 low
= ((val
& 0xf) ^ 0x8) - 0x8;
8058 else if (GET_MODE_SIZE (mode
) < 8)
8060 /* NEON element load/stores do not have an offset. */
8061 if (TARGET_NEON_FP16
&& mode
== HFmode
)
8066 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8067 Try the wider 12-bit range first, and re-try if the result
8069 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8071 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
8075 if (mode
== HImode
|| mode
== HFmode
)
8078 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
8081 /* The storehi/movhi_bytes fallbacks can use only
8082 [-4094,+4094] of the full ldrb/strb index range. */
8083 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8084 if (low
== 4095 || low
== -4095)
8089 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8095 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
8096 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
8097 - (unsigned HOST_WIDE_INT
) 0x80000000);
8098 /* Check for overflow or zero */
8099 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
8102 /* Reload the high part into a base reg; leave the low part
8104 Note that replacing this gen_rtx_PLUS with plus_constant is
8105 wrong in this case because we rely on the
8106 (plus (plus reg c1) c2) structure being preserved so that
8107 XEXP (*p, 0) in push_reload below uses the correct term. */
8108 *p
= gen_rtx_PLUS (GET_MODE (*p
),
8109 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
8112 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
8113 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
8114 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8122 thumb_legitimize_reload_address (rtx
*x_p
,
8124 int opnum
, int type
,
8125 int ind_levels ATTRIBUTE_UNUSED
)
8129 if (GET_CODE (x
) == PLUS
8130 && GET_MODE_SIZE (mode
) < 4
8131 && REG_P (XEXP (x
, 0))
8132 && XEXP (x
, 0) == stack_pointer_rtx
8133 && CONST_INT_P (XEXP (x
, 1))
8134 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8139 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8140 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8144 /* If both registers are hi-regs, then it's better to reload the
8145 entire expression rather than each register individually. That
8146 only requires one reload register rather than two. */
8147 if (GET_CODE (x
) == PLUS
8148 && REG_P (XEXP (x
, 0))
8149 && REG_P (XEXP (x
, 1))
8150 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8151 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8156 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8157 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8164 /* Return TRUE if X contains any TLS symbol references. */
8167 arm_tls_referenced_p (rtx x
)
8169 if (! TARGET_HAVE_TLS
)
8172 subrtx_iterator::array_type array
;
8173 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8175 const_rtx x
= *iter
;
8176 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8179 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8180 TLS offsets, not real symbol references. */
8181 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8182 iter
.skip_subrtxes ();
8187 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8189 On the ARM, allow any integer (invalid ones are removed later by insn
8190 patterns), nice doubles and symbol_refs which refer to the function's
8193 When generating pic allow anything. */
8196 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8198 return flag_pic
|| !label_mentioned_p (x
);
8202 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8204 return (CONST_INT_P (x
)
8205 || CONST_DOUBLE_P (x
)
8206 || CONSTANT_ADDRESS_P (x
)
8211 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8213 return (!arm_cannot_force_const_mem (mode
, x
)
8215 ? arm_legitimate_constant_p_1 (mode
, x
)
8216 : thumb_legitimate_constant_p (mode
, x
)));
8219 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8222 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8226 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8228 split_const (x
, &base
, &offset
);
8229 if (GET_CODE (base
) == SYMBOL_REF
8230 && !offset_within_block_p (base
, INTVAL (offset
)))
8233 return arm_tls_referenced_p (x
);
8236 #define REG_OR_SUBREG_REG(X) \
8238 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8240 #define REG_OR_SUBREG_RTX(X) \
8241 (REG_P (X) ? (X) : SUBREG_REG (X))
8244 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8246 machine_mode mode
= GET_MODE (x
);
8255 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8262 return COSTS_N_INSNS (1);
8265 if (CONST_INT_P (XEXP (x
, 1)))
8268 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8275 return COSTS_N_INSNS (2) + cycles
;
8277 return COSTS_N_INSNS (1) + 16;
8280 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8282 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8283 return (COSTS_N_INSNS (words
)
8284 + 4 * ((MEM_P (SET_SRC (x
)))
8285 + MEM_P (SET_DEST (x
))));
8290 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8292 if (thumb_shiftable_const (INTVAL (x
)))
8293 return COSTS_N_INSNS (2);
8294 return COSTS_N_INSNS (3);
8296 else if ((outer
== PLUS
|| outer
== COMPARE
)
8297 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8299 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8300 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8301 return COSTS_N_INSNS (1);
8302 else if (outer
== AND
)
8305 /* This duplicates the tests in the andsi3 expander. */
8306 for (i
= 9; i
<= 31; i
++)
8307 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8308 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8309 return COSTS_N_INSNS (2);
8311 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8312 || outer
== LSHIFTRT
)
8314 return COSTS_N_INSNS (2);
8320 return COSTS_N_INSNS (3);
8338 /* XXX another guess. */
8339 /* Memory costs quite a lot for the first word, but subsequent words
8340 load at the equivalent of a single insn each. */
8341 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8342 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8347 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8353 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8354 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8360 return total
+ COSTS_N_INSNS (1);
8362 /* Assume a two-shift sequence. Increase the cost slightly so
8363 we prefer actual shifts over an extend operation. */
8364 return total
+ 1 + COSTS_N_INSNS (2);
8372 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8374 machine_mode mode
= GET_MODE (x
);
8375 enum rtx_code subcode
;
8377 enum rtx_code code
= GET_CODE (x
);
8383 /* Memory costs quite a lot for the first word, but subsequent words
8384 load at the equivalent of a single insn each. */
8385 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8392 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8393 *total
= COSTS_N_INSNS (2);
8394 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8395 *total
= COSTS_N_INSNS (4);
8397 *total
= COSTS_N_INSNS (20);
8401 if (REG_P (XEXP (x
, 1)))
8402 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8403 else if (!CONST_INT_P (XEXP (x
, 1)))
8404 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8410 *total
+= COSTS_N_INSNS (4);
8415 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8416 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8419 *total
+= COSTS_N_INSNS (3);
8423 *total
+= COSTS_N_INSNS (1);
8424 /* Increase the cost of complex shifts because they aren't any faster,
8425 and reduce dual issue opportunities. */
8426 if (arm_tune_cortex_a9
8427 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8435 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8436 if (CONST_INT_P (XEXP (x
, 0))
8437 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8439 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8443 if (CONST_INT_P (XEXP (x
, 1))
8444 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8446 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8453 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8455 if (TARGET_HARD_FLOAT
8457 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8459 *total
= COSTS_N_INSNS (1);
8460 if (CONST_DOUBLE_P (XEXP (x
, 0))
8461 && arm_const_double_rtx (XEXP (x
, 0)))
8463 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8467 if (CONST_DOUBLE_P (XEXP (x
, 1))
8468 && arm_const_double_rtx (XEXP (x
, 1)))
8470 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8476 *total
= COSTS_N_INSNS (20);
8480 *total
= COSTS_N_INSNS (1);
8481 if (CONST_INT_P (XEXP (x
, 0))
8482 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8484 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8488 subcode
= GET_CODE (XEXP (x
, 1));
8489 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8490 || subcode
== LSHIFTRT
8491 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8493 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8494 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8498 /* A shift as a part of RSB costs no more than RSB itself. */
8499 if (GET_CODE (XEXP (x
, 0)) == MULT
8500 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8502 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8503 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8508 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8510 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8511 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8515 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8516 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8518 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 if (REG_P (XEXP (XEXP (x
, 1), 0))
8520 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8521 *total
+= COSTS_N_INSNS (1);
8529 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8530 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8531 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8533 *total
= COSTS_N_INSNS (1);
8534 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8536 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8540 /* MLA: All arguments must be registers. We filter out
8541 multiplication by a power of two, so that we fall down into
8543 if (GET_CODE (XEXP (x
, 0)) == MULT
8544 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8546 /* The cost comes from the cost of the multiply. */
8550 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8552 if (TARGET_HARD_FLOAT
8554 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8556 *total
= COSTS_N_INSNS (1);
8557 if (CONST_DOUBLE_P (XEXP (x
, 1))
8558 && arm_const_double_rtx (XEXP (x
, 1)))
8560 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8567 *total
= COSTS_N_INSNS (20);
8571 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8572 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8574 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8575 if (REG_P (XEXP (XEXP (x
, 0), 0))
8576 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8577 *total
+= COSTS_N_INSNS (1);
8583 case AND
: case XOR
: case IOR
:
8585 /* Normally the frame registers will be spilt into reg+const during
8586 reload, so it is a bad idea to combine them with other instructions,
8587 since then they might not be moved outside of loops. As a compromise
8588 we allow integration with ops that have a constant as their second
8590 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8591 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8592 && !CONST_INT_P (XEXP (x
, 1)))
8593 *total
= COSTS_N_INSNS (1);
8597 *total
+= COSTS_N_INSNS (2);
8598 if (CONST_INT_P (XEXP (x
, 1))
8599 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8601 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8608 *total
+= COSTS_N_INSNS (1);
8609 if (CONST_INT_P (XEXP (x
, 1))
8610 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8612 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8615 subcode
= GET_CODE (XEXP (x
, 0));
8616 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8617 || subcode
== LSHIFTRT
8618 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8620 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8621 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8626 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8628 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8629 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8633 if (subcode
== UMIN
|| subcode
== UMAX
8634 || subcode
== SMIN
|| subcode
== SMAX
)
8636 *total
= COSTS_N_INSNS (3);
8643 /* This should have been handled by the CPU specific routines. */
8647 if (arm_arch3m
&& mode
== SImode
8648 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8649 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8650 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8651 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8652 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8653 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8655 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8658 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8662 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8664 if (TARGET_HARD_FLOAT
8666 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8668 *total
= COSTS_N_INSNS (1);
8671 *total
= COSTS_N_INSNS (2);
8677 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8678 if (mode
== SImode
&& code
== NOT
)
8680 subcode
= GET_CODE (XEXP (x
, 0));
8681 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8682 || subcode
== LSHIFTRT
8683 || subcode
== ROTATE
|| subcode
== ROTATERT
8685 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8687 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8688 /* Register shifts cost an extra cycle. */
8689 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8690 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8699 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8701 *total
= COSTS_N_INSNS (4);
8705 operand
= XEXP (x
, 0);
8707 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8708 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8709 && REG_P (XEXP (operand
, 0))
8710 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8711 *total
+= COSTS_N_INSNS (1);
8712 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8713 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8717 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8719 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8725 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8726 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8728 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8734 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8735 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8737 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8757 /* SCC insns. In the case where the comparison has already been
8758 performed, then they cost 2 instructions. Otherwise they need
8759 an additional comparison before them. */
8760 *total
= COSTS_N_INSNS (2);
8761 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8768 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8774 *total
+= COSTS_N_INSNS (1);
8775 if (CONST_INT_P (XEXP (x
, 1))
8776 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8778 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8782 subcode
= GET_CODE (XEXP (x
, 0));
8783 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8784 || subcode
== LSHIFTRT
8785 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8787 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8788 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8793 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8795 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8796 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8806 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8807 if (!CONST_INT_P (XEXP (x
, 1))
8808 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8809 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8813 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8815 if (TARGET_HARD_FLOAT
8817 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8819 *total
= COSTS_N_INSNS (1);
8822 *total
= COSTS_N_INSNS (20);
8825 *total
= COSTS_N_INSNS (1);
8827 *total
+= COSTS_N_INSNS (3);
8833 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8835 rtx op
= XEXP (x
, 0);
8836 machine_mode opmode
= GET_MODE (op
);
8839 *total
+= COSTS_N_INSNS (1);
8841 if (opmode
!= SImode
)
8845 /* If !arm_arch4, we use one of the extendhisi2_mem
8846 or movhi_bytes patterns for HImode. For a QImode
8847 sign extension, we first zero-extend from memory
8848 and then perform a shift sequence. */
8849 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8850 *total
+= COSTS_N_INSNS (2);
8853 *total
+= COSTS_N_INSNS (1);
8855 /* We don't have the necessary insn, so we need to perform some
8857 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8858 /* An and with constant 255. */
8859 *total
+= COSTS_N_INSNS (1);
8861 /* A shift sequence. Increase costs slightly to avoid
8862 combining two shifts into an extend operation. */
8863 *total
+= COSTS_N_INSNS (2) + 1;
8869 switch (GET_MODE (XEXP (x
, 0)))
8876 *total
= COSTS_N_INSNS (1);
8886 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8890 if (const_ok_for_arm (INTVAL (x
))
8891 || const_ok_for_arm (~INTVAL (x
)))
8892 *total
= COSTS_N_INSNS (1);
8894 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8895 INTVAL (x
), NULL_RTX
,
8902 *total
= COSTS_N_INSNS (3);
8906 *total
= COSTS_N_INSNS (1);
8910 *total
= COSTS_N_INSNS (1);
8911 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8915 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8916 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8917 *total
= COSTS_N_INSNS (1);
8919 *total
= COSTS_N_INSNS (4);
8923 /* The vec_extract patterns accept memory operands that require an
8924 address reload. Account for the cost of that reload to give the
8925 auto-inc-dec pass an incentive to try to replace them. */
8926 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8927 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8929 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8930 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8931 *total
+= COSTS_N_INSNS (1);
8934 /* Likewise for the vec_set patterns. */
8935 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8936 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8937 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8939 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8940 *total
= rtx_cost (mem
, code
, 0, speed
);
8941 if (!neon_vector_mem_operand (mem
, 2, true))
8942 *total
+= COSTS_N_INSNS (1);
8948 /* We cost this as high as our memory costs to allow this to
8949 be hoisted from loops. */
8950 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8952 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8958 && TARGET_HARD_FLOAT
8960 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8961 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8962 *total
= COSTS_N_INSNS (1);
8964 *total
= COSTS_N_INSNS (4);
8968 *total
= COSTS_N_INSNS (4);
8973 /* Estimates the size cost of thumb1 instructions.
8974 For now most of the code is copied from thumb1_rtx_costs. We need more
8975 fine grain tuning when we have more related test cases. */
8977 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8979 machine_mode mode
= GET_MODE (x
);
8988 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8992 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8993 defined by RTL expansion, especially for the expansion of
8995 if ((GET_CODE (XEXP (x
, 0)) == MULT
8996 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8997 || (GET_CODE (XEXP (x
, 1)) == MULT
8998 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8999 return COSTS_N_INSNS (2);
9000 /* On purpose fall through for normal RTX. */
9004 return COSTS_N_INSNS (1);
9007 if (CONST_INT_P (XEXP (x
, 1)))
9009 /* Thumb1 mul instruction can't operate on const. We must Load it
9010 into a register first. */
9011 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9012 /* For the targets which have a very small and high-latency multiply
9013 unit, we prefer to synthesize the mult with up to 5 instructions,
9014 giving a good balance between size and performance. */
9015 if (arm_arch6m
&& arm_m_profile_small_mul
)
9016 return COSTS_N_INSNS (5);
9018 return COSTS_N_INSNS (1) + const_size
;
9020 return COSTS_N_INSNS (1);
9023 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9025 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9026 return COSTS_N_INSNS (words
)
9027 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
9028 || satisfies_constraint_K (SET_SRC (x
))
9029 /* thumb1_movdi_insn. */
9030 || ((words
> 1) && MEM_P (SET_SRC (x
))));
9035 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
9036 return COSTS_N_INSNS (1);
9037 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9038 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9039 return COSTS_N_INSNS (2);
9040 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9041 if (thumb_shiftable_const (INTVAL (x
)))
9042 return COSTS_N_INSNS (2);
9043 return COSTS_N_INSNS (3);
9045 else if ((outer
== PLUS
|| outer
== COMPARE
)
9046 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9048 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9049 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9050 return COSTS_N_INSNS (1);
9051 else if (outer
== AND
)
9054 /* This duplicates the tests in the andsi3 expander. */
9055 for (i
= 9; i
<= 31; i
++)
9056 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
9057 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
9058 return COSTS_N_INSNS (2);
9060 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9061 || outer
== LSHIFTRT
)
9063 return COSTS_N_INSNS (2);
9069 return COSTS_N_INSNS (3);
9083 return COSTS_N_INSNS (1);
9086 return (COSTS_N_INSNS (1)
9088 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9089 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9090 ? COSTS_N_INSNS (1) : 0));
9094 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9099 /* XXX still guessing. */
9100 switch (GET_MODE (XEXP (x
, 0)))
9103 return (1 + (mode
== DImode
? 4 : 0)
9104 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9107 return (4 + (mode
== DImode
? 4 : 0)
9108 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9111 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9122 /* RTX costs when optimizing for size. */
9124 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9127 machine_mode mode
= GET_MODE (x
);
9130 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9134 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9138 /* A memory access costs 1 insn if the mode is small, or the address is
9139 a single register, otherwise it costs one insn per word. */
9140 if (REG_P (XEXP (x
, 0)))
9141 *total
= COSTS_N_INSNS (1);
9143 && GET_CODE (XEXP (x
, 0)) == PLUS
9144 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9145 /* This will be split into two instructions.
9146 See arm.md:calculate_pic_address. */
9147 *total
= COSTS_N_INSNS (2);
9149 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9156 /* Needs a libcall, so it costs about this. */
9157 *total
= COSTS_N_INSNS (2);
9161 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9163 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9171 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9173 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9176 else if (mode
== SImode
)
9178 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9179 /* Slightly disparage register shifts, but not by much. */
9180 if (!CONST_INT_P (XEXP (x
, 1)))
9181 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9185 /* Needs a libcall. */
9186 *total
= COSTS_N_INSNS (2);
9190 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9191 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9193 *total
= COSTS_N_INSNS (1);
9199 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9200 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9202 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9203 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9204 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9205 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9206 || subcode1
== ASHIFTRT
)
9208 /* It's just the cost of the two operands. */
9213 *total
= COSTS_N_INSNS (1);
9217 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9221 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9222 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9224 *total
= COSTS_N_INSNS (1);
9228 /* A shift as a part of ADD costs nothing. */
9229 if (GET_CODE (XEXP (x
, 0)) == MULT
9230 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9232 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9233 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9234 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9239 case AND
: case XOR
: case IOR
:
9242 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9244 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9245 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9246 || (code
== AND
&& subcode
== NOT
))
9248 /* It's just the cost of the two operands. */
9254 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9258 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9262 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9263 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9265 *total
= COSTS_N_INSNS (1);
9271 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9280 if (cc_register (XEXP (x
, 0), VOIDmode
))
9283 *total
= COSTS_N_INSNS (1);
9287 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9288 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9289 *total
= COSTS_N_INSNS (1);
9291 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9296 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9299 if (const_ok_for_arm (INTVAL (x
)))
9300 /* A multiplication by a constant requires another instruction
9301 to load the constant to a register. */
9302 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9304 else if (const_ok_for_arm (~INTVAL (x
)))
9305 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9306 else if (const_ok_for_arm (-INTVAL (x
)))
9308 if (outer_code
== COMPARE
|| outer_code
== PLUS
9309 || outer_code
== MINUS
)
9312 *total
= COSTS_N_INSNS (1);
9315 *total
= COSTS_N_INSNS (2);
9321 *total
= COSTS_N_INSNS (2);
9325 *total
= COSTS_N_INSNS (4);
9330 && TARGET_HARD_FLOAT
9331 && outer_code
== SET
9332 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9333 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9334 *total
= COSTS_N_INSNS (1);
9336 *total
= COSTS_N_INSNS (4);
9341 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9342 cost of these slightly. */
9343 *total
= COSTS_N_INSNS (1) + 1;
9350 if (mode
!= VOIDmode
)
9351 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9353 *total
= COSTS_N_INSNS (4); /* How knows? */
9358 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9359 operand, then return the operand that is being shifted. If the shift
9360 is not by a constant, then set SHIFT_REG to point to the operand.
9361 Return NULL if OP is not a shifter operand. */
9363 shifter_op_p (rtx op
, rtx
*shift_reg
)
9365 enum rtx_code code
= GET_CODE (op
);
9367 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9368 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9369 return XEXP (op
, 0);
9370 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9371 return XEXP (op
, 0);
9372 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9373 || code
== ASHIFTRT
)
9375 if (!CONST_INT_P (XEXP (op
, 1)))
9376 *shift_reg
= XEXP (op
, 1);
9377 return XEXP (op
, 0);
9384 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9386 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9387 gcc_assert (GET_CODE (x
) == UNSPEC
);
9389 switch (XINT (x
, 1))
9391 case UNSPEC_UNALIGNED_LOAD
:
9392 /* We can only do unaligned loads into the integer unit, and we can't
9394 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9396 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9397 + extra_cost
->ldst
.load_unaligned
);
9400 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9401 ADDR_SPACE_GENERIC
, speed_p
);
9405 case UNSPEC_UNALIGNED_STORE
:
9406 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9408 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9409 + extra_cost
->ldst
.store_unaligned
);
9411 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9413 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9414 ADDR_SPACE_GENERIC
, speed_p
);
9424 *cost
= COSTS_N_INSNS (1);
9426 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9430 *cost
= COSTS_N_INSNS (2);
9436 /* Cost of a libcall. We assume one insn per argument, an amount for the
9437 call (one insn for -Os) and then one for processing the result. */
9438 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9440 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9443 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9444 if (shift_op != NULL \
9445 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9450 *cost += extra_cost->alu.arith_shift_reg; \
9451 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9454 *cost += extra_cost->alu.arith_shift; \
9456 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9457 + rtx_cost (XEXP (x, 1 - IDX), \
9464 /* RTX costs. Make an estimate of the cost of executing the operation
9465 X, which is contained with an operation with code OUTER_CODE.
9466 SPEED_P indicates whether the cost desired is the performance cost,
9467 or the size cost. The estimate is stored in COST and the return
9468 value is TRUE if the cost calculation is final, or FALSE if the
9469 caller should recurse through the operands of X to add additional
9472 We currently make no attempt to model the size savings of Thumb-2
9473 16-bit instructions. At the normal points in compilation where
9474 this code is called we have no measure of whether the condition
9475 flags are live or not, and thus no realistic way to determine what
9476 the size will eventually be. */
9478 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9479 const struct cpu_cost_table
*extra_cost
,
9480 int *cost
, bool speed_p
)
9482 machine_mode mode
= GET_MODE (x
);
9487 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9489 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9497 /* SET RTXs don't have a mode so we get it from the destination. */
9498 mode
= GET_MODE (SET_DEST (x
));
9500 if (REG_P (SET_SRC (x
))
9501 && REG_P (SET_DEST (x
)))
9503 /* Assume that most copies can be done with a single insn,
9504 unless we don't have HW FP, in which case everything
9505 larger than word mode will require two insns. */
9506 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9507 && GET_MODE_SIZE (mode
) > 4)
9510 /* Conditional register moves can be encoded
9511 in 16 bits in Thumb mode. */
9512 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9518 if (CONST_INT_P (SET_SRC (x
)))
9520 /* Handle CONST_INT here, since the value doesn't have a mode
9521 and we would otherwise be unable to work out the true cost. */
9522 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9524 /* Slightly lower the cost of setting a core reg to a constant.
9525 This helps break up chains and allows for better scheduling. */
9526 if (REG_P (SET_DEST (x
))
9527 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9530 /* Immediate moves with an immediate in the range [0, 255] can be
9531 encoded in 16 bits in Thumb mode. */
9532 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9533 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9535 goto const_int_cost
;
9541 /* A memory access costs 1 insn if the mode is small, or the address is
9542 a single register, otherwise it costs one insn per word. */
9543 if (REG_P (XEXP (x
, 0)))
9544 *cost
= COSTS_N_INSNS (1);
9546 && GET_CODE (XEXP (x
, 0)) == PLUS
9547 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9548 /* This will be split into two instructions.
9549 See arm.md:calculate_pic_address. */
9550 *cost
= COSTS_N_INSNS (2);
9552 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9554 /* For speed optimizations, add the costs of the address and
9555 accessing memory. */
9558 *cost
+= (extra_cost
->ldst
.load
9559 + arm_address_cost (XEXP (x
, 0), mode
,
9560 ADDR_SPACE_GENERIC
, speed_p
));
9562 *cost
+= extra_cost
->ldst
.load
;
9568 /* Calculations of LDM costs are complex. We assume an initial cost
9569 (ldm_1st) which will load the number of registers mentioned in
9570 ldm_regs_per_insn_1st registers; then each additional
9571 ldm_regs_per_insn_subsequent registers cost one more insn. The
9572 formula for N regs is thus:
9574 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9575 + ldm_regs_per_insn_subsequent - 1)
9576 / ldm_regs_per_insn_subsequent).
9578 Additional costs may also be added for addressing. A similar
9579 formula is used for STM. */
9581 bool is_ldm
= load_multiple_operation (x
, SImode
);
9582 bool is_stm
= store_multiple_operation (x
, SImode
);
9584 *cost
= COSTS_N_INSNS (1);
9586 if (is_ldm
|| is_stm
)
9590 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9591 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9592 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9593 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9594 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9595 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9596 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9598 *cost
+= regs_per_insn_1st
9599 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9600 + regs_per_insn_sub
- 1)
9601 / regs_per_insn_sub
);
9610 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9611 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9612 *cost
= COSTS_N_INSNS (speed_p
9613 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9614 else if (mode
== SImode
&& TARGET_IDIV
)
9615 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9617 *cost
= LIBCALL_COST (2);
9618 return false; /* All arguments must be in registers. */
9622 *cost
= LIBCALL_COST (2);
9623 return false; /* All arguments must be in registers. */
9626 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9628 *cost
= (COSTS_N_INSNS (2)
9629 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9631 *cost
+= extra_cost
->alu
.shift_reg
;
9639 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9641 *cost
= (COSTS_N_INSNS (3)
9642 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9644 *cost
+= 2 * extra_cost
->alu
.shift
;
9647 else if (mode
== SImode
)
9649 *cost
= (COSTS_N_INSNS (1)
9650 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9651 /* Slightly disparage register shifts at -Os, but not by much. */
9652 if (!CONST_INT_P (XEXP (x
, 1)))
9653 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9654 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9657 else if (GET_MODE_CLASS (mode
) == MODE_INT
9658 && GET_MODE_SIZE (mode
) < 4)
9662 *cost
= (COSTS_N_INSNS (1)
9663 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9664 /* Slightly disparage register shifts at -Os, but not by
9666 if (!CONST_INT_P (XEXP (x
, 1)))
9667 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9668 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9670 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9672 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9674 /* Can use SBFX/UBFX. */
9675 *cost
= COSTS_N_INSNS (1);
9677 *cost
+= extra_cost
->alu
.bfx
;
9678 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9682 *cost
= COSTS_N_INSNS (2);
9683 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9686 if (CONST_INT_P (XEXP (x
, 1)))
9687 *cost
+= 2 * extra_cost
->alu
.shift
;
9689 *cost
+= (extra_cost
->alu
.shift
9690 + extra_cost
->alu
.shift_reg
);
9693 /* Slightly disparage register shifts. */
9694 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9699 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9700 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9703 if (CONST_INT_P (XEXP (x
, 1)))
9704 *cost
+= (2 * extra_cost
->alu
.shift
9705 + extra_cost
->alu
.log_shift
);
9707 *cost
+= (extra_cost
->alu
.shift
9708 + extra_cost
->alu
.shift_reg
9709 + extra_cost
->alu
.log_shift_reg
);
9715 *cost
= LIBCALL_COST (2);
9723 *cost
= COSTS_N_INSNS (1);
9725 *cost
+= extra_cost
->alu
.rev
;
9732 /* No rev instruction available. Look at arm_legacy_rev
9733 and thumb_legacy_rev for the form of RTL used then. */
9736 *cost
= COSTS_N_INSNS (10);
9740 *cost
+= 6 * extra_cost
->alu
.shift
;
9741 *cost
+= 3 * extra_cost
->alu
.logical
;
9746 *cost
= COSTS_N_INSNS (5);
9750 *cost
+= 2 * extra_cost
->alu
.shift
;
9751 *cost
+= extra_cost
->alu
.arith_shift
;
9752 *cost
+= 2 * extra_cost
->alu
.logical
;
9760 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9761 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9763 *cost
= COSTS_N_INSNS (1);
9764 if (GET_CODE (XEXP (x
, 0)) == MULT
9765 || GET_CODE (XEXP (x
, 1)) == MULT
)
9767 rtx mul_op0
, mul_op1
, sub_op
;
9770 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9772 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9774 mul_op0
= XEXP (XEXP (x
, 0), 0);
9775 mul_op1
= XEXP (XEXP (x
, 0), 1);
9776 sub_op
= XEXP (x
, 1);
9780 mul_op0
= XEXP (XEXP (x
, 1), 0);
9781 mul_op1
= XEXP (XEXP (x
, 1), 1);
9782 sub_op
= XEXP (x
, 0);
9785 /* The first operand of the multiply may be optionally
9787 if (GET_CODE (mul_op0
) == NEG
)
9788 mul_op0
= XEXP (mul_op0
, 0);
9790 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9791 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9792 + rtx_cost (sub_op
, code
, 0, speed_p
));
9798 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9804 rtx shift_by_reg
= NULL
;
9808 *cost
= COSTS_N_INSNS (1);
9810 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9811 if (shift_op
== NULL
)
9813 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9814 non_shift_op
= XEXP (x
, 0);
9817 non_shift_op
= XEXP (x
, 1);
9819 if (shift_op
!= NULL
)
9821 if (shift_by_reg
!= NULL
)
9824 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9825 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9828 *cost
+= extra_cost
->alu
.arith_shift
;
9830 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9831 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9836 && GET_CODE (XEXP (x
, 1)) == MULT
)
9840 *cost
+= extra_cost
->mult
[0].add
;
9841 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9842 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9843 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9847 if (CONST_INT_P (XEXP (x
, 0)))
9849 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9850 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9852 *cost
= COSTS_N_INSNS (insns
);
9854 *cost
+= insns
* extra_cost
->alu
.arith
;
9855 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9859 *cost
+= extra_cost
->alu
.arith
;
9864 if (GET_MODE_CLASS (mode
) == MODE_INT
9865 && GET_MODE_SIZE (mode
) < 4)
9867 rtx shift_op
, shift_reg
;
9870 /* We check both sides of the MINUS for shifter operands since,
9871 unlike PLUS, it's not commutative. */
9873 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9874 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9876 /* Slightly disparage, as we might need to widen the result. */
9877 *cost
= 1 + COSTS_N_INSNS (1);
9879 *cost
+= extra_cost
->alu
.arith
;
9881 if (CONST_INT_P (XEXP (x
, 0)))
9883 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9892 *cost
= COSTS_N_INSNS (2);
9894 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9896 rtx op1
= XEXP (x
, 1);
9899 *cost
+= 2 * extra_cost
->alu
.arith
;
9901 if (GET_CODE (op1
) == ZERO_EXTEND
)
9902 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9904 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9905 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9909 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9912 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9913 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9915 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9918 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9922 *cost
+= (extra_cost
->alu
.arith
9923 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9924 ? extra_cost
->alu
.arith
9925 : extra_cost
->alu
.arith_shift
));
9926 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9927 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9928 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9933 *cost
+= 2 * extra_cost
->alu
.arith
;
9939 *cost
= LIBCALL_COST (2);
9943 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9944 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9946 *cost
= COSTS_N_INSNS (1);
9947 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9949 rtx mul_op0
, mul_op1
, add_op
;
9952 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9954 mul_op0
= XEXP (XEXP (x
, 0), 0);
9955 mul_op1
= XEXP (XEXP (x
, 0), 1);
9956 add_op
= XEXP (x
, 1);
9958 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9959 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9960 + rtx_cost (add_op
, code
, 0, speed_p
));
9966 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9969 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9971 *cost
= LIBCALL_COST (2);
9975 /* Narrow modes can be synthesized in SImode, but the range
9976 of useful sub-operations is limited. Check for shift operations
9977 on one of the operands. Only left shifts can be used in the
9979 if (GET_MODE_CLASS (mode
) == MODE_INT
9980 && GET_MODE_SIZE (mode
) < 4)
9982 rtx shift_op
, shift_reg
;
9985 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9987 if (CONST_INT_P (XEXP (x
, 1)))
9989 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9990 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9992 *cost
= COSTS_N_INSNS (insns
);
9994 *cost
+= insns
* extra_cost
->alu
.arith
;
9995 /* Slightly penalize a narrow operation as the result may
9997 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10001 /* Slightly penalize a narrow operation as the result may
10003 *cost
= 1 + COSTS_N_INSNS (1);
10005 *cost
+= extra_cost
->alu
.arith
;
10010 if (mode
== SImode
)
10012 rtx shift_op
, shift_reg
;
10014 *cost
= COSTS_N_INSNS (1);
10015 if (TARGET_INT_SIMD
10016 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10017 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10019 /* UXTA[BH] or SXTA[BH]. */
10021 *cost
+= extra_cost
->alu
.extend_arith
;
10022 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10024 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
10029 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10030 if (shift_op
!= NULL
)
10035 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10036 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10039 *cost
+= extra_cost
->alu
.arith_shift
;
10041 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10042 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10045 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10047 rtx mul_op
= XEXP (x
, 0);
10049 *cost
= COSTS_N_INSNS (1);
10051 if (TARGET_DSP_MULTIPLY
10052 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10053 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10054 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10055 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10056 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10057 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10058 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10059 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10060 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10061 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10062 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10063 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10066 /* SMLA[BT][BT]. */
10068 *cost
+= extra_cost
->mult
[0].extend_add
;
10069 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
10070 SIGN_EXTEND
, 0, speed_p
)
10071 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
10072 SIGN_EXTEND
, 0, speed_p
)
10073 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10078 *cost
+= extra_cost
->mult
[0].add
;
10079 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
10080 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
10081 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10084 if (CONST_INT_P (XEXP (x
, 1)))
10086 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10087 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10089 *cost
= COSTS_N_INSNS (insns
);
10091 *cost
+= insns
* extra_cost
->alu
.arith
;
10092 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10096 *cost
+= extra_cost
->alu
.arith
;
10101 if (mode
== DImode
)
10104 && GET_CODE (XEXP (x
, 0)) == MULT
10105 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10106 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10107 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10108 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10110 *cost
= COSTS_N_INSNS (1);
10112 *cost
+= extra_cost
->mult
[1].extend_add
;
10113 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10114 ZERO_EXTEND
, 0, speed_p
)
10115 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10116 ZERO_EXTEND
, 0, speed_p
)
10117 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10121 *cost
= COSTS_N_INSNS (2);
10123 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10124 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10127 *cost
+= (extra_cost
->alu
.arith
10128 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10129 ? extra_cost
->alu
.arith
10130 : extra_cost
->alu
.arith_shift
));
10132 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10134 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10139 *cost
+= 2 * extra_cost
->alu
.arith
;
10144 *cost
= LIBCALL_COST (2);
10147 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10149 *cost
= COSTS_N_INSNS (1);
10151 *cost
+= extra_cost
->alu
.rev
;
10155 /* Fall through. */
10156 case AND
: case XOR
:
10157 if (mode
== SImode
)
10159 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10160 rtx op0
= XEXP (x
, 0);
10161 rtx shift_op
, shift_reg
;
10163 *cost
= COSTS_N_INSNS (1);
10167 || (code
== IOR
&& TARGET_THUMB2
)))
10168 op0
= XEXP (op0
, 0);
10171 shift_op
= shifter_op_p (op0
, &shift_reg
);
10172 if (shift_op
!= NULL
)
10177 *cost
+= extra_cost
->alu
.log_shift_reg
;
10178 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10181 *cost
+= extra_cost
->alu
.log_shift
;
10183 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10184 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10188 if (CONST_INT_P (XEXP (x
, 1)))
10190 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10191 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10194 *cost
= COSTS_N_INSNS (insns
);
10196 *cost
+= insns
* extra_cost
->alu
.logical
;
10197 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10202 *cost
+= extra_cost
->alu
.logical
;
10203 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10204 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10208 if (mode
== DImode
)
10210 rtx op0
= XEXP (x
, 0);
10211 enum rtx_code subcode
= GET_CODE (op0
);
10213 *cost
= COSTS_N_INSNS (2);
10217 || (code
== IOR
&& TARGET_THUMB2
)))
10218 op0
= XEXP (op0
, 0);
10220 if (GET_CODE (op0
) == ZERO_EXTEND
)
10223 *cost
+= 2 * extra_cost
->alu
.logical
;
10225 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10226 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10229 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10232 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10234 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10235 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10240 *cost
+= 2 * extra_cost
->alu
.logical
;
10246 *cost
= LIBCALL_COST (2);
10250 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10251 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10253 rtx op0
= XEXP (x
, 0);
10255 *cost
= COSTS_N_INSNS (1);
10257 if (GET_CODE (op0
) == NEG
)
10258 op0
= XEXP (op0
, 0);
10261 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10263 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10264 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10267 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10269 *cost
= LIBCALL_COST (2);
10273 if (mode
== SImode
)
10275 *cost
= COSTS_N_INSNS (1);
10276 if (TARGET_DSP_MULTIPLY
10277 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10278 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10279 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10280 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10281 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10282 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10283 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10284 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10285 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10286 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10287 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10288 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10291 /* SMUL[TB][TB]. */
10293 *cost
+= extra_cost
->mult
[0].extend
;
10294 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10295 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10299 *cost
+= extra_cost
->mult
[0].simple
;
10303 if (mode
== DImode
)
10306 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10307 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10308 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10309 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10311 *cost
= COSTS_N_INSNS (1);
10313 *cost
+= extra_cost
->mult
[1].extend
;
10314 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10315 ZERO_EXTEND
, 0, speed_p
)
10316 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10317 ZERO_EXTEND
, 0, speed_p
));
10321 *cost
= LIBCALL_COST (2);
10326 *cost
= LIBCALL_COST (2);
10330 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10331 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10333 *cost
= COSTS_N_INSNS (1);
10335 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10339 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10341 *cost
= LIBCALL_COST (1);
10345 if (mode
== SImode
)
10347 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10349 *cost
= COSTS_N_INSNS (2);
10350 /* Assume the non-flag-changing variant. */
10352 *cost
+= (extra_cost
->alu
.log_shift
10353 + extra_cost
->alu
.arith_shift
);
10354 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10358 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10359 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10361 *cost
= COSTS_N_INSNS (2);
10362 /* No extra cost for MOV imm and MVN imm. */
10363 /* If the comparison op is using the flags, there's no further
10364 cost, otherwise we need to add the cost of the comparison. */
10365 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10366 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10367 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10369 *cost
+= (COSTS_N_INSNS (1)
10370 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10372 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10375 *cost
+= extra_cost
->alu
.arith
;
10379 *cost
= COSTS_N_INSNS (1);
10381 *cost
+= extra_cost
->alu
.arith
;
10385 if (GET_MODE_CLASS (mode
) == MODE_INT
10386 && GET_MODE_SIZE (mode
) < 4)
10388 /* Slightly disparage, as we might need an extend operation. */
10389 *cost
= 1 + COSTS_N_INSNS (1);
10391 *cost
+= extra_cost
->alu
.arith
;
10395 if (mode
== DImode
)
10397 *cost
= COSTS_N_INSNS (2);
10399 *cost
+= 2 * extra_cost
->alu
.arith
;
10404 *cost
= LIBCALL_COST (1);
10408 if (mode
== SImode
)
10411 rtx shift_reg
= NULL
;
10413 *cost
= COSTS_N_INSNS (1);
10414 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10418 if (shift_reg
!= NULL
)
10421 *cost
+= extra_cost
->alu
.log_shift_reg
;
10422 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10425 *cost
+= extra_cost
->alu
.log_shift
;
10426 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10431 *cost
+= extra_cost
->alu
.logical
;
10434 if (mode
== DImode
)
10436 *cost
= COSTS_N_INSNS (2);
10442 *cost
+= LIBCALL_COST (1);
10447 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10449 *cost
= COSTS_N_INSNS (4);
10452 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10453 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10455 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10456 /* Assume that if one arm of the if_then_else is a register,
10457 that it will be tied with the result and eliminate the
10458 conditional insn. */
10459 if (REG_P (XEXP (x
, 1)))
10461 else if (REG_P (XEXP (x
, 2)))
10467 if (extra_cost
->alu
.non_exec_costs_exec
)
10468 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10470 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10473 *cost
+= op1cost
+ op2cost
;
10479 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10483 machine_mode op0mode
;
10484 /* We'll mostly assume that the cost of a compare is the cost of the
10485 LHS. However, there are some notable exceptions. */
10487 /* Floating point compares are never done as side-effects. */
10488 op0mode
= GET_MODE (XEXP (x
, 0));
10489 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10490 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10492 *cost
= COSTS_N_INSNS (1);
10494 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10496 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10498 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10504 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10506 *cost
= LIBCALL_COST (2);
10510 /* DImode compares normally take two insns. */
10511 if (op0mode
== DImode
)
10513 *cost
= COSTS_N_INSNS (2);
10515 *cost
+= 2 * extra_cost
->alu
.arith
;
10519 if (op0mode
== SImode
)
10524 if (XEXP (x
, 1) == const0_rtx
10525 && !(REG_P (XEXP (x
, 0))
10526 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10527 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10529 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10531 /* Multiply operations that set the flags are often
10532 significantly more expensive. */
10534 && GET_CODE (XEXP (x
, 0)) == MULT
10535 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10536 *cost
+= extra_cost
->mult
[0].flag_setting
;
10539 && GET_CODE (XEXP (x
, 0)) == PLUS
10540 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10541 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10543 *cost
+= extra_cost
->mult
[0].flag_setting
;
10548 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10549 if (shift_op
!= NULL
)
10551 *cost
= COSTS_N_INSNS (1);
10552 if (shift_reg
!= NULL
)
10554 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10556 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10559 *cost
+= extra_cost
->alu
.arith_shift
;
10560 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10561 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10565 *cost
= COSTS_N_INSNS (1);
10567 *cost
+= extra_cost
->alu
.arith
;
10568 if (CONST_INT_P (XEXP (x
, 1))
10569 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10571 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10579 *cost
= LIBCALL_COST (2);
10602 if (outer_code
== SET
)
10604 /* Is it a store-flag operation? */
10605 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10606 && XEXP (x
, 1) == const0_rtx
)
10608 /* Thumb also needs an IT insn. */
10609 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10612 if (XEXP (x
, 1) == const0_rtx
)
10617 /* LSR Rd, Rn, #31. */
10618 *cost
= COSTS_N_INSNS (1);
10620 *cost
+= extra_cost
->alu
.shift
;
10630 *cost
= COSTS_N_INSNS (2);
10634 /* RSBS T1, Rn, Rn, LSR #31
10636 *cost
= COSTS_N_INSNS (2);
10638 *cost
+= extra_cost
->alu
.arith_shift
;
10642 /* RSB Rd, Rn, Rn, ASR #1
10643 LSR Rd, Rd, #31. */
10644 *cost
= COSTS_N_INSNS (2);
10646 *cost
+= (extra_cost
->alu
.arith_shift
10647 + extra_cost
->alu
.shift
);
10653 *cost
= COSTS_N_INSNS (2);
10655 *cost
+= extra_cost
->alu
.shift
;
10659 /* Remaining cases are either meaningless or would take
10660 three insns anyway. */
10661 *cost
= COSTS_N_INSNS (3);
10664 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10669 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10670 if (CONST_INT_P (XEXP (x
, 1))
10671 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10673 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10680 /* Not directly inside a set. If it involves the condition code
10681 register it must be the condition for a branch, cond_exec or
10682 I_T_E operation. Since the comparison is performed elsewhere
10683 this is just the control part which has no additional
10685 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10686 && XEXP (x
, 1) == const0_rtx
)
10694 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10695 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10697 *cost
= COSTS_N_INSNS (1);
10699 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10703 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10705 *cost
= LIBCALL_COST (1);
10709 if (mode
== SImode
)
10711 *cost
= COSTS_N_INSNS (1);
10713 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10717 *cost
= LIBCALL_COST (1);
10721 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10722 && MEM_P (XEXP (x
, 0)))
10724 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10726 if (mode
== DImode
)
10727 *cost
+= COSTS_N_INSNS (1);
10732 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10733 *cost
+= extra_cost
->ldst
.load
;
10735 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10737 if (mode
== DImode
)
10738 *cost
+= extra_cost
->alu
.shift
;
10743 /* Widening from less than 32-bits requires an extend operation. */
10744 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10746 /* We have SXTB/SXTH. */
10747 *cost
= COSTS_N_INSNS (1);
10748 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10750 *cost
+= extra_cost
->alu
.extend
;
10752 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10754 /* Needs two shifts. */
10755 *cost
= COSTS_N_INSNS (2);
10756 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10758 *cost
+= 2 * extra_cost
->alu
.shift
;
10761 /* Widening beyond 32-bits requires one more insn. */
10762 if (mode
== DImode
)
10764 *cost
+= COSTS_N_INSNS (1);
10766 *cost
+= extra_cost
->alu
.shift
;
10773 || GET_MODE (XEXP (x
, 0)) == SImode
10774 || GET_MODE (XEXP (x
, 0)) == QImode
)
10775 && MEM_P (XEXP (x
, 0)))
10777 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10779 if (mode
== DImode
)
10780 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10785 /* Widening from less than 32-bits requires an extend operation. */
10786 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10788 /* UXTB can be a shorter instruction in Thumb2, but it might
10789 be slower than the AND Rd, Rn, #255 alternative. When
10790 optimizing for speed it should never be slower to use
10791 AND, and we don't really model 16-bit vs 32-bit insns
10793 *cost
= COSTS_N_INSNS (1);
10795 *cost
+= extra_cost
->alu
.logical
;
10797 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10799 /* We have UXTB/UXTH. */
10800 *cost
= COSTS_N_INSNS (1);
10801 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10803 *cost
+= extra_cost
->alu
.extend
;
10805 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10807 /* Needs two shifts. It's marginally preferable to use
10808 shifts rather than two BIC instructions as the second
10809 shift may merge with a subsequent insn as a shifter
10811 *cost
= COSTS_N_INSNS (2);
10812 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10814 *cost
+= 2 * extra_cost
->alu
.shift
;
10816 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10817 *cost
= COSTS_N_INSNS (1);
10819 /* Widening beyond 32-bits requires one more insn. */
10820 if (mode
== DImode
)
10822 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10829 /* CONST_INT has no mode, so we cannot tell for sure how many
10830 insns are really going to be needed. The best we can do is
10831 look at the value passed. If it fits in SImode, then assume
10832 that's the mode it will be used for. Otherwise assume it
10833 will be used in DImode. */
10834 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10839 /* Avoid blowing up in arm_gen_constant (). */
10840 if (!(outer_code
== PLUS
10841 || outer_code
== AND
10842 || outer_code
== IOR
10843 || outer_code
== XOR
10844 || outer_code
== MINUS
))
10848 if (mode
== SImode
)
10850 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10851 INTVAL (x
), NULL
, NULL
,
10857 *cost
+= COSTS_N_INSNS (arm_gen_constant
10858 (outer_code
, SImode
, NULL
,
10859 trunc_int_for_mode (INTVAL (x
), SImode
),
10861 + arm_gen_constant (outer_code
, SImode
, NULL
,
10862 INTVAL (x
) >> 32, NULL
,
10874 if (arm_arch_thumb2
&& !flag_pic
)
10875 *cost
= COSTS_N_INSNS (2);
10877 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10880 *cost
= COSTS_N_INSNS (2);
10884 *cost
+= COSTS_N_INSNS (1);
10886 *cost
+= extra_cost
->alu
.arith
;
10892 *cost
= COSTS_N_INSNS (4);
10897 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10898 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10900 if (vfp3_const_double_rtx (x
))
10902 *cost
= COSTS_N_INSNS (1);
10904 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10910 *cost
= COSTS_N_INSNS (1);
10911 if (mode
== DFmode
)
10912 *cost
+= extra_cost
->ldst
.loadd
;
10914 *cost
+= extra_cost
->ldst
.loadf
;
10917 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10921 *cost
= COSTS_N_INSNS (4);
10927 && TARGET_HARD_FLOAT
10928 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10929 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10930 *cost
= COSTS_N_INSNS (1);
10932 *cost
= COSTS_N_INSNS (4);
10937 *cost
= COSTS_N_INSNS (1);
10938 /* When optimizing for size, we prefer constant pool entries to
10939 MOVW/MOVT pairs, so bump the cost of these slightly. */
10945 *cost
= COSTS_N_INSNS (1);
10947 *cost
+= extra_cost
->alu
.clz
;
10951 if (XEXP (x
, 1) == const0_rtx
)
10953 *cost
= COSTS_N_INSNS (1);
10955 *cost
+= extra_cost
->alu
.log_shift
;
10956 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10959 /* Fall through. */
10963 *cost
= COSTS_N_INSNS (2);
10967 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10968 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10969 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10970 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10971 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10972 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10973 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10974 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10977 *cost
= COSTS_N_INSNS (1);
10979 *cost
+= extra_cost
->mult
[1].extend
;
10980 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10982 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10986 *cost
= LIBCALL_COST (1);
10990 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10993 /* Reading the PC is like reading any other register. Writing it
10994 is more expensive, but we take that into account elsewhere. */
10999 /* TODO: Simple zero_extract of bottom bits using AND. */
11000 /* Fall through. */
11004 && CONST_INT_P (XEXP (x
, 1))
11005 && CONST_INT_P (XEXP (x
, 2)))
11007 *cost
= COSTS_N_INSNS (1);
11009 *cost
+= extra_cost
->alu
.bfx
;
11010 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11013 /* Without UBFX/SBFX, need to resort to shift operations. */
11014 *cost
= COSTS_N_INSNS (2);
11016 *cost
+= 2 * extra_cost
->alu
.shift
;
11017 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
11021 if (TARGET_HARD_FLOAT
)
11023 *cost
= COSTS_N_INSNS (1);
11025 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11026 if (!TARGET_FPU_ARMV8
11027 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11029 /* Pre v8, widening HF->DF is a two-step process, first
11030 widening to SFmode. */
11031 *cost
+= COSTS_N_INSNS (1);
11033 *cost
+= extra_cost
->fp
[0].widen
;
11035 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11039 *cost
= LIBCALL_COST (1);
11042 case FLOAT_TRUNCATE
:
11043 if (TARGET_HARD_FLOAT
)
11045 *cost
= COSTS_N_INSNS (1);
11047 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11048 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11050 /* Vector modes? */
11052 *cost
= LIBCALL_COST (1);
11056 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11058 rtx op0
= XEXP (x
, 0);
11059 rtx op1
= XEXP (x
, 1);
11060 rtx op2
= XEXP (x
, 2);
11062 *cost
= COSTS_N_INSNS (1);
11064 /* vfms or vfnma. */
11065 if (GET_CODE (op0
) == NEG
)
11066 op0
= XEXP (op0
, 0);
11068 /* vfnms or vfnma. */
11069 if (GET_CODE (op2
) == NEG
)
11070 op2
= XEXP (op2
, 0);
11072 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
11073 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
11074 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
11077 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11082 *cost
= LIBCALL_COST (3);
11087 if (TARGET_HARD_FLOAT
)
11089 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11091 *cost
= COSTS_N_INSNS (1);
11093 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
11094 /* Strip of the 'cost' of rounding towards zero. */
11095 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11096 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11098 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11099 /* ??? Increase the cost to deal with transferring from
11100 FP -> CORE registers? */
11103 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11104 && TARGET_FPU_ARMV8
)
11106 *cost
= COSTS_N_INSNS (1);
11108 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11111 /* Vector costs? */
11113 *cost
= LIBCALL_COST (1);
11117 case UNSIGNED_FLOAT
:
11118 if (TARGET_HARD_FLOAT
)
11120 /* ??? Increase the cost to deal with transferring from CORE
11121 -> FP registers? */
11122 *cost
= COSTS_N_INSNS (1);
11124 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11127 *cost
= LIBCALL_COST (1);
11131 *cost
= COSTS_N_INSNS (1);
11136 /* Just a guess. Guess number of instructions in the asm
11137 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11138 though (see PR60663). */
11139 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11140 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11142 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11146 if (mode
!= VOIDmode
)
11147 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11149 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11154 #undef HANDLE_NARROW_SHIFT_ARITH
11156 /* RTX costs when optimizing for size. */
11158 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11159 int *total
, bool speed
)
11163 if (TARGET_OLD_RTX_COSTS
11164 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11166 /* Old way. (Deprecated.) */
11168 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11169 (enum rtx_code
) outer_code
, total
);
11171 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11172 (enum rtx_code
) outer_code
, total
,
11178 if (current_tune
->insn_extra_cost
)
11179 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11180 (enum rtx_code
) outer_code
,
11181 current_tune
->insn_extra_cost
,
11183 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11184 && current_tune->insn_extra_cost != NULL */
11186 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11187 (enum rtx_code
) outer_code
,
11188 &generic_extra_costs
, total
, speed
);
11191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11193 print_rtl_single (dump_file
, x
);
11194 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11195 *total
, result
? "final" : "partial");
11200 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11201 supported on any "slowmul" cores, so it can be ignored. */
11204 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11205 int *total
, bool speed
)
11207 machine_mode mode
= GET_MODE (x
);
11211 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11218 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11221 *total
= COSTS_N_INSNS (20);
11225 if (CONST_INT_P (XEXP (x
, 1)))
11227 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11228 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11229 int cost
, const_ok
= const_ok_for_arm (i
);
11230 int j
, booth_unit_size
;
11232 /* Tune as appropriate. */
11233 cost
= const_ok
? 4 : 8;
11234 booth_unit_size
= 2;
11235 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11237 i
>>= booth_unit_size
;
11241 *total
= COSTS_N_INSNS (cost
);
11242 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11246 *total
= COSTS_N_INSNS (20);
11250 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11255 /* RTX cost for cores with a fast multiply unit (M variants). */
11258 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11259 int *total
, bool speed
)
11261 machine_mode mode
= GET_MODE (x
);
11265 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11269 /* ??? should thumb2 use different costs? */
11273 /* There is no point basing this on the tuning, since it is always the
11274 fast variant if it exists at all. */
11276 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11277 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11278 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11280 *total
= COSTS_N_INSNS(2);
11285 if (mode
== DImode
)
11287 *total
= COSTS_N_INSNS (5);
11291 if (CONST_INT_P (XEXP (x
, 1)))
11293 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11294 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11295 int cost
, const_ok
= const_ok_for_arm (i
);
11296 int j
, booth_unit_size
;
11298 /* Tune as appropriate. */
11299 cost
= const_ok
? 4 : 8;
11300 booth_unit_size
= 8;
11301 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11303 i
>>= booth_unit_size
;
11307 *total
= COSTS_N_INSNS(cost
);
11311 if (mode
== SImode
)
11313 *total
= COSTS_N_INSNS (4);
11317 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11319 if (TARGET_HARD_FLOAT
11321 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11323 *total
= COSTS_N_INSNS (1);
11328 /* Requires a lib call */
11329 *total
= COSTS_N_INSNS (20);
11333 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11338 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11339 so it can be ignored. */
11342 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11343 int *total
, bool speed
)
11345 machine_mode mode
= GET_MODE (x
);
11349 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11356 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11357 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11359 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11360 will stall until the multiplication is complete. */
11361 *total
= COSTS_N_INSNS (3);
11365 /* There is no point basing this on the tuning, since it is always the
11366 fast variant if it exists at all. */
11368 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11369 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11370 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11372 *total
= COSTS_N_INSNS (2);
11377 if (mode
== DImode
)
11379 *total
= COSTS_N_INSNS (5);
11383 if (CONST_INT_P (XEXP (x
, 1)))
11385 /* If operand 1 is a constant we can more accurately
11386 calculate the cost of the multiply. The multiplier can
11387 retire 15 bits on the first cycle and a further 12 on the
11388 second. We do, of course, have to load the constant into
11389 a register first. */
11390 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11391 /* There's a general overhead of one cycle. */
11393 unsigned HOST_WIDE_INT masked_const
;
11395 if (i
& 0x80000000)
11398 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11400 masked_const
= i
& 0xffff8000;
11401 if (masked_const
!= 0)
11404 masked_const
= i
& 0xf8000000;
11405 if (masked_const
!= 0)
11408 *total
= COSTS_N_INSNS (cost
);
11412 if (mode
== SImode
)
11414 *total
= COSTS_N_INSNS (3);
11418 /* Requires a lib call */
11419 *total
= COSTS_N_INSNS (20);
11423 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11428 /* RTX costs for 9e (and later) cores. */
11431 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11432 int *total
, bool speed
)
11434 machine_mode mode
= GET_MODE (x
);
11441 /* Small multiply: 32 cycles for an integer multiply inst. */
11442 if (arm_arch6m
&& arm_m_profile_small_mul
)
11443 *total
= COSTS_N_INSNS (32);
11445 *total
= COSTS_N_INSNS (3);
11449 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11457 /* There is no point basing this on the tuning, since it is always the
11458 fast variant if it exists at all. */
11460 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11461 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11462 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11464 *total
= COSTS_N_INSNS (2);
11469 if (mode
== DImode
)
11471 *total
= COSTS_N_INSNS (5);
11475 if (mode
== SImode
)
11477 *total
= COSTS_N_INSNS (2);
11481 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11483 if (TARGET_HARD_FLOAT
11485 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11487 *total
= COSTS_N_INSNS (1);
11492 *total
= COSTS_N_INSNS (20);
11496 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11499 /* All address computations that can be done are free, but rtx cost returns
11500 the same for practically all of them. So we weight the different types
11501 of address here in the order (most pref first):
11502 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11504 arm_arm_address_cost (rtx x
)
11506 enum rtx_code c
= GET_CODE (x
);
11508 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11510 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11515 if (CONST_INT_P (XEXP (x
, 1)))
11518 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11528 arm_thumb_address_cost (rtx x
)
11530 enum rtx_code c
= GET_CODE (x
);
11535 && REG_P (XEXP (x
, 0))
11536 && CONST_INT_P (XEXP (x
, 1)))
11543 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11544 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11546 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11549 /* Adjust cost hook for XScale. */
11551 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11553 /* Some true dependencies can have a higher cost depending
11554 on precisely how certain input operands are used. */
11555 if (REG_NOTE_KIND(link
) == 0
11556 && recog_memoized (insn
) >= 0
11557 && recog_memoized (dep
) >= 0)
11559 int shift_opnum
= get_attr_shift (insn
);
11560 enum attr_type attr_type
= get_attr_type (dep
);
11562 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11563 operand for INSN. If we have a shifted input operand and the
11564 instruction we depend on is another ALU instruction, then we may
11565 have to account for an additional stall. */
11566 if (shift_opnum
!= 0
11567 && (attr_type
== TYPE_ALU_SHIFT_IMM
11568 || attr_type
== TYPE_ALUS_SHIFT_IMM
11569 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11570 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11571 || attr_type
== TYPE_ALU_SHIFT_REG
11572 || attr_type
== TYPE_ALUS_SHIFT_REG
11573 || attr_type
== TYPE_LOGIC_SHIFT_REG
11574 || attr_type
== TYPE_LOGICS_SHIFT_REG
11575 || attr_type
== TYPE_MOV_SHIFT
11576 || attr_type
== TYPE_MVN_SHIFT
11577 || attr_type
== TYPE_MOV_SHIFT_REG
11578 || attr_type
== TYPE_MVN_SHIFT_REG
))
11580 rtx shifted_operand
;
11583 /* Get the shifted operand. */
11584 extract_insn (insn
);
11585 shifted_operand
= recog_data
.operand
[shift_opnum
];
11587 /* Iterate over all the operands in DEP. If we write an operand
11588 that overlaps with SHIFTED_OPERAND, then we have increase the
11589 cost of this dependency. */
11590 extract_insn (dep
);
11591 preprocess_constraints (dep
);
11592 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11594 /* We can ignore strict inputs. */
11595 if (recog_data
.operand_type
[opno
] == OP_IN
)
11598 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11610 /* Adjust cost hook for Cortex A9. */
11612 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11614 switch (REG_NOTE_KIND (link
))
11621 case REG_DEP_OUTPUT
:
11622 if (recog_memoized (insn
) >= 0
11623 && recog_memoized (dep
) >= 0)
11625 if (GET_CODE (PATTERN (insn
)) == SET
)
11628 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11630 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11632 enum attr_type attr_type_insn
= get_attr_type (insn
);
11633 enum attr_type attr_type_dep
= get_attr_type (dep
);
11635 /* By default all dependencies of the form
11638 have an extra latency of 1 cycle because
11639 of the input and output dependency in this
11640 case. However this gets modeled as an true
11641 dependency and hence all these checks. */
11642 if (REG_P (SET_DEST (PATTERN (insn
)))
11643 && REG_P (SET_DEST (PATTERN (dep
)))
11644 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11645 SET_DEST (PATTERN (dep
))))
11647 /* FMACS is a special case where the dependent
11648 instruction can be issued 3 cycles before
11649 the normal latency in case of an output
11651 if ((attr_type_insn
== TYPE_FMACS
11652 || attr_type_insn
== TYPE_FMACD
)
11653 && (attr_type_dep
== TYPE_FMACS
11654 || attr_type_dep
== TYPE_FMACD
))
11656 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11657 *cost
= insn_default_latency (dep
) - 3;
11659 *cost
= insn_default_latency (dep
);
11664 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11665 *cost
= insn_default_latency (dep
) + 1;
11667 *cost
= insn_default_latency (dep
);
11677 gcc_unreachable ();
11683 /* Adjust cost hook for FA726TE. */
11685 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11687 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11688 have penalty of 3. */
11689 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11690 && recog_memoized (insn
) >= 0
11691 && recog_memoized (dep
) >= 0
11692 && get_attr_conds (dep
) == CONDS_SET
)
11694 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11695 if (get_attr_conds (insn
) == CONDS_USE
11696 && get_attr_type (insn
) != TYPE_BRANCH
)
11702 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11703 || get_attr_conds (insn
) == CONDS_USE
)
11713 /* Implement TARGET_REGISTER_MOVE_COST.
11715 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11716 it is typically more expensive than a single memory access. We set
11717 the cost to less than two memory accesses so that floating
11718 point to integer conversion does not go through memory. */
11721 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11722 reg_class_t from
, reg_class_t to
)
11726 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11727 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11729 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11730 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11732 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11739 if (from
== HI_REGS
|| to
== HI_REGS
)
11746 /* Implement TARGET_MEMORY_MOVE_COST. */
11749 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11750 bool in ATTRIBUTE_UNUSED
)
11756 if (GET_MODE_SIZE (mode
) < 4)
11759 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11763 /* Vectorizer cost model implementation. */
11765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11767 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11769 int misalign ATTRIBUTE_UNUSED
)
11773 switch (type_of_cost
)
11776 return current_tune
->vec_costs
->scalar_stmt_cost
;
11779 return current_tune
->vec_costs
->scalar_load_cost
;
11782 return current_tune
->vec_costs
->scalar_store_cost
;
11785 return current_tune
->vec_costs
->vec_stmt_cost
;
11788 return current_tune
->vec_costs
->vec_align_load_cost
;
11791 return current_tune
->vec_costs
->vec_store_cost
;
11793 case vec_to_scalar
:
11794 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11796 case scalar_to_vec
:
11797 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11799 case unaligned_load
:
11800 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11802 case unaligned_store
:
11803 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11805 case cond_branch_taken
:
11806 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11808 case cond_branch_not_taken
:
11809 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11812 case vec_promote_demote
:
11813 return current_tune
->vec_costs
->vec_stmt_cost
;
11815 case vec_construct
:
11816 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11817 return elements
/ 2 + 1;
11820 gcc_unreachable ();
11824 /* Implement targetm.vectorize.add_stmt_cost. */
11827 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11828 struct _stmt_vec_info
*stmt_info
, int misalign
,
11829 enum vect_cost_model_location where
)
11831 unsigned *cost
= (unsigned *) data
;
11832 unsigned retval
= 0;
11834 if (flag_vect_cost_model
)
11836 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11837 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11839 /* Statements in an inner loop relative to the loop being
11840 vectorized are weighted more heavily. The value here is
11841 arbitrary and could potentially be improved with analysis. */
11842 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11843 count
*= 50; /* FIXME. */
11845 retval
= (unsigned) (count
* stmt_cost
);
11846 cost
[where
] += retval
;
11852 /* Return true if and only if this insn can dual-issue only as older. */
11854 cortexa7_older_only (rtx_insn
*insn
)
11856 if (recog_memoized (insn
) < 0)
11859 switch (get_attr_type (insn
))
11861 case TYPE_ALU_DSP_REG
:
11862 case TYPE_ALU_SREG
:
11863 case TYPE_ALUS_SREG
:
11864 case TYPE_LOGIC_REG
:
11865 case TYPE_LOGICS_REG
:
11867 case TYPE_ADCS_REG
:
11872 case TYPE_SHIFT_IMM
:
11873 case TYPE_SHIFT_REG
:
11874 case TYPE_LOAD_BYTE
:
11877 case TYPE_FFARITHS
:
11879 case TYPE_FFARITHD
:
11897 case TYPE_F_STORES
:
11904 /* Return true if and only if this insn can dual-issue as younger. */
11906 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11908 if (recog_memoized (insn
) < 0)
11911 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11915 switch (get_attr_type (insn
))
11918 case TYPE_ALUS_IMM
:
11919 case TYPE_LOGIC_IMM
:
11920 case TYPE_LOGICS_IMM
:
11925 case TYPE_MOV_SHIFT
:
11926 case TYPE_MOV_SHIFT_REG
:
11936 /* Look for an instruction that can dual issue only as an older
11937 instruction, and move it in front of any instructions that can
11938 dual-issue as younger, while preserving the relative order of all
11939 other instructions in the ready list. This is a hueuristic to help
11940 dual-issue in later cycles, by postponing issue of more flexible
11941 instructions. This heuristic may affect dual issue opportunities
11942 in the current cycle. */
11944 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11945 int *n_readyp
, int clock
)
11948 int first_older_only
= -1, first_younger
= -1;
11952 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11956 /* Traverse the ready list from the head (the instruction to issue
11957 first), and looking for the first instruction that can issue as
11958 younger and the first instruction that can dual-issue only as
11960 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11962 rtx_insn
*insn
= ready
[i
];
11963 if (cortexa7_older_only (insn
))
11965 first_older_only
= i
;
11967 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11970 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11974 /* Nothing to reorder because either no younger insn found or insn
11975 that can dual-issue only as older appears before any insn that
11976 can dual-issue as younger. */
11977 if (first_younger
== -1)
11980 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11984 /* Nothing to reorder because no older-only insn in the ready list. */
11985 if (first_older_only
== -1)
11988 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11992 /* Move first_older_only insn before first_younger. */
11994 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11995 INSN_UID(ready
[first_older_only
]),
11996 INSN_UID(ready
[first_younger
]));
11997 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11998 for (i
= first_older_only
; i
< first_younger
; i
++)
12000 ready
[i
] = ready
[i
+1];
12003 ready
[i
] = first_older_only_insn
;
12007 /* Implement TARGET_SCHED_REORDER. */
12009 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12015 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12018 /* Do nothing for other cores. */
12022 return arm_issue_rate ();
12025 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12026 It corrects the value of COST based on the relationship between
12027 INSN and DEP through the dependence LINK. It returns the new
12028 value. There is a per-core adjust_cost hook to adjust scheduler costs
12029 and the per-core hook can choose to completely override the generic
12030 adjust_cost function. Only put bits of code into arm_adjust_cost that
12031 are common across all cores. */
12033 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
12037 /* When generating Thumb-1 code, we want to place flag-setting operations
12038 close to a conditional branch which depends on them, so that we can
12039 omit the comparison. */
12041 && REG_NOTE_KIND (link
) == 0
12042 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12043 && recog_memoized (dep
) >= 0
12044 && get_attr_conds (dep
) == CONDS_SET
)
12047 if (current_tune
->sched_adjust_cost
!= NULL
)
12049 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12053 /* XXX Is this strictly true? */
12054 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12055 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12058 /* Call insns don't incur a stall, even if they follow a load. */
12059 if (REG_NOTE_KIND (link
) == 0
12063 if ((i_pat
= single_set (insn
)) != NULL
12064 && MEM_P (SET_SRC (i_pat
))
12065 && (d_pat
= single_set (dep
)) != NULL
12066 && MEM_P (SET_DEST (d_pat
)))
12068 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12069 /* This is a load after a store, there is no conflict if the load reads
12070 from a cached area. Assume that loads from the stack, and from the
12071 constant pool are cached, and that others will miss. This is a
12074 if ((GET_CODE (src_mem
) == SYMBOL_REF
12075 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12076 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12077 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12078 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12086 arm_max_conditional_execute (void)
12088 return max_insns_skipped
;
12092 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12095 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12097 return (optimize
> 0) ? 2 : 0;
12101 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12103 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12106 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12107 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12108 sequences of non-executed instructions in IT blocks probably take the same
12109 amount of time as executed instructions (and the IT instruction itself takes
12110 space in icache). This function was experimentally determined to give good
12111 results on a popular embedded benchmark. */
12114 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12116 return (TARGET_32BIT
&& speed_p
) ? 1
12117 : arm_default_branch_cost (speed_p
, predictable_p
);
12121 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12123 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12126 static bool fp_consts_inited
= false;
12128 static REAL_VALUE_TYPE value_fp0
;
12131 init_fp_table (void)
12135 r
= REAL_VALUE_ATOF ("0", DFmode
);
12137 fp_consts_inited
= true;
12140 /* Return TRUE if rtx X is a valid immediate FP constant. */
12142 arm_const_double_rtx (rtx x
)
12146 if (!fp_consts_inited
)
12149 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12150 if (REAL_VALUE_MINUS_ZERO (r
))
12153 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12159 /* VFPv3 has a fairly wide range of representable immediates, formed from
12160 "quarter-precision" floating-point values. These can be evaluated using this
12161 formula (with ^ for exponentiation):
12165 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12166 16 <= n <= 31 and 0 <= r <= 7.
12168 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12170 - A (most-significant) is the sign bit.
12171 - BCD are the exponent (encoded as r XOR 3).
12172 - EFGH are the mantissa (encoded as n - 16).
12175 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12176 fconst[sd] instruction, or -1 if X isn't suitable. */
12178 vfp3_const_double_index (rtx x
)
12180 REAL_VALUE_TYPE r
, m
;
12181 int sign
, exponent
;
12182 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12183 unsigned HOST_WIDE_INT mask
;
12184 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12187 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12190 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12192 /* We can't represent these things, so detect them first. */
12193 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12196 /* Extract sign, exponent and mantissa. */
12197 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12198 r
= real_value_abs (&r
);
12199 exponent
= REAL_EXP (&r
);
12200 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12201 highest (sign) bit, with a fixed binary point at bit point_pos.
12202 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12203 bits for the mantissa, this may fail (low bits would be lost). */
12204 real_ldexp (&m
, &r
, point_pos
- exponent
);
12205 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12206 mantissa
= w
.elt (0);
12207 mant_hi
= w
.elt (1);
12209 /* If there are bits set in the low part of the mantissa, we can't
12210 represent this value. */
12214 /* Now make it so that mantissa contains the most-significant bits, and move
12215 the point_pos to indicate that the least-significant bits have been
12217 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12218 mantissa
= mant_hi
;
12220 /* We can permit four significant bits of mantissa only, plus a high bit
12221 which is always 1. */
12222 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12223 if ((mantissa
& mask
) != 0)
12226 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12227 mantissa
>>= point_pos
- 5;
12229 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12230 floating-point immediate zero with Neon using an integer-zero load, but
12231 that case is handled elsewhere.) */
12235 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12237 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12238 normalized significands are in the range [1, 2). (Our mantissa is shifted
12239 left 4 places at this point relative to normalized IEEE754 values). GCC
12240 internally uses [0.5, 1) (see real.c), so the exponent returned from
12241 REAL_EXP must be altered. */
12242 exponent
= 5 - exponent
;
12244 if (exponent
< 0 || exponent
> 7)
12247 /* Sign, mantissa and exponent are now in the correct form to plug into the
12248 formula described in the comment above. */
12249 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12252 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12254 vfp3_const_double_rtx (rtx x
)
12259 return vfp3_const_double_index (x
) != -1;
12262 /* Recognize immediates which can be used in various Neon instructions. Legal
12263 immediates are described by the following table (for VMVN variants, the
12264 bitwise inverse of the constant shown is recognized. In either case, VMOV
12265 is output and the correct instruction to use for a given constant is chosen
12266 by the assembler). The constant shown is replicated across all elements of
12267 the destination vector.
12269 insn elems variant constant (binary)
12270 ---- ----- ------- -----------------
12271 vmov i32 0 00000000 00000000 00000000 abcdefgh
12272 vmov i32 1 00000000 00000000 abcdefgh 00000000
12273 vmov i32 2 00000000 abcdefgh 00000000 00000000
12274 vmov i32 3 abcdefgh 00000000 00000000 00000000
12275 vmov i16 4 00000000 abcdefgh
12276 vmov i16 5 abcdefgh 00000000
12277 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12278 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12279 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12280 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12281 vmvn i16 10 00000000 abcdefgh
12282 vmvn i16 11 abcdefgh 00000000
12283 vmov i32 12 00000000 00000000 abcdefgh 11111111
12284 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12285 vmov i32 14 00000000 abcdefgh 11111111 11111111
12286 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12287 vmov i8 16 abcdefgh
12288 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12289 eeeeeeee ffffffff gggggggg hhhhhhhh
12290 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12291 vmov f32 19 00000000 00000000 00000000 00000000
12293 For case 18, B = !b. Representable values are exactly those accepted by
12294 vfp3_const_double_index, but are output as floating-point numbers rather
12297 For case 19, we will change it to vmov.i32 when assembling.
12299 Variants 0-5 (inclusive) may also be used as immediates for the second
12300 operand of VORR/VBIC instructions.
12302 The INVERSE argument causes the bitwise inverse of the given operand to be
12303 recognized instead (used for recognizing legal immediates for the VAND/VORN
12304 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12305 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12306 output, rather than the real insns vbic/vorr).
12308 INVERSE makes no difference to the recognition of float vectors.
12310 The return value is the variant of immediate as shown in the above table, or
12311 -1 if the given value doesn't match any of the listed patterns.
12314 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12315 rtx
*modconst
, int *elementwidth
)
12317 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12319 for (i = 0; i < idx; i += (STRIDE)) \
12324 immtype = (CLASS); \
12325 elsize = (ELSIZE); \
12329 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12330 unsigned int innersize
;
12331 unsigned char bytes
[16];
12332 int immtype
= -1, matches
;
12333 unsigned int invmask
= inverse
? 0xff : 0;
12334 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12338 n_elts
= CONST_VECTOR_NUNITS (op
);
12339 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12344 if (mode
== VOIDmode
)
12346 innersize
= GET_MODE_SIZE (mode
);
12349 /* Vectors of float constants. */
12350 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12352 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12353 REAL_VALUE_TYPE r0
;
12355 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12358 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12360 for (i
= 1; i
< n_elts
; i
++)
12362 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12363 REAL_VALUE_TYPE re
;
12365 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12367 if (!REAL_VALUES_EQUAL (r0
, re
))
12372 *modconst
= CONST_VECTOR_ELT (op
, 0);
12377 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12383 /* Splat vector constant out into a byte vector. */
12384 for (i
= 0; i
< n_elts
; i
++)
12386 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12387 unsigned HOST_WIDE_INT elpart
;
12388 unsigned int part
, parts
;
12390 if (CONST_INT_P (el
))
12392 elpart
= INTVAL (el
);
12395 else if (CONST_DOUBLE_P (el
))
12397 elpart
= CONST_DOUBLE_LOW (el
);
12401 gcc_unreachable ();
12403 for (part
= 0; part
< parts
; part
++)
12406 for (byte
= 0; byte
< innersize
; byte
++)
12408 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12409 elpart
>>= BITS_PER_UNIT
;
12411 if (CONST_DOUBLE_P (el
))
12412 elpart
= CONST_DOUBLE_HIGH (el
);
12416 /* Sanity check. */
12417 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12421 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12422 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12424 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12425 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12427 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12428 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12430 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12431 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12433 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12435 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12437 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12438 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12440 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12441 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12443 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12444 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12446 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12447 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12449 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12451 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12453 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12454 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12456 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12457 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12459 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12460 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12462 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12463 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12465 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12467 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12468 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12476 *elementwidth
= elsize
;
12480 unsigned HOST_WIDE_INT imm
= 0;
12482 /* Un-invert bytes of recognized vector, if necessary. */
12484 for (i
= 0; i
< idx
; i
++)
12485 bytes
[i
] ^= invmask
;
12489 /* FIXME: Broken on 32-bit H_W_I hosts. */
12490 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12492 for (i
= 0; i
< 8; i
++)
12493 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12494 << (i
* BITS_PER_UNIT
);
12496 *modconst
= GEN_INT (imm
);
12500 unsigned HOST_WIDE_INT imm
= 0;
12502 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12503 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12505 *modconst
= GEN_INT (imm
);
12513 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12514 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12515 float elements), and a modified constant (whatever should be output for a
12516 VMOV) in *MODCONST. */
12519 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12520 rtx
*modconst
, int *elementwidth
)
12524 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12530 *modconst
= tmpconst
;
12533 *elementwidth
= tmpwidth
;
12538 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12539 the immediate is valid, write a constant suitable for using as an operand
12540 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12541 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12544 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12545 rtx
*modconst
, int *elementwidth
)
12549 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12551 if (retval
< 0 || retval
> 5)
12555 *modconst
= tmpconst
;
12558 *elementwidth
= tmpwidth
;
12563 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12564 the immediate is valid, write a constant suitable for using as an operand
12565 to VSHR/VSHL to *MODCONST and the corresponding element width to
12566 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12567 because they have different limitations. */
12570 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12571 rtx
*modconst
, int *elementwidth
,
12574 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12575 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12576 unsigned HOST_WIDE_INT last_elt
= 0;
12577 unsigned HOST_WIDE_INT maxshift
;
12579 /* Split vector constant out into a byte vector. */
12580 for (i
= 0; i
< n_elts
; i
++)
12582 rtx el
= CONST_VECTOR_ELT (op
, i
);
12583 unsigned HOST_WIDE_INT elpart
;
12585 if (CONST_INT_P (el
))
12586 elpart
= INTVAL (el
);
12587 else if (CONST_DOUBLE_P (el
))
12590 gcc_unreachable ();
12592 if (i
!= 0 && elpart
!= last_elt
)
12598 /* Shift less than element size. */
12599 maxshift
= innersize
* 8;
12603 /* Left shift immediate value can be from 0 to <size>-1. */
12604 if (last_elt
>= maxshift
)
12609 /* Right shift immediate value can be from 1 to <size>. */
12610 if (last_elt
== 0 || last_elt
> maxshift
)
12615 *elementwidth
= innersize
* 8;
12618 *modconst
= CONST_VECTOR_ELT (op
, 0);
12623 /* Return a string suitable for output of Neon immediate logic operation
12627 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12628 int inverse
, int quad
)
12630 int width
, is_valid
;
12631 static char templ
[40];
12633 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12635 gcc_assert (is_valid
!= 0);
12638 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12640 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12645 /* Return a string suitable for output of Neon immediate shift operation
12646 (VSHR or VSHL) MNEM. */
12649 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12650 machine_mode mode
, int quad
,
12653 int width
, is_valid
;
12654 static char templ
[40];
12656 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12657 gcc_assert (is_valid
!= 0);
12660 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12662 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12667 /* Output a sequence of pairwise operations to implement a reduction.
12668 NOTE: We do "too much work" here, because pairwise operations work on two
12669 registers-worth of operands in one go. Unfortunately we can't exploit those
12670 extra calculations to do the full operation in fewer steps, I don't think.
12671 Although all vector elements of the result but the first are ignored, we
12672 actually calculate the same result in each of the elements. An alternative
12673 such as initially loading a vector with zero to use as each of the second
12674 operands would use up an additional register and take an extra instruction,
12675 for no particular gain. */
12678 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12679 rtx (*reduc
) (rtx
, rtx
, rtx
))
12681 machine_mode inner
= GET_MODE_INNER (mode
);
12682 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12685 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12687 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12688 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12693 /* If VALS is a vector constant that can be loaded into a register
12694 using VDUP, generate instructions to do so and return an RTX to
12695 assign to the register. Otherwise return NULL_RTX. */
12698 neon_vdup_constant (rtx vals
)
12700 machine_mode mode
= GET_MODE (vals
);
12701 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12702 int n_elts
= GET_MODE_NUNITS (mode
);
12703 bool all_same
= true;
12707 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12710 for (i
= 0; i
< n_elts
; ++i
)
12712 x
= XVECEXP (vals
, 0, i
);
12713 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12718 /* The elements are not all the same. We could handle repeating
12719 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12720 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12724 /* We can load this constant by using VDUP and a constant in a
12725 single ARM register. This will be cheaper than a vector
12728 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12729 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12732 /* Generate code to load VALS, which is a PARALLEL containing only
12733 constants (for vec_init) or CONST_VECTOR, efficiently into a
12734 register. Returns an RTX to copy into the register, or NULL_RTX
12735 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12738 neon_make_constant (rtx vals
)
12740 machine_mode mode
= GET_MODE (vals
);
12742 rtx const_vec
= NULL_RTX
;
12743 int n_elts
= GET_MODE_NUNITS (mode
);
12747 if (GET_CODE (vals
) == CONST_VECTOR
)
12749 else if (GET_CODE (vals
) == PARALLEL
)
12751 /* A CONST_VECTOR must contain only CONST_INTs and
12752 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12753 Only store valid constants in a CONST_VECTOR. */
12754 for (i
= 0; i
< n_elts
; ++i
)
12756 rtx x
= XVECEXP (vals
, 0, i
);
12757 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12760 if (n_const
== n_elts
)
12761 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12764 gcc_unreachable ();
12766 if (const_vec
!= NULL
12767 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12768 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12770 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12771 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12772 pipeline cycle; creating the constant takes one or two ARM
12773 pipeline cycles. */
12775 else if (const_vec
!= NULL_RTX
)
12776 /* Load from constant pool. On Cortex-A8 this takes two cycles
12777 (for either double or quad vectors). We can not take advantage
12778 of single-cycle VLD1 because we need a PC-relative addressing
12782 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12783 We can not construct an initializer. */
12787 /* Initialize vector TARGET to VALS. */
12790 neon_expand_vector_init (rtx target
, rtx vals
)
12792 machine_mode mode
= GET_MODE (target
);
12793 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12794 int n_elts
= GET_MODE_NUNITS (mode
);
12795 int n_var
= 0, one_var
= -1;
12796 bool all_same
= true;
12800 for (i
= 0; i
< n_elts
; ++i
)
12802 x
= XVECEXP (vals
, 0, i
);
12803 if (!CONSTANT_P (x
))
12804 ++n_var
, one_var
= i
;
12806 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12812 rtx constant
= neon_make_constant (vals
);
12813 if (constant
!= NULL_RTX
)
12815 emit_move_insn (target
, constant
);
12820 /* Splat a single non-constant element if we can. */
12821 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12823 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12824 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12825 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12829 /* One field is non-constant. Load constant then overwrite varying
12830 field. This is more efficient than using the stack. */
12833 rtx copy
= copy_rtx (vals
);
12834 rtx index
= GEN_INT (one_var
);
12836 /* Load constant part of vector, substitute neighboring value for
12837 varying element. */
12838 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12839 neon_expand_vector_init (target
, copy
);
12841 /* Insert variable. */
12842 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12846 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12849 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12852 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12855 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12858 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12861 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12864 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12867 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12870 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12873 gcc_unreachable ();
12878 /* Construct the vector in memory one field at a time
12879 and load the whole vector. */
12880 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12881 for (i
= 0; i
< n_elts
; i
++)
12882 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12883 i
* GET_MODE_SIZE (inner_mode
)),
12884 XVECEXP (vals
, 0, i
));
12885 emit_move_insn (target
, mem
);
12888 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12889 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12890 reported source locations are bogus. */
12893 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12896 HOST_WIDE_INT lane
;
12898 gcc_assert (CONST_INT_P (operand
));
12900 lane
= INTVAL (operand
);
12902 if (lane
< low
|| lane
>= high
)
12906 /* Bounds-check lanes. */
12909 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12911 bounds_check (operand
, low
, high
, "lane out of range");
12914 /* Bounds-check constants. */
12917 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12919 bounds_check (operand
, low
, high
, "constant out of range");
12923 neon_element_bits (machine_mode mode
)
12925 if (mode
== DImode
)
12926 return GET_MODE_BITSIZE (mode
);
12928 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12932 /* Predicates for `match_operand' and `match_operator'. */
12934 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12935 WB is true if full writeback address modes are allowed and is false
12936 if limited writeback address modes (POST_INC and PRE_DEC) are
12940 arm_coproc_mem_operand (rtx op
, bool wb
)
12944 /* Reject eliminable registers. */
12945 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12946 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12947 || reg_mentioned_p (arg_pointer_rtx
, op
)
12948 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12949 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12950 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12951 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12954 /* Constants are converted into offsets from labels. */
12958 ind
= XEXP (op
, 0);
12960 if (reload_completed
12961 && (GET_CODE (ind
) == LABEL_REF
12962 || (GET_CODE (ind
) == CONST
12963 && GET_CODE (XEXP (ind
, 0)) == PLUS
12964 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12965 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12968 /* Match: (mem (reg)). */
12970 return arm_address_register_rtx_p (ind
, 0);
12972 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12973 acceptable in any case (subject to verification by
12974 arm_address_register_rtx_p). We need WB to be true to accept
12975 PRE_INC and POST_DEC. */
12976 if (GET_CODE (ind
) == POST_INC
12977 || GET_CODE (ind
) == PRE_DEC
12979 && (GET_CODE (ind
) == PRE_INC
12980 || GET_CODE (ind
) == POST_DEC
)))
12981 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12984 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12985 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12986 && GET_CODE (XEXP (ind
, 1)) == PLUS
12987 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12988 ind
= XEXP (ind
, 1);
12993 if (GET_CODE (ind
) == PLUS
12994 && REG_P (XEXP (ind
, 0))
12995 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12996 && CONST_INT_P (XEXP (ind
, 1))
12997 && INTVAL (XEXP (ind
, 1)) > -1024
12998 && INTVAL (XEXP (ind
, 1)) < 1024
12999 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13005 /* Return TRUE if OP is a memory operand which we can load or store a vector
13006 to/from. TYPE is one of the following values:
13007 0 - Vector load/stor (vldr)
13008 1 - Core registers (ldm)
13009 2 - Element/structure loads (vld1)
13012 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13016 /* Reject eliminable registers. */
13017 if (! (reload_in_progress
|| reload_completed
)
13018 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13019 || reg_mentioned_p (arg_pointer_rtx
, op
)
13020 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13021 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13022 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13023 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13026 /* Constants are converted into offsets from labels. */
13030 ind
= XEXP (op
, 0);
13032 if (reload_completed
13033 && (GET_CODE (ind
) == LABEL_REF
13034 || (GET_CODE (ind
) == CONST
13035 && GET_CODE (XEXP (ind
, 0)) == PLUS
13036 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13037 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13040 /* Match: (mem (reg)). */
13042 return arm_address_register_rtx_p (ind
, 0);
13044 /* Allow post-increment with Neon registers. */
13045 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13046 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13047 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13049 /* Allow post-increment by register for VLDn */
13050 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13051 && GET_CODE (XEXP (ind
, 1)) == PLUS
13052 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13059 && GET_CODE (ind
) == PLUS
13060 && REG_P (XEXP (ind
, 0))
13061 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13062 && CONST_INT_P (XEXP (ind
, 1))
13063 && INTVAL (XEXP (ind
, 1)) > -1024
13064 /* For quad modes, we restrict the constant offset to be slightly less
13065 than what the instruction format permits. We have no such constraint
13066 on double mode offsets. (This must match arm_legitimate_index_p.) */
13067 && (INTVAL (XEXP (ind
, 1))
13068 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13069 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13075 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13078 neon_struct_mem_operand (rtx op
)
13082 /* Reject eliminable registers. */
13083 if (! (reload_in_progress
|| reload_completed
)
13084 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13085 || reg_mentioned_p (arg_pointer_rtx
, op
)
13086 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13087 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13088 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13089 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13092 /* Constants are converted into offsets from labels. */
13096 ind
= XEXP (op
, 0);
13098 if (reload_completed
13099 && (GET_CODE (ind
) == LABEL_REF
13100 || (GET_CODE (ind
) == CONST
13101 && GET_CODE (XEXP (ind
, 0)) == PLUS
13102 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13103 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13106 /* Match: (mem (reg)). */
13108 return arm_address_register_rtx_p (ind
, 0);
13110 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13111 if (GET_CODE (ind
) == POST_INC
13112 || GET_CODE (ind
) == PRE_DEC
)
13113 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13118 /* Return true if X is a register that will be eliminated later on. */
13120 arm_eliminable_register (rtx x
)
13122 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13123 || REGNO (x
) == ARG_POINTER_REGNUM
13124 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13125 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13128 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13129 coprocessor registers. Otherwise return NO_REGS. */
13132 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13134 if (mode
== HFmode
)
13136 if (!TARGET_NEON_FP16
)
13137 return GENERAL_REGS
;
13138 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13140 return GENERAL_REGS
;
13143 /* The neon move patterns handle all legitimate vector and struct
13146 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13147 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13148 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13149 || VALID_NEON_STRUCT_MODE (mode
)))
13152 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13155 return GENERAL_REGS
;
13158 /* Values which must be returned in the most-significant end of the return
13162 arm_return_in_msb (const_tree valtype
)
13164 return (TARGET_AAPCS_BASED
13165 && BYTES_BIG_ENDIAN
13166 && (AGGREGATE_TYPE_P (valtype
)
13167 || TREE_CODE (valtype
) == COMPLEX_TYPE
13168 || FIXED_POINT_TYPE_P (valtype
)));
13171 /* Return TRUE if X references a SYMBOL_REF. */
13173 symbol_mentioned_p (rtx x
)
13178 if (GET_CODE (x
) == SYMBOL_REF
)
13181 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13182 are constant offsets, not symbols. */
13183 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13186 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13188 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13194 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13195 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13198 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13205 /* Return TRUE if X references a LABEL_REF. */
13207 label_mentioned_p (rtx x
)
13212 if (GET_CODE (x
) == LABEL_REF
)
13215 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13216 instruction, but they are constant offsets, not symbols. */
13217 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13220 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13221 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13227 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13228 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13231 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13239 tls_mentioned_p (rtx x
)
13241 switch (GET_CODE (x
))
13244 return tls_mentioned_p (XEXP (x
, 0));
13247 if (XINT (x
, 1) == UNSPEC_TLS
)
13255 /* Must not copy any rtx that uses a pc-relative address. */
13258 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13260 /* The tls call insn cannot be copied, as it is paired with a data
13262 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13265 subrtx_iterator::array_type array
;
13266 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13268 const_rtx x
= *iter
;
13269 if (GET_CODE (x
) == UNSPEC
13270 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13271 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13278 minmax_code (rtx x
)
13280 enum rtx_code code
= GET_CODE (x
);
13293 gcc_unreachable ();
13297 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13300 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13301 int *mask
, bool *signed_sat
)
13303 /* The high bound must be a power of two minus one. */
13304 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13308 /* The low bound is either zero (for usat) or one less than the
13309 negation of the high bound (for ssat). */
13310 if (INTVAL (lo_bound
) == 0)
13315 *signed_sat
= false;
13320 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13325 *signed_sat
= true;
13333 /* Return 1 if memory locations are adjacent. */
13335 adjacent_mem_locations (rtx a
, rtx b
)
13337 /* We don't guarantee to preserve the order of these memory refs. */
13338 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13341 if ((REG_P (XEXP (a
, 0))
13342 || (GET_CODE (XEXP (a
, 0)) == PLUS
13343 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13344 && (REG_P (XEXP (b
, 0))
13345 || (GET_CODE (XEXP (b
, 0)) == PLUS
13346 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13348 HOST_WIDE_INT val0
= 0, val1
= 0;
13352 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13354 reg0
= XEXP (XEXP (a
, 0), 0);
13355 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13358 reg0
= XEXP (a
, 0);
13360 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13362 reg1
= XEXP (XEXP (b
, 0), 0);
13363 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13366 reg1
= XEXP (b
, 0);
13368 /* Don't accept any offset that will require multiple
13369 instructions to handle, since this would cause the
13370 arith_adjacentmem pattern to output an overlong sequence. */
13371 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13374 /* Don't allow an eliminable register: register elimination can make
13375 the offset too large. */
13376 if (arm_eliminable_register (reg0
))
13379 val_diff
= val1
- val0
;
13383 /* If the target has load delay slots, then there's no benefit
13384 to using an ldm instruction unless the offset is zero and
13385 we are optimizing for size. */
13386 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13387 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13388 && (val_diff
== 4 || val_diff
== -4));
13391 return ((REGNO (reg0
) == REGNO (reg1
))
13392 && (val_diff
== 4 || val_diff
== -4));
13398 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13399 for load operations, false for store operations. CONSECUTIVE is true
13400 if the register numbers in the operation must be consecutive in the register
13401 bank. RETURN_PC is true if value is to be loaded in PC.
13402 The pattern we are trying to match for load is:
13403 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13404 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13407 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13410 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13411 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13412 3. If consecutive is TRUE, then for kth register being loaded,
13413 REGNO (R_dk) = REGNO (R_d0) + k.
13414 The pattern for store is similar. */
13416 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13417 bool consecutive
, bool return_pc
)
13419 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13420 rtx reg
, mem
, addr
;
13422 unsigned first_regno
;
13423 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13425 bool addr_reg_in_reglist
= false;
13426 bool update
= false;
13431 /* If not in SImode, then registers must be consecutive
13432 (e.g., VLDM instructions for DFmode). */
13433 gcc_assert ((mode
== SImode
) || consecutive
);
13434 /* Setting return_pc for stores is illegal. */
13435 gcc_assert (!return_pc
|| load
);
13437 /* Set up the increments and the regs per val based on the mode. */
13438 reg_increment
= GET_MODE_SIZE (mode
);
13439 regs_per_val
= reg_increment
/ 4;
13440 offset_adj
= return_pc
? 1 : 0;
13443 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13444 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13447 /* Check if this is a write-back. */
13448 elt
= XVECEXP (op
, 0, offset_adj
);
13449 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13455 /* The offset adjustment must be the number of registers being
13456 popped times the size of a single register. */
13457 if (!REG_P (SET_DEST (elt
))
13458 || !REG_P (XEXP (SET_SRC (elt
), 0))
13459 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13460 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13461 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13462 ((count
- 1 - offset_adj
) * reg_increment
))
13466 i
= i
+ offset_adj
;
13467 base
= base
+ offset_adj
;
13468 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13469 success depends on the type: VLDM can do just one reg,
13470 LDM must do at least two. */
13471 if ((count
<= i
) && (mode
== SImode
))
13474 elt
= XVECEXP (op
, 0, i
- 1);
13475 if (GET_CODE (elt
) != SET
)
13480 reg
= SET_DEST (elt
);
13481 mem
= SET_SRC (elt
);
13485 reg
= SET_SRC (elt
);
13486 mem
= SET_DEST (elt
);
13489 if (!REG_P (reg
) || !MEM_P (mem
))
13492 regno
= REGNO (reg
);
13493 first_regno
= regno
;
13494 addr
= XEXP (mem
, 0);
13495 if (GET_CODE (addr
) == PLUS
)
13497 if (!CONST_INT_P (XEXP (addr
, 1)))
13500 offset
= INTVAL (XEXP (addr
, 1));
13501 addr
= XEXP (addr
, 0);
13507 /* Don't allow SP to be loaded unless it is also the base register. It
13508 guarantees that SP is reset correctly when an LDM instruction
13509 is interrupted. Otherwise, we might end up with a corrupt stack. */
13510 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13513 for (; i
< count
; i
++)
13515 elt
= XVECEXP (op
, 0, i
);
13516 if (GET_CODE (elt
) != SET
)
13521 reg
= SET_DEST (elt
);
13522 mem
= SET_SRC (elt
);
13526 reg
= SET_SRC (elt
);
13527 mem
= SET_DEST (elt
);
13531 || GET_MODE (reg
) != mode
13532 || REGNO (reg
) <= regno
13535 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13536 /* Don't allow SP to be loaded unless it is also the base register. It
13537 guarantees that SP is reset correctly when an LDM instruction
13538 is interrupted. Otherwise, we might end up with a corrupt stack. */
13539 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13541 || GET_MODE (mem
) != mode
13542 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13543 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13544 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13545 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13546 offset
+ (i
- base
) * reg_increment
))
13547 && (!REG_P (XEXP (mem
, 0))
13548 || offset
+ (i
- base
) * reg_increment
!= 0)))
13551 regno
= REGNO (reg
);
13552 if (regno
== REGNO (addr
))
13553 addr_reg_in_reglist
= true;
13558 if (update
&& addr_reg_in_reglist
)
13561 /* For Thumb-1, address register is always modified - either by write-back
13562 or by explicit load. If the pattern does not describe an update,
13563 then the address register must be in the list of loaded registers. */
13565 return update
|| addr_reg_in_reglist
;
13571 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13572 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13573 instruction. ADD_OFFSET is nonzero if the base address register needs
13574 to be modified with an add instruction before we can use it. */
13577 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13578 int nops
, HOST_WIDE_INT add_offset
)
13580 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13581 if the offset isn't small enough. The reason 2 ldrs are faster
13582 is because these ARMs are able to do more than one cache access
13583 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13584 whilst the ARM8 has a double bandwidth cache. This means that
13585 these cores can do both an instruction fetch and a data fetch in
13586 a single cycle, so the trick of calculating the address into a
13587 scratch register (one of the result regs) and then doing a load
13588 multiple actually becomes slower (and no smaller in code size).
13589 That is the transformation
13591 ldr rd1, [rbase + offset]
13592 ldr rd2, [rbase + offset + 4]
13596 add rd1, rbase, offset
13597 ldmia rd1, {rd1, rd2}
13599 produces worse code -- '3 cycles + any stalls on rd2' instead of
13600 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13601 access per cycle, the first sequence could never complete in less
13602 than 6 cycles, whereas the ldm sequence would only take 5 and
13603 would make better use of sequential accesses if not hitting the
13606 We cheat here and test 'arm_ld_sched' which we currently know to
13607 only be true for the ARM8, ARM9 and StrongARM. If this ever
13608 changes, then the test below needs to be reworked. */
13609 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13612 /* XScale has load-store double instructions, but they have stricter
13613 alignment requirements than load-store multiple, so we cannot
13616 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13617 the pipeline until completion.
13625 An ldr instruction takes 1-3 cycles, but does not block the
13634 Best case ldr will always win. However, the more ldr instructions
13635 we issue, the less likely we are to be able to schedule them well.
13636 Using ldr instructions also increases code size.
13638 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13639 for counts of 3 or 4 regs. */
13640 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13645 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13646 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13647 an array ORDER which describes the sequence to use when accessing the
13648 offsets that produces an ascending order. In this sequence, each
13649 offset must be larger by exactly 4 than the previous one. ORDER[0]
13650 must have been filled in with the lowest offset by the caller.
13651 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13652 we use to verify that ORDER produces an ascending order of registers.
13653 Return true if it was possible to construct such an order, false if
13657 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13658 int *unsorted_regs
)
13661 for (i
= 1; i
< nops
; i
++)
13665 order
[i
] = order
[i
- 1];
13666 for (j
= 0; j
< nops
; j
++)
13667 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13669 /* We must find exactly one offset that is higher than the
13670 previous one by 4. */
13671 if (order
[i
] != order
[i
- 1])
13675 if (order
[i
] == order
[i
- 1])
13677 /* The register numbers must be ascending. */
13678 if (unsorted_regs
!= NULL
13679 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13685 /* Used to determine in a peephole whether a sequence of load
13686 instructions can be changed into a load-multiple instruction.
13687 NOPS is the number of separate load instructions we are examining. The
13688 first NOPS entries in OPERANDS are the destination registers, the
13689 next NOPS entries are memory operands. If this function is
13690 successful, *BASE is set to the common base register of the memory
13691 accesses; *LOAD_OFFSET is set to the first memory location's offset
13692 from that base register.
13693 REGS is an array filled in with the destination register numbers.
13694 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13695 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13696 the sequence of registers in REGS matches the loads from ascending memory
13697 locations, and the function verifies that the register numbers are
13698 themselves ascending. If CHECK_REGS is false, the register numbers
13699 are stored in the order they are found in the operands. */
13701 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13702 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13704 int unsorted_regs
[MAX_LDM_STM_OPS
];
13705 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13706 int order
[MAX_LDM_STM_OPS
];
13707 rtx base_reg_rtx
= NULL
;
13711 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13712 easily extended if required. */
13713 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13715 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13717 /* Loop over the operands and check that the memory references are
13718 suitable (i.e. immediate offsets from the same base register). At
13719 the same time, extract the target register, and the memory
13721 for (i
= 0; i
< nops
; i
++)
13726 /* Convert a subreg of a mem into the mem itself. */
13727 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13728 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13730 gcc_assert (MEM_P (operands
[nops
+ i
]));
13732 /* Don't reorder volatile memory references; it doesn't seem worth
13733 looking for the case where the order is ok anyway. */
13734 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13737 offset
= const0_rtx
;
13739 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13740 || (GET_CODE (reg
) == SUBREG
13741 && REG_P (reg
= SUBREG_REG (reg
))))
13742 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13743 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13744 || (GET_CODE (reg
) == SUBREG
13745 && REG_P (reg
= SUBREG_REG (reg
))))
13746 && (CONST_INT_P (offset
13747 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13751 base_reg
= REGNO (reg
);
13752 base_reg_rtx
= reg
;
13753 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13756 else if (base_reg
!= (int) REGNO (reg
))
13757 /* Not addressed from the same base register. */
13760 unsorted_regs
[i
] = (REG_P (operands
[i
])
13761 ? REGNO (operands
[i
])
13762 : REGNO (SUBREG_REG (operands
[i
])));
13764 /* If it isn't an integer register, or if it overwrites the
13765 base register but isn't the last insn in the list, then
13766 we can't do this. */
13767 if (unsorted_regs
[i
] < 0
13768 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13769 || unsorted_regs
[i
] > 14
13770 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13773 /* Don't allow SP to be loaded unless it is also the base
13774 register. It guarantees that SP is reset correctly when
13775 an LDM instruction is interrupted. Otherwise, we might
13776 end up with a corrupt stack. */
13777 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13780 unsorted_offsets
[i
] = INTVAL (offset
);
13781 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13785 /* Not a suitable memory address. */
13789 /* All the useful information has now been extracted from the
13790 operands into unsorted_regs and unsorted_offsets; additionally,
13791 order[0] has been set to the lowest offset in the list. Sort
13792 the offsets into order, verifying that they are adjacent, and
13793 check that the register numbers are ascending. */
13794 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13795 check_regs
? unsorted_regs
: NULL
))
13799 memcpy (saved_order
, order
, sizeof order
);
13805 for (i
= 0; i
< nops
; i
++)
13806 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13808 *load_offset
= unsorted_offsets
[order
[0]];
13812 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13815 if (unsorted_offsets
[order
[0]] == 0)
13816 ldm_case
= 1; /* ldmia */
13817 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13818 ldm_case
= 2; /* ldmib */
13819 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13820 ldm_case
= 3; /* ldmda */
13821 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13822 ldm_case
= 4; /* ldmdb */
13823 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13824 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13829 if (!multiple_operation_profitable_p (false, nops
,
13831 ? unsorted_offsets
[order
[0]] : 0))
13837 /* Used to determine in a peephole whether a sequence of store instructions can
13838 be changed into a store-multiple instruction.
13839 NOPS is the number of separate store instructions we are examining.
13840 NOPS_TOTAL is the total number of instructions recognized by the peephole
13842 The first NOPS entries in OPERANDS are the source registers, the next
13843 NOPS entries are memory operands. If this function is successful, *BASE is
13844 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13845 to the first memory location's offset from that base register. REGS is an
13846 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13847 likewise filled with the corresponding rtx's.
13848 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13849 numbers to an ascending order of stores.
13850 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13851 from ascending memory locations, and the function verifies that the register
13852 numbers are themselves ascending. If CHECK_REGS is false, the register
13853 numbers are stored in the order they are found in the operands. */
13855 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13856 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13857 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13859 int unsorted_regs
[MAX_LDM_STM_OPS
];
13860 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13861 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13862 int order
[MAX_LDM_STM_OPS
];
13864 rtx base_reg_rtx
= NULL
;
13867 /* Write back of base register is currently only supported for Thumb 1. */
13868 int base_writeback
= TARGET_THUMB1
;
13870 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13871 easily extended if required. */
13872 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13874 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13876 /* Loop over the operands and check that the memory references are
13877 suitable (i.e. immediate offsets from the same base register). At
13878 the same time, extract the target register, and the memory
13880 for (i
= 0; i
< nops
; i
++)
13885 /* Convert a subreg of a mem into the mem itself. */
13886 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13887 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13889 gcc_assert (MEM_P (operands
[nops
+ i
]));
13891 /* Don't reorder volatile memory references; it doesn't seem worth
13892 looking for the case where the order is ok anyway. */
13893 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13896 offset
= const0_rtx
;
13898 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13899 || (GET_CODE (reg
) == SUBREG
13900 && REG_P (reg
= SUBREG_REG (reg
))))
13901 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13902 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13903 || (GET_CODE (reg
) == SUBREG
13904 && REG_P (reg
= SUBREG_REG (reg
))))
13905 && (CONST_INT_P (offset
13906 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13908 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13909 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13910 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13914 base_reg
= REGNO (reg
);
13915 base_reg_rtx
= reg
;
13916 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13919 else if (base_reg
!= (int) REGNO (reg
))
13920 /* Not addressed from the same base register. */
13923 /* If it isn't an integer register, then we can't do this. */
13924 if (unsorted_regs
[i
] < 0
13925 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13926 /* The effects are unpredictable if the base register is
13927 both updated and stored. */
13928 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13929 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13930 || unsorted_regs
[i
] > 14)
13933 unsorted_offsets
[i
] = INTVAL (offset
);
13934 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13938 /* Not a suitable memory address. */
13942 /* All the useful information has now been extracted from the
13943 operands into unsorted_regs and unsorted_offsets; additionally,
13944 order[0] has been set to the lowest offset in the list. Sort
13945 the offsets into order, verifying that they are adjacent, and
13946 check that the register numbers are ascending. */
13947 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13948 check_regs
? unsorted_regs
: NULL
))
13952 memcpy (saved_order
, order
, sizeof order
);
13958 for (i
= 0; i
< nops
; i
++)
13960 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13962 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13965 *load_offset
= unsorted_offsets
[order
[0]];
13969 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13972 if (unsorted_offsets
[order
[0]] == 0)
13973 stm_case
= 1; /* stmia */
13974 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13975 stm_case
= 2; /* stmib */
13976 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13977 stm_case
= 3; /* stmda */
13978 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13979 stm_case
= 4; /* stmdb */
13983 if (!multiple_operation_profitable_p (false, nops
, 0))
13989 /* Routines for use in generating RTL. */
13991 /* Generate a load-multiple instruction. COUNT is the number of loads in
13992 the instruction; REGS and MEMS are arrays containing the operands.
13993 BASEREG is the base register to be used in addressing the memory operands.
13994 WBACK_OFFSET is nonzero if the instruction should update the base
13998 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13999 HOST_WIDE_INT wback_offset
)
14004 if (!multiple_operation_profitable_p (false, count
, 0))
14010 for (i
= 0; i
< count
; i
++)
14011 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14013 if (wback_offset
!= 0)
14014 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14016 seq
= get_insns ();
14022 result
= gen_rtx_PARALLEL (VOIDmode
,
14023 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14024 if (wback_offset
!= 0)
14026 XVECEXP (result
, 0, 0)
14027 = gen_rtx_SET (VOIDmode
, basereg
,
14028 plus_constant (Pmode
, basereg
, wback_offset
));
14033 for (j
= 0; i
< count
; i
++, j
++)
14034 XVECEXP (result
, 0, i
)
14035 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14040 /* Generate a store-multiple instruction. COUNT is the number of stores in
14041 the instruction; REGS and MEMS are arrays containing the operands.
14042 BASEREG is the base register to be used in addressing the memory operands.
14043 WBACK_OFFSET is nonzero if the instruction should update the base
14047 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14048 HOST_WIDE_INT wback_offset
)
14053 if (GET_CODE (basereg
) == PLUS
)
14054 basereg
= XEXP (basereg
, 0);
14056 if (!multiple_operation_profitable_p (false, count
, 0))
14062 for (i
= 0; i
< count
; i
++)
14063 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14065 if (wback_offset
!= 0)
14066 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14068 seq
= get_insns ();
14074 result
= gen_rtx_PARALLEL (VOIDmode
,
14075 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14076 if (wback_offset
!= 0)
14078 XVECEXP (result
, 0, 0)
14079 = gen_rtx_SET (VOIDmode
, basereg
,
14080 plus_constant (Pmode
, basereg
, wback_offset
));
14085 for (j
= 0; i
< count
; i
++, j
++)
14086 XVECEXP (result
, 0, i
)
14087 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14092 /* Generate either a load-multiple or a store-multiple instruction. This
14093 function can be used in situations where we can start with a single MEM
14094 rtx and adjust its address upwards.
14095 COUNT is the number of operations in the instruction, not counting a
14096 possible update of the base register. REGS is an array containing the
14098 BASEREG is the base register to be used in addressing the memory operands,
14099 which are constructed from BASEMEM.
14100 WRITE_BACK specifies whether the generated instruction should include an
14101 update of the base register.
14102 OFFSETP is used to pass an offset to and from this function; this offset
14103 is not used when constructing the address (instead BASEMEM should have an
14104 appropriate offset in its address), it is used only for setting
14105 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14108 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14109 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14111 rtx mems
[MAX_LDM_STM_OPS
];
14112 HOST_WIDE_INT offset
= *offsetp
;
14115 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14117 if (GET_CODE (basereg
) == PLUS
)
14118 basereg
= XEXP (basereg
, 0);
14120 for (i
= 0; i
< count
; i
++)
14122 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14123 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14131 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14132 write_back
? 4 * count
: 0);
14134 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14135 write_back
? 4 * count
: 0);
14139 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14140 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14142 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14147 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14148 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14150 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14154 /* Called from a peephole2 expander to turn a sequence of loads into an
14155 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14156 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14157 is true if we can reorder the registers because they are used commutatively
14159 Returns true iff we could generate a new instruction. */
14162 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14164 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14165 rtx mems
[MAX_LDM_STM_OPS
];
14166 int i
, j
, base_reg
;
14168 HOST_WIDE_INT offset
;
14169 int write_back
= FALSE
;
14173 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14174 &base_reg
, &offset
, !sort_regs
);
14180 for (i
= 0; i
< nops
- 1; i
++)
14181 for (j
= i
+ 1; j
< nops
; j
++)
14182 if (regs
[i
] > regs
[j
])
14188 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14192 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14193 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14199 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14200 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14202 if (!TARGET_THUMB1
)
14204 base_reg
= regs
[0];
14205 base_reg_rtx
= newbase
;
14209 for (i
= 0; i
< nops
; i
++)
14211 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14212 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14215 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14216 write_back
? offset
+ i
* 4 : 0));
14220 /* Called from a peephole2 expander to turn a sequence of stores into an
14221 STM instruction. OPERANDS are the operands found by the peephole matcher;
14222 NOPS indicates how many separate stores we are trying to combine.
14223 Returns true iff we could generate a new instruction. */
14226 gen_stm_seq (rtx
*operands
, int nops
)
14229 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14230 rtx mems
[MAX_LDM_STM_OPS
];
14233 HOST_WIDE_INT offset
;
14234 int write_back
= FALSE
;
14237 bool base_reg_dies
;
14239 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14240 mem_order
, &base_reg
, &offset
, true);
14245 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14247 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14250 gcc_assert (base_reg_dies
);
14256 gcc_assert (base_reg_dies
);
14257 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14261 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14263 for (i
= 0; i
< nops
; i
++)
14265 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14266 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14269 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14270 write_back
? offset
+ i
* 4 : 0));
14274 /* Called from a peephole2 expander to turn a sequence of stores that are
14275 preceded by constant loads into an STM instruction. OPERANDS are the
14276 operands found by the peephole matcher; NOPS indicates how many
14277 separate stores we are trying to combine; there are 2 * NOPS
14278 instructions in the peephole.
14279 Returns true iff we could generate a new instruction. */
14282 gen_const_stm_seq (rtx
*operands
, int nops
)
14284 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14285 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14286 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14287 rtx mems
[MAX_LDM_STM_OPS
];
14290 HOST_WIDE_INT offset
;
14291 int write_back
= FALSE
;
14294 bool base_reg_dies
;
14296 HARD_REG_SET allocated
;
14298 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14299 mem_order
, &base_reg
, &offset
, false);
14304 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14306 /* If the same register is used more than once, try to find a free
14308 CLEAR_HARD_REG_SET (allocated
);
14309 for (i
= 0; i
< nops
; i
++)
14311 for (j
= i
+ 1; j
< nops
; j
++)
14312 if (regs
[i
] == regs
[j
])
14314 rtx t
= peep2_find_free_register (0, nops
* 2,
14315 TARGET_THUMB1
? "l" : "r",
14316 SImode
, &allocated
);
14320 regs
[i
] = REGNO (t
);
14324 /* Compute an ordering that maps the register numbers to an ascending
14327 for (i
= 0; i
< nops
; i
++)
14328 if (regs
[i
] < regs
[reg_order
[0]])
14331 for (i
= 1; i
< nops
; i
++)
14333 int this_order
= reg_order
[i
- 1];
14334 for (j
= 0; j
< nops
; j
++)
14335 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14336 && (this_order
== reg_order
[i
- 1]
14337 || regs
[j
] < regs
[this_order
]))
14339 reg_order
[i
] = this_order
;
14342 /* Ensure that registers that must be live after the instruction end
14343 up with the correct value. */
14344 for (i
= 0; i
< nops
; i
++)
14346 int this_order
= reg_order
[i
];
14347 if ((this_order
!= mem_order
[i
]
14348 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14349 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14353 /* Load the constants. */
14354 for (i
= 0; i
< nops
; i
++)
14356 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14357 sorted_regs
[i
] = regs
[reg_order
[i
]];
14358 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14361 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14363 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14366 gcc_assert (base_reg_dies
);
14372 gcc_assert (base_reg_dies
);
14373 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14377 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14379 for (i
= 0; i
< nops
; i
++)
14381 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14382 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14385 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14386 write_back
? offset
+ i
* 4 : 0));
14390 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14391 unaligned copies on processors which support unaligned semantics for those
14392 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14393 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14394 An interleave factor of 1 (the minimum) will perform no interleaving.
14395 Load/store multiple are used for aligned addresses where possible. */
14398 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14399 HOST_WIDE_INT length
,
14400 unsigned int interleave_factor
)
14402 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14403 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14404 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14405 HOST_WIDE_INT i
, j
;
14406 HOST_WIDE_INT remaining
= length
, words
;
14407 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14409 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14410 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14411 HOST_WIDE_INT srcoffset
, dstoffset
;
14412 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14415 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14417 /* Use hard registers if we have aligned source or destination so we can use
14418 load/store multiple with contiguous registers. */
14419 if (dst_aligned
|| src_aligned
)
14420 for (i
= 0; i
< interleave_factor
; i
++)
14421 regs
[i
] = gen_rtx_REG (SImode
, i
);
14423 for (i
= 0; i
< interleave_factor
; i
++)
14424 regs
[i
] = gen_reg_rtx (SImode
);
14426 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14427 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14429 srcoffset
= dstoffset
= 0;
14431 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14432 For copying the last bytes we want to subtract this offset again. */
14433 src_autoinc
= dst_autoinc
= 0;
14435 for (i
= 0; i
< interleave_factor
; i
++)
14438 /* Copy BLOCK_SIZE_BYTES chunks. */
14440 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14443 if (src_aligned
&& interleave_factor
> 1)
14445 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14446 TRUE
, srcbase
, &srcoffset
));
14447 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14451 for (j
= 0; j
< interleave_factor
; j
++)
14453 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14455 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14456 srcoffset
+ j
* UNITS_PER_WORD
);
14457 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14459 srcoffset
+= block_size_bytes
;
14463 if (dst_aligned
&& interleave_factor
> 1)
14465 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14466 TRUE
, dstbase
, &dstoffset
));
14467 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14471 for (j
= 0; j
< interleave_factor
; j
++)
14473 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14475 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14476 dstoffset
+ j
* UNITS_PER_WORD
);
14477 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14479 dstoffset
+= block_size_bytes
;
14482 remaining
-= block_size_bytes
;
14485 /* Copy any whole words left (note these aren't interleaved with any
14486 subsequent halfword/byte load/stores in the interests of simplicity). */
14488 words
= remaining
/ UNITS_PER_WORD
;
14490 gcc_assert (words
< interleave_factor
);
14492 if (src_aligned
&& words
> 1)
14494 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14496 src_autoinc
+= UNITS_PER_WORD
* words
;
14500 for (j
= 0; j
< words
; j
++)
14502 addr
= plus_constant (Pmode
, src
,
14503 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14504 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14505 srcoffset
+ j
* UNITS_PER_WORD
);
14506 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14508 srcoffset
+= words
* UNITS_PER_WORD
;
14511 if (dst_aligned
&& words
> 1)
14513 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14515 dst_autoinc
+= words
* UNITS_PER_WORD
;
14519 for (j
= 0; j
< words
; j
++)
14521 addr
= plus_constant (Pmode
, dst
,
14522 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14523 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14524 dstoffset
+ j
* UNITS_PER_WORD
);
14525 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14527 dstoffset
+= words
* UNITS_PER_WORD
;
14530 remaining
-= words
* UNITS_PER_WORD
;
14532 gcc_assert (remaining
< 4);
14534 /* Copy a halfword if necessary. */
14536 if (remaining
>= 2)
14538 halfword_tmp
= gen_reg_rtx (SImode
);
14540 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14541 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14542 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14544 /* Either write out immediately, or delay until we've loaded the last
14545 byte, depending on interleave factor. */
14546 if (interleave_factor
== 1)
14548 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14549 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14550 emit_insn (gen_unaligned_storehi (mem
,
14551 gen_lowpart (HImode
, halfword_tmp
)));
14552 halfword_tmp
= NULL
;
14560 gcc_assert (remaining
< 2);
14562 /* Copy last byte. */
14564 if ((remaining
& 1) != 0)
14566 byte_tmp
= gen_reg_rtx (SImode
);
14568 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14569 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14570 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14572 if (interleave_factor
== 1)
14574 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14575 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14576 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14585 /* Store last halfword if we haven't done so already. */
14589 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14590 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14591 emit_insn (gen_unaligned_storehi (mem
,
14592 gen_lowpart (HImode
, halfword_tmp
)));
14596 /* Likewise for last byte. */
14600 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14601 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14602 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14606 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14609 /* From mips_adjust_block_mem:
14611 Helper function for doing a loop-based block operation on memory
14612 reference MEM. Each iteration of the loop will operate on LENGTH
14615 Create a new base register for use within the loop and point it to
14616 the start of MEM. Create a new memory reference that uses this
14617 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14620 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14623 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14625 /* Although the new mem does not refer to a known location,
14626 it does keep up to LENGTH bytes of alignment. */
14627 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14628 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14631 /* From mips_block_move_loop:
14633 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14634 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14635 the memory regions do not overlap. */
14638 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14639 unsigned int interleave_factor
,
14640 HOST_WIDE_INT bytes_per_iter
)
14642 rtx src_reg
, dest_reg
, final_src
, test
;
14643 HOST_WIDE_INT leftover
;
14645 leftover
= length
% bytes_per_iter
;
14646 length
-= leftover
;
14648 /* Create registers and memory references for use within the loop. */
14649 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14650 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14652 /* Calculate the value that SRC_REG should have after the last iteration of
14654 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14655 0, 0, OPTAB_WIDEN
);
14657 /* Emit the start of the loop. */
14658 rtx_code_label
*label
= gen_label_rtx ();
14659 emit_label (label
);
14661 /* Emit the loop body. */
14662 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14663 interleave_factor
);
14665 /* Move on to the next block. */
14666 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14667 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14669 /* Emit the loop condition. */
14670 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14671 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14673 /* Mop up any left-over bytes. */
14675 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14678 /* Emit a block move when either the source or destination is unaligned (not
14679 aligned to a four-byte boundary). This may need further tuning depending on
14680 core type, optimize_size setting, etc. */
14683 arm_movmemqi_unaligned (rtx
*operands
)
14685 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14689 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14690 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14691 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14692 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14693 or dst_aligned though: allow more interleaving in those cases since the
14694 resulting code can be smaller. */
14695 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14696 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14699 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14700 interleave_factor
, bytes_per_iter
);
14702 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14703 interleave_factor
);
14707 /* Note that the loop created by arm_block_move_unaligned_loop may be
14708 subject to loop unrolling, which makes tuning this condition a little
14711 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14713 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14720 arm_gen_movmemqi (rtx
*operands
)
14722 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14723 HOST_WIDE_INT srcoffset
, dstoffset
;
14725 rtx src
, dst
, srcbase
, dstbase
;
14726 rtx part_bytes_reg
= NULL
;
14729 if (!CONST_INT_P (operands
[2])
14730 || !CONST_INT_P (operands
[3])
14731 || INTVAL (operands
[2]) > 64)
14734 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14735 return arm_movmemqi_unaligned (operands
);
14737 if (INTVAL (operands
[3]) & 3)
14740 dstbase
= operands
[0];
14741 srcbase
= operands
[1];
14743 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14744 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14746 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14747 out_words_to_go
= INTVAL (operands
[2]) / 4;
14748 last_bytes
= INTVAL (operands
[2]) & 3;
14749 dstoffset
= srcoffset
= 0;
14751 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14752 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14754 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14756 if (in_words_to_go
> 4)
14757 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14758 TRUE
, srcbase
, &srcoffset
));
14760 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14761 src
, FALSE
, srcbase
,
14764 if (out_words_to_go
)
14766 if (out_words_to_go
> 4)
14767 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14768 TRUE
, dstbase
, &dstoffset
));
14769 else if (out_words_to_go
!= 1)
14770 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14771 out_words_to_go
, dst
,
14774 dstbase
, &dstoffset
));
14777 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14778 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14779 if (last_bytes
!= 0)
14781 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14787 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14788 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14791 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14792 if (out_words_to_go
)
14796 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14797 sreg
= copy_to_reg (mem
);
14799 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14800 emit_move_insn (mem
, sreg
);
14803 gcc_assert (!in_words_to_go
); /* Sanity check */
14806 if (in_words_to_go
)
14808 gcc_assert (in_words_to_go
> 0);
14810 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14811 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14814 gcc_assert (!last_bytes
|| part_bytes_reg
);
14816 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14818 rtx tmp
= gen_reg_rtx (SImode
);
14820 /* The bytes we want are in the top end of the word. */
14821 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14822 GEN_INT (8 * (4 - last_bytes
))));
14823 part_bytes_reg
= tmp
;
14827 mem
= adjust_automodify_address (dstbase
, QImode
,
14828 plus_constant (Pmode
, dst
,
14830 dstoffset
+ last_bytes
- 1);
14831 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14835 tmp
= gen_reg_rtx (SImode
);
14836 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14837 part_bytes_reg
= tmp
;
14844 if (last_bytes
> 1)
14846 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14847 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14851 rtx tmp
= gen_reg_rtx (SImode
);
14852 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14853 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14854 part_bytes_reg
= tmp
;
14861 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14862 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14869 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14872 next_consecutive_mem (rtx mem
)
14874 machine_mode mode
= GET_MODE (mem
);
14875 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14876 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14878 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14881 /* Copy using LDRD/STRD instructions whenever possible.
14882 Returns true upon success. */
14884 gen_movmem_ldrd_strd (rtx
*operands
)
14886 unsigned HOST_WIDE_INT len
;
14887 HOST_WIDE_INT align
;
14888 rtx src
, dst
, base
;
14890 bool src_aligned
, dst_aligned
;
14891 bool src_volatile
, dst_volatile
;
14893 gcc_assert (CONST_INT_P (operands
[2]));
14894 gcc_assert (CONST_INT_P (operands
[3]));
14896 len
= UINTVAL (operands
[2]);
14900 /* Maximum alignment we can assume for both src and dst buffers. */
14901 align
= INTVAL (operands
[3]);
14903 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14906 /* Place src and dst addresses in registers
14907 and update the corresponding mem rtx. */
14909 dst_volatile
= MEM_VOLATILE_P (dst
);
14910 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14911 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14912 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14915 src_volatile
= MEM_VOLATILE_P (src
);
14916 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14917 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14918 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14920 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14923 if (src_volatile
|| dst_volatile
)
14926 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14927 if (!(dst_aligned
|| src_aligned
))
14928 return arm_gen_movmemqi (operands
);
14930 src
= adjust_address (src
, DImode
, 0);
14931 dst
= adjust_address (dst
, DImode
, 0);
14935 reg0
= gen_reg_rtx (DImode
);
14937 emit_move_insn (reg0
, src
);
14939 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14942 emit_move_insn (dst
, reg0
);
14944 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14946 src
= next_consecutive_mem (src
);
14947 dst
= next_consecutive_mem (dst
);
14950 gcc_assert (len
< 8);
14953 /* More than a word but less than a double-word to copy. Copy a word. */
14954 reg0
= gen_reg_rtx (SImode
);
14955 src
= adjust_address (src
, SImode
, 0);
14956 dst
= adjust_address (dst
, SImode
, 0);
14958 emit_move_insn (reg0
, src
);
14960 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14963 emit_move_insn (dst
, reg0
);
14965 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14967 src
= next_consecutive_mem (src
);
14968 dst
= next_consecutive_mem (dst
);
14975 /* Copy the remaining bytes. */
14978 dst
= adjust_address (dst
, HImode
, 0);
14979 src
= adjust_address (src
, HImode
, 0);
14980 reg0
= gen_reg_rtx (SImode
);
14982 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14984 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14987 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14989 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14991 src
= next_consecutive_mem (src
);
14992 dst
= next_consecutive_mem (dst
);
14997 dst
= adjust_address (dst
, QImode
, 0);
14998 src
= adjust_address (src
, QImode
, 0);
14999 reg0
= gen_reg_rtx (QImode
);
15000 emit_move_insn (reg0
, src
);
15001 emit_move_insn (dst
, reg0
);
15005 /* Select a dominance comparison mode if possible for a test of the general
15006 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15007 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15008 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15009 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15010 In all cases OP will be either EQ or NE, but we don't need to know which
15011 here. If we are unable to support a dominance comparison we return
15012 CC mode. This will then fail to match for the RTL expressions that
15013 generate this call. */
15015 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15017 enum rtx_code cond1
, cond2
;
15020 /* Currently we will probably get the wrong result if the individual
15021 comparisons are not simple. This also ensures that it is safe to
15022 reverse a comparison if necessary. */
15023 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15025 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15029 /* The if_then_else variant of this tests the second condition if the
15030 first passes, but is true if the first fails. Reverse the first
15031 condition to get a true "inclusive-or" expression. */
15032 if (cond_or
== DOM_CC_NX_OR_Y
)
15033 cond1
= reverse_condition (cond1
);
15035 /* If the comparisons are not equal, and one doesn't dominate the other,
15036 then we can't do this. */
15038 && !comparison_dominates_p (cond1
, cond2
)
15039 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15043 std::swap (cond1
, cond2
);
15048 if (cond_or
== DOM_CC_X_AND_Y
)
15053 case EQ
: return CC_DEQmode
;
15054 case LE
: return CC_DLEmode
;
15055 case LEU
: return CC_DLEUmode
;
15056 case GE
: return CC_DGEmode
;
15057 case GEU
: return CC_DGEUmode
;
15058 default: gcc_unreachable ();
15062 if (cond_or
== DOM_CC_X_AND_Y
)
15074 gcc_unreachable ();
15078 if (cond_or
== DOM_CC_X_AND_Y
)
15090 gcc_unreachable ();
15094 if (cond_or
== DOM_CC_X_AND_Y
)
15095 return CC_DLTUmode
;
15100 return CC_DLTUmode
;
15102 return CC_DLEUmode
;
15106 gcc_unreachable ();
15110 if (cond_or
== DOM_CC_X_AND_Y
)
15111 return CC_DGTUmode
;
15116 return CC_DGTUmode
;
15118 return CC_DGEUmode
;
15122 gcc_unreachable ();
15125 /* The remaining cases only occur when both comparisons are the
15128 gcc_assert (cond1
== cond2
);
15132 gcc_assert (cond1
== cond2
);
15136 gcc_assert (cond1
== cond2
);
15140 gcc_assert (cond1
== cond2
);
15141 return CC_DLEUmode
;
15144 gcc_assert (cond1
== cond2
);
15145 return CC_DGEUmode
;
15148 gcc_unreachable ();
15153 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15155 /* All floating point compares return CCFP if it is an equality
15156 comparison, and CCFPE otherwise. */
15157 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15180 gcc_unreachable ();
15184 /* A compare with a shifted operand. Because of canonicalization, the
15185 comparison will have to be swapped when we emit the assembler. */
15186 if (GET_MODE (y
) == SImode
15187 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15188 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15189 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15190 || GET_CODE (x
) == ROTATERT
))
15193 /* This operation is performed swapped, but since we only rely on the Z
15194 flag we don't need an additional mode. */
15195 if (GET_MODE (y
) == SImode
15196 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15197 && GET_CODE (x
) == NEG
15198 && (op
== EQ
|| op
== NE
))
15201 /* This is a special case that is used by combine to allow a
15202 comparison of a shifted byte load to be split into a zero-extend
15203 followed by a comparison of the shifted integer (only valid for
15204 equalities and unsigned inequalities). */
15205 if (GET_MODE (x
) == SImode
15206 && GET_CODE (x
) == ASHIFT
15207 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15208 && GET_CODE (XEXP (x
, 0)) == SUBREG
15209 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15210 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15211 && (op
== EQ
|| op
== NE
15212 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15213 && CONST_INT_P (y
))
15216 /* A construct for a conditional compare, if the false arm contains
15217 0, then both conditions must be true, otherwise either condition
15218 must be true. Not all conditions are possible, so CCmode is
15219 returned if it can't be done. */
15220 if (GET_CODE (x
) == IF_THEN_ELSE
15221 && (XEXP (x
, 2) == const0_rtx
15222 || XEXP (x
, 2) == const1_rtx
)
15223 && COMPARISON_P (XEXP (x
, 0))
15224 && COMPARISON_P (XEXP (x
, 1)))
15225 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15226 INTVAL (XEXP (x
, 2)));
15228 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15229 if (GET_CODE (x
) == AND
15230 && (op
== EQ
|| op
== NE
)
15231 && COMPARISON_P (XEXP (x
, 0))
15232 && COMPARISON_P (XEXP (x
, 1)))
15233 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15236 if (GET_CODE (x
) == IOR
15237 && (op
== EQ
|| op
== NE
)
15238 && COMPARISON_P (XEXP (x
, 0))
15239 && COMPARISON_P (XEXP (x
, 1)))
15240 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15243 /* An operation (on Thumb) where we want to test for a single bit.
15244 This is done by shifting that bit up into the top bit of a
15245 scratch register; we can then branch on the sign bit. */
15247 && GET_MODE (x
) == SImode
15248 && (op
== EQ
|| op
== NE
)
15249 && GET_CODE (x
) == ZERO_EXTRACT
15250 && XEXP (x
, 1) == const1_rtx
)
15253 /* An operation that sets the condition codes as a side-effect, the
15254 V flag is not set correctly, so we can only use comparisons where
15255 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15257 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15258 if (GET_MODE (x
) == SImode
15260 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15261 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15262 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15263 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15264 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15265 || GET_CODE (x
) == LSHIFTRT
15266 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15267 || GET_CODE (x
) == ROTATERT
15268 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15269 return CC_NOOVmode
;
15271 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15274 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15275 && GET_CODE (x
) == PLUS
15276 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15279 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15285 /* A DImode comparison against zero can be implemented by
15286 or'ing the two halves together. */
15287 if (y
== const0_rtx
)
15290 /* We can do an equality test in three Thumb instructions. */
15300 /* DImode unsigned comparisons can be implemented by cmp +
15301 cmpeq without a scratch register. Not worth doing in
15312 /* DImode signed and unsigned comparisons can be implemented
15313 by cmp + sbcs with a scratch register, but that does not
15314 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15315 gcc_assert (op
!= EQ
&& op
!= NE
);
15319 gcc_unreachable ();
15323 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15324 return GET_MODE (x
);
15329 /* X and Y are two things to compare using CODE. Emit the compare insn and
15330 return the rtx for register 0 in the proper mode. FP means this is a
15331 floating point compare: I don't think that it is needed on the arm. */
15333 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15337 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15339 /* We might have X as a constant, Y as a register because of the predicates
15340 used for cmpdi. If so, force X to a register here. */
15341 if (dimode_comparison
&& !REG_P (x
))
15342 x
= force_reg (DImode
, x
);
15344 mode
= SELECT_CC_MODE (code
, x
, y
);
15345 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15347 if (dimode_comparison
15348 && mode
!= CC_CZmode
)
15352 /* To compare two non-zero values for equality, XOR them and
15353 then compare against zero. Not used for ARM mode; there
15354 CC_CZmode is cheaper. */
15355 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15357 gcc_assert (!reload_completed
);
15358 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15362 /* A scratch register is required. */
15363 if (reload_completed
)
15364 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15366 scratch
= gen_rtx_SCRATCH (SImode
);
15368 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15369 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15370 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15373 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15378 /* Generate a sequence of insns that will generate the correct return
15379 address mask depending on the physical architecture that the program
15382 arm_gen_return_addr_mask (void)
15384 rtx reg
= gen_reg_rtx (Pmode
);
15386 emit_insn (gen_return_addr_mask (reg
));
15391 arm_reload_in_hi (rtx
*operands
)
15393 rtx ref
= operands
[1];
15395 HOST_WIDE_INT offset
= 0;
15397 if (GET_CODE (ref
) == SUBREG
)
15399 offset
= SUBREG_BYTE (ref
);
15400 ref
= SUBREG_REG (ref
);
15405 /* We have a pseudo which has been spilt onto the stack; there
15406 are two cases here: the first where there is a simple
15407 stack-slot replacement and a second where the stack-slot is
15408 out of range, or is used as a subreg. */
15409 if (reg_equiv_mem (REGNO (ref
)))
15411 ref
= reg_equiv_mem (REGNO (ref
));
15412 base
= find_replacement (&XEXP (ref
, 0));
15415 /* The slot is out of range, or was dressed up in a SUBREG. */
15416 base
= reg_equiv_address (REGNO (ref
));
15419 base
= find_replacement (&XEXP (ref
, 0));
15421 /* Handle the case where the address is too complex to be offset by 1. */
15422 if (GET_CODE (base
) == MINUS
15423 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15425 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15427 emit_set_insn (base_plus
, base
);
15430 else if (GET_CODE (base
) == PLUS
)
15432 /* The addend must be CONST_INT, or we would have dealt with it above. */
15433 HOST_WIDE_INT hi
, lo
;
15435 offset
+= INTVAL (XEXP (base
, 1));
15436 base
= XEXP (base
, 0);
15438 /* Rework the address into a legal sequence of insns. */
15439 /* Valid range for lo is -4095 -> 4095 */
15442 : -((-offset
) & 0xfff));
15444 /* Corner case, if lo is the max offset then we would be out of range
15445 once we have added the additional 1 below, so bump the msb into the
15446 pre-loading insn(s). */
15450 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15451 ^ (HOST_WIDE_INT
) 0x80000000)
15452 - (HOST_WIDE_INT
) 0x80000000);
15454 gcc_assert (hi
+ lo
== offset
);
15458 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15460 /* Get the base address; addsi3 knows how to handle constants
15461 that require more than one insn. */
15462 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15468 /* Operands[2] may overlap operands[0] (though it won't overlap
15469 operands[1]), that's why we asked for a DImode reg -- so we can
15470 use the bit that does not overlap. */
15471 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15472 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15474 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15476 emit_insn (gen_zero_extendqisi2 (scratch
,
15477 gen_rtx_MEM (QImode
,
15478 plus_constant (Pmode
, base
,
15480 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15481 gen_rtx_MEM (QImode
,
15482 plus_constant (Pmode
, base
,
15484 if (!BYTES_BIG_ENDIAN
)
15485 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15486 gen_rtx_IOR (SImode
,
15489 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15493 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15494 gen_rtx_IOR (SImode
,
15495 gen_rtx_ASHIFT (SImode
, scratch
,
15497 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15500 /* Handle storing a half-word to memory during reload by synthesizing as two
15501 byte stores. Take care not to clobber the input values until after we
15502 have moved them somewhere safe. This code assumes that if the DImode
15503 scratch in operands[2] overlaps either the input value or output address
15504 in some way, then that value must die in this insn (we absolutely need
15505 two scratch registers for some corner cases). */
15507 arm_reload_out_hi (rtx
*operands
)
15509 rtx ref
= operands
[0];
15510 rtx outval
= operands
[1];
15512 HOST_WIDE_INT offset
= 0;
15514 if (GET_CODE (ref
) == SUBREG
)
15516 offset
= SUBREG_BYTE (ref
);
15517 ref
= SUBREG_REG (ref
);
15522 /* We have a pseudo which has been spilt onto the stack; there
15523 are two cases here: the first where there is a simple
15524 stack-slot replacement and a second where the stack-slot is
15525 out of range, or is used as a subreg. */
15526 if (reg_equiv_mem (REGNO (ref
)))
15528 ref
= reg_equiv_mem (REGNO (ref
));
15529 base
= find_replacement (&XEXP (ref
, 0));
15532 /* The slot is out of range, or was dressed up in a SUBREG. */
15533 base
= reg_equiv_address (REGNO (ref
));
15536 base
= find_replacement (&XEXP (ref
, 0));
15538 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15540 /* Handle the case where the address is too complex to be offset by 1. */
15541 if (GET_CODE (base
) == MINUS
15542 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15544 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15546 /* Be careful not to destroy OUTVAL. */
15547 if (reg_overlap_mentioned_p (base_plus
, outval
))
15549 /* Updating base_plus might destroy outval, see if we can
15550 swap the scratch and base_plus. */
15551 if (!reg_overlap_mentioned_p (scratch
, outval
))
15552 std::swap (scratch
, base_plus
);
15555 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15557 /* Be conservative and copy OUTVAL into the scratch now,
15558 this should only be necessary if outval is a subreg
15559 of something larger than a word. */
15560 /* XXX Might this clobber base? I can't see how it can,
15561 since scratch is known to overlap with OUTVAL, and
15562 must be wider than a word. */
15563 emit_insn (gen_movhi (scratch_hi
, outval
));
15564 outval
= scratch_hi
;
15568 emit_set_insn (base_plus
, base
);
15571 else if (GET_CODE (base
) == PLUS
)
15573 /* The addend must be CONST_INT, or we would have dealt with it above. */
15574 HOST_WIDE_INT hi
, lo
;
15576 offset
+= INTVAL (XEXP (base
, 1));
15577 base
= XEXP (base
, 0);
15579 /* Rework the address into a legal sequence of insns. */
15580 /* Valid range for lo is -4095 -> 4095 */
15583 : -((-offset
) & 0xfff));
15585 /* Corner case, if lo is the max offset then we would be out of range
15586 once we have added the additional 1 below, so bump the msb into the
15587 pre-loading insn(s). */
15591 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15592 ^ (HOST_WIDE_INT
) 0x80000000)
15593 - (HOST_WIDE_INT
) 0x80000000);
15595 gcc_assert (hi
+ lo
== offset
);
15599 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15601 /* Be careful not to destroy OUTVAL. */
15602 if (reg_overlap_mentioned_p (base_plus
, outval
))
15604 /* Updating base_plus might destroy outval, see if we
15605 can swap the scratch and base_plus. */
15606 if (!reg_overlap_mentioned_p (scratch
, outval
))
15607 std::swap (scratch
, base_plus
);
15610 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15612 /* Be conservative and copy outval into scratch now,
15613 this should only be necessary if outval is a
15614 subreg of something larger than a word. */
15615 /* XXX Might this clobber base? I can't see how it
15616 can, since scratch is known to overlap with
15618 emit_insn (gen_movhi (scratch_hi
, outval
));
15619 outval
= scratch_hi
;
15623 /* Get the base address; addsi3 knows how to handle constants
15624 that require more than one insn. */
15625 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15631 if (BYTES_BIG_ENDIAN
)
15633 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15634 plus_constant (Pmode
, base
,
15636 gen_lowpart (QImode
, outval
)));
15637 emit_insn (gen_lshrsi3 (scratch
,
15638 gen_rtx_SUBREG (SImode
, outval
, 0),
15640 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15642 gen_lowpart (QImode
, scratch
)));
15646 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15648 gen_lowpart (QImode
, outval
)));
15649 emit_insn (gen_lshrsi3 (scratch
,
15650 gen_rtx_SUBREG (SImode
, outval
, 0),
15652 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15653 plus_constant (Pmode
, base
,
15655 gen_lowpart (QImode
, scratch
)));
15659 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15660 (padded to the size of a word) should be passed in a register. */
15663 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15665 if (TARGET_AAPCS_BASED
)
15666 return must_pass_in_stack_var_size (mode
, type
);
15668 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15672 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15673 Return true if an argument passed on the stack should be padded upwards,
15674 i.e. if the least-significant byte has useful data.
15675 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15676 aggregate types are placed in the lowest memory address. */
15679 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15681 if (!TARGET_AAPCS_BASED
)
15682 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15684 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15691 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15692 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15693 register has useful data, and return the opposite if the most
15694 significant byte does. */
15697 arm_pad_reg_upward (machine_mode mode
,
15698 tree type
, int first ATTRIBUTE_UNUSED
)
15700 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15702 /* For AAPCS, small aggregates, small fixed-point types,
15703 and small complex types are always padded upwards. */
15706 if ((AGGREGATE_TYPE_P (type
)
15707 || TREE_CODE (type
) == COMPLEX_TYPE
15708 || FIXED_POINT_TYPE_P (type
))
15709 && int_size_in_bytes (type
) <= 4)
15714 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15715 && GET_MODE_SIZE (mode
) <= 4)
15720 /* Otherwise, use default padding. */
15721 return !BYTES_BIG_ENDIAN
;
15724 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15725 assuming that the address in the base register is word aligned. */
15727 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15729 HOST_WIDE_INT max_offset
;
15731 /* Offset must be a multiple of 4 in Thumb mode. */
15732 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15737 else if (TARGET_ARM
)
15742 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15745 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15746 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15747 Assumes that the address in the base register RN is word aligned. Pattern
15748 guarantees that both memory accesses use the same base register,
15749 the offsets are constants within the range, and the gap between the offsets is 4.
15750 If preload complete then check that registers are legal. WBACK indicates whether
15751 address is updated. LOAD indicates whether memory access is load or store. */
15753 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15754 bool wback
, bool load
)
15756 unsigned int t
, t2
, n
;
15758 if (!reload_completed
)
15761 if (!offset_ok_for_ldrd_strd (offset
))
15768 if ((TARGET_THUMB2
)
15769 && ((wback
&& (n
== t
|| n
== t2
))
15770 || (t
== SP_REGNUM
)
15771 || (t
== PC_REGNUM
)
15772 || (t2
== SP_REGNUM
)
15773 || (t2
== PC_REGNUM
)
15774 || (!load
&& (n
== PC_REGNUM
))
15775 || (load
&& (t
== t2
))
15776 /* Triggers Cortex-M3 LDRD errata. */
15777 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15781 && ((wback
&& (n
== t
|| n
== t2
))
15782 || (t2
== PC_REGNUM
)
15783 || (t
% 2 != 0) /* First destination register is not even. */
15785 /* PC can be used as base register (for offset addressing only),
15786 but it is depricated. */
15787 || (n
== PC_REGNUM
)))
15793 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15794 operand MEM's address contains an immediate offset from the base
15795 register and has no side effects, in which case it sets BASE and
15796 OFFSET accordingly. */
15798 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15802 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15804 /* TODO: Handle more general memory operand patterns, such as
15805 PRE_DEC and PRE_INC. */
15807 if (side_effects_p (mem
))
15810 /* Can't deal with subregs. */
15811 if (GET_CODE (mem
) == SUBREG
)
15814 gcc_assert (MEM_P (mem
));
15816 *offset
= const0_rtx
;
15818 addr
= XEXP (mem
, 0);
15820 /* If addr isn't valid for DImode, then we can't handle it. */
15821 if (!arm_legitimate_address_p (DImode
, addr
,
15822 reload_in_progress
|| reload_completed
))
15830 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15832 *base
= XEXP (addr
, 0);
15833 *offset
= XEXP (addr
, 1);
15834 return (REG_P (*base
) && CONST_INT_P (*offset
));
15840 /* Called from a peephole2 to replace two word-size accesses with a
15841 single LDRD/STRD instruction. Returns true iff we can generate a
15842 new instruction sequence. That is, both accesses use the same base
15843 register and the gap between constant offsets is 4. This function
15844 may reorder its operands to match ldrd/strd RTL templates.
15845 OPERANDS are the operands found by the peephole matcher;
15846 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15847 corresponding memory operands. LOAD indicaates whether the access
15848 is load or store. CONST_STORE indicates a store of constant
15849 integer values held in OPERANDS[4,5] and assumes that the pattern
15850 is of length 4 insn, for the purpose of checking dead registers.
15851 COMMUTE indicates that register operands may be reordered. */
15853 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15854 bool const_store
, bool commute
)
15857 HOST_WIDE_INT offsets
[2], offset
;
15858 rtx base
= NULL_RTX
;
15859 rtx cur_base
, cur_offset
, tmp
;
15861 HARD_REG_SET regset
;
15863 gcc_assert (!const_store
|| !load
);
15864 /* Check that the memory references are immediate offsets from the
15865 same base register. Extract the base register, the destination
15866 registers, and the corresponding memory offsets. */
15867 for (i
= 0; i
< nops
; i
++)
15869 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15874 else if (REGNO (base
) != REGNO (cur_base
))
15877 offsets
[i
] = INTVAL (cur_offset
);
15878 if (GET_CODE (operands
[i
]) == SUBREG
)
15880 tmp
= SUBREG_REG (operands
[i
]);
15881 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15886 /* Make sure there is no dependency between the individual loads. */
15887 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15888 return false; /* RAW */
15890 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15891 return false; /* WAW */
15893 /* If the same input register is used in both stores
15894 when storing different constants, try to find a free register.
15895 For example, the code
15900 can be transformed into
15903 in Thumb mode assuming that r1 is free. */
15905 && REGNO (operands
[0]) == REGNO (operands
[1])
15906 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15910 CLEAR_HARD_REG_SET (regset
);
15911 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15912 if (tmp
== NULL_RTX
)
15915 /* Use the new register in the first load to ensure that
15916 if the original input register is not dead after peephole,
15917 then it will have the correct constant value. */
15920 else if (TARGET_ARM
)
15923 int regno
= REGNO (operands
[0]);
15924 if (!peep2_reg_dead_p (4, operands
[0]))
15926 /* When the input register is even and is not dead after the
15927 pattern, it has to hold the second constant but we cannot
15928 form a legal STRD in ARM mode with this register as the second
15930 if (regno
% 2 == 0)
15933 /* Is regno-1 free? */
15934 SET_HARD_REG_SET (regset
);
15935 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15936 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15937 if (tmp
== NULL_RTX
)
15944 /* Find a DImode register. */
15945 CLEAR_HARD_REG_SET (regset
);
15946 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15947 if (tmp
!= NULL_RTX
)
15949 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15950 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15954 /* Can we use the input register to form a DI register? */
15955 SET_HARD_REG_SET (regset
);
15956 CLEAR_HARD_REG_BIT(regset
,
15957 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15958 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15959 if (tmp
== NULL_RTX
)
15961 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15965 gcc_assert (operands
[0] != NULL_RTX
);
15966 gcc_assert (operands
[1] != NULL_RTX
);
15967 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15968 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15972 /* Make sure the instructions are ordered with lower memory access first. */
15973 if (offsets
[0] > offsets
[1])
15975 gap
= offsets
[0] - offsets
[1];
15976 offset
= offsets
[1];
15978 /* Swap the instructions such that lower memory is accessed first. */
15979 std::swap (operands
[0], operands
[1]);
15980 std::swap (operands
[2], operands
[3]);
15982 std::swap (operands
[4], operands
[5]);
15986 gap
= offsets
[1] - offsets
[0];
15987 offset
= offsets
[0];
15990 /* Make sure accesses are to consecutive memory locations. */
15994 /* Make sure we generate legal instructions. */
15995 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15999 /* In Thumb state, where registers are almost unconstrained, there
16000 is little hope to fix it. */
16004 if (load
&& commute
)
16006 /* Try reordering registers. */
16007 std::swap (operands
[0], operands
[1]);
16008 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16015 /* If input registers are dead after this pattern, they can be
16016 reordered or replaced by other registers that are free in the
16017 current pattern. */
16018 if (!peep2_reg_dead_p (4, operands
[0])
16019 || !peep2_reg_dead_p (4, operands
[1]))
16022 /* Try to reorder the input registers. */
16023 /* For example, the code
16028 can be transformed into
16033 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16036 std::swap (operands
[0], operands
[1]);
16040 /* Try to find a free DI register. */
16041 CLEAR_HARD_REG_SET (regset
);
16042 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16043 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16046 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16047 if (tmp
== NULL_RTX
)
16050 /* DREG must be an even-numbered register in DImode.
16051 Split it into SI registers. */
16052 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16053 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16054 gcc_assert (operands
[0] != NULL_RTX
);
16055 gcc_assert (operands
[1] != NULL_RTX
);
16056 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16057 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16059 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16071 /* Print a symbolic form of X to the debug file, F. */
16073 arm_print_value (FILE *f
, rtx x
)
16075 switch (GET_CODE (x
))
16078 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16082 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16090 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16092 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16093 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16101 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16105 fprintf (f
, "`%s'", XSTR (x
, 0));
16109 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16113 arm_print_value (f
, XEXP (x
, 0));
16117 arm_print_value (f
, XEXP (x
, 0));
16119 arm_print_value (f
, XEXP (x
, 1));
16127 fprintf (f
, "????");
16132 /* Routines for manipulation of the constant pool. */
16134 /* Arm instructions cannot load a large constant directly into a
16135 register; they have to come from a pc relative load. The constant
16136 must therefore be placed in the addressable range of the pc
16137 relative load. Depending on the precise pc relative load
16138 instruction the range is somewhere between 256 bytes and 4k. This
16139 means that we often have to dump a constant inside a function, and
16140 generate code to branch around it.
16142 It is important to minimize this, since the branches will slow
16143 things down and make the code larger.
16145 Normally we can hide the table after an existing unconditional
16146 branch so that there is no interruption of the flow, but in the
16147 worst case the code looks like this:
16165 We fix this by performing a scan after scheduling, which notices
16166 which instructions need to have their operands fetched from the
16167 constant table and builds the table.
16169 The algorithm starts by building a table of all the constants that
16170 need fixing up and all the natural barriers in the function (places
16171 where a constant table can be dropped without breaking the flow).
16172 For each fixup we note how far the pc-relative replacement will be
16173 able to reach and the offset of the instruction into the function.
16175 Having built the table we then group the fixes together to form
16176 tables that are as large as possible (subject to addressing
16177 constraints) and emit each table of constants after the last
16178 barrier that is within range of all the instructions in the group.
16179 If a group does not contain a barrier, then we forcibly create one
16180 by inserting a jump instruction into the flow. Once the table has
16181 been inserted, the insns are then modified to reference the
16182 relevant entry in the pool.
16184 Possible enhancements to the algorithm (not implemented) are:
16186 1) For some processors and object formats, there may be benefit in
16187 aligning the pools to the start of cache lines; this alignment
16188 would need to be taken into account when calculating addressability
16191 /* These typedefs are located at the start of this file, so that
16192 they can be used in the prototypes there. This comment is to
16193 remind readers of that fact so that the following structures
16194 can be understood more easily.
16196 typedef struct minipool_node Mnode;
16197 typedef struct minipool_fixup Mfix; */
16199 struct minipool_node
16201 /* Doubly linked chain of entries. */
16204 /* The maximum offset into the code that this entry can be placed. While
16205 pushing fixes for forward references, all entries are sorted in order
16206 of increasing max_address. */
16207 HOST_WIDE_INT max_address
;
16208 /* Similarly for an entry inserted for a backwards ref. */
16209 HOST_WIDE_INT min_address
;
16210 /* The number of fixes referencing this entry. This can become zero
16211 if we "unpush" an entry. In this case we ignore the entry when we
16212 come to emit the code. */
16214 /* The offset from the start of the minipool. */
16215 HOST_WIDE_INT offset
;
16216 /* The value in table. */
16218 /* The mode of value. */
16220 /* The size of the value. With iWMMXt enabled
16221 sizes > 4 also imply an alignment of 8-bytes. */
16225 struct minipool_fixup
16229 HOST_WIDE_INT address
;
16235 HOST_WIDE_INT forwards
;
16236 HOST_WIDE_INT backwards
;
16239 /* Fixes less than a word need padding out to a word boundary. */
16240 #define MINIPOOL_FIX_SIZE(mode) \
16241 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16243 static Mnode
* minipool_vector_head
;
16244 static Mnode
* minipool_vector_tail
;
16245 static rtx_code_label
*minipool_vector_label
;
16246 static int minipool_pad
;
16248 /* The linked list of all minipool fixes required for this function. */
16249 Mfix
* minipool_fix_head
;
16250 Mfix
* minipool_fix_tail
;
16251 /* The fix entry for the current minipool, once it has been placed. */
16252 Mfix
* minipool_barrier
;
16254 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16255 #define JUMP_TABLES_IN_TEXT_SECTION 0
16258 static HOST_WIDE_INT
16259 get_jump_table_size (rtx_jump_table_data
*insn
)
16261 /* ADDR_VECs only take room if read-only data does into the text
16263 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16265 rtx body
= PATTERN (insn
);
16266 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16267 HOST_WIDE_INT size
;
16268 HOST_WIDE_INT modesize
;
16270 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16271 size
= modesize
* XVECLEN (body
, elt
);
16275 /* Round up size of TBB table to a halfword boundary. */
16276 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16279 /* No padding necessary for TBH. */
16282 /* Add two bytes for alignment on Thumb. */
16287 gcc_unreachable ();
16295 /* Return the maximum amount of padding that will be inserted before
16298 static HOST_WIDE_INT
16299 get_label_padding (rtx label
)
16301 HOST_WIDE_INT align
, min_insn_size
;
16303 align
= 1 << label_to_alignment (label
);
16304 min_insn_size
= TARGET_THUMB
? 2 : 4;
16305 return align
> min_insn_size
? align
- min_insn_size
: 0;
16308 /* Move a minipool fix MP from its current location to before MAX_MP.
16309 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16310 constraints may need updating. */
16312 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16313 HOST_WIDE_INT max_address
)
16315 /* The code below assumes these are different. */
16316 gcc_assert (mp
!= max_mp
);
16318 if (max_mp
== NULL
)
16320 if (max_address
< mp
->max_address
)
16321 mp
->max_address
= max_address
;
16325 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16326 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16328 mp
->max_address
= max_address
;
16330 /* Unlink MP from its current position. Since max_mp is non-null,
16331 mp->prev must be non-null. */
16332 mp
->prev
->next
= mp
->next
;
16333 if (mp
->next
!= NULL
)
16334 mp
->next
->prev
= mp
->prev
;
16336 minipool_vector_tail
= mp
->prev
;
16338 /* Re-insert it before MAX_MP. */
16340 mp
->prev
= max_mp
->prev
;
16343 if (mp
->prev
!= NULL
)
16344 mp
->prev
->next
= mp
;
16346 minipool_vector_head
= mp
;
16349 /* Save the new entry. */
16352 /* Scan over the preceding entries and adjust their addresses as
16354 while (mp
->prev
!= NULL
16355 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16357 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16364 /* Add a constant to the minipool for a forward reference. Returns the
16365 node added or NULL if the constant will not fit in this pool. */
16367 add_minipool_forward_ref (Mfix
*fix
)
16369 /* If set, max_mp is the first pool_entry that has a lower
16370 constraint than the one we are trying to add. */
16371 Mnode
* max_mp
= NULL
;
16372 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16375 /* If the minipool starts before the end of FIX->INSN then this FIX
16376 can not be placed into the current pool. Furthermore, adding the
16377 new constant pool entry may cause the pool to start FIX_SIZE bytes
16379 if (minipool_vector_head
&&
16380 (fix
->address
+ get_attr_length (fix
->insn
)
16381 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16384 /* Scan the pool to see if a constant with the same value has
16385 already been added. While we are doing this, also note the
16386 location where we must insert the constant if it doesn't already
16388 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16390 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16391 && fix
->mode
== mp
->mode
16392 && (!LABEL_P (fix
->value
)
16393 || (CODE_LABEL_NUMBER (fix
->value
)
16394 == CODE_LABEL_NUMBER (mp
->value
)))
16395 && rtx_equal_p (fix
->value
, mp
->value
))
16397 /* More than one fix references this entry. */
16399 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16402 /* Note the insertion point if necessary. */
16404 && mp
->max_address
> max_address
)
16407 /* If we are inserting an 8-bytes aligned quantity and
16408 we have not already found an insertion point, then
16409 make sure that all such 8-byte aligned quantities are
16410 placed at the start of the pool. */
16411 if (ARM_DOUBLEWORD_ALIGN
16413 && fix
->fix_size
>= 8
16414 && mp
->fix_size
< 8)
16417 max_address
= mp
->max_address
;
16421 /* The value is not currently in the minipool, so we need to create
16422 a new entry for it. If MAX_MP is NULL, the entry will be put on
16423 the end of the list since the placement is less constrained than
16424 any existing entry. Otherwise, we insert the new fix before
16425 MAX_MP and, if necessary, adjust the constraints on the other
16428 mp
->fix_size
= fix
->fix_size
;
16429 mp
->mode
= fix
->mode
;
16430 mp
->value
= fix
->value
;
16432 /* Not yet required for a backwards ref. */
16433 mp
->min_address
= -65536;
16435 if (max_mp
== NULL
)
16437 mp
->max_address
= max_address
;
16439 mp
->prev
= minipool_vector_tail
;
16441 if (mp
->prev
== NULL
)
16443 minipool_vector_head
= mp
;
16444 minipool_vector_label
= gen_label_rtx ();
16447 mp
->prev
->next
= mp
;
16449 minipool_vector_tail
= mp
;
16453 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16454 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16456 mp
->max_address
= max_address
;
16459 mp
->prev
= max_mp
->prev
;
16461 if (mp
->prev
!= NULL
)
16462 mp
->prev
->next
= mp
;
16464 minipool_vector_head
= mp
;
16467 /* Save the new entry. */
16470 /* Scan over the preceding entries and adjust their addresses as
16472 while (mp
->prev
!= NULL
16473 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16475 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16483 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16484 HOST_WIDE_INT min_address
)
16486 HOST_WIDE_INT offset
;
16488 /* The code below assumes these are different. */
16489 gcc_assert (mp
!= min_mp
);
16491 if (min_mp
== NULL
)
16493 if (min_address
> mp
->min_address
)
16494 mp
->min_address
= min_address
;
16498 /* We will adjust this below if it is too loose. */
16499 mp
->min_address
= min_address
;
16501 /* Unlink MP from its current position. Since min_mp is non-null,
16502 mp->next must be non-null. */
16503 mp
->next
->prev
= mp
->prev
;
16504 if (mp
->prev
!= NULL
)
16505 mp
->prev
->next
= mp
->next
;
16507 minipool_vector_head
= mp
->next
;
16509 /* Reinsert it after MIN_MP. */
16511 mp
->next
= min_mp
->next
;
16513 if (mp
->next
!= NULL
)
16514 mp
->next
->prev
= mp
;
16516 minipool_vector_tail
= mp
;
16522 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16524 mp
->offset
= offset
;
16525 if (mp
->refcount
> 0)
16526 offset
+= mp
->fix_size
;
16528 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16529 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16535 /* Add a constant to the minipool for a backward reference. Returns the
16536 node added or NULL if the constant will not fit in this pool.
16538 Note that the code for insertion for a backwards reference can be
16539 somewhat confusing because the calculated offsets for each fix do
16540 not take into account the size of the pool (which is still under
16543 add_minipool_backward_ref (Mfix
*fix
)
16545 /* If set, min_mp is the last pool_entry that has a lower constraint
16546 than the one we are trying to add. */
16547 Mnode
*min_mp
= NULL
;
16548 /* This can be negative, since it is only a constraint. */
16549 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16552 /* If we can't reach the current pool from this insn, or if we can't
16553 insert this entry at the end of the pool without pushing other
16554 fixes out of range, then we don't try. This ensures that we
16555 can't fail later on. */
16556 if (min_address
>= minipool_barrier
->address
16557 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16558 >= minipool_barrier
->address
))
16561 /* Scan the pool to see if a constant with the same value has
16562 already been added. While we are doing this, also note the
16563 location where we must insert the constant if it doesn't already
16565 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16567 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16568 && fix
->mode
== mp
->mode
16569 && (!LABEL_P (fix
->value
)
16570 || (CODE_LABEL_NUMBER (fix
->value
)
16571 == CODE_LABEL_NUMBER (mp
->value
)))
16572 && rtx_equal_p (fix
->value
, mp
->value
)
16573 /* Check that there is enough slack to move this entry to the
16574 end of the table (this is conservative). */
16575 && (mp
->max_address
16576 > (minipool_barrier
->address
16577 + minipool_vector_tail
->offset
16578 + minipool_vector_tail
->fix_size
)))
16581 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16584 if (min_mp
!= NULL
)
16585 mp
->min_address
+= fix
->fix_size
;
16588 /* Note the insertion point if necessary. */
16589 if (mp
->min_address
< min_address
)
16591 /* For now, we do not allow the insertion of 8-byte alignment
16592 requiring nodes anywhere but at the start of the pool. */
16593 if (ARM_DOUBLEWORD_ALIGN
16594 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16599 else if (mp
->max_address
16600 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16602 /* Inserting before this entry would push the fix beyond
16603 its maximum address (which can happen if we have
16604 re-located a forwards fix); force the new fix to come
16606 if (ARM_DOUBLEWORD_ALIGN
16607 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16612 min_address
= mp
->min_address
+ fix
->fix_size
;
16615 /* Do not insert a non-8-byte aligned quantity before 8-byte
16616 aligned quantities. */
16617 else if (ARM_DOUBLEWORD_ALIGN
16618 && fix
->fix_size
< 8
16619 && mp
->fix_size
>= 8)
16622 min_address
= mp
->min_address
+ fix
->fix_size
;
16627 /* We need to create a new entry. */
16629 mp
->fix_size
= fix
->fix_size
;
16630 mp
->mode
= fix
->mode
;
16631 mp
->value
= fix
->value
;
16633 mp
->max_address
= minipool_barrier
->address
+ 65536;
16635 mp
->min_address
= min_address
;
16637 if (min_mp
== NULL
)
16640 mp
->next
= minipool_vector_head
;
16642 if (mp
->next
== NULL
)
16644 minipool_vector_tail
= mp
;
16645 minipool_vector_label
= gen_label_rtx ();
16648 mp
->next
->prev
= mp
;
16650 minipool_vector_head
= mp
;
16654 mp
->next
= min_mp
->next
;
16658 if (mp
->next
!= NULL
)
16659 mp
->next
->prev
= mp
;
16661 minipool_vector_tail
= mp
;
16664 /* Save the new entry. */
16672 /* Scan over the following entries and adjust their offsets. */
16673 while (mp
->next
!= NULL
)
16675 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16676 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16679 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16681 mp
->next
->offset
= mp
->offset
;
16690 assign_minipool_offsets (Mfix
*barrier
)
16692 HOST_WIDE_INT offset
= 0;
16695 minipool_barrier
= barrier
;
16697 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16699 mp
->offset
= offset
;
16701 if (mp
->refcount
> 0)
16702 offset
+= mp
->fix_size
;
16706 /* Output the literal table */
16708 dump_minipool (rtx_insn
*scan
)
16714 if (ARM_DOUBLEWORD_ALIGN
)
16715 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16716 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16723 fprintf (dump_file
,
16724 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16725 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16727 scan
= emit_label_after (gen_label_rtx (), scan
);
16728 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16729 scan
= emit_label_after (minipool_vector_label
, scan
);
16731 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16733 if (mp
->refcount
> 0)
16737 fprintf (dump_file
,
16738 ";; Offset %u, min %ld, max %ld ",
16739 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16740 (unsigned long) mp
->max_address
);
16741 arm_print_value (dump_file
, mp
->value
);
16742 fputc ('\n', dump_file
);
16745 switch (GET_MODE_SIZE (mp
->mode
))
16747 #ifdef HAVE_consttable_1
16749 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16753 #ifdef HAVE_consttable_2
16755 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16759 #ifdef HAVE_consttable_4
16761 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16765 #ifdef HAVE_consttable_8
16767 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16771 #ifdef HAVE_consttable_16
16773 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16778 gcc_unreachable ();
16786 minipool_vector_head
= minipool_vector_tail
= NULL
;
16787 scan
= emit_insn_after (gen_consttable_end (), scan
);
16788 scan
= emit_barrier_after (scan
);
16791 /* Return the cost of forcibly inserting a barrier after INSN. */
16793 arm_barrier_cost (rtx insn
)
16795 /* Basing the location of the pool on the loop depth is preferable,
16796 but at the moment, the basic block information seems to be
16797 corrupt by this stage of the compilation. */
16798 int base_cost
= 50;
16799 rtx next
= next_nonnote_insn (insn
);
16801 if (next
!= NULL
&& LABEL_P (next
))
16804 switch (GET_CODE (insn
))
16807 /* It will always be better to place the table before the label, rather
16816 return base_cost
- 10;
16819 return base_cost
+ 10;
16823 /* Find the best place in the insn stream in the range
16824 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16825 Create the barrier by inserting a jump and add a new fix entry for
16828 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16830 HOST_WIDE_INT count
= 0;
16831 rtx_barrier
*barrier
;
16832 rtx_insn
*from
= fix
->insn
;
16833 /* The instruction after which we will insert the jump. */
16834 rtx_insn
*selected
= NULL
;
16836 /* The address at which the jump instruction will be placed. */
16837 HOST_WIDE_INT selected_address
;
16839 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16840 rtx_code_label
*label
= gen_label_rtx ();
16842 selected_cost
= arm_barrier_cost (from
);
16843 selected_address
= fix
->address
;
16845 while (from
&& count
< max_count
)
16847 rtx_jump_table_data
*tmp
;
16850 /* This code shouldn't have been called if there was a natural barrier
16852 gcc_assert (!BARRIER_P (from
));
16854 /* Count the length of this insn. This must stay in sync with the
16855 code that pushes minipool fixes. */
16856 if (LABEL_P (from
))
16857 count
+= get_label_padding (from
);
16859 count
+= get_attr_length (from
);
16861 /* If there is a jump table, add its length. */
16862 if (tablejump_p (from
, NULL
, &tmp
))
16864 count
+= get_jump_table_size (tmp
);
16866 /* Jump tables aren't in a basic block, so base the cost on
16867 the dispatch insn. If we select this location, we will
16868 still put the pool after the table. */
16869 new_cost
= arm_barrier_cost (from
);
16871 if (count
< max_count
16872 && (!selected
|| new_cost
<= selected_cost
))
16875 selected_cost
= new_cost
;
16876 selected_address
= fix
->address
+ count
;
16879 /* Continue after the dispatch table. */
16880 from
= NEXT_INSN (tmp
);
16884 new_cost
= arm_barrier_cost (from
);
16886 if (count
< max_count
16887 && (!selected
|| new_cost
<= selected_cost
))
16890 selected_cost
= new_cost
;
16891 selected_address
= fix
->address
+ count
;
16894 from
= NEXT_INSN (from
);
16897 /* Make sure that we found a place to insert the jump. */
16898 gcc_assert (selected
);
16900 /* Make sure we do not split a call and its corresponding
16901 CALL_ARG_LOCATION note. */
16902 if (CALL_P (selected
))
16904 rtx_insn
*next
= NEXT_INSN (selected
);
16905 if (next
&& NOTE_P (next
)
16906 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16910 /* Create a new JUMP_INSN that branches around a barrier. */
16911 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16912 JUMP_LABEL (from
) = label
;
16913 barrier
= emit_barrier_after (from
);
16914 emit_label_after (label
, barrier
);
16916 /* Create a minipool barrier entry for the new barrier. */
16917 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16918 new_fix
->insn
= barrier
;
16919 new_fix
->address
= selected_address
;
16920 new_fix
->next
= fix
->next
;
16921 fix
->next
= new_fix
;
16926 /* Record that there is a natural barrier in the insn stream at
16929 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16931 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16934 fix
->address
= address
;
16937 if (minipool_fix_head
!= NULL
)
16938 minipool_fix_tail
->next
= fix
;
16940 minipool_fix_head
= fix
;
16942 minipool_fix_tail
= fix
;
16945 /* Record INSN, which will need fixing up to load a value from the
16946 minipool. ADDRESS is the offset of the insn since the start of the
16947 function; LOC is a pointer to the part of the insn which requires
16948 fixing; VALUE is the constant that must be loaded, which is of type
16951 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16952 machine_mode mode
, rtx value
)
16954 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16957 fix
->address
= address
;
16960 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16961 fix
->value
= value
;
16962 fix
->forwards
= get_attr_pool_range (insn
);
16963 fix
->backwards
= get_attr_neg_pool_range (insn
);
16964 fix
->minipool
= NULL
;
16966 /* If an insn doesn't have a range defined for it, then it isn't
16967 expecting to be reworked by this code. Better to stop now than
16968 to generate duff assembly code. */
16969 gcc_assert (fix
->forwards
|| fix
->backwards
);
16971 /* If an entry requires 8-byte alignment then assume all constant pools
16972 require 4 bytes of padding. Trying to do this later on a per-pool
16973 basis is awkward because existing pool entries have to be modified. */
16974 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16979 fprintf (dump_file
,
16980 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16981 GET_MODE_NAME (mode
),
16982 INSN_UID (insn
), (unsigned long) address
,
16983 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16984 arm_print_value (dump_file
, fix
->value
);
16985 fprintf (dump_file
, "\n");
16988 /* Add it to the chain of fixes. */
16991 if (minipool_fix_head
!= NULL
)
16992 minipool_fix_tail
->next
= fix
;
16994 minipool_fix_head
= fix
;
16996 minipool_fix_tail
= fix
;
16999 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17000 Returns the number of insns needed, or 99 if we always want to synthesize
17003 arm_max_const_double_inline_cost ()
17005 /* Let the value get synthesized to avoid the use of literal pools. */
17006 if (arm_disable_literal_pool
)
17009 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17012 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17013 Returns the number of insns needed, or 99 if we don't know how to
17016 arm_const_double_inline_cost (rtx val
)
17018 rtx lowpart
, highpart
;
17021 mode
= GET_MODE (val
);
17023 if (mode
== VOIDmode
)
17026 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17028 lowpart
= gen_lowpart (SImode
, val
);
17029 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17031 gcc_assert (CONST_INT_P (lowpart
));
17032 gcc_assert (CONST_INT_P (highpart
));
17034 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17035 NULL_RTX
, NULL_RTX
, 0, 0)
17036 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17037 NULL_RTX
, NULL_RTX
, 0, 0));
17040 /* Cost of loading a SImode constant. */
17042 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17044 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17045 NULL_RTX
, NULL_RTX
, 1, 0);
17048 /* Return true if it is worthwhile to split a 64-bit constant into two
17049 32-bit operations. This is the case if optimizing for size, or
17050 if we have load delay slots, or if one 32-bit part can be done with
17051 a single data operation. */
17053 arm_const_double_by_parts (rtx val
)
17055 machine_mode mode
= GET_MODE (val
);
17058 if (optimize_size
|| arm_ld_sched
)
17061 if (mode
== VOIDmode
)
17064 part
= gen_highpart_mode (SImode
, mode
, val
);
17066 gcc_assert (CONST_INT_P (part
));
17068 if (const_ok_for_arm (INTVAL (part
))
17069 || const_ok_for_arm (~INTVAL (part
)))
17072 part
= gen_lowpart (SImode
, val
);
17074 gcc_assert (CONST_INT_P (part
));
17076 if (const_ok_for_arm (INTVAL (part
))
17077 || const_ok_for_arm (~INTVAL (part
)))
17083 /* Return true if it is possible to inline both the high and low parts
17084 of a 64-bit constant into 32-bit data processing instructions. */
17086 arm_const_double_by_immediates (rtx val
)
17088 machine_mode mode
= GET_MODE (val
);
17091 if (mode
== VOIDmode
)
17094 part
= gen_highpart_mode (SImode
, mode
, val
);
17096 gcc_assert (CONST_INT_P (part
));
17098 if (!const_ok_for_arm (INTVAL (part
)))
17101 part
= gen_lowpart (SImode
, val
);
17103 gcc_assert (CONST_INT_P (part
));
17105 if (!const_ok_for_arm (INTVAL (part
)))
17111 /* Scan INSN and note any of its operands that need fixing.
17112 If DO_PUSHES is false we do not actually push any of the fixups
17115 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17119 extract_constrain_insn (insn
);
17121 if (recog_data
.n_alternatives
== 0)
17124 /* Fill in recog_op_alt with information about the constraints of
17126 preprocess_constraints (insn
);
17128 const operand_alternative
*op_alt
= which_op_alt ();
17129 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17131 /* Things we need to fix can only occur in inputs. */
17132 if (recog_data
.operand_type
[opno
] != OP_IN
)
17135 /* If this alternative is a memory reference, then any mention
17136 of constants in this alternative is really to fool reload
17137 into allowing us to accept one there. We need to fix them up
17138 now so that we output the right code. */
17139 if (op_alt
[opno
].memory_ok
)
17141 rtx op
= recog_data
.operand
[opno
];
17143 if (CONSTANT_P (op
))
17146 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17147 recog_data
.operand_mode
[opno
], op
);
17149 else if (MEM_P (op
)
17150 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17151 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17155 rtx cop
= avoid_constant_pool_reference (op
);
17157 /* Casting the address of something to a mode narrower
17158 than a word can cause avoid_constant_pool_reference()
17159 to return the pool reference itself. That's no good to
17160 us here. Lets just hope that we can use the
17161 constant pool value directly. */
17163 cop
= get_pool_constant (XEXP (op
, 0));
17165 push_minipool_fix (insn
, address
,
17166 recog_data
.operand_loc
[opno
],
17167 recog_data
.operand_mode
[opno
], cop
);
17177 /* Rewrite move insn into subtract of 0 if the condition codes will
17178 be useful in next conditional jump insn. */
17181 thumb1_reorg (void)
17185 FOR_EACH_BB_FN (bb
, cfun
)
17188 rtx pat
, op0
, set
= NULL
;
17189 rtx_insn
*prev
, *insn
= BB_END (bb
);
17190 bool insn_clobbered
= false;
17192 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17193 insn
= PREV_INSN (insn
);
17195 /* Find the last cbranchsi4_insn in basic block BB. */
17196 if (insn
== BB_HEAD (bb
)
17197 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17200 /* Get the register with which we are comparing. */
17201 pat
= PATTERN (insn
);
17202 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17204 /* Find the first flag setting insn before INSN in basic block BB. */
17205 gcc_assert (insn
!= BB_HEAD (bb
));
17206 for (prev
= PREV_INSN (insn
);
17208 && prev
!= BB_HEAD (bb
)
17210 || DEBUG_INSN_P (prev
)
17211 || ((set
= single_set (prev
)) != NULL
17212 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17213 prev
= PREV_INSN (prev
))
17215 if (reg_set_p (op0
, prev
))
17216 insn_clobbered
= true;
17219 /* Skip if op0 is clobbered by insn other than prev. */
17220 if (insn_clobbered
)
17226 dest
= SET_DEST (set
);
17227 src
= SET_SRC (set
);
17228 if (!low_register_operand (dest
, SImode
)
17229 || !low_register_operand (src
, SImode
))
17232 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17233 in INSN. Both src and dest of the move insn are checked. */
17234 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17236 dest
= copy_rtx (dest
);
17237 src
= copy_rtx (src
);
17238 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17239 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17240 INSN_CODE (prev
) = -1;
17241 /* Set test register in INSN to dest. */
17242 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17243 INSN_CODE (insn
) = -1;
17248 /* Convert instructions to their cc-clobbering variant if possible, since
17249 that allows us to use smaller encodings. */
17252 thumb2_reorg (void)
17257 INIT_REG_SET (&live
);
17259 /* We are freeing block_for_insn in the toplev to keep compatibility
17260 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17261 compute_bb_for_insn ();
17264 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17266 FOR_EACH_BB_FN (bb
, cfun
)
17268 if (current_tune
->disparage_flag_setting_t16_encodings
17269 && optimize_bb_for_speed_p (bb
))
17273 Convert_Action action
= SKIP
;
17274 Convert_Action action_for_partial_flag_setting
17275 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17276 && optimize_bb_for_speed_p (bb
))
17279 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17280 df_simulate_initialize_backwards (bb
, &live
);
17281 FOR_BB_INSNS_REVERSE (bb
, insn
)
17283 if (NONJUMP_INSN_P (insn
)
17284 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17285 && GET_CODE (PATTERN (insn
)) == SET
)
17288 rtx pat
= PATTERN (insn
);
17289 rtx dst
= XEXP (pat
, 0);
17290 rtx src
= XEXP (pat
, 1);
17291 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17293 if (UNARY_P (src
) || BINARY_P (src
))
17294 op0
= XEXP (src
, 0);
17296 if (BINARY_P (src
))
17297 op1
= XEXP (src
, 1);
17299 if (low_register_operand (dst
, SImode
))
17301 switch (GET_CODE (src
))
17304 /* Adding two registers and storing the result
17305 in the first source is already a 16-bit
17307 if (rtx_equal_p (dst
, op0
)
17308 && register_operand (op1
, SImode
))
17311 if (low_register_operand (op0
, SImode
))
17313 /* ADDS <Rd>,<Rn>,<Rm> */
17314 if (low_register_operand (op1
, SImode
))
17316 /* ADDS <Rdn>,#<imm8> */
17317 /* SUBS <Rdn>,#<imm8> */
17318 else if (rtx_equal_p (dst
, op0
)
17319 && CONST_INT_P (op1
)
17320 && IN_RANGE (INTVAL (op1
), -255, 255))
17322 /* ADDS <Rd>,<Rn>,#<imm3> */
17323 /* SUBS <Rd>,<Rn>,#<imm3> */
17324 else if (CONST_INT_P (op1
)
17325 && IN_RANGE (INTVAL (op1
), -7, 7))
17328 /* ADCS <Rd>, <Rn> */
17329 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17330 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17331 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17333 && COMPARISON_P (op1
)
17334 && cc_register (XEXP (op1
, 0), VOIDmode
)
17335 && maybe_get_arm_condition_code (op1
) == ARM_CS
17336 && XEXP (op1
, 1) == const0_rtx
)
17341 /* RSBS <Rd>,<Rn>,#0
17342 Not handled here: see NEG below. */
17343 /* SUBS <Rd>,<Rn>,#<imm3>
17345 Not handled here: see PLUS above. */
17346 /* SUBS <Rd>,<Rn>,<Rm> */
17347 if (low_register_operand (op0
, SImode
)
17348 && low_register_operand (op1
, SImode
))
17353 /* MULS <Rdm>,<Rn>,<Rdm>
17354 As an exception to the rule, this is only used
17355 when optimizing for size since MULS is slow on all
17356 known implementations. We do not even want to use
17357 MULS in cold code, if optimizing for speed, so we
17358 test the global flag here. */
17359 if (!optimize_size
)
17361 /* else fall through. */
17365 /* ANDS <Rdn>,<Rm> */
17366 if (rtx_equal_p (dst
, op0
)
17367 && low_register_operand (op1
, SImode
))
17368 action
= action_for_partial_flag_setting
;
17369 else if (rtx_equal_p (dst
, op1
)
17370 && low_register_operand (op0
, SImode
))
17371 action
= action_for_partial_flag_setting
== SKIP
17372 ? SKIP
: SWAP_CONV
;
17378 /* ASRS <Rdn>,<Rm> */
17379 /* LSRS <Rdn>,<Rm> */
17380 /* LSLS <Rdn>,<Rm> */
17381 if (rtx_equal_p (dst
, op0
)
17382 && low_register_operand (op1
, SImode
))
17383 action
= action_for_partial_flag_setting
;
17384 /* ASRS <Rd>,<Rm>,#<imm5> */
17385 /* LSRS <Rd>,<Rm>,#<imm5> */
17386 /* LSLS <Rd>,<Rm>,#<imm5> */
17387 else if (low_register_operand (op0
, SImode
)
17388 && CONST_INT_P (op1
)
17389 && IN_RANGE (INTVAL (op1
), 0, 31))
17390 action
= action_for_partial_flag_setting
;
17394 /* RORS <Rdn>,<Rm> */
17395 if (rtx_equal_p (dst
, op0
)
17396 && low_register_operand (op1
, SImode
))
17397 action
= action_for_partial_flag_setting
;
17401 /* MVNS <Rd>,<Rm> */
17402 if (low_register_operand (op0
, SImode
))
17403 action
= action_for_partial_flag_setting
;
17407 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17408 if (low_register_operand (op0
, SImode
))
17413 /* MOVS <Rd>,#<imm8> */
17414 if (CONST_INT_P (src
)
17415 && IN_RANGE (INTVAL (src
), 0, 255))
17416 action
= action_for_partial_flag_setting
;
17420 /* MOVS and MOV<c> with registers have different
17421 encodings, so are not relevant here. */
17429 if (action
!= SKIP
)
17431 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17432 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17435 if (action
== SWAP_CONV
)
17437 src
= copy_rtx (src
);
17438 XEXP (src
, 0) = op1
;
17439 XEXP (src
, 1) = op0
;
17440 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17441 vec
= gen_rtvec (2, pat
, clobber
);
17443 else /* action == CONV */
17444 vec
= gen_rtvec (2, pat
, clobber
);
17446 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17447 INSN_CODE (insn
) = -1;
17451 if (NONDEBUG_INSN_P (insn
))
17452 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17456 CLEAR_REG_SET (&live
);
17459 /* Gcc puts the pool in the wrong place for ARM, since we can only
17460 load addresses a limited distance around the pc. We do some
17461 special munging to move the constant pool values to the correct
17462 point in the code. */
17467 HOST_WIDE_INT address
= 0;
17472 else if (TARGET_THUMB2
)
17475 /* Ensure all insns that must be split have been split at this point.
17476 Otherwise, the pool placement code below may compute incorrect
17477 insn lengths. Note that when optimizing, all insns have already
17478 been split at this point. */
17480 split_all_insns_noflow ();
17482 minipool_fix_head
= minipool_fix_tail
= NULL
;
17484 /* The first insn must always be a note, or the code below won't
17485 scan it properly. */
17486 insn
= get_insns ();
17487 gcc_assert (NOTE_P (insn
));
17490 /* Scan all the insns and record the operands that will need fixing. */
17491 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17493 if (BARRIER_P (insn
))
17494 push_minipool_barrier (insn
, address
);
17495 else if (INSN_P (insn
))
17497 rtx_jump_table_data
*table
;
17499 note_invalid_constants (insn
, address
, true);
17500 address
+= get_attr_length (insn
);
17502 /* If the insn is a vector jump, add the size of the table
17503 and skip the table. */
17504 if (tablejump_p (insn
, NULL
, &table
))
17506 address
+= get_jump_table_size (table
);
17510 else if (LABEL_P (insn
))
17511 /* Add the worst-case padding due to alignment. We don't add
17512 the _current_ padding because the minipool insertions
17513 themselves might change it. */
17514 address
+= get_label_padding (insn
);
17517 fix
= minipool_fix_head
;
17519 /* Now scan the fixups and perform the required changes. */
17524 Mfix
* last_added_fix
;
17525 Mfix
* last_barrier
= NULL
;
17528 /* Skip any further barriers before the next fix. */
17529 while (fix
&& BARRIER_P (fix
->insn
))
17532 /* No more fixes. */
17536 last_added_fix
= NULL
;
17538 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17540 if (BARRIER_P (ftmp
->insn
))
17542 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17545 last_barrier
= ftmp
;
17547 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17550 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17553 /* If we found a barrier, drop back to that; any fixes that we
17554 could have reached but come after the barrier will now go in
17555 the next mini-pool. */
17556 if (last_barrier
!= NULL
)
17558 /* Reduce the refcount for those fixes that won't go into this
17560 for (fdel
= last_barrier
->next
;
17561 fdel
&& fdel
!= ftmp
;
17564 fdel
->minipool
->refcount
--;
17565 fdel
->minipool
= NULL
;
17568 ftmp
= last_barrier
;
17572 /* ftmp is first fix that we can't fit into this pool and
17573 there no natural barriers that we could use. Insert a
17574 new barrier in the code somewhere between the previous
17575 fix and this one, and arrange to jump around it. */
17576 HOST_WIDE_INT max_address
;
17578 /* The last item on the list of fixes must be a barrier, so
17579 we can never run off the end of the list of fixes without
17580 last_barrier being set. */
17583 max_address
= minipool_vector_head
->max_address
;
17584 /* Check that there isn't another fix that is in range that
17585 we couldn't fit into this pool because the pool was
17586 already too large: we need to put the pool before such an
17587 instruction. The pool itself may come just after the
17588 fix because create_fix_barrier also allows space for a
17589 jump instruction. */
17590 if (ftmp
->address
< max_address
)
17591 max_address
= ftmp
->address
+ 1;
17593 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17596 assign_minipool_offsets (last_barrier
);
17600 if (!BARRIER_P (ftmp
->insn
)
17601 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17608 /* Scan over the fixes we have identified for this pool, fixing them
17609 up and adding the constants to the pool itself. */
17610 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17611 this_fix
= this_fix
->next
)
17612 if (!BARRIER_P (this_fix
->insn
))
17615 = plus_constant (Pmode
,
17616 gen_rtx_LABEL_REF (VOIDmode
,
17617 minipool_vector_label
),
17618 this_fix
->minipool
->offset
);
17619 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17622 dump_minipool (last_barrier
->insn
);
17626 /* From now on we must synthesize any constants that we can't handle
17627 directly. This can happen if the RTL gets split during final
17628 instruction generation. */
17629 cfun
->machine
->after_arm_reorg
= 1;
17631 /* Free the minipool memory. */
17632 obstack_free (&minipool_obstack
, minipool_startobj
);
17635 /* Routines to output assembly language. */
17637 /* Return string representation of passed in real value. */
17638 static const char *
17639 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17641 if (!fp_consts_inited
)
17644 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17648 /* OPERANDS[0] is the entire list of insns that constitute pop,
17649 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17650 is in the list, UPDATE is true iff the list contains explicit
17651 update of base register. */
17653 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17659 const char *conditional
;
17660 int num_saves
= XVECLEN (operands
[0], 0);
17661 unsigned int regno
;
17662 unsigned int regno_base
= REGNO (operands
[1]);
17665 offset
+= update
? 1 : 0;
17666 offset
+= return_pc
? 1 : 0;
17668 /* Is the base register in the list? */
17669 for (i
= offset
; i
< num_saves
; i
++)
17671 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17672 /* If SP is in the list, then the base register must be SP. */
17673 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17674 /* If base register is in the list, there must be no explicit update. */
17675 if (regno
== regno_base
)
17676 gcc_assert (!update
);
17679 conditional
= reverse
? "%?%D0" : "%?%d0";
17680 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17682 /* Output pop (not stmfd) because it has a shorter encoding. */
17683 gcc_assert (update
);
17684 sprintf (pattern
, "pop%s\t{", conditional
);
17688 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17689 It's just a convention, their semantics are identical. */
17690 if (regno_base
== SP_REGNUM
)
17691 sprintf (pattern
, "ldm%sfd\t", conditional
);
17692 else if (TARGET_UNIFIED_ASM
)
17693 sprintf (pattern
, "ldmia%s\t", conditional
);
17695 sprintf (pattern
, "ldm%sia\t", conditional
);
17697 strcat (pattern
, reg_names
[regno_base
]);
17699 strcat (pattern
, "!, {");
17701 strcat (pattern
, ", {");
17704 /* Output the first destination register. */
17706 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17708 /* Output the rest of the destination registers. */
17709 for (i
= offset
+ 1; i
< num_saves
; i
++)
17711 strcat (pattern
, ", ");
17713 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17716 strcat (pattern
, "}");
17718 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17719 strcat (pattern
, "^");
17721 output_asm_insn (pattern
, &cond
);
17725 /* Output the assembly for a store multiple. */
17728 vfp_output_vstmd (rtx
* operands
)
17734 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17735 ? XEXP (operands
[0], 0)
17736 : XEXP (XEXP (operands
[0], 0), 0);
17737 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17740 strcpy (pattern
, "vpush%?.64\t{%P1");
17742 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17744 p
= strlen (pattern
);
17746 gcc_assert (REG_P (operands
[1]));
17748 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17749 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17751 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17753 strcpy (&pattern
[p
], "}");
17755 output_asm_insn (pattern
, operands
);
17760 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17761 number of bytes pushed. */
17764 vfp_emit_fstmd (int base_reg
, int count
)
17771 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17772 register pairs are stored by a store multiple insn. We avoid this
17773 by pushing an extra pair. */
17774 if (count
== 2 && !arm_arch6
)
17776 if (base_reg
== LAST_VFP_REGNUM
- 3)
17781 /* FSTMD may not store more than 16 doubleword registers at once. Split
17782 larger stores into multiple parts (up to a maximum of two, in
17787 /* NOTE: base_reg is an internal register number, so each D register
17789 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17790 saved
+= vfp_emit_fstmd (base_reg
, 16);
17794 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17795 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17797 reg
= gen_rtx_REG (DFmode
, base_reg
);
17800 XVECEXP (par
, 0, 0)
17801 = gen_rtx_SET (VOIDmode
,
17804 gen_rtx_PRE_MODIFY (Pmode
,
17807 (Pmode
, stack_pointer_rtx
,
17810 gen_rtx_UNSPEC (BLKmode
,
17811 gen_rtvec (1, reg
),
17812 UNSPEC_PUSH_MULT
));
17814 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17815 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17816 RTX_FRAME_RELATED_P (tmp
) = 1;
17817 XVECEXP (dwarf
, 0, 0) = tmp
;
17819 tmp
= gen_rtx_SET (VOIDmode
,
17820 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17822 RTX_FRAME_RELATED_P (tmp
) = 1;
17823 XVECEXP (dwarf
, 0, 1) = tmp
;
17825 for (i
= 1; i
< count
; i
++)
17827 reg
= gen_rtx_REG (DFmode
, base_reg
);
17829 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17831 tmp
= gen_rtx_SET (VOIDmode
,
17832 gen_frame_mem (DFmode
,
17833 plus_constant (Pmode
,
17837 RTX_FRAME_RELATED_P (tmp
) = 1;
17838 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17841 par
= emit_insn (par
);
17842 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17843 RTX_FRAME_RELATED_P (par
) = 1;
17848 /* Emit a call instruction with pattern PAT. ADDR is the address of
17849 the call target. */
17852 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17856 insn
= emit_call_insn (pat
);
17858 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17859 If the call might use such an entry, add a use of the PIC register
17860 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17861 if (TARGET_VXWORKS_RTP
17864 && GET_CODE (addr
) == SYMBOL_REF
17865 && (SYMBOL_REF_DECL (addr
)
17866 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17867 : !SYMBOL_REF_LOCAL_P (addr
)))
17869 require_pic_register ();
17870 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17873 if (TARGET_AAPCS_BASED
)
17875 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17876 linker. We need to add an IP clobber to allow setting
17877 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17878 is not needed since it's a fixed register. */
17879 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17880 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17884 /* Output a 'call' insn. */
17886 output_call (rtx
*operands
)
17888 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17890 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17891 if (REGNO (operands
[0]) == LR_REGNUM
)
17893 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17894 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17897 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17899 if (TARGET_INTERWORK
|| arm_arch4t
)
17900 output_asm_insn ("bx%?\t%0", operands
);
17902 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17907 /* Output a 'call' insn that is a reference in memory. This is
17908 disabled for ARMv5 and we prefer a blx instead because otherwise
17909 there's a significant performance overhead. */
17911 output_call_mem (rtx
*operands
)
17913 gcc_assert (!arm_arch5
);
17914 if (TARGET_INTERWORK
)
17916 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17917 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17918 output_asm_insn ("bx%?\t%|ip", operands
);
17920 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17922 /* LR is used in the memory address. We load the address in the
17923 first instruction. It's safe to use IP as the target of the
17924 load since the call will kill it anyway. */
17925 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17926 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17928 output_asm_insn ("bx%?\t%|ip", operands
);
17930 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17934 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17935 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17942 /* Output a move from arm registers to arm registers of a long double
17943 OPERANDS[0] is the destination.
17944 OPERANDS[1] is the source. */
17946 output_mov_long_double_arm_from_arm (rtx
*operands
)
17948 /* We have to be careful here because the two might overlap. */
17949 int dest_start
= REGNO (operands
[0]);
17950 int src_start
= REGNO (operands
[1]);
17954 if (dest_start
< src_start
)
17956 for (i
= 0; i
< 3; i
++)
17958 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17959 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17960 output_asm_insn ("mov%?\t%0, %1", ops
);
17965 for (i
= 2; i
>= 0; i
--)
17967 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17968 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17969 output_asm_insn ("mov%?\t%0, %1", ops
);
17977 arm_emit_movpair (rtx dest
, rtx src
)
17979 /* If the src is an immediate, simplify it. */
17980 if (CONST_INT_P (src
))
17982 HOST_WIDE_INT val
= INTVAL (src
);
17983 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17984 if ((val
>> 16) & 0x0000ffff)
17985 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17987 GEN_INT ((val
>> 16) & 0x0000ffff));
17990 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17991 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17994 /* Output a move between double words. It must be REG<-MEM
17997 output_move_double (rtx
*operands
, bool emit
, int *count
)
17999 enum rtx_code code0
= GET_CODE (operands
[0]);
18000 enum rtx_code code1
= GET_CODE (operands
[1]);
18005 /* The only case when this might happen is when
18006 you are looking at the length of a DImode instruction
18007 that has an invalid constant in it. */
18008 if (code0
== REG
&& code1
!= MEM
)
18010 gcc_assert (!emit
);
18017 unsigned int reg0
= REGNO (operands
[0]);
18019 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18021 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18023 switch (GET_CODE (XEXP (operands
[1], 0)))
18030 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18031 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
18033 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18038 gcc_assert (TARGET_LDRD
);
18040 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
18047 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
18049 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
18057 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
18059 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
18064 gcc_assert (TARGET_LDRD
);
18066 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
18071 /* Autoicrement addressing modes should never have overlapping
18072 base and destination registers, and overlapping index registers
18073 are already prohibited, so this doesn't need to worry about
18075 otherops
[0] = operands
[0];
18076 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18077 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18079 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18081 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18083 /* Registers overlap so split out the increment. */
18086 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18087 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18094 /* Use a single insn if we can.
18095 FIXME: IWMMXT allows offsets larger than ldrd can
18096 handle, fix these up with a pair of ldr. */
18098 || !CONST_INT_P (otherops
[2])
18099 || (INTVAL (otherops
[2]) > -256
18100 && INTVAL (otherops
[2]) < 256))
18103 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18109 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18110 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18120 /* Use a single insn if we can.
18121 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18122 fix these up with a pair of ldr. */
18124 || !CONST_INT_P (otherops
[2])
18125 || (INTVAL (otherops
[2]) > -256
18126 && INTVAL (otherops
[2]) < 256))
18129 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18135 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18136 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18146 /* We might be able to use ldrd %0, %1 here. However the range is
18147 different to ldr/adr, and it is broken on some ARMv7-M
18148 implementations. */
18149 /* Use the second register of the pair to avoid problematic
18151 otherops
[1] = operands
[1];
18153 output_asm_insn ("adr%?\t%0, %1", otherops
);
18154 operands
[1] = otherops
[0];
18158 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18160 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18167 /* ??? This needs checking for thumb2. */
18169 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18170 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18172 otherops
[0] = operands
[0];
18173 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18174 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18176 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18178 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18180 switch ((int) INTVAL (otherops
[2]))
18184 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18190 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18196 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18200 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18201 operands
[1] = otherops
[0];
18203 && (REG_P (otherops
[2])
18205 || (CONST_INT_P (otherops
[2])
18206 && INTVAL (otherops
[2]) > -256
18207 && INTVAL (otherops
[2]) < 256)))
18209 if (reg_overlap_mentioned_p (operands
[0],
18212 /* Swap base and index registers over to
18213 avoid a conflict. */
18214 std::swap (otherops
[1], otherops
[2]);
18216 /* If both registers conflict, it will usually
18217 have been fixed by a splitter. */
18218 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18219 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18223 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18224 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18231 otherops
[0] = operands
[0];
18233 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18238 if (CONST_INT_P (otherops
[2]))
18242 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18243 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18245 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18251 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18257 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18264 return "ldr%(d%)\t%0, [%1]";
18266 return "ldm%(ia%)\t%1, %M0";
18270 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18271 /* Take care of overlapping base/data reg. */
18272 if (reg_mentioned_p (operands
[0], operands
[1]))
18276 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18277 output_asm_insn ("ldr%?\t%0, %1", operands
);
18287 output_asm_insn ("ldr%?\t%0, %1", operands
);
18288 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18298 /* Constraints should ensure this. */
18299 gcc_assert (code0
== MEM
&& code1
== REG
);
18300 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18301 || (TARGET_ARM
&& TARGET_LDRD
));
18303 switch (GET_CODE (XEXP (operands
[0], 0)))
18309 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18311 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18316 gcc_assert (TARGET_LDRD
);
18318 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18325 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18327 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18335 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18337 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18342 gcc_assert (TARGET_LDRD
);
18344 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18349 otherops
[0] = operands
[1];
18350 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18351 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18353 /* IWMMXT allows offsets larger than ldrd can handle,
18354 fix these up with a pair of ldr. */
18356 && CONST_INT_P (otherops
[2])
18357 && (INTVAL(otherops
[2]) <= -256
18358 || INTVAL(otherops
[2]) >= 256))
18360 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18364 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18365 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18374 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18375 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18381 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18384 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18389 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18394 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18395 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18397 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18401 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18408 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18415 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18420 && (REG_P (otherops
[2])
18422 || (CONST_INT_P (otherops
[2])
18423 && INTVAL (otherops
[2]) > -256
18424 && INTVAL (otherops
[2]) < 256)))
18426 otherops
[0] = operands
[1];
18427 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18429 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18435 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18436 otherops
[1] = operands
[1];
18439 output_asm_insn ("str%?\t%1, %0", operands
);
18440 output_asm_insn ("str%?\t%H1, %0", otherops
);
18450 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18451 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18454 output_move_quad (rtx
*operands
)
18456 if (REG_P (operands
[0]))
18458 /* Load, or reg->reg move. */
18460 if (MEM_P (operands
[1]))
18462 switch (GET_CODE (XEXP (operands
[1], 0)))
18465 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18470 output_asm_insn ("adr%?\t%0, %1", operands
);
18471 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18475 gcc_unreachable ();
18483 gcc_assert (REG_P (operands
[1]));
18485 dest
= REGNO (operands
[0]);
18486 src
= REGNO (operands
[1]);
18488 /* This seems pretty dumb, but hopefully GCC won't try to do it
18491 for (i
= 0; i
< 4; i
++)
18493 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18494 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18495 output_asm_insn ("mov%?\t%0, %1", ops
);
18498 for (i
= 3; i
>= 0; i
--)
18500 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18501 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18502 output_asm_insn ("mov%?\t%0, %1", ops
);
18508 gcc_assert (MEM_P (operands
[0]));
18509 gcc_assert (REG_P (operands
[1]));
18510 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18512 switch (GET_CODE (XEXP (operands
[0], 0)))
18515 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18519 gcc_unreachable ();
18526 /* Output a VFP load or store instruction. */
18529 output_move_vfp (rtx
*operands
)
18531 rtx reg
, mem
, addr
, ops
[2];
18532 int load
= REG_P (operands
[0]);
18533 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18534 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18539 reg
= operands
[!load
];
18540 mem
= operands
[load
];
18542 mode
= GET_MODE (reg
);
18544 gcc_assert (REG_P (reg
));
18545 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18546 gcc_assert (mode
== SFmode
18550 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18551 gcc_assert (MEM_P (mem
));
18553 addr
= XEXP (mem
, 0);
18555 switch (GET_CODE (addr
))
18558 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18559 ops
[0] = XEXP (addr
, 0);
18564 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18565 ops
[0] = XEXP (addr
, 0);
18570 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18576 sprintf (buff
, templ
,
18577 load
? "ld" : "st",
18580 integer_p
? "\t%@ int" : "");
18581 output_asm_insn (buff
, ops
);
18586 /* Output a Neon double-word or quad-word load or store, or a load
18587 or store for larger structure modes.
18589 WARNING: The ordering of elements is weird in big-endian mode,
18590 because the EABI requires that vectors stored in memory appear
18591 as though they were stored by a VSTM, as required by the EABI.
18592 GCC RTL defines element ordering based on in-memory order.
18593 This can be different from the architectural ordering of elements
18594 within a NEON register. The intrinsics defined in arm_neon.h use the
18595 NEON register element ordering, not the GCC RTL element ordering.
18597 For example, the in-memory ordering of a big-endian a quadword
18598 vector with 16-bit elements when stored from register pair {d0,d1}
18599 will be (lowest address first, d0[N] is NEON register element N):
18601 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18603 When necessary, quadword registers (dN, dN+1) are moved to ARM
18604 registers from rN in the order:
18606 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18608 So that STM/LDM can be used on vectors in ARM registers, and the
18609 same memory layout will result as if VSTM/VLDM were used.
18611 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18612 possible, which allows use of appropriate alignment tags.
18613 Note that the choice of "64" is independent of the actual vector
18614 element size; this size simply ensures that the behavior is
18615 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18617 Due to limitations of those instructions, use of VST1.64/VLD1.64
18618 is not possible if:
18619 - the address contains PRE_DEC, or
18620 - the mode refers to more than 4 double-word registers
18622 In those cases, it would be possible to replace VSTM/VLDM by a
18623 sequence of instructions; this is not currently implemented since
18624 this is not certain to actually improve performance. */
18627 output_move_neon (rtx
*operands
)
18629 rtx reg
, mem
, addr
, ops
[2];
18630 int regno
, nregs
, load
= REG_P (operands
[0]);
18635 reg
= operands
[!load
];
18636 mem
= operands
[load
];
18638 mode
= GET_MODE (reg
);
18640 gcc_assert (REG_P (reg
));
18641 regno
= REGNO (reg
);
18642 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18643 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18644 || NEON_REGNO_OK_FOR_QUAD (regno
));
18645 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18646 || VALID_NEON_QREG_MODE (mode
)
18647 || VALID_NEON_STRUCT_MODE (mode
));
18648 gcc_assert (MEM_P (mem
));
18650 addr
= XEXP (mem
, 0);
18652 /* Strip off const from addresses like (const (plus (...))). */
18653 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18654 addr
= XEXP (addr
, 0);
18656 switch (GET_CODE (addr
))
18659 /* We have to use vldm / vstm for too-large modes. */
18662 templ
= "v%smia%%?\t%%0!, %%h1";
18663 ops
[0] = XEXP (addr
, 0);
18667 templ
= "v%s1.64\t%%h1, %%A0";
18674 /* We have to use vldm / vstm in this case, since there is no
18675 pre-decrement form of the vld1 / vst1 instructions. */
18676 templ
= "v%smdb%%?\t%%0!, %%h1";
18677 ops
[0] = XEXP (addr
, 0);
18682 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18683 gcc_unreachable ();
18686 /* We have to use vldm / vstm for too-large modes. */
18690 templ
= "v%smia%%?\t%%m0, %%h1";
18692 templ
= "v%s1.64\t%%h1, %%A0";
18698 /* Fall through. */
18704 for (i
= 0; i
< nregs
; i
++)
18706 /* We're only using DImode here because it's a convenient size. */
18707 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18708 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18709 if (reg_overlap_mentioned_p (ops
[0], mem
))
18711 gcc_assert (overlap
== -1);
18716 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18717 output_asm_insn (buff
, ops
);
18722 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18723 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18724 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18725 output_asm_insn (buff
, ops
);
18732 gcc_unreachable ();
18735 sprintf (buff
, templ
, load
? "ld" : "st");
18736 output_asm_insn (buff
, ops
);
18741 /* Compute and return the length of neon_mov<mode>, where <mode> is
18742 one of VSTRUCT modes: EI, OI, CI or XI. */
18744 arm_attr_length_move_neon (rtx_insn
*insn
)
18746 rtx reg
, mem
, addr
;
18750 extract_insn_cached (insn
);
18752 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18754 mode
= GET_MODE (recog_data
.operand
[0]);
18765 gcc_unreachable ();
18769 load
= REG_P (recog_data
.operand
[0]);
18770 reg
= recog_data
.operand
[!load
];
18771 mem
= recog_data
.operand
[load
];
18773 gcc_assert (MEM_P (mem
));
18775 mode
= GET_MODE (reg
);
18776 addr
= XEXP (mem
, 0);
18778 /* Strip off const from addresses like (const (plus (...))). */
18779 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18780 addr
= XEXP (addr
, 0);
18782 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18784 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18791 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18795 arm_address_offset_is_imm (rtx_insn
*insn
)
18799 extract_insn_cached (insn
);
18801 if (REG_P (recog_data
.operand
[0]))
18804 mem
= recog_data
.operand
[0];
18806 gcc_assert (MEM_P (mem
));
18808 addr
= XEXP (mem
, 0);
18811 || (GET_CODE (addr
) == PLUS
18812 && REG_P (XEXP (addr
, 0))
18813 && CONST_INT_P (XEXP (addr
, 1))))
18819 /* Output an ADD r, s, #n where n may be too big for one instruction.
18820 If adding zero to one register, output nothing. */
18822 output_add_immediate (rtx
*operands
)
18824 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18826 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18829 output_multi_immediate (operands
,
18830 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18833 output_multi_immediate (operands
,
18834 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18841 /* Output a multiple immediate operation.
18842 OPERANDS is the vector of operands referred to in the output patterns.
18843 INSTR1 is the output pattern to use for the first constant.
18844 INSTR2 is the output pattern to use for subsequent constants.
18845 IMMED_OP is the index of the constant slot in OPERANDS.
18846 N is the constant value. */
18847 static const char *
18848 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18849 int immed_op
, HOST_WIDE_INT n
)
18851 #if HOST_BITS_PER_WIDE_INT > 32
18857 /* Quick and easy output. */
18858 operands
[immed_op
] = const0_rtx
;
18859 output_asm_insn (instr1
, operands
);
18864 const char * instr
= instr1
;
18866 /* Note that n is never zero here (which would give no output). */
18867 for (i
= 0; i
< 32; i
+= 2)
18871 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18872 output_asm_insn (instr
, operands
);
18882 /* Return the name of a shifter operation. */
18883 static const char *
18884 arm_shift_nmem(enum rtx_code code
)
18889 return ARM_LSL_NAME
;
18905 /* Return the appropriate ARM instruction for the operation code.
18906 The returned result should not be overwritten. OP is the rtx of the
18907 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18910 arithmetic_instr (rtx op
, int shift_first_arg
)
18912 switch (GET_CODE (op
))
18918 return shift_first_arg
? "rsb" : "sub";
18933 return arm_shift_nmem(GET_CODE(op
));
18936 gcc_unreachable ();
18940 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18941 for the operation code. The returned result should not be overwritten.
18942 OP is the rtx code of the shift.
18943 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18945 static const char *
18946 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18949 enum rtx_code code
= GET_CODE (op
);
18954 if (!CONST_INT_P (XEXP (op
, 1)))
18956 output_operand_lossage ("invalid shift operand");
18961 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18969 mnem
= arm_shift_nmem(code
);
18970 if (CONST_INT_P (XEXP (op
, 1)))
18972 *amountp
= INTVAL (XEXP (op
, 1));
18974 else if (REG_P (XEXP (op
, 1)))
18981 output_operand_lossage ("invalid shift operand");
18987 /* We never have to worry about the amount being other than a
18988 power of 2, since this case can never be reloaded from a reg. */
18989 if (!CONST_INT_P (XEXP (op
, 1)))
18991 output_operand_lossage ("invalid shift operand");
18995 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18997 /* Amount must be a power of two. */
18998 if (*amountp
& (*amountp
- 1))
19000 output_operand_lossage ("invalid shift operand");
19004 *amountp
= int_log2 (*amountp
);
19005 return ARM_LSL_NAME
;
19008 output_operand_lossage ("invalid shift operand");
19012 /* This is not 100% correct, but follows from the desire to merge
19013 multiplication by a power of 2 with the recognizer for a
19014 shift. >=32 is not a valid shift for "lsl", so we must try and
19015 output a shift that produces the correct arithmetical result.
19016 Using lsr #32 is identical except for the fact that the carry bit
19017 is not set correctly if we set the flags; but we never use the
19018 carry bit from such an operation, so we can ignore that. */
19019 if (code
== ROTATERT
)
19020 /* Rotate is just modulo 32. */
19022 else if (*amountp
!= (*amountp
& 31))
19024 if (code
== ASHIFT
)
19029 /* Shifts of 0 are no-ops. */
19036 /* Obtain the shift from the POWER of two. */
19038 static HOST_WIDE_INT
19039 int_log2 (HOST_WIDE_INT power
)
19041 HOST_WIDE_INT shift
= 0;
19043 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
19045 gcc_assert (shift
<= 31);
19052 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19053 because /bin/as is horribly restrictive. The judgement about
19054 whether or not each character is 'printable' (and can be output as
19055 is) or not (and must be printed with an octal escape) must be made
19056 with reference to the *host* character set -- the situation is
19057 similar to that discussed in the comments above pp_c_char in
19058 c-pretty-print.c. */
19060 #define MAX_ASCII_LEN 51
19063 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19066 int len_so_far
= 0;
19068 fputs ("\t.ascii\t\"", stream
);
19070 for (i
= 0; i
< len
; i
++)
19074 if (len_so_far
>= MAX_ASCII_LEN
)
19076 fputs ("\"\n\t.ascii\t\"", stream
);
19082 if (c
== '\\' || c
== '\"')
19084 putc ('\\', stream
);
19092 fprintf (stream
, "\\%03o", c
);
19097 fputs ("\"\n", stream
);
19100 /* Whether a register is callee saved or not. This is necessary because high
19101 registers are marked as caller saved when optimizing for size on Thumb-1
19102 targets despite being callee saved in order to avoid using them. */
19103 #define callee_saved_reg_p(reg) \
19104 (!call_used_regs[reg] \
19105 || (TARGET_THUMB1 && optimize_size \
19106 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19108 /* Compute the register save mask for registers 0 through 12
19109 inclusive. This code is used by arm_compute_save_reg_mask. */
19111 static unsigned long
19112 arm_compute_save_reg0_reg12_mask (void)
19114 unsigned long func_type
= arm_current_func_type ();
19115 unsigned long save_reg_mask
= 0;
19118 if (IS_INTERRUPT (func_type
))
19120 unsigned int max_reg
;
19121 /* Interrupt functions must not corrupt any registers,
19122 even call clobbered ones. If this is a leaf function
19123 we can just examine the registers used by the RTL, but
19124 otherwise we have to assume that whatever function is
19125 called might clobber anything, and so we have to save
19126 all the call-clobbered registers as well. */
19127 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19128 /* FIQ handlers have registers r8 - r12 banked, so
19129 we only need to check r0 - r7, Normal ISRs only
19130 bank r14 and r15, so we must check up to r12.
19131 r13 is the stack pointer which is always preserved,
19132 so we do not need to consider it here. */
19137 for (reg
= 0; reg
<= max_reg
; reg
++)
19138 if (df_regs_ever_live_p (reg
)
19139 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19140 save_reg_mask
|= (1 << reg
);
19142 /* Also save the pic base register if necessary. */
19144 && !TARGET_SINGLE_PIC_BASE
19145 && arm_pic_register
!= INVALID_REGNUM
19146 && crtl
->uses_pic_offset_table
)
19147 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19149 else if (IS_VOLATILE(func_type
))
19151 /* For noreturn functions we historically omitted register saves
19152 altogether. However this really messes up debugging. As a
19153 compromise save just the frame pointers. Combined with the link
19154 register saved elsewhere this should be sufficient to get
19156 if (frame_pointer_needed
)
19157 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19158 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19159 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19160 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19161 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19165 /* In the normal case we only need to save those registers
19166 which are call saved and which are used by this function. */
19167 for (reg
= 0; reg
<= 11; reg
++)
19168 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19169 save_reg_mask
|= (1 << reg
);
19171 /* Handle the frame pointer as a special case. */
19172 if (frame_pointer_needed
)
19173 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19175 /* If we aren't loading the PIC register,
19176 don't stack it even though it may be live. */
19178 && !TARGET_SINGLE_PIC_BASE
19179 && arm_pic_register
!= INVALID_REGNUM
19180 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19181 || crtl
->uses_pic_offset_table
))
19182 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19184 /* The prologue will copy SP into R0, so save it. */
19185 if (IS_STACKALIGN (func_type
))
19186 save_reg_mask
|= 1;
19189 /* Save registers so the exception handler can modify them. */
19190 if (crtl
->calls_eh_return
)
19196 reg
= EH_RETURN_DATA_REGNO (i
);
19197 if (reg
== INVALID_REGNUM
)
19199 save_reg_mask
|= 1 << reg
;
19203 return save_reg_mask
;
19206 /* Return true if r3 is live at the start of the function. */
19209 arm_r3_live_at_start_p (void)
19211 /* Just look at cfg info, which is still close enough to correct at this
19212 point. This gives false positives for broken functions that might use
19213 uninitialized data that happens to be allocated in r3, but who cares? */
19214 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19217 /* Compute the number of bytes used to store the static chain register on the
19218 stack, above the stack frame. We need to know this accurately to get the
19219 alignment of the rest of the stack frame correct. */
19222 arm_compute_static_chain_stack_bytes (void)
19224 /* See the defining assertion in arm_expand_prologue. */
19225 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19226 && IS_NESTED (arm_current_func_type ())
19227 && arm_r3_live_at_start_p ()
19228 && crtl
->args
.pretend_args_size
== 0)
19234 /* Compute a bit mask of which registers need to be
19235 saved on the stack for the current function.
19236 This is used by arm_get_frame_offsets, which may add extra registers. */
19238 static unsigned long
19239 arm_compute_save_reg_mask (void)
19241 unsigned int save_reg_mask
= 0;
19242 unsigned long func_type
= arm_current_func_type ();
19245 if (IS_NAKED (func_type
))
19246 /* This should never really happen. */
19249 /* If we are creating a stack frame, then we must save the frame pointer,
19250 IP (which will hold the old stack pointer), LR and the PC. */
19251 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19253 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19256 | (1 << PC_REGNUM
);
19258 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19260 /* Decide if we need to save the link register.
19261 Interrupt routines have their own banked link register,
19262 so they never need to save it.
19263 Otherwise if we do not use the link register we do not need to save
19264 it. If we are pushing other registers onto the stack however, we
19265 can save an instruction in the epilogue by pushing the link register
19266 now and then popping it back into the PC. This incurs extra memory
19267 accesses though, so we only do it when optimizing for size, and only
19268 if we know that we will not need a fancy return sequence. */
19269 if (df_regs_ever_live_p (LR_REGNUM
)
19272 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19273 && !crtl
->tail_call_emit
19274 && !crtl
->calls_eh_return
))
19275 save_reg_mask
|= 1 << LR_REGNUM
;
19277 if (cfun
->machine
->lr_save_eliminated
)
19278 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19280 if (TARGET_REALLY_IWMMXT
19281 && ((bit_count (save_reg_mask
)
19282 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19283 arm_compute_static_chain_stack_bytes())
19286 /* The total number of registers that are going to be pushed
19287 onto the stack is odd. We need to ensure that the stack
19288 is 64-bit aligned before we start to save iWMMXt registers,
19289 and also before we start to create locals. (A local variable
19290 might be a double or long long which we will load/store using
19291 an iWMMXt instruction). Therefore we need to push another
19292 ARM register, so that the stack will be 64-bit aligned. We
19293 try to avoid using the arg registers (r0 -r3) as they might be
19294 used to pass values in a tail call. */
19295 for (reg
= 4; reg
<= 12; reg
++)
19296 if ((save_reg_mask
& (1 << reg
)) == 0)
19300 save_reg_mask
|= (1 << reg
);
19303 cfun
->machine
->sibcall_blocked
= 1;
19304 save_reg_mask
|= (1 << 3);
19308 /* We may need to push an additional register for use initializing the
19309 PIC base register. */
19310 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19311 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19313 reg
= thumb_find_work_register (1 << 4);
19314 if (!call_used_regs
[reg
])
19315 save_reg_mask
|= (1 << reg
);
19318 return save_reg_mask
;
19322 /* Compute a bit mask of which registers need to be
19323 saved on the stack for the current function. */
19324 static unsigned long
19325 thumb1_compute_save_reg_mask (void)
19327 unsigned long mask
;
19331 for (reg
= 0; reg
< 12; reg
++)
19332 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19336 && !TARGET_SINGLE_PIC_BASE
19337 && arm_pic_register
!= INVALID_REGNUM
19338 && crtl
->uses_pic_offset_table
)
19339 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19341 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19342 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19343 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19345 /* LR will also be pushed if any lo regs are pushed. */
19346 if (mask
& 0xff || thumb_force_lr_save ())
19347 mask
|= (1 << LR_REGNUM
);
19349 /* Make sure we have a low work register if we need one.
19350 We will need one if we are going to push a high register,
19351 but we are not currently intending to push a low register. */
19352 if ((mask
& 0xff) == 0
19353 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19355 /* Use thumb_find_work_register to choose which register
19356 we will use. If the register is live then we will
19357 have to push it. Use LAST_LO_REGNUM as our fallback
19358 choice for the register to select. */
19359 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19360 /* Make sure the register returned by thumb_find_work_register is
19361 not part of the return value. */
19362 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19363 reg
= LAST_LO_REGNUM
;
19365 if (callee_saved_reg_p (reg
))
19369 /* The 504 below is 8 bytes less than 512 because there are two possible
19370 alignment words. We can't tell here if they will be present or not so we
19371 have to play it safe and assume that they are. */
19372 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19373 ROUND_UP_WORD (get_frame_size ()) +
19374 crtl
->outgoing_args_size
) >= 504)
19376 /* This is the same as the code in thumb1_expand_prologue() which
19377 determines which register to use for stack decrement. */
19378 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19379 if (mask
& (1 << reg
))
19382 if (reg
> LAST_LO_REGNUM
)
19384 /* Make sure we have a register available for stack decrement. */
19385 mask
|= 1 << LAST_LO_REGNUM
;
19393 /* Return the number of bytes required to save VFP registers. */
19395 arm_get_vfp_saved_size (void)
19397 unsigned int regno
;
19402 /* Space for saved VFP registers. */
19403 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19406 for (regno
= FIRST_VFP_REGNUM
;
19407 regno
< LAST_VFP_REGNUM
;
19410 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19411 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19415 /* Workaround ARM10 VFPr1 bug. */
19416 if (count
== 2 && !arm_arch6
)
19418 saved
+= count
* 8;
19427 if (count
== 2 && !arm_arch6
)
19429 saved
+= count
* 8;
19436 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19437 everything bar the final return instruction. If simple_return is true,
19438 then do not output epilogue, because it has already been emitted in RTL. */
19440 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19441 bool simple_return
)
19443 char conditional
[10];
19446 unsigned long live_regs_mask
;
19447 unsigned long func_type
;
19448 arm_stack_offsets
*offsets
;
19450 func_type
= arm_current_func_type ();
19452 if (IS_NAKED (func_type
))
19455 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19457 /* If this function was declared non-returning, and we have
19458 found a tail call, then we have to trust that the called
19459 function won't return. */
19464 /* Otherwise, trap an attempted return by aborting. */
19466 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19468 assemble_external_libcall (ops
[1]);
19469 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19475 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19477 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19479 cfun
->machine
->return_used_this_function
= 1;
19481 offsets
= arm_get_frame_offsets ();
19482 live_regs_mask
= offsets
->saved_regs_mask
;
19484 if (!simple_return
&& live_regs_mask
)
19486 const char * return_reg
;
19488 /* If we do not have any special requirements for function exit
19489 (e.g. interworking) then we can load the return address
19490 directly into the PC. Otherwise we must load it into LR. */
19492 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19493 return_reg
= reg_names
[PC_REGNUM
];
19495 return_reg
= reg_names
[LR_REGNUM
];
19497 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19499 /* There are three possible reasons for the IP register
19500 being saved. 1) a stack frame was created, in which case
19501 IP contains the old stack pointer, or 2) an ISR routine
19502 corrupted it, or 3) it was saved to align the stack on
19503 iWMMXt. In case 1, restore IP into SP, otherwise just
19505 if (frame_pointer_needed
)
19507 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19508 live_regs_mask
|= (1 << SP_REGNUM
);
19511 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19514 /* On some ARM architectures it is faster to use LDR rather than
19515 LDM to load a single register. On other architectures, the
19516 cost is the same. In 26 bit mode, or for exception handlers,
19517 we have to use LDM to load the PC so that the CPSR is also
19519 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19520 if (live_regs_mask
== (1U << reg
))
19523 if (reg
<= LAST_ARM_REGNUM
19524 && (reg
!= LR_REGNUM
19526 || ! IS_INTERRUPT (func_type
)))
19528 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19529 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19536 /* Generate the load multiple instruction to restore the
19537 registers. Note we can get here, even if
19538 frame_pointer_needed is true, but only if sp already
19539 points to the base of the saved core registers. */
19540 if (live_regs_mask
& (1 << SP_REGNUM
))
19542 unsigned HOST_WIDE_INT stack_adjust
;
19544 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19545 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19547 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19548 if (TARGET_UNIFIED_ASM
)
19549 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19551 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19554 /* If we can't use ldmib (SA110 bug),
19555 then try to pop r3 instead. */
19557 live_regs_mask
|= 1 << 3;
19559 if (TARGET_UNIFIED_ASM
)
19560 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19562 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19566 if (TARGET_UNIFIED_ASM
)
19567 sprintf (instr
, "pop%s\t{", conditional
);
19569 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19571 p
= instr
+ strlen (instr
);
19573 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19574 if (live_regs_mask
& (1 << reg
))
19576 int l
= strlen (reg_names
[reg
]);
19582 memcpy (p
, ", ", 2);
19586 memcpy (p
, "%|", 2);
19587 memcpy (p
+ 2, reg_names
[reg
], l
);
19591 if (live_regs_mask
& (1 << LR_REGNUM
))
19593 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19594 /* If returning from an interrupt, restore the CPSR. */
19595 if (IS_INTERRUPT (func_type
))
19602 output_asm_insn (instr
, & operand
);
19604 /* See if we need to generate an extra instruction to
19605 perform the actual function return. */
19607 && func_type
!= ARM_FT_INTERWORKED
19608 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19610 /* The return has already been handled
19611 by loading the LR into the PC. */
19618 switch ((int) ARM_FUNC_TYPE (func_type
))
19622 /* ??? This is wrong for unified assembly syntax. */
19623 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19626 case ARM_FT_INTERWORKED
:
19627 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19630 case ARM_FT_EXCEPTION
:
19631 /* ??? This is wrong for unified assembly syntax. */
19632 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19636 /* Use bx if it's available. */
19637 if (arm_arch5
|| arm_arch4t
)
19638 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19640 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19644 output_asm_insn (instr
, & operand
);
19650 /* Write the function name into the code section, directly preceding
19651 the function prologue.
19653 Code will be output similar to this:
19655 .ascii "arm_poke_function_name", 0
19658 .word 0xff000000 + (t1 - t0)
19659 arm_poke_function_name
19661 stmfd sp!, {fp, ip, lr, pc}
19664 When performing a stack backtrace, code can inspect the value
19665 of 'pc' stored at 'fp' + 0. If the trace function then looks
19666 at location pc - 12 and the top 8 bits are set, then we know
19667 that there is a function name embedded immediately preceding this
19668 location and has length ((pc[-3]) & 0xff000000).
19670 We assume that pc is declared as a pointer to an unsigned long.
19672 It is of no benefit to output the function name if we are assembling
19673 a leaf function. These function types will not contain a stack
19674 backtrace structure, therefore it is not possible to determine the
19677 arm_poke_function_name (FILE *stream
, const char *name
)
19679 unsigned long alignlength
;
19680 unsigned long length
;
19683 length
= strlen (name
) + 1;
19684 alignlength
= ROUND_UP_WORD (length
);
19686 ASM_OUTPUT_ASCII (stream
, name
, length
);
19687 ASM_OUTPUT_ALIGN (stream
, 2);
19688 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19689 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19692 /* Place some comments into the assembler stream
19693 describing the current function. */
19695 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19697 unsigned long func_type
;
19699 /* ??? Do we want to print some of the below anyway? */
19703 /* Sanity check. */
19704 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19706 func_type
= arm_current_func_type ();
19708 switch ((int) ARM_FUNC_TYPE (func_type
))
19711 case ARM_FT_NORMAL
:
19713 case ARM_FT_INTERWORKED
:
19714 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19717 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19720 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19722 case ARM_FT_EXCEPTION
:
19723 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19727 if (IS_NAKED (func_type
))
19728 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19730 if (IS_VOLATILE (func_type
))
19731 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19733 if (IS_NESTED (func_type
))
19734 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19735 if (IS_STACKALIGN (func_type
))
19736 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19738 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19740 crtl
->args
.pretend_args_size
, frame_size
);
19742 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19743 frame_pointer_needed
,
19744 cfun
->machine
->uses_anonymous_args
);
19746 if (cfun
->machine
->lr_save_eliminated
)
19747 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19749 if (crtl
->calls_eh_return
)
19750 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19755 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19756 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19758 arm_stack_offsets
*offsets
;
19764 /* Emit any call-via-reg trampolines that are needed for v4t support
19765 of call_reg and call_value_reg type insns. */
19766 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19768 rtx label
= cfun
->machine
->call_via
[regno
];
19772 switch_to_section (function_section (current_function_decl
));
19773 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19774 CODE_LABEL_NUMBER (label
));
19775 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19779 /* ??? Probably not safe to set this here, since it assumes that a
19780 function will be emitted as assembly immediately after we generate
19781 RTL for it. This does not happen for inline functions. */
19782 cfun
->machine
->return_used_this_function
= 0;
19784 else /* TARGET_32BIT */
19786 /* We need to take into account any stack-frame rounding. */
19787 offsets
= arm_get_frame_offsets ();
19789 gcc_assert (!use_return_insn (FALSE
, NULL
)
19790 || (cfun
->machine
->return_used_this_function
!= 0)
19791 || offsets
->saved_regs
== offsets
->outgoing_args
19792 || frame_pointer_needed
);
19796 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19797 STR and STRD. If an even number of registers are being pushed, one
19798 or more STRD patterns are created for each register pair. If an
19799 odd number of registers are pushed, emit an initial STR followed by
19800 as many STRD instructions as are needed. This works best when the
19801 stack is initially 64-bit aligned (the normal case), since it
19802 ensures that each STRD is also 64-bit aligned. */
19804 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19809 rtx par
= NULL_RTX
;
19810 rtx dwarf
= NULL_RTX
;
19814 num_regs
= bit_count (saved_regs_mask
);
19816 /* Must be at least one register to save, and can't save SP or PC. */
19817 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19818 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19819 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19821 /* Create sequence for DWARF info. All the frame-related data for
19822 debugging is held in this wrapper. */
19823 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19825 /* Describe the stack adjustment. */
19826 tmp
= gen_rtx_SET (VOIDmode
,
19828 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19829 RTX_FRAME_RELATED_P (tmp
) = 1;
19830 XVECEXP (dwarf
, 0, 0) = tmp
;
19832 /* Find the first register. */
19833 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19838 /* If there's an odd number of registers to push. Start off by
19839 pushing a single register. This ensures that subsequent strd
19840 operations are dword aligned (assuming that SP was originally
19841 64-bit aligned). */
19842 if ((num_regs
& 1) != 0)
19844 rtx reg
, mem
, insn
;
19846 reg
= gen_rtx_REG (SImode
, regno
);
19848 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19849 stack_pointer_rtx
));
19851 mem
= gen_frame_mem (Pmode
,
19853 (Pmode
, stack_pointer_rtx
,
19854 plus_constant (Pmode
, stack_pointer_rtx
,
19857 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19858 RTX_FRAME_RELATED_P (tmp
) = 1;
19859 insn
= emit_insn (tmp
);
19860 RTX_FRAME_RELATED_P (insn
) = 1;
19861 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19862 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19864 RTX_FRAME_RELATED_P (tmp
) = 1;
19867 XVECEXP (dwarf
, 0, i
) = tmp
;
19871 while (i
< num_regs
)
19872 if (saved_regs_mask
& (1 << regno
))
19874 rtx reg1
, reg2
, mem1
, mem2
;
19875 rtx tmp0
, tmp1
, tmp2
;
19878 /* Find the register to pair with this one. */
19879 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19883 reg1
= gen_rtx_REG (SImode
, regno
);
19884 reg2
= gen_rtx_REG (SImode
, regno2
);
19891 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19894 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19896 -4 * (num_regs
- 1)));
19897 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19898 plus_constant (Pmode
, stack_pointer_rtx
,
19900 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19901 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19902 RTX_FRAME_RELATED_P (tmp0
) = 1;
19903 RTX_FRAME_RELATED_P (tmp1
) = 1;
19904 RTX_FRAME_RELATED_P (tmp2
) = 1;
19905 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19906 XVECEXP (par
, 0, 0) = tmp0
;
19907 XVECEXP (par
, 0, 1) = tmp1
;
19908 XVECEXP (par
, 0, 2) = tmp2
;
19909 insn
= emit_insn (par
);
19910 RTX_FRAME_RELATED_P (insn
) = 1;
19911 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19915 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19918 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19921 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19922 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19923 RTX_FRAME_RELATED_P (tmp1
) = 1;
19924 RTX_FRAME_RELATED_P (tmp2
) = 1;
19925 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19926 XVECEXP (par
, 0, 0) = tmp1
;
19927 XVECEXP (par
, 0, 1) = tmp2
;
19931 /* Create unwind information. This is an approximation. */
19932 tmp1
= gen_rtx_SET (VOIDmode
,
19933 gen_frame_mem (Pmode
,
19934 plus_constant (Pmode
,
19938 tmp2
= gen_rtx_SET (VOIDmode
,
19939 gen_frame_mem (Pmode
,
19940 plus_constant (Pmode
,
19945 RTX_FRAME_RELATED_P (tmp1
) = 1;
19946 RTX_FRAME_RELATED_P (tmp2
) = 1;
19947 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19948 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19950 regno
= regno2
+ 1;
19958 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19959 whenever possible, otherwise it emits single-word stores. The first store
19960 also allocates stack space for all saved registers, using writeback with
19961 post-addressing mode. All other stores use offset addressing. If no STRD
19962 can be emitted, this function emits a sequence of single-word stores,
19963 and not an STM as before, because single-word stores provide more freedom
19964 scheduling and can be turned into an STM by peephole optimizations. */
19966 arm_emit_strd_push (unsigned long saved_regs_mask
)
19969 int i
, j
, dwarf_index
= 0;
19971 rtx dwarf
= NULL_RTX
;
19972 rtx insn
= NULL_RTX
;
19975 /* TODO: A more efficient code can be emitted by changing the
19976 layout, e.g., first push all pairs that can use STRD to keep the
19977 stack aligned, and then push all other registers. */
19978 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19979 if (saved_regs_mask
& (1 << i
))
19982 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19983 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19984 gcc_assert (num_regs
> 0);
19986 /* Create sequence for DWARF info. */
19987 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19989 /* For dwarf info, we generate explicit stack update. */
19990 tmp
= gen_rtx_SET (VOIDmode
,
19992 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19993 RTX_FRAME_RELATED_P (tmp
) = 1;
19994 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19996 /* Save registers. */
19997 offset
= - 4 * num_regs
;
19999 while (j
<= LAST_ARM_REGNUM
)
20000 if (saved_regs_mask
& (1 << j
))
20003 && (saved_regs_mask
& (1 << (j
+ 1))))
20005 /* Current register and previous register form register pair for
20006 which STRD can be generated. */
20009 /* Allocate stack space for all saved registers. */
20010 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20011 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20012 mem
= gen_frame_mem (DImode
, tmp
);
20015 else if (offset
> 0)
20016 mem
= gen_frame_mem (DImode
,
20017 plus_constant (Pmode
,
20021 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20023 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
20024 RTX_FRAME_RELATED_P (tmp
) = 1;
20025 tmp
= emit_insn (tmp
);
20027 /* Record the first store insn. */
20028 if (dwarf_index
== 1)
20031 /* Generate dwarf info. */
20032 mem
= gen_frame_mem (SImode
,
20033 plus_constant (Pmode
,
20036 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20037 RTX_FRAME_RELATED_P (tmp
) = 1;
20038 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20040 mem
= gen_frame_mem (SImode
,
20041 plus_constant (Pmode
,
20044 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
20045 RTX_FRAME_RELATED_P (tmp
) = 1;
20046 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20053 /* Emit a single word store. */
20056 /* Allocate stack space for all saved registers. */
20057 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20058 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20059 mem
= gen_frame_mem (SImode
, tmp
);
20062 else if (offset
> 0)
20063 mem
= gen_frame_mem (SImode
,
20064 plus_constant (Pmode
,
20068 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20070 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20071 RTX_FRAME_RELATED_P (tmp
) = 1;
20072 tmp
= emit_insn (tmp
);
20074 /* Record the first store insn. */
20075 if (dwarf_index
== 1)
20078 /* Generate dwarf info. */
20079 mem
= gen_frame_mem (SImode
,
20080 plus_constant(Pmode
,
20083 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20084 RTX_FRAME_RELATED_P (tmp
) = 1;
20085 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20094 /* Attach dwarf info to the first insn we generate. */
20095 gcc_assert (insn
!= NULL_RTX
);
20096 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20097 RTX_FRAME_RELATED_P (insn
) = 1;
20100 /* Generate and emit an insn that we will recognize as a push_multi.
20101 Unfortunately, since this insn does not reflect very well the actual
20102 semantics of the operation, we need to annotate the insn for the benefit
20103 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20104 MASK for registers that should be annotated for DWARF2 frame unwind
20107 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20110 int num_dwarf_regs
= 0;
20114 int dwarf_par_index
;
20117 /* We don't record the PC in the dwarf frame information. */
20118 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20120 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20122 if (mask
& (1 << i
))
20124 if (dwarf_regs_mask
& (1 << i
))
20128 gcc_assert (num_regs
&& num_regs
<= 16);
20129 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20131 /* For the body of the insn we are going to generate an UNSPEC in
20132 parallel with several USEs. This allows the insn to be recognized
20133 by the push_multi pattern in the arm.md file.
20135 The body of the insn looks something like this:
20138 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20139 (const_int:SI <num>)))
20140 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20146 For the frame note however, we try to be more explicit and actually
20147 show each register being stored into the stack frame, plus a (single)
20148 decrement of the stack pointer. We do it this way in order to be
20149 friendly to the stack unwinding code, which only wants to see a single
20150 stack decrement per instruction. The RTL we generate for the note looks
20151 something like this:
20154 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20155 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20156 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20157 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20161 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20162 instead we'd have a parallel expression detailing all
20163 the stores to the various memory addresses so that debug
20164 information is more up-to-date. Remember however while writing
20165 this to take care of the constraints with the push instruction.
20167 Note also that this has to be taken care of for the VFP registers.
20169 For more see PR43399. */
20171 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20172 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20173 dwarf_par_index
= 1;
20175 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20177 if (mask
& (1 << i
))
20179 reg
= gen_rtx_REG (SImode
, i
);
20181 XVECEXP (par
, 0, 0)
20182 = gen_rtx_SET (VOIDmode
,
20185 gen_rtx_PRE_MODIFY (Pmode
,
20188 (Pmode
, stack_pointer_rtx
,
20191 gen_rtx_UNSPEC (BLKmode
,
20192 gen_rtvec (1, reg
),
20193 UNSPEC_PUSH_MULT
));
20195 if (dwarf_regs_mask
& (1 << i
))
20197 tmp
= gen_rtx_SET (VOIDmode
,
20198 gen_frame_mem (SImode
, stack_pointer_rtx
),
20200 RTX_FRAME_RELATED_P (tmp
) = 1;
20201 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20208 for (j
= 1, i
++; j
< num_regs
; i
++)
20210 if (mask
& (1 << i
))
20212 reg
= gen_rtx_REG (SImode
, i
);
20214 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20216 if (dwarf_regs_mask
& (1 << i
))
20219 = gen_rtx_SET (VOIDmode
,
20222 plus_constant (Pmode
, stack_pointer_rtx
,
20225 RTX_FRAME_RELATED_P (tmp
) = 1;
20226 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20233 par
= emit_insn (par
);
20235 tmp
= gen_rtx_SET (VOIDmode
,
20237 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20238 RTX_FRAME_RELATED_P (tmp
) = 1;
20239 XVECEXP (dwarf
, 0, 0) = tmp
;
20241 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20246 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20247 SIZE is the offset to be adjusted.
20248 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20250 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20254 RTX_FRAME_RELATED_P (insn
) = 1;
20255 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20256 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20259 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20260 SAVED_REGS_MASK shows which registers need to be restored.
20262 Unfortunately, since this insn does not reflect very well the actual
20263 semantics of the operation, we need to annotate the insn for the benefit
20264 of DWARF2 frame unwind information. */
20266 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20271 rtx dwarf
= NULL_RTX
;
20273 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20277 offset_adj
= return_in_pc
? 1 : 0;
20278 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20279 if (saved_regs_mask
& (1 << i
))
20282 gcc_assert (num_regs
&& num_regs
<= 16);
20284 /* If SP is in reglist, then we don't emit SP update insn. */
20285 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20287 /* The parallel needs to hold num_regs SETs
20288 and one SET for the stack update. */
20289 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20292 XVECEXP (par
, 0, 0) = ret_rtx
;
20296 /* Increment the stack pointer, based on there being
20297 num_regs 4-byte registers to restore. */
20298 tmp
= gen_rtx_SET (VOIDmode
,
20300 plus_constant (Pmode
,
20303 RTX_FRAME_RELATED_P (tmp
) = 1;
20304 XVECEXP (par
, 0, offset_adj
) = tmp
;
20307 /* Now restore every reg, which may include PC. */
20308 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20309 if (saved_regs_mask
& (1 << i
))
20311 reg
= gen_rtx_REG (SImode
, i
);
20312 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20314 /* Emit single load with writeback. */
20315 tmp
= gen_frame_mem (SImode
,
20316 gen_rtx_POST_INC (Pmode
,
20317 stack_pointer_rtx
));
20318 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20319 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20323 tmp
= gen_rtx_SET (VOIDmode
,
20327 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20328 RTX_FRAME_RELATED_P (tmp
) = 1;
20329 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20331 /* We need to maintain a sequence for DWARF info too. As dwarf info
20332 should not have PC, skip PC. */
20333 if (i
!= PC_REGNUM
)
20334 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20340 par
= emit_jump_insn (par
);
20342 par
= emit_insn (par
);
20344 REG_NOTES (par
) = dwarf
;
20346 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20347 stack_pointer_rtx
, stack_pointer_rtx
);
20350 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20351 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20353 Unfortunately, since this insn does not reflect very well the actual
20354 semantics of the operation, we need to annotate the insn for the benefit
20355 of DWARF2 frame unwind information. */
20357 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20361 rtx dwarf
= NULL_RTX
;
20364 gcc_assert (num_regs
&& num_regs
<= 32);
20366 /* Workaround ARM10 VFPr1 bug. */
20367 if (num_regs
== 2 && !arm_arch6
)
20369 if (first_reg
== 15)
20375 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20376 there could be up to 32 D-registers to restore.
20377 If there are more than 16 D-registers, make two recursive calls,
20378 each of which emits one pop_multi instruction. */
20381 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20382 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20386 /* The parallel needs to hold num_regs SETs
20387 and one SET for the stack update. */
20388 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20390 /* Increment the stack pointer, based on there being
20391 num_regs 8-byte registers to restore. */
20392 tmp
= gen_rtx_SET (VOIDmode
,
20394 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20395 RTX_FRAME_RELATED_P (tmp
) = 1;
20396 XVECEXP (par
, 0, 0) = tmp
;
20398 /* Now show every reg that will be restored, using a SET for each. */
20399 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20401 reg
= gen_rtx_REG (DFmode
, i
);
20403 tmp
= gen_rtx_SET (VOIDmode
,
20407 plus_constant (Pmode
, base_reg
, 8 * j
)));
20408 RTX_FRAME_RELATED_P (tmp
) = 1;
20409 XVECEXP (par
, 0, j
+ 1) = tmp
;
20411 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20416 par
= emit_insn (par
);
20417 REG_NOTES (par
) = dwarf
;
20419 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20420 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20422 RTX_FRAME_RELATED_P (par
) = 1;
20423 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20426 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20427 base_reg
, base_reg
);
20430 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20431 number of registers are being popped, multiple LDRD patterns are created for
20432 all register pairs. If odd number of registers are popped, last register is
20433 loaded by using LDR pattern. */
20435 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20439 rtx par
= NULL_RTX
;
20440 rtx dwarf
= NULL_RTX
;
20441 rtx tmp
, reg
, tmp1
;
20442 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20444 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20445 if (saved_regs_mask
& (1 << i
))
20448 gcc_assert (num_regs
&& num_regs
<= 16);
20450 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20451 to be popped. So, if num_regs is even, now it will become odd,
20452 and we can generate pop with PC. If num_regs is odd, it will be
20453 even now, and ldr with return can be generated for PC. */
20457 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20459 /* Var j iterates over all the registers to gather all the registers in
20460 saved_regs_mask. Var i gives index of saved registers in stack frame.
20461 A PARALLEL RTX of register-pair is created here, so that pattern for
20462 LDRD can be matched. As PC is always last register to be popped, and
20463 we have already decremented num_regs if PC, we don't have to worry
20464 about PC in this loop. */
20465 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20466 if (saved_regs_mask
& (1 << j
))
20468 /* Create RTX for memory load. */
20469 reg
= gen_rtx_REG (SImode
, j
);
20470 tmp
= gen_rtx_SET (SImode
,
20472 gen_frame_mem (SImode
,
20473 plus_constant (Pmode
,
20474 stack_pointer_rtx
, 4 * i
)));
20475 RTX_FRAME_RELATED_P (tmp
) = 1;
20479 /* When saved-register index (i) is even, the RTX to be emitted is
20480 yet to be created. Hence create it first. The LDRD pattern we
20481 are generating is :
20482 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20483 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20484 where target registers need not be consecutive. */
20485 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20489 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20490 added as 0th element and if i is odd, reg_i is added as 1st element
20491 of LDRD pattern shown above. */
20492 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20493 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20497 /* When saved-register index (i) is odd, RTXs for both the registers
20498 to be loaded are generated in above given LDRD pattern, and the
20499 pattern can be emitted now. */
20500 par
= emit_insn (par
);
20501 REG_NOTES (par
) = dwarf
;
20502 RTX_FRAME_RELATED_P (par
) = 1;
20508 /* If the number of registers pushed is odd AND return_in_pc is false OR
20509 number of registers are even AND return_in_pc is true, last register is
20510 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20511 then LDR with post increment. */
20513 /* Increment the stack pointer, based on there being
20514 num_regs 4-byte registers to restore. */
20515 tmp
= gen_rtx_SET (VOIDmode
,
20517 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20518 RTX_FRAME_RELATED_P (tmp
) = 1;
20519 tmp
= emit_insn (tmp
);
20522 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20523 stack_pointer_rtx
, stack_pointer_rtx
);
20528 if (((num_regs
% 2) == 1 && !return_in_pc
)
20529 || ((num_regs
% 2) == 0 && return_in_pc
))
20531 /* Scan for the single register to be popped. Skip until the saved
20532 register is found. */
20533 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20535 /* Gen LDR with post increment here. */
20536 tmp1
= gen_rtx_MEM (SImode
,
20537 gen_rtx_POST_INC (SImode
,
20538 stack_pointer_rtx
));
20539 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20541 reg
= gen_rtx_REG (SImode
, j
);
20542 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20543 RTX_FRAME_RELATED_P (tmp
) = 1;
20544 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20548 /* If return_in_pc, j must be PC_REGNUM. */
20549 gcc_assert (j
== PC_REGNUM
);
20550 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20551 XVECEXP (par
, 0, 0) = ret_rtx
;
20552 XVECEXP (par
, 0, 1) = tmp
;
20553 par
= emit_jump_insn (par
);
20557 par
= emit_insn (tmp
);
20558 REG_NOTES (par
) = dwarf
;
20559 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20560 stack_pointer_rtx
, stack_pointer_rtx
);
20564 else if ((num_regs
% 2) == 1 && return_in_pc
)
20566 /* There are 2 registers to be popped. So, generate the pattern
20567 pop_multiple_with_stack_update_and_return to pop in PC. */
20568 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20574 /* LDRD in ARM mode needs consecutive registers as operands. This function
20575 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20576 offset addressing and then generates one separate stack udpate. This provides
20577 more scheduling freedom, compared to writeback on every load. However,
20578 if the function returns using load into PC directly
20579 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20580 before the last load. TODO: Add a peephole optimization to recognize
20581 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20582 peephole optimization to merge the load at stack-offset zero
20583 with the stack update instruction using load with writeback
20584 in post-index addressing mode. */
20586 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20590 rtx par
= NULL_RTX
;
20591 rtx dwarf
= NULL_RTX
;
20594 /* Restore saved registers. */
20595 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20597 while (j
<= LAST_ARM_REGNUM
)
20598 if (saved_regs_mask
& (1 << j
))
20601 && (saved_regs_mask
& (1 << (j
+ 1)))
20602 && (j
+ 1) != PC_REGNUM
)
20604 /* Current register and next register form register pair for which
20605 LDRD can be generated. PC is always the last register popped, and
20606 we handle it separately. */
20608 mem
= gen_frame_mem (DImode
,
20609 plus_constant (Pmode
,
20613 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20615 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20616 tmp
= emit_insn (tmp
);
20617 RTX_FRAME_RELATED_P (tmp
) = 1;
20619 /* Generate dwarf info. */
20621 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20622 gen_rtx_REG (SImode
, j
),
20624 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20625 gen_rtx_REG (SImode
, j
+ 1),
20628 REG_NOTES (tmp
) = dwarf
;
20633 else if (j
!= PC_REGNUM
)
20635 /* Emit a single word load. */
20637 mem
= gen_frame_mem (SImode
,
20638 plus_constant (Pmode
,
20642 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20644 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20645 tmp
= emit_insn (tmp
);
20646 RTX_FRAME_RELATED_P (tmp
) = 1;
20648 /* Generate dwarf info. */
20649 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20650 gen_rtx_REG (SImode
, j
),
20656 else /* j == PC_REGNUM */
20662 /* Update the stack. */
20665 tmp
= gen_rtx_SET (Pmode
,
20667 plus_constant (Pmode
,
20670 tmp
= emit_insn (tmp
);
20671 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20672 stack_pointer_rtx
, stack_pointer_rtx
);
20676 if (saved_regs_mask
& (1 << PC_REGNUM
))
20678 /* Only PC is to be popped. */
20679 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20680 XVECEXP (par
, 0, 0) = ret_rtx
;
20681 tmp
= gen_rtx_SET (SImode
,
20682 gen_rtx_REG (SImode
, PC_REGNUM
),
20683 gen_frame_mem (SImode
,
20684 gen_rtx_POST_INC (SImode
,
20685 stack_pointer_rtx
)));
20686 RTX_FRAME_RELATED_P (tmp
) = 1;
20687 XVECEXP (par
, 0, 1) = tmp
;
20688 par
= emit_jump_insn (par
);
20690 /* Generate dwarf info. */
20691 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20692 gen_rtx_REG (SImode
, PC_REGNUM
),
20694 REG_NOTES (par
) = dwarf
;
20695 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20696 stack_pointer_rtx
, stack_pointer_rtx
);
20700 /* Calculate the size of the return value that is passed in registers. */
20702 arm_size_return_regs (void)
20706 if (crtl
->return_rtx
!= 0)
20707 mode
= GET_MODE (crtl
->return_rtx
);
20709 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20711 return GET_MODE_SIZE (mode
);
20714 /* Return true if the current function needs to save/restore LR. */
20716 thumb_force_lr_save (void)
20718 return !cfun
->machine
->lr_save_eliminated
20719 && (!leaf_function_p ()
20720 || thumb_far_jump_used_p ()
20721 || df_regs_ever_live_p (LR_REGNUM
));
20724 /* We do not know if r3 will be available because
20725 we do have an indirect tailcall happening in this
20726 particular case. */
20728 is_indirect_tailcall_p (rtx call
)
20730 rtx pat
= PATTERN (call
);
20732 /* Indirect tail call. */
20733 pat
= XVECEXP (pat
, 0, 0);
20734 if (GET_CODE (pat
) == SET
)
20735 pat
= SET_SRC (pat
);
20737 pat
= XEXP (XEXP (pat
, 0), 0);
20738 return REG_P (pat
);
20741 /* Return true if r3 is used by any of the tail call insns in the
20742 current function. */
20744 any_sibcall_could_use_r3 (void)
20749 if (!crtl
->tail_call_emit
)
20751 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20752 if (e
->flags
& EDGE_SIBCALL
)
20754 rtx call
= BB_END (e
->src
);
20755 if (!CALL_P (call
))
20756 call
= prev_nonnote_nondebug_insn (call
);
20757 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20758 if (find_regno_fusage (call
, USE
, 3)
20759 || is_indirect_tailcall_p (call
))
20766 /* Compute the distance from register FROM to register TO.
20767 These can be the arg pointer (26), the soft frame pointer (25),
20768 the stack pointer (13) or the hard frame pointer (11).
20769 In thumb mode r7 is used as the soft frame pointer, if needed.
20770 Typical stack layout looks like this:
20772 old stack pointer -> | |
20775 | | saved arguments for
20776 | | vararg functions
20779 hard FP & arg pointer -> | | \
20787 soft frame pointer -> | | /
20792 locals base pointer -> | | /
20797 current stack pointer -> | | /
20800 For a given function some or all of these stack components
20801 may not be needed, giving rise to the possibility of
20802 eliminating some of the registers.
20804 The values returned by this function must reflect the behavior
20805 of arm_expand_prologue() and arm_compute_save_reg_mask().
20807 The sign of the number returned reflects the direction of stack
20808 growth, so the values are positive for all eliminations except
20809 from the soft frame pointer to the hard frame pointer.
20811 SFP may point just inside the local variables block to ensure correct
20815 /* Calculate stack offsets. These are used to calculate register elimination
20816 offsets and in prologue/epilogue code. Also calculates which registers
20817 should be saved. */
20819 static arm_stack_offsets
*
20820 arm_get_frame_offsets (void)
20822 struct arm_stack_offsets
*offsets
;
20823 unsigned long func_type
;
20827 HOST_WIDE_INT frame_size
;
20830 offsets
= &cfun
->machine
->stack_offsets
;
20832 /* We need to know if we are a leaf function. Unfortunately, it
20833 is possible to be called after start_sequence has been called,
20834 which causes get_insns to return the insns for the sequence,
20835 not the function, which will cause leaf_function_p to return
20836 the incorrect result.
20838 to know about leaf functions once reload has completed, and the
20839 frame size cannot be changed after that time, so we can safely
20840 use the cached value. */
20842 if (reload_completed
)
20845 /* Initially this is the size of the local variables. It will translated
20846 into an offset once we have determined the size of preceding data. */
20847 frame_size
= ROUND_UP_WORD (get_frame_size ());
20849 leaf
= leaf_function_p ();
20851 /* Space for variadic functions. */
20852 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20854 /* In Thumb mode this is incorrect, but never used. */
20856 = (offsets
->saved_args
20857 + arm_compute_static_chain_stack_bytes ()
20858 + (frame_pointer_needed
? 4 : 0));
20862 unsigned int regno
;
20864 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20865 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20866 saved
= core_saved
;
20868 /* We know that SP will be doubleword aligned on entry, and we must
20869 preserve that condition at any subroutine call. We also require the
20870 soft frame pointer to be doubleword aligned. */
20872 if (TARGET_REALLY_IWMMXT
)
20874 /* Check for the call-saved iWMMXt registers. */
20875 for (regno
= FIRST_IWMMXT_REGNUM
;
20876 regno
<= LAST_IWMMXT_REGNUM
;
20878 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20882 func_type
= arm_current_func_type ();
20883 /* Space for saved VFP registers. */
20884 if (! IS_VOLATILE (func_type
)
20885 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20886 saved
+= arm_get_vfp_saved_size ();
20888 else /* TARGET_THUMB1 */
20890 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20891 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20892 saved
= core_saved
;
20893 if (TARGET_BACKTRACE
)
20897 /* Saved registers include the stack frame. */
20898 offsets
->saved_regs
20899 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20900 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20902 /* A leaf function does not need any stack alignment if it has nothing
20904 if (leaf
&& frame_size
== 0
20905 /* However if it calls alloca(), we have a dynamically allocated
20906 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20907 && ! cfun
->calls_alloca
)
20909 offsets
->outgoing_args
= offsets
->soft_frame
;
20910 offsets
->locals_base
= offsets
->soft_frame
;
20914 /* Ensure SFP has the correct alignment. */
20915 if (ARM_DOUBLEWORD_ALIGN
20916 && (offsets
->soft_frame
& 7))
20918 offsets
->soft_frame
+= 4;
20919 /* Try to align stack by pushing an extra reg. Don't bother doing this
20920 when there is a stack frame as the alignment will be rolled into
20921 the normal stack adjustment. */
20922 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20926 /* Register r3 is caller-saved. Normally it does not need to be
20927 saved on entry by the prologue. However if we choose to save
20928 it for padding then we may confuse the compiler into thinking
20929 a prologue sequence is required when in fact it is not. This
20930 will occur when shrink-wrapping if r3 is used as a scratch
20931 register and there are no other callee-saved writes.
20933 This situation can be avoided when other callee-saved registers
20934 are available and r3 is not mandatory if we choose a callee-saved
20935 register for padding. */
20936 bool prefer_callee_reg_p
= false;
20938 /* If it is safe to use r3, then do so. This sometimes
20939 generates better code on Thumb-2 by avoiding the need to
20940 use 32-bit push/pop instructions. */
20941 if (! any_sibcall_could_use_r3 ()
20942 && arm_size_return_regs () <= 12
20943 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20945 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20948 if (!TARGET_THUMB2
)
20949 prefer_callee_reg_p
= true;
20952 || prefer_callee_reg_p
)
20954 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20956 /* Avoid fixed registers; they may be changed at
20957 arbitrary times so it's unsafe to restore them
20958 during the epilogue. */
20960 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20970 offsets
->saved_regs
+= 4;
20971 offsets
->saved_regs_mask
|= (1 << reg
);
20976 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20977 offsets
->outgoing_args
= (offsets
->locals_base
20978 + crtl
->outgoing_args_size
);
20980 if (ARM_DOUBLEWORD_ALIGN
)
20982 /* Ensure SP remains doubleword aligned. */
20983 if (offsets
->outgoing_args
& 7)
20984 offsets
->outgoing_args
+= 4;
20985 gcc_assert (!(offsets
->outgoing_args
& 7));
20992 /* Calculate the relative offsets for the different stack pointers. Positive
20993 offsets are in the direction of stack growth. */
20996 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20998 arm_stack_offsets
*offsets
;
21000 offsets
= arm_get_frame_offsets ();
21002 /* OK, now we have enough information to compute the distances.
21003 There must be an entry in these switch tables for each pair
21004 of registers in ELIMINABLE_REGS, even if some of the entries
21005 seem to be redundant or useless. */
21008 case ARG_POINTER_REGNUM
:
21011 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21014 case FRAME_POINTER_REGNUM
:
21015 /* This is the reverse of the soft frame pointer
21016 to hard frame pointer elimination below. */
21017 return offsets
->soft_frame
- offsets
->saved_args
;
21019 case ARM_HARD_FRAME_POINTER_REGNUM
:
21020 /* This is only non-zero in the case where the static chain register
21021 is stored above the frame. */
21022 return offsets
->frame
- offsets
->saved_args
- 4;
21024 case STACK_POINTER_REGNUM
:
21025 /* If nothing has been pushed on the stack at all
21026 then this will return -4. This *is* correct! */
21027 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21030 gcc_unreachable ();
21032 gcc_unreachable ();
21034 case FRAME_POINTER_REGNUM
:
21037 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21040 case ARM_HARD_FRAME_POINTER_REGNUM
:
21041 /* The hard frame pointer points to the top entry in the
21042 stack frame. The soft frame pointer to the bottom entry
21043 in the stack frame. If there is no stack frame at all,
21044 then they are identical. */
21046 return offsets
->frame
- offsets
->soft_frame
;
21048 case STACK_POINTER_REGNUM
:
21049 return offsets
->outgoing_args
- offsets
->soft_frame
;
21052 gcc_unreachable ();
21054 gcc_unreachable ();
21057 /* You cannot eliminate from the stack pointer.
21058 In theory you could eliminate from the hard frame
21059 pointer to the stack pointer, but this will never
21060 happen, since if a stack frame is not needed the
21061 hard frame pointer will never be used. */
21062 gcc_unreachable ();
21066 /* Given FROM and TO register numbers, say whether this elimination is
21067 allowed. Frame pointer elimination is automatically handled.
21069 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21070 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21071 pointer, we must eliminate FRAME_POINTER_REGNUM into
21072 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21073 ARG_POINTER_REGNUM. */
21076 arm_can_eliminate (const int from
, const int to
)
21078 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21079 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21080 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21081 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21085 /* Emit RTL to save coprocessor registers on function entry. Returns the
21086 number of bytes pushed. */
21089 arm_save_coproc_regs(void)
21091 int saved_size
= 0;
21093 unsigned start_reg
;
21096 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21097 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21099 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21100 insn
= gen_rtx_MEM (V2SImode
, insn
);
21101 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21102 RTX_FRAME_RELATED_P (insn
) = 1;
21106 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21108 start_reg
= FIRST_VFP_REGNUM
;
21110 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21112 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21113 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21115 if (start_reg
!= reg
)
21116 saved_size
+= vfp_emit_fstmd (start_reg
,
21117 (reg
- start_reg
) / 2);
21118 start_reg
= reg
+ 2;
21121 if (start_reg
!= reg
)
21122 saved_size
+= vfp_emit_fstmd (start_reg
,
21123 (reg
- start_reg
) / 2);
21129 /* Set the Thumb frame pointer from the stack pointer. */
21132 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21134 HOST_WIDE_INT amount
;
21137 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21139 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21140 stack_pointer_rtx
, GEN_INT (amount
)));
21143 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21144 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21145 expects the first two operands to be the same. */
21148 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21150 hard_frame_pointer_rtx
));
21154 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21155 hard_frame_pointer_rtx
,
21156 stack_pointer_rtx
));
21158 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21159 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21160 RTX_FRAME_RELATED_P (dwarf
) = 1;
21161 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21164 RTX_FRAME_RELATED_P (insn
) = 1;
21167 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21170 arm_expand_prologue (void)
21175 unsigned long live_regs_mask
;
21176 unsigned long func_type
;
21178 int saved_pretend_args
= 0;
21179 int saved_regs
= 0;
21180 unsigned HOST_WIDE_INT args_to_push
;
21181 arm_stack_offsets
*offsets
;
21183 func_type
= arm_current_func_type ();
21185 /* Naked functions don't have prologues. */
21186 if (IS_NAKED (func_type
))
21189 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21190 args_to_push
= crtl
->args
.pretend_args_size
;
21192 /* Compute which register we will have to save onto the stack. */
21193 offsets
= arm_get_frame_offsets ();
21194 live_regs_mask
= offsets
->saved_regs_mask
;
21196 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21198 if (IS_STACKALIGN (func_type
))
21202 /* Handle a word-aligned stack pointer. We generate the following:
21207 <save and restore r0 in normal prologue/epilogue>
21211 The unwinder doesn't need to know about the stack realignment.
21212 Just tell it we saved SP in r0. */
21213 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21215 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21216 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21218 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21219 RTX_FRAME_RELATED_P (insn
) = 1;
21220 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21222 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21224 /* ??? The CFA changes here, which may cause GDB to conclude that it
21225 has entered a different function. That said, the unwind info is
21226 correct, individually, before and after this instruction because
21227 we've described the save of SP, which will override the default
21228 handling of SP as restoring from the CFA. */
21229 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21232 /* For APCS frames, if IP register is clobbered
21233 when creating frame, save that register in a special
21235 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21237 if (IS_INTERRUPT (func_type
))
21239 /* Interrupt functions must not corrupt any registers.
21240 Creating a frame pointer however, corrupts the IP
21241 register, so we must push it first. */
21242 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21244 /* Do not set RTX_FRAME_RELATED_P on this insn.
21245 The dwarf stack unwinding code only wants to see one
21246 stack decrement per function, and this is not it. If
21247 this instruction is labeled as being part of the frame
21248 creation sequence then dwarf2out_frame_debug_expr will
21249 die when it encounters the assignment of IP to FP
21250 later on, since the use of SP here establishes SP as
21251 the CFA register and not IP.
21253 Anyway this instruction is not really part of the stack
21254 frame creation although it is part of the prologue. */
21256 else if (IS_NESTED (func_type
))
21258 /* The static chain register is the same as the IP register
21259 used as a scratch register during stack frame creation.
21260 To get around this need to find somewhere to store IP
21261 whilst the frame is being created. We try the following
21264 1. The last argument register r3 if it is available.
21265 2. A slot on the stack above the frame if there are no
21266 arguments to push onto the stack.
21267 3. Register r3 again, after pushing the argument registers
21268 onto the stack, if this is a varargs function.
21269 4. The last slot on the stack created for the arguments to
21270 push, if this isn't a varargs function.
21272 Note - we only need to tell the dwarf2 backend about the SP
21273 adjustment in the second variant; the static chain register
21274 doesn't need to be unwound, as it doesn't contain a value
21275 inherited from the caller. */
21277 if (!arm_r3_live_at_start_p ())
21278 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21279 else if (args_to_push
== 0)
21283 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21286 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21287 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21290 /* Just tell the dwarf backend that we adjusted SP. */
21291 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21292 plus_constant (Pmode
, stack_pointer_rtx
,
21294 RTX_FRAME_RELATED_P (insn
) = 1;
21295 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21299 /* Store the args on the stack. */
21300 if (cfun
->machine
->uses_anonymous_args
)
21303 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21304 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21305 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21306 saved_pretend_args
= 1;
21312 if (args_to_push
== 4)
21313 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21316 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21317 plus_constant (Pmode
,
21321 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21323 /* Just tell the dwarf backend that we adjusted SP. */
21325 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21326 plus_constant (Pmode
, stack_pointer_rtx
,
21328 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21331 RTX_FRAME_RELATED_P (insn
) = 1;
21332 fp_offset
= args_to_push
;
21337 insn
= emit_set_insn (ip_rtx
,
21338 plus_constant (Pmode
, stack_pointer_rtx
,
21340 RTX_FRAME_RELATED_P (insn
) = 1;
21345 /* Push the argument registers, or reserve space for them. */
21346 if (cfun
->machine
->uses_anonymous_args
)
21347 insn
= emit_multi_reg_push
21348 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21349 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21352 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21353 GEN_INT (- args_to_push
)));
21354 RTX_FRAME_RELATED_P (insn
) = 1;
21357 /* If this is an interrupt service routine, and the link register
21358 is going to be pushed, and we're not generating extra
21359 push of IP (needed when frame is needed and frame layout if apcs),
21360 subtracting four from LR now will mean that the function return
21361 can be done with a single instruction. */
21362 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21363 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21364 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21367 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21369 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21372 if (live_regs_mask
)
21374 unsigned long dwarf_regs_mask
= live_regs_mask
;
21376 saved_regs
+= bit_count (live_regs_mask
) * 4;
21377 if (optimize_size
&& !frame_pointer_needed
21378 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21380 /* If no coprocessor registers are being pushed and we don't have
21381 to worry about a frame pointer then push extra registers to
21382 create the stack frame. This is done is a way that does not
21383 alter the frame layout, so is independent of the epilogue. */
21387 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21389 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21390 if (frame
&& n
* 4 >= frame
)
21393 live_regs_mask
|= (1 << n
) - 1;
21394 saved_regs
+= frame
;
21399 && current_tune
->prefer_ldrd_strd
21400 && !optimize_function_for_size_p (cfun
))
21402 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21404 thumb2_emit_strd_push (live_regs_mask
);
21405 else if (TARGET_ARM
21406 && !TARGET_APCS_FRAME
21407 && !IS_INTERRUPT (func_type
))
21408 arm_emit_strd_push (live_regs_mask
);
21411 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21412 RTX_FRAME_RELATED_P (insn
) = 1;
21417 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21418 RTX_FRAME_RELATED_P (insn
) = 1;
21422 if (! IS_VOLATILE (func_type
))
21423 saved_regs
+= arm_save_coproc_regs ();
21425 if (frame_pointer_needed
&& TARGET_ARM
)
21427 /* Create the new frame pointer. */
21428 if (TARGET_APCS_FRAME
)
21430 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21431 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21432 RTX_FRAME_RELATED_P (insn
) = 1;
21434 if (IS_NESTED (func_type
))
21436 /* Recover the static chain register. */
21437 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21438 insn
= gen_rtx_REG (SImode
, 3);
21441 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21442 insn
= gen_frame_mem (SImode
, insn
);
21444 emit_set_insn (ip_rtx
, insn
);
21445 /* Add a USE to stop propagate_one_insn() from barfing. */
21446 emit_insn (gen_force_register_use (ip_rtx
));
21451 insn
= GEN_INT (saved_regs
- 4);
21452 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21453 stack_pointer_rtx
, insn
));
21454 RTX_FRAME_RELATED_P (insn
) = 1;
21458 if (flag_stack_usage_info
)
21459 current_function_static_stack_size
21460 = offsets
->outgoing_args
- offsets
->saved_args
;
21462 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21464 /* This add can produce multiple insns for a large constant, so we
21465 need to get tricky. */
21466 rtx_insn
*last
= get_last_insn ();
21468 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21469 - offsets
->outgoing_args
);
21471 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21475 last
= last
? NEXT_INSN (last
) : get_insns ();
21476 RTX_FRAME_RELATED_P (last
) = 1;
21478 while (last
!= insn
);
21480 /* If the frame pointer is needed, emit a special barrier that
21481 will prevent the scheduler from moving stores to the frame
21482 before the stack adjustment. */
21483 if (frame_pointer_needed
)
21484 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21485 hard_frame_pointer_rtx
));
21489 if (frame_pointer_needed
&& TARGET_THUMB2
)
21490 thumb_set_frame_pointer (offsets
);
21492 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21494 unsigned long mask
;
21496 mask
= live_regs_mask
;
21497 mask
&= THUMB2_WORK_REGS
;
21498 if (!IS_NESTED (func_type
))
21499 mask
|= (1 << IP_REGNUM
);
21500 arm_load_pic_register (mask
);
21503 /* If we are profiling, make sure no instructions are scheduled before
21504 the call to mcount. Similarly if the user has requested no
21505 scheduling in the prolog. Similarly if we want non-call exceptions
21506 using the EABI unwinder, to prevent faulting instructions from being
21507 swapped with a stack adjustment. */
21508 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21509 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21510 && cfun
->can_throw_non_call_exceptions
))
21511 emit_insn (gen_blockage ());
21513 /* If the link register is being kept alive, with the return address in it,
21514 then make sure that it does not get reused by the ce2 pass. */
21515 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21516 cfun
->machine
->lr_save_eliminated
= 1;
21519 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21521 arm_print_condition (FILE *stream
)
21523 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21525 /* Branch conversion is not implemented for Thumb-2. */
21528 output_operand_lossage ("predicated Thumb instruction");
21531 if (current_insn_predicate
!= NULL
)
21533 output_operand_lossage
21534 ("predicated instruction in conditional sequence");
21538 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21540 else if (current_insn_predicate
)
21542 enum arm_cond_code code
;
21546 output_operand_lossage ("predicated Thumb instruction");
21550 code
= get_arm_condition_code (current_insn_predicate
);
21551 fputs (arm_condition_codes
[code
], stream
);
21556 /* Globally reserved letters: acln
21557 Puncutation letters currently used: @_|?().!#
21558 Lower case letters currently used: bcdefhimpqtvwxyz
21559 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21560 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21562 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21564 If CODE is 'd', then the X is a condition operand and the instruction
21565 should only be executed if the condition is true.
21566 if CODE is 'D', then the X is a condition operand and the instruction
21567 should only be executed if the condition is false: however, if the mode
21568 of the comparison is CCFPEmode, then always execute the instruction -- we
21569 do this because in these circumstances !GE does not necessarily imply LT;
21570 in these cases the instruction pattern will take care to make sure that
21571 an instruction containing %d will follow, thereby undoing the effects of
21572 doing this instruction unconditionally.
21573 If CODE is 'N' then X is a floating point operand that must be negated
21575 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21576 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21578 arm_print_operand (FILE *stream
, rtx x
, int code
)
21583 fputs (ASM_COMMENT_START
, stream
);
21587 fputs (user_label_prefix
, stream
);
21591 fputs (REGISTER_PREFIX
, stream
);
21595 arm_print_condition (stream
);
21599 /* Nothing in unified syntax, otherwise the current condition code. */
21600 if (!TARGET_UNIFIED_ASM
)
21601 arm_print_condition (stream
);
21605 /* The current condition code in unified syntax, otherwise nothing. */
21606 if (TARGET_UNIFIED_ASM
)
21607 arm_print_condition (stream
);
21611 /* The current condition code for a condition code setting instruction.
21612 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21613 if (TARGET_UNIFIED_ASM
)
21615 fputc('s', stream
);
21616 arm_print_condition (stream
);
21620 arm_print_condition (stream
);
21621 fputc('s', stream
);
21626 /* If the instruction is conditionally executed then print
21627 the current condition code, otherwise print 's'. */
21628 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21629 if (current_insn_predicate
)
21630 arm_print_condition (stream
);
21632 fputc('s', stream
);
21635 /* %# is a "break" sequence. It doesn't output anything, but is used to
21636 separate e.g. operand numbers from following text, if that text consists
21637 of further digits which we don't want to be part of the operand
21645 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21646 r
= real_value_negate (&r
);
21647 fprintf (stream
, "%s", fp_const_from_val (&r
));
21651 /* An integer or symbol address without a preceding # sign. */
21653 switch (GET_CODE (x
))
21656 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21660 output_addr_const (stream
, x
);
21664 if (GET_CODE (XEXP (x
, 0)) == PLUS
21665 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21667 output_addr_const (stream
, x
);
21670 /* Fall through. */
21673 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21677 /* An integer that we want to print in HEX. */
21679 switch (GET_CODE (x
))
21682 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21686 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21691 if (CONST_INT_P (x
))
21694 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21695 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21699 putc ('~', stream
);
21700 output_addr_const (stream
, x
);
21705 /* Print the log2 of a CONST_INT. */
21709 if (!CONST_INT_P (x
)
21710 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21711 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21713 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21718 /* The low 16 bits of an immediate constant. */
21719 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21723 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21727 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21735 shift
= shift_op (x
, &val
);
21739 fprintf (stream
, ", %s ", shift
);
21741 arm_print_operand (stream
, XEXP (x
, 1), 0);
21743 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21748 /* An explanation of the 'Q', 'R' and 'H' register operands:
21750 In a pair of registers containing a DI or DF value the 'Q'
21751 operand returns the register number of the register containing
21752 the least significant part of the value. The 'R' operand returns
21753 the register number of the register containing the most
21754 significant part of the value.
21756 The 'H' operand returns the higher of the two register numbers.
21757 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21758 same as the 'Q' operand, since the most significant part of the
21759 value is held in the lower number register. The reverse is true
21760 on systems where WORDS_BIG_ENDIAN is false.
21762 The purpose of these operands is to distinguish between cases
21763 where the endian-ness of the values is important (for example
21764 when they are added together), and cases where the endian-ness
21765 is irrelevant, but the order of register operations is important.
21766 For example when loading a value from memory into a register
21767 pair, the endian-ness does not matter. Provided that the value
21768 from the lower memory address is put into the lower numbered
21769 register, and the value from the higher address is put into the
21770 higher numbered register, the load will work regardless of whether
21771 the value being loaded is big-wordian or little-wordian. The
21772 order of the two register loads can matter however, if the address
21773 of the memory location is actually held in one of the registers
21774 being overwritten by the load.
21776 The 'Q' and 'R' constraints are also available for 64-bit
21779 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21781 rtx part
= gen_lowpart (SImode
, x
);
21782 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21786 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21788 output_operand_lossage ("invalid operand for code '%c'", code
);
21792 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21796 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21798 machine_mode mode
= GET_MODE (x
);
21801 if (mode
== VOIDmode
)
21803 part
= gen_highpart_mode (SImode
, mode
, x
);
21804 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21808 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21810 output_operand_lossage ("invalid operand for code '%c'", code
);
21814 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21818 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21820 output_operand_lossage ("invalid operand for code '%c'", code
);
21824 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21828 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21830 output_operand_lossage ("invalid operand for code '%c'", code
);
21834 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21838 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21840 output_operand_lossage ("invalid operand for code '%c'", code
);
21844 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21848 asm_fprintf (stream
, "%r",
21849 REG_P (XEXP (x
, 0))
21850 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21854 asm_fprintf (stream
, "{%r-%r}",
21856 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21859 /* Like 'M', but writing doubleword vector registers, for use by Neon
21863 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21864 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21866 asm_fprintf (stream
, "{d%d}", regno
);
21868 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21873 /* CONST_TRUE_RTX means always -- that's the default. */
21874 if (x
== const_true_rtx
)
21877 if (!COMPARISON_P (x
))
21879 output_operand_lossage ("invalid operand for code '%c'", code
);
21883 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21888 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21889 want to do that. */
21890 if (x
== const_true_rtx
)
21892 output_operand_lossage ("instruction never executed");
21895 if (!COMPARISON_P (x
))
21897 output_operand_lossage ("invalid operand for code '%c'", code
);
21901 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21902 (get_arm_condition_code (x
))],
21912 /* Former Maverick support, removed after GCC-4.7. */
21913 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21918 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21919 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21920 /* Bad value for wCG register number. */
21922 output_operand_lossage ("invalid operand for code '%c'", code
);
21927 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21930 /* Print an iWMMXt control register name. */
21932 if (!CONST_INT_P (x
)
21934 || INTVAL (x
) >= 16)
21935 /* Bad value for wC register number. */
21937 output_operand_lossage ("invalid operand for code '%c'", code
);
21943 static const char * wc_reg_names
[16] =
21945 "wCID", "wCon", "wCSSF", "wCASF",
21946 "wC4", "wC5", "wC6", "wC7",
21947 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21948 "wC12", "wC13", "wC14", "wC15"
21951 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21955 /* Print the high single-precision register of a VFP double-precision
21959 machine_mode mode
= GET_MODE (x
);
21962 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21964 output_operand_lossage ("invalid operand for code '%c'", code
);
21969 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21971 output_operand_lossage ("invalid operand for code '%c'", code
);
21975 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21979 /* Print a VFP/Neon double precision or quad precision register name. */
21983 machine_mode mode
= GET_MODE (x
);
21984 int is_quad
= (code
== 'q');
21987 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21989 output_operand_lossage ("invalid operand for code '%c'", code
);
21994 || !IS_VFP_REGNUM (REGNO (x
)))
21996 output_operand_lossage ("invalid operand for code '%c'", code
);
22001 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22002 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22004 output_operand_lossage ("invalid operand for code '%c'", code
);
22008 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22009 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22013 /* These two codes print the low/high doubleword register of a Neon quad
22014 register, respectively. For pair-structure types, can also print
22015 low/high quadword registers. */
22019 machine_mode mode
= GET_MODE (x
);
22022 if ((GET_MODE_SIZE (mode
) != 16
22023 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22025 output_operand_lossage ("invalid operand for code '%c'", code
);
22030 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22032 output_operand_lossage ("invalid operand for code '%c'", code
);
22036 if (GET_MODE_SIZE (mode
) == 16)
22037 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22038 + (code
== 'f' ? 1 : 0));
22040 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22041 + (code
== 'f' ? 1 : 0));
22045 /* Print a VFPv3 floating-point constant, represented as an integer
22049 int index
= vfp3_const_double_index (x
);
22050 gcc_assert (index
!= -1);
22051 fprintf (stream
, "%d", index
);
22055 /* Print bits representing opcode features for Neon.
22057 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22058 and polynomials as unsigned.
22060 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22062 Bit 2 is 1 for rounding functions, 0 otherwise. */
22064 /* Identify the type as 's', 'u', 'p' or 'f'. */
22067 HOST_WIDE_INT bits
= INTVAL (x
);
22068 fputc ("uspf"[bits
& 3], stream
);
22072 /* Likewise, but signed and unsigned integers are both 'i'. */
22075 HOST_WIDE_INT bits
= INTVAL (x
);
22076 fputc ("iipf"[bits
& 3], stream
);
22080 /* As for 'T', but emit 'u' instead of 'p'. */
22083 HOST_WIDE_INT bits
= INTVAL (x
);
22084 fputc ("usuf"[bits
& 3], stream
);
22088 /* Bit 2: rounding (vs none). */
22091 HOST_WIDE_INT bits
= INTVAL (x
);
22092 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22096 /* Memory operand for vld1/vst1 instruction. */
22100 bool postinc
= FALSE
;
22101 rtx postinc_reg
= NULL
;
22102 unsigned align
, memsize
, align_bits
;
22104 gcc_assert (MEM_P (x
));
22105 addr
= XEXP (x
, 0);
22106 if (GET_CODE (addr
) == POST_INC
)
22109 addr
= XEXP (addr
, 0);
22111 if (GET_CODE (addr
) == POST_MODIFY
)
22113 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22114 addr
= XEXP (addr
, 0);
22116 asm_fprintf (stream
, "[%r", REGNO (addr
));
22118 /* We know the alignment of this access, so we can emit a hint in the
22119 instruction (for some alignments) as an aid to the memory subsystem
22121 align
= MEM_ALIGN (x
) >> 3;
22122 memsize
= MEM_SIZE (x
);
22124 /* Only certain alignment specifiers are supported by the hardware. */
22125 if (memsize
== 32 && (align
% 32) == 0)
22127 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22129 else if (memsize
>= 8 && (align
% 8) == 0)
22134 if (align_bits
!= 0)
22135 asm_fprintf (stream
, ":%d", align_bits
);
22137 asm_fprintf (stream
, "]");
22140 fputs("!", stream
);
22142 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22150 gcc_assert (MEM_P (x
));
22151 addr
= XEXP (x
, 0);
22152 gcc_assert (REG_P (addr
));
22153 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22157 /* Translate an S register number into a D register number and element index. */
22160 machine_mode mode
= GET_MODE (x
);
22163 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22165 output_operand_lossage ("invalid operand for code '%c'", code
);
22170 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22172 output_operand_lossage ("invalid operand for code '%c'", code
);
22176 regno
= regno
- FIRST_VFP_REGNUM
;
22177 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22182 gcc_assert (CONST_DOUBLE_P (x
));
22184 result
= vfp3_const_double_for_fract_bits (x
);
22186 result
= vfp3_const_double_for_bits (x
);
22187 fprintf (stream
, "#%d", result
);
22190 /* Register specifier for vld1.16/vst1.16. Translate the S register
22191 number into a D register number and element index. */
22194 machine_mode mode
= GET_MODE (x
);
22197 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22199 output_operand_lossage ("invalid operand for code '%c'", code
);
22204 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22206 output_operand_lossage ("invalid operand for code '%c'", code
);
22210 regno
= regno
- FIRST_VFP_REGNUM
;
22211 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22218 output_operand_lossage ("missing operand");
22222 switch (GET_CODE (x
))
22225 asm_fprintf (stream
, "%r", REGNO (x
));
22229 output_memory_reference_mode
= GET_MODE (x
);
22230 output_address (XEXP (x
, 0));
22236 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22237 sizeof (fpstr
), 0, 1);
22238 fprintf (stream
, "#%s", fpstr
);
22243 gcc_assert (GET_CODE (x
) != NEG
);
22244 fputc ('#', stream
);
22245 if (GET_CODE (x
) == HIGH
)
22247 fputs (":lower16:", stream
);
22251 output_addr_const (stream
, x
);
22257 /* Target hook for printing a memory address. */
22259 arm_print_operand_address (FILE *stream
, rtx x
)
22263 int is_minus
= GET_CODE (x
) == MINUS
;
22266 asm_fprintf (stream
, "[%r]", REGNO (x
));
22267 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22269 rtx base
= XEXP (x
, 0);
22270 rtx index
= XEXP (x
, 1);
22271 HOST_WIDE_INT offset
= 0;
22273 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22275 /* Ensure that BASE is a register. */
22276 /* (one of them must be). */
22277 /* Also ensure the SP is not used as in index register. */
22278 std::swap (base
, index
);
22280 switch (GET_CODE (index
))
22283 offset
= INTVAL (index
);
22286 asm_fprintf (stream
, "[%r, #%wd]",
22287 REGNO (base
), offset
);
22291 asm_fprintf (stream
, "[%r, %s%r]",
22292 REGNO (base
), is_minus
? "-" : "",
22302 asm_fprintf (stream
, "[%r, %s%r",
22303 REGNO (base
), is_minus
? "-" : "",
22304 REGNO (XEXP (index
, 0)));
22305 arm_print_operand (stream
, index
, 'S');
22306 fputs ("]", stream
);
22311 gcc_unreachable ();
22314 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22315 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22317 extern machine_mode output_memory_reference_mode
;
22319 gcc_assert (REG_P (XEXP (x
, 0)));
22321 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22322 asm_fprintf (stream
, "[%r, #%s%d]!",
22323 REGNO (XEXP (x
, 0)),
22324 GET_CODE (x
) == PRE_DEC
? "-" : "",
22325 GET_MODE_SIZE (output_memory_reference_mode
));
22327 asm_fprintf (stream
, "[%r], #%s%d",
22328 REGNO (XEXP (x
, 0)),
22329 GET_CODE (x
) == POST_DEC
? "-" : "",
22330 GET_MODE_SIZE (output_memory_reference_mode
));
22332 else if (GET_CODE (x
) == PRE_MODIFY
)
22334 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22335 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22336 asm_fprintf (stream
, "#%wd]!",
22337 INTVAL (XEXP (XEXP (x
, 1), 1)));
22339 asm_fprintf (stream
, "%r]!",
22340 REGNO (XEXP (XEXP (x
, 1), 1)));
22342 else if (GET_CODE (x
) == POST_MODIFY
)
22344 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22345 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22346 asm_fprintf (stream
, "#%wd",
22347 INTVAL (XEXP (XEXP (x
, 1), 1)));
22349 asm_fprintf (stream
, "%r",
22350 REGNO (XEXP (XEXP (x
, 1), 1)));
22352 else output_addr_const (stream
, x
);
22357 asm_fprintf (stream
, "[%r]", REGNO (x
));
22358 else if (GET_CODE (x
) == POST_INC
)
22359 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22360 else if (GET_CODE (x
) == PLUS
)
22362 gcc_assert (REG_P (XEXP (x
, 0)));
22363 if (CONST_INT_P (XEXP (x
, 1)))
22364 asm_fprintf (stream
, "[%r, #%wd]",
22365 REGNO (XEXP (x
, 0)),
22366 INTVAL (XEXP (x
, 1)));
22368 asm_fprintf (stream
, "[%r, %r]",
22369 REGNO (XEXP (x
, 0)),
22370 REGNO (XEXP (x
, 1)));
22373 output_addr_const (stream
, x
);
22377 /* Target hook for indicating whether a punctuation character for
22378 TARGET_PRINT_OPERAND is valid. */
22380 arm_print_operand_punct_valid_p (unsigned char code
)
22382 return (code
== '@' || code
== '|' || code
== '.'
22383 || code
== '(' || code
== ')' || code
== '#'
22384 || (TARGET_32BIT
&& (code
== '?'))
22385 || (TARGET_THUMB2
&& (code
== '!'))
22386 || (TARGET_THUMB
&& (code
== '_')));
22389 /* Target hook for assembling integer objects. The ARM version needs to
22390 handle word-sized values specially. */
22392 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22396 if (size
== UNITS_PER_WORD
&& aligned_p
)
22398 fputs ("\t.word\t", asm_out_file
);
22399 output_addr_const (asm_out_file
, x
);
22401 /* Mark symbols as position independent. We only do this in the
22402 .text segment, not in the .data segment. */
22403 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22404 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22406 /* See legitimize_pic_address for an explanation of the
22407 TARGET_VXWORKS_RTP check. */
22408 if (!arm_pic_data_is_text_relative
22409 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22410 fputs ("(GOT)", asm_out_file
);
22412 fputs ("(GOTOFF)", asm_out_file
);
22414 fputc ('\n', asm_out_file
);
22418 mode
= GET_MODE (x
);
22420 if (arm_vector_mode_supported_p (mode
))
22424 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22426 units
= CONST_VECTOR_NUNITS (x
);
22427 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22429 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22430 for (i
= 0; i
< units
; i
++)
22432 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22434 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22437 for (i
= 0; i
< units
; i
++)
22439 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22440 REAL_VALUE_TYPE rval
;
22442 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22445 (rval
, GET_MODE_INNER (mode
),
22446 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22452 return default_assemble_integer (x
, size
, aligned_p
);
22456 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22460 if (!TARGET_AAPCS_BASED
)
22463 default_named_section_asm_out_constructor
22464 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22468 /* Put these in the .init_array section, using a special relocation. */
22469 if (priority
!= DEFAULT_INIT_PRIORITY
)
22472 sprintf (buf
, "%s.%.5u",
22473 is_ctor
? ".init_array" : ".fini_array",
22475 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22482 switch_to_section (s
);
22483 assemble_align (POINTER_SIZE
);
22484 fputs ("\t.word\t", asm_out_file
);
22485 output_addr_const (asm_out_file
, symbol
);
22486 fputs ("(target1)\n", asm_out_file
);
22489 /* Add a function to the list of static constructors. */
22492 arm_elf_asm_constructor (rtx symbol
, int priority
)
22494 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22497 /* Add a function to the list of static destructors. */
22500 arm_elf_asm_destructor (rtx symbol
, int priority
)
22502 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22505 /* A finite state machine takes care of noticing whether or not instructions
22506 can be conditionally executed, and thus decrease execution time and code
22507 size by deleting branch instructions. The fsm is controlled by
22508 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22510 /* The state of the fsm controlling condition codes are:
22511 0: normal, do nothing special
22512 1: make ASM_OUTPUT_OPCODE not output this instruction
22513 2: make ASM_OUTPUT_OPCODE not output this instruction
22514 3: make instructions conditional
22515 4: make instructions conditional
22517 State transitions (state->state by whom under condition):
22518 0 -> 1 final_prescan_insn if the `target' is a label
22519 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22520 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22521 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22522 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22523 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22524 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22525 (the target insn is arm_target_insn).
22527 If the jump clobbers the conditions then we use states 2 and 4.
22529 A similar thing can be done with conditional return insns.
22531 XXX In case the `target' is an unconditional branch, this conditionalising
22532 of the instructions always reduces code size, but not always execution
22533 time. But then, I want to reduce the code size to somewhere near what
22534 /bin/cc produces. */
22536 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22537 instructions. When a COND_EXEC instruction is seen the subsequent
22538 instructions are scanned so that multiple conditional instructions can be
22539 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22540 specify the length and true/false mask for the IT block. These will be
22541 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22543 /* Returns the index of the ARM condition code string in
22544 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22545 COMPARISON should be an rtx like `(eq (...) (...))'. */
22548 maybe_get_arm_condition_code (rtx comparison
)
22550 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22551 enum arm_cond_code code
;
22552 enum rtx_code comp_code
= GET_CODE (comparison
);
22554 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22555 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22556 XEXP (comparison
, 1));
22560 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22561 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22562 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22563 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22564 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22565 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22566 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22567 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22568 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22569 case CC_DLTUmode
: code
= ARM_CC
;
22572 if (comp_code
== EQ
)
22573 return ARM_INVERSE_CONDITION_CODE (code
);
22574 if (comp_code
== NE
)
22581 case NE
: return ARM_NE
;
22582 case EQ
: return ARM_EQ
;
22583 case GE
: return ARM_PL
;
22584 case LT
: return ARM_MI
;
22585 default: return ARM_NV
;
22591 case NE
: return ARM_NE
;
22592 case EQ
: return ARM_EQ
;
22593 default: return ARM_NV
;
22599 case NE
: return ARM_MI
;
22600 case EQ
: return ARM_PL
;
22601 default: return ARM_NV
;
22606 /* We can handle all cases except UNEQ and LTGT. */
22609 case GE
: return ARM_GE
;
22610 case GT
: return ARM_GT
;
22611 case LE
: return ARM_LS
;
22612 case LT
: return ARM_MI
;
22613 case NE
: return ARM_NE
;
22614 case EQ
: return ARM_EQ
;
22615 case ORDERED
: return ARM_VC
;
22616 case UNORDERED
: return ARM_VS
;
22617 case UNLT
: return ARM_LT
;
22618 case UNLE
: return ARM_LE
;
22619 case UNGT
: return ARM_HI
;
22620 case UNGE
: return ARM_PL
;
22621 /* UNEQ and LTGT do not have a representation. */
22622 case UNEQ
: /* Fall through. */
22623 case LTGT
: /* Fall through. */
22624 default: return ARM_NV
;
22630 case NE
: return ARM_NE
;
22631 case EQ
: return ARM_EQ
;
22632 case GE
: return ARM_LE
;
22633 case GT
: return ARM_LT
;
22634 case LE
: return ARM_GE
;
22635 case LT
: return ARM_GT
;
22636 case GEU
: return ARM_LS
;
22637 case GTU
: return ARM_CC
;
22638 case LEU
: return ARM_CS
;
22639 case LTU
: return ARM_HI
;
22640 default: return ARM_NV
;
22646 case LTU
: return ARM_CS
;
22647 case GEU
: return ARM_CC
;
22648 default: return ARM_NV
;
22654 case NE
: return ARM_NE
;
22655 case EQ
: return ARM_EQ
;
22656 case GEU
: return ARM_CS
;
22657 case GTU
: return ARM_HI
;
22658 case LEU
: return ARM_LS
;
22659 case LTU
: return ARM_CC
;
22660 default: return ARM_NV
;
22666 case GE
: return ARM_GE
;
22667 case LT
: return ARM_LT
;
22668 case GEU
: return ARM_CS
;
22669 case LTU
: return ARM_CC
;
22670 default: return ARM_NV
;
22676 case NE
: return ARM_NE
;
22677 case EQ
: return ARM_EQ
;
22678 case GE
: return ARM_GE
;
22679 case GT
: return ARM_GT
;
22680 case LE
: return ARM_LE
;
22681 case LT
: return ARM_LT
;
22682 case GEU
: return ARM_CS
;
22683 case GTU
: return ARM_HI
;
22684 case LEU
: return ARM_LS
;
22685 case LTU
: return ARM_CC
;
22686 default: return ARM_NV
;
22689 default: gcc_unreachable ();
22693 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22694 static enum arm_cond_code
22695 get_arm_condition_code (rtx comparison
)
22697 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22698 gcc_assert (code
!= ARM_NV
);
22702 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22705 thumb2_final_prescan_insn (rtx_insn
*insn
)
22707 rtx_insn
*first_insn
= insn
;
22708 rtx body
= PATTERN (insn
);
22710 enum arm_cond_code code
;
22715 /* max_insns_skipped in the tune was already taken into account in the
22716 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22717 just emit the IT blocks as we can. It does not make sense to split
22719 max
= MAX_INSN_PER_IT_BLOCK
;
22721 /* Remove the previous insn from the count of insns to be output. */
22722 if (arm_condexec_count
)
22723 arm_condexec_count
--;
22725 /* Nothing to do if we are already inside a conditional block. */
22726 if (arm_condexec_count
)
22729 if (GET_CODE (body
) != COND_EXEC
)
22732 /* Conditional jumps are implemented directly. */
22736 predicate
= COND_EXEC_TEST (body
);
22737 arm_current_cc
= get_arm_condition_code (predicate
);
22739 n
= get_attr_ce_count (insn
);
22740 arm_condexec_count
= 1;
22741 arm_condexec_mask
= (1 << n
) - 1;
22742 arm_condexec_masklen
= n
;
22743 /* See if subsequent instructions can be combined into the same block. */
22746 insn
= next_nonnote_insn (insn
);
22748 /* Jumping into the middle of an IT block is illegal, so a label or
22749 barrier terminates the block. */
22750 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22753 body
= PATTERN (insn
);
22754 /* USE and CLOBBER aren't really insns, so just skip them. */
22755 if (GET_CODE (body
) == USE
22756 || GET_CODE (body
) == CLOBBER
)
22759 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22760 if (GET_CODE (body
) != COND_EXEC
)
22762 /* Maximum number of conditionally executed instructions in a block. */
22763 n
= get_attr_ce_count (insn
);
22764 if (arm_condexec_masklen
+ n
> max
)
22767 predicate
= COND_EXEC_TEST (body
);
22768 code
= get_arm_condition_code (predicate
);
22769 mask
= (1 << n
) - 1;
22770 if (arm_current_cc
== code
)
22771 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22772 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22775 arm_condexec_count
++;
22776 arm_condexec_masklen
+= n
;
22778 /* A jump must be the last instruction in a conditional block. */
22782 /* Restore recog_data (getting the attributes of other insns can
22783 destroy this array, but final.c assumes that it remains intact
22784 across this call). */
22785 extract_constrain_insn_cached (first_insn
);
22789 arm_final_prescan_insn (rtx_insn
*insn
)
22791 /* BODY will hold the body of INSN. */
22792 rtx body
= PATTERN (insn
);
22794 /* This will be 1 if trying to repeat the trick, and things need to be
22795 reversed if it appears to fail. */
22798 /* If we start with a return insn, we only succeed if we find another one. */
22799 int seeking_return
= 0;
22800 enum rtx_code return_code
= UNKNOWN
;
22802 /* START_INSN will hold the insn from where we start looking. This is the
22803 first insn after the following code_label if REVERSE is true. */
22804 rtx_insn
*start_insn
= insn
;
22806 /* If in state 4, check if the target branch is reached, in order to
22807 change back to state 0. */
22808 if (arm_ccfsm_state
== 4)
22810 if (insn
== arm_target_insn
)
22812 arm_target_insn
= NULL
;
22813 arm_ccfsm_state
= 0;
22818 /* If in state 3, it is possible to repeat the trick, if this insn is an
22819 unconditional branch to a label, and immediately following this branch
22820 is the previous target label which is only used once, and the label this
22821 branch jumps to is not too far off. */
22822 if (arm_ccfsm_state
== 3)
22824 if (simplejump_p (insn
))
22826 start_insn
= next_nonnote_insn (start_insn
);
22827 if (BARRIER_P (start_insn
))
22829 /* XXX Isn't this always a barrier? */
22830 start_insn
= next_nonnote_insn (start_insn
);
22832 if (LABEL_P (start_insn
)
22833 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22834 && LABEL_NUSES (start_insn
) == 1)
22839 else if (ANY_RETURN_P (body
))
22841 start_insn
= next_nonnote_insn (start_insn
);
22842 if (BARRIER_P (start_insn
))
22843 start_insn
= next_nonnote_insn (start_insn
);
22844 if (LABEL_P (start_insn
)
22845 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22846 && LABEL_NUSES (start_insn
) == 1)
22849 seeking_return
= 1;
22850 return_code
= GET_CODE (body
);
22859 gcc_assert (!arm_ccfsm_state
|| reverse
);
22860 if (!JUMP_P (insn
))
22863 /* This jump might be paralleled with a clobber of the condition codes
22864 the jump should always come first */
22865 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22866 body
= XVECEXP (body
, 0, 0);
22869 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22870 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22873 int fail
= FALSE
, succeed
= FALSE
;
22874 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22875 int then_not_else
= TRUE
;
22876 rtx_insn
*this_insn
= start_insn
;
22879 /* Register the insn jumped to. */
22882 if (!seeking_return
)
22883 label
= XEXP (SET_SRC (body
), 0);
22885 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22886 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22887 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22889 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22890 then_not_else
= FALSE
;
22892 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22894 seeking_return
= 1;
22895 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22897 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22899 seeking_return
= 1;
22900 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22901 then_not_else
= FALSE
;
22904 gcc_unreachable ();
22906 /* See how many insns this branch skips, and what kind of insns. If all
22907 insns are okay, and the label or unconditional branch to the same
22908 label is not too far away, succeed. */
22909 for (insns_skipped
= 0;
22910 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22914 this_insn
= next_nonnote_insn (this_insn
);
22918 switch (GET_CODE (this_insn
))
22921 /* Succeed if it is the target label, otherwise fail since
22922 control falls in from somewhere else. */
22923 if (this_insn
== label
)
22925 arm_ccfsm_state
= 1;
22933 /* Succeed if the following insn is the target label.
22935 If return insns are used then the last insn in a function
22936 will be a barrier. */
22937 this_insn
= next_nonnote_insn (this_insn
);
22938 if (this_insn
&& this_insn
== label
)
22940 arm_ccfsm_state
= 1;
22948 /* The AAPCS says that conditional calls should not be
22949 used since they make interworking inefficient (the
22950 linker can't transform BL<cond> into BLX). That's
22951 only a problem if the machine has BLX. */
22958 /* Succeed if the following insn is the target label, or
22959 if the following two insns are a barrier and the
22961 this_insn
= next_nonnote_insn (this_insn
);
22962 if (this_insn
&& BARRIER_P (this_insn
))
22963 this_insn
= next_nonnote_insn (this_insn
);
22965 if (this_insn
&& this_insn
== label
22966 && insns_skipped
< max_insns_skipped
)
22968 arm_ccfsm_state
= 1;
22976 /* If this is an unconditional branch to the same label, succeed.
22977 If it is to another label, do nothing. If it is conditional,
22979 /* XXX Probably, the tests for SET and the PC are
22982 scanbody
= PATTERN (this_insn
);
22983 if (GET_CODE (scanbody
) == SET
22984 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22986 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22987 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22989 arm_ccfsm_state
= 2;
22992 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22995 /* Fail if a conditional return is undesirable (e.g. on a
22996 StrongARM), but still allow this if optimizing for size. */
22997 else if (GET_CODE (scanbody
) == return_code
22998 && !use_return_insn (TRUE
, NULL
)
23001 else if (GET_CODE (scanbody
) == return_code
)
23003 arm_ccfsm_state
= 2;
23006 else if (GET_CODE (scanbody
) == PARALLEL
)
23008 switch (get_attr_conds (this_insn
))
23018 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23023 /* Instructions using or affecting the condition codes make it
23025 scanbody
= PATTERN (this_insn
);
23026 if (!(GET_CODE (scanbody
) == SET
23027 || GET_CODE (scanbody
) == PARALLEL
)
23028 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23038 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23039 arm_target_label
= CODE_LABEL_NUMBER (label
);
23042 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23044 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23046 this_insn
= next_nonnote_insn (this_insn
);
23047 gcc_assert (!this_insn
23048 || (!BARRIER_P (this_insn
)
23049 && !LABEL_P (this_insn
)));
23053 /* Oh, dear! we ran off the end.. give up. */
23054 extract_constrain_insn_cached (insn
);
23055 arm_ccfsm_state
= 0;
23056 arm_target_insn
= NULL
;
23059 arm_target_insn
= this_insn
;
23062 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23065 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23067 if (reverse
|| then_not_else
)
23068 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23071 /* Restore recog_data (getting the attributes of other insns can
23072 destroy this array, but final.c assumes that it remains intact
23073 across this call. */
23074 extract_constrain_insn_cached (insn
);
23078 /* Output IT instructions. */
23080 thumb2_asm_output_opcode (FILE * stream
)
23085 if (arm_condexec_mask
)
23087 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23088 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23090 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23091 arm_condition_codes
[arm_current_cc
]);
23092 arm_condexec_mask
= 0;
23096 /* Returns true if REGNO is a valid register
23097 for holding a quantity of type MODE. */
23099 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23101 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23102 return (regno
== CC_REGNUM
23103 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23104 && regno
== VFPCC_REGNUM
));
23106 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23110 /* For the Thumb we only allow values bigger than SImode in
23111 registers 0 - 6, so that there is always a second low
23112 register available to hold the upper part of the value.
23113 We probably we ought to ensure that the register is the
23114 start of an even numbered register pair. */
23115 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23117 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23118 && IS_VFP_REGNUM (regno
))
23120 if (mode
== SFmode
|| mode
== SImode
)
23121 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23123 if (mode
== DFmode
)
23124 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23126 /* VFP registers can hold HFmode values, but there is no point in
23127 putting them there unless we have hardware conversion insns. */
23128 if (mode
== HFmode
)
23129 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23132 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23133 || (VALID_NEON_QREG_MODE (mode
)
23134 && NEON_REGNO_OK_FOR_QUAD (regno
))
23135 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23136 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23137 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23138 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23139 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23144 if (TARGET_REALLY_IWMMXT
)
23146 if (IS_IWMMXT_GR_REGNUM (regno
))
23147 return mode
== SImode
;
23149 if (IS_IWMMXT_REGNUM (regno
))
23150 return VALID_IWMMXT_REG_MODE (mode
);
23153 /* We allow almost any value to be stored in the general registers.
23154 Restrict doubleword quantities to even register pairs in ARM state
23155 so that we can use ldrd. Do not allow very large Neon structure
23156 opaque modes in general registers; they would use too many. */
23157 if (regno
<= LAST_ARM_REGNUM
)
23159 if (ARM_NUM_REGS (mode
) > 4)
23165 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23168 if (regno
== FRAME_POINTER_REGNUM
23169 || regno
== ARG_POINTER_REGNUM
)
23170 /* We only allow integers in the fake hard registers. */
23171 return GET_MODE_CLASS (mode
) == MODE_INT
;
23176 /* Implement MODES_TIEABLE_P. */
23179 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23181 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23184 /* We specifically want to allow elements of "structure" modes to
23185 be tieable to the structure. This more general condition allows
23186 other rarer situations too. */
23188 && (VALID_NEON_DREG_MODE (mode1
)
23189 || VALID_NEON_QREG_MODE (mode1
)
23190 || VALID_NEON_STRUCT_MODE (mode1
))
23191 && (VALID_NEON_DREG_MODE (mode2
)
23192 || VALID_NEON_QREG_MODE (mode2
)
23193 || VALID_NEON_STRUCT_MODE (mode2
)))
23199 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23200 not used in arm mode. */
23203 arm_regno_class (int regno
)
23205 if (regno
== PC_REGNUM
)
23210 if (regno
== STACK_POINTER_REGNUM
)
23212 if (regno
== CC_REGNUM
)
23219 if (TARGET_THUMB2
&& regno
< 8)
23222 if ( regno
<= LAST_ARM_REGNUM
23223 || regno
== FRAME_POINTER_REGNUM
23224 || regno
== ARG_POINTER_REGNUM
)
23225 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23227 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23228 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23230 if (IS_VFP_REGNUM (regno
))
23232 if (regno
<= D7_VFP_REGNUM
)
23233 return VFP_D0_D7_REGS
;
23234 else if (regno
<= LAST_LO_VFP_REGNUM
)
23235 return VFP_LO_REGS
;
23237 return VFP_HI_REGS
;
23240 if (IS_IWMMXT_REGNUM (regno
))
23241 return IWMMXT_REGS
;
23243 if (IS_IWMMXT_GR_REGNUM (regno
))
23244 return IWMMXT_GR_REGS
;
23249 /* Handle a special case when computing the offset
23250 of an argument from the frame pointer. */
23252 arm_debugger_arg_offset (int value
, rtx addr
)
23256 /* We are only interested if dbxout_parms() failed to compute the offset. */
23260 /* We can only cope with the case where the address is held in a register. */
23264 /* If we are using the frame pointer to point at the argument, then
23265 an offset of 0 is correct. */
23266 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23269 /* If we are using the stack pointer to point at the
23270 argument, then an offset of 0 is correct. */
23271 /* ??? Check this is consistent with thumb2 frame layout. */
23272 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23273 && REGNO (addr
) == SP_REGNUM
)
23276 /* Oh dear. The argument is pointed to by a register rather
23277 than being held in a register, or being stored at a known
23278 offset from the frame pointer. Since GDB only understands
23279 those two kinds of argument we must translate the address
23280 held in the register into an offset from the frame pointer.
23281 We do this by searching through the insns for the function
23282 looking to see where this register gets its value. If the
23283 register is initialized from the frame pointer plus an offset
23284 then we are in luck and we can continue, otherwise we give up.
23286 This code is exercised by producing debugging information
23287 for a function with arguments like this:
23289 double func (double a, double b, int c, double d) {return d;}
23291 Without this code the stab for parameter 'd' will be set to
23292 an offset of 0 from the frame pointer, rather than 8. */
23294 /* The if() statement says:
23296 If the insn is a normal instruction
23297 and if the insn is setting the value in a register
23298 and if the register being set is the register holding the address of the argument
23299 and if the address is computing by an addition
23300 that involves adding to a register
23301 which is the frame pointer
23306 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23308 if ( NONJUMP_INSN_P (insn
)
23309 && GET_CODE (PATTERN (insn
)) == SET
23310 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23311 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23312 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23313 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23314 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23317 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23326 warning (0, "unable to compute real location of stacked parameter");
23327 value
= 8; /* XXX magic hack */
23333 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23335 static const char *
23336 arm_invalid_parameter_type (const_tree t
)
23338 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23339 return N_("function parameters cannot have __fp16 type");
23343 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23345 static const char *
23346 arm_invalid_return_type (const_tree t
)
23348 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23349 return N_("functions cannot return __fp16 type");
23353 /* Implement TARGET_PROMOTED_TYPE. */
23356 arm_promoted_type (const_tree t
)
23358 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23359 return float_type_node
;
23363 /* Implement TARGET_CONVERT_TO_TYPE.
23364 Specifically, this hook implements the peculiarity of the ARM
23365 half-precision floating-point C semantics that requires conversions between
23366 __fp16 to or from double to do an intermediate conversion to float. */
23369 arm_convert_to_type (tree type
, tree expr
)
23371 tree fromtype
= TREE_TYPE (expr
);
23372 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23374 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23375 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23376 return convert (type
, convert (float_type_node
, expr
));
23380 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23381 This simply adds HFmode as a supported mode; even though we don't
23382 implement arithmetic on this type directly, it's supported by
23383 optabs conversions, much the way the double-word arithmetic is
23384 special-cased in the default hook. */
23387 arm_scalar_mode_supported_p (machine_mode mode
)
23389 if (mode
== HFmode
)
23390 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23391 else if (ALL_FIXED_POINT_MODE_P (mode
))
23394 return default_scalar_mode_supported_p (mode
);
23397 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23399 neon_reinterpret (rtx dest
, rtx src
)
23401 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23404 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23405 not to early-clobber SRC registers in the process.
23407 We assume that the operands described by SRC and DEST represent a
23408 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23409 number of components into which the copy has been decomposed. */
23411 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23415 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23416 || REGNO (operands
[0]) < REGNO (operands
[1]))
23418 for (i
= 0; i
< count
; i
++)
23420 operands
[2 * i
] = dest
[i
];
23421 operands
[2 * i
+ 1] = src
[i
];
23426 for (i
= 0; i
< count
; i
++)
23428 operands
[2 * i
] = dest
[count
- i
- 1];
23429 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23434 /* Split operands into moves from op[1] + op[2] into op[0]. */
23437 neon_split_vcombine (rtx operands
[3])
23439 unsigned int dest
= REGNO (operands
[0]);
23440 unsigned int src1
= REGNO (operands
[1]);
23441 unsigned int src2
= REGNO (operands
[2]);
23442 machine_mode halfmode
= GET_MODE (operands
[1]);
23443 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23444 rtx destlo
, desthi
;
23446 if (src1
== dest
&& src2
== dest
+ halfregs
)
23448 /* No-op move. Can't split to nothing; emit something. */
23449 emit_note (NOTE_INSN_DELETED
);
23453 /* Preserve register attributes for variable tracking. */
23454 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23455 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23456 GET_MODE_SIZE (halfmode
));
23458 /* Special case of reversed high/low parts. Use VSWP. */
23459 if (src2
== dest
&& src1
== dest
+ halfregs
)
23461 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
23462 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
23463 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23467 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23469 /* Try to avoid unnecessary moves if part of the result
23470 is in the right place already. */
23472 emit_move_insn (destlo
, operands
[1]);
23473 if (src2
!= dest
+ halfregs
)
23474 emit_move_insn (desthi
, operands
[2]);
23478 if (src2
!= dest
+ halfregs
)
23479 emit_move_insn (desthi
, operands
[2]);
23481 emit_move_insn (destlo
, operands
[1]);
23485 /* Return the number (counting from 0) of
23486 the least significant set bit in MASK. */
23489 number_of_first_bit_set (unsigned mask
)
23491 return ctz_hwi (mask
);
23494 /* Like emit_multi_reg_push, but allowing for a different set of
23495 registers to be described as saved. MASK is the set of registers
23496 to be saved; REAL_REGS is the set of registers to be described as
23497 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23500 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23502 unsigned long regno
;
23503 rtx par
[10], tmp
, reg
;
23507 /* Build the parallel of the registers actually being stored. */
23508 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23510 regno
= ctz_hwi (mask
);
23511 reg
= gen_rtx_REG (SImode
, regno
);
23514 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23516 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23521 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23522 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23523 tmp
= gen_frame_mem (BLKmode
, tmp
);
23524 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
23527 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23528 insn
= emit_insn (tmp
);
23530 /* Always build the stack adjustment note for unwind info. */
23531 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23532 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
23535 /* Build the parallel of the registers recorded as saved for unwind. */
23536 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23538 regno
= ctz_hwi (real_regs
);
23539 reg
= gen_rtx_REG (SImode
, regno
);
23541 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23542 tmp
= gen_frame_mem (SImode
, tmp
);
23543 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
23544 RTX_FRAME_RELATED_P (tmp
) = 1;
23552 RTX_FRAME_RELATED_P (par
[0]) = 1;
23553 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23556 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23561 /* Emit code to push or pop registers to or from the stack. F is the
23562 assembly file. MASK is the registers to pop. */
23564 thumb_pop (FILE *f
, unsigned long mask
)
23567 int lo_mask
= mask
& 0xFF;
23568 int pushed_words
= 0;
23572 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23574 /* Special case. Do not generate a POP PC statement here, do it in
23576 thumb_exit (f
, -1);
23580 fprintf (f
, "\tpop\t{");
23582 /* Look at the low registers first. */
23583 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23587 asm_fprintf (f
, "%r", regno
);
23589 if ((lo_mask
& ~1) != 0)
23596 if (mask
& (1 << PC_REGNUM
))
23598 /* Catch popping the PC. */
23599 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23600 || crtl
->calls_eh_return
)
23602 /* The PC is never poped directly, instead
23603 it is popped into r3 and then BX is used. */
23604 fprintf (f
, "}\n");
23606 thumb_exit (f
, -1);
23615 asm_fprintf (f
, "%r", PC_REGNUM
);
23619 fprintf (f
, "}\n");
23622 /* Generate code to return from a thumb function.
23623 If 'reg_containing_return_addr' is -1, then the return address is
23624 actually on the stack, at the stack pointer. */
23626 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23628 unsigned regs_available_for_popping
;
23629 unsigned regs_to_pop
;
23631 unsigned available
;
23635 int restore_a4
= FALSE
;
23637 /* Compute the registers we need to pop. */
23641 if (reg_containing_return_addr
== -1)
23643 regs_to_pop
|= 1 << LR_REGNUM
;
23647 if (TARGET_BACKTRACE
)
23649 /* Restore the (ARM) frame pointer and stack pointer. */
23650 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23654 /* If there is nothing to pop then just emit the BX instruction and
23656 if (pops_needed
== 0)
23658 if (crtl
->calls_eh_return
)
23659 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23661 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23664 /* Otherwise if we are not supporting interworking and we have not created
23665 a backtrace structure and the function was not entered in ARM mode then
23666 just pop the return address straight into the PC. */
23667 else if (!TARGET_INTERWORK
23668 && !TARGET_BACKTRACE
23669 && !is_called_in_ARM_mode (current_function_decl
)
23670 && !crtl
->calls_eh_return
)
23672 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23676 /* Find out how many of the (return) argument registers we can corrupt. */
23677 regs_available_for_popping
= 0;
23679 /* If returning via __builtin_eh_return, the bottom three registers
23680 all contain information needed for the return. */
23681 if (crtl
->calls_eh_return
)
23685 /* If we can deduce the registers used from the function's
23686 return value. This is more reliable that examining
23687 df_regs_ever_live_p () because that will be set if the register is
23688 ever used in the function, not just if the register is used
23689 to hold a return value. */
23691 if (crtl
->return_rtx
!= 0)
23692 mode
= GET_MODE (crtl
->return_rtx
);
23694 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23696 size
= GET_MODE_SIZE (mode
);
23700 /* In a void function we can use any argument register.
23701 In a function that returns a structure on the stack
23702 we can use the second and third argument registers. */
23703 if (mode
== VOIDmode
)
23704 regs_available_for_popping
=
23705 (1 << ARG_REGISTER (1))
23706 | (1 << ARG_REGISTER (2))
23707 | (1 << ARG_REGISTER (3));
23709 regs_available_for_popping
=
23710 (1 << ARG_REGISTER (2))
23711 | (1 << ARG_REGISTER (3));
23713 else if (size
<= 4)
23714 regs_available_for_popping
=
23715 (1 << ARG_REGISTER (2))
23716 | (1 << ARG_REGISTER (3));
23717 else if (size
<= 8)
23718 regs_available_for_popping
=
23719 (1 << ARG_REGISTER (3));
23722 /* Match registers to be popped with registers into which we pop them. */
23723 for (available
= regs_available_for_popping
,
23724 required
= regs_to_pop
;
23725 required
!= 0 && available
!= 0;
23726 available
&= ~(available
& - available
),
23727 required
&= ~(required
& - required
))
23730 /* If we have any popping registers left over, remove them. */
23732 regs_available_for_popping
&= ~available
;
23734 /* Otherwise if we need another popping register we can use
23735 the fourth argument register. */
23736 else if (pops_needed
)
23738 /* If we have not found any free argument registers and
23739 reg a4 contains the return address, we must move it. */
23740 if (regs_available_for_popping
== 0
23741 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23743 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23744 reg_containing_return_addr
= LR_REGNUM
;
23746 else if (size
> 12)
23748 /* Register a4 is being used to hold part of the return value,
23749 but we have dire need of a free, low register. */
23752 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23755 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23757 /* The fourth argument register is available. */
23758 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23764 /* Pop as many registers as we can. */
23765 thumb_pop (f
, regs_available_for_popping
);
23767 /* Process the registers we popped. */
23768 if (reg_containing_return_addr
== -1)
23770 /* The return address was popped into the lowest numbered register. */
23771 regs_to_pop
&= ~(1 << LR_REGNUM
);
23773 reg_containing_return_addr
=
23774 number_of_first_bit_set (regs_available_for_popping
);
23776 /* Remove this register for the mask of available registers, so that
23777 the return address will not be corrupted by further pops. */
23778 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23781 /* If we popped other registers then handle them here. */
23782 if (regs_available_for_popping
)
23786 /* Work out which register currently contains the frame pointer. */
23787 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23789 /* Move it into the correct place. */
23790 asm_fprintf (f
, "\tmov\t%r, %r\n",
23791 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23793 /* (Temporarily) remove it from the mask of popped registers. */
23794 regs_available_for_popping
&= ~(1 << frame_pointer
);
23795 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23797 if (regs_available_for_popping
)
23801 /* We popped the stack pointer as well,
23802 find the register that contains it. */
23803 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23805 /* Move it into the stack register. */
23806 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23808 /* At this point we have popped all necessary registers, so
23809 do not worry about restoring regs_available_for_popping
23810 to its correct value:
23812 assert (pops_needed == 0)
23813 assert (regs_available_for_popping == (1 << frame_pointer))
23814 assert (regs_to_pop == (1 << STACK_POINTER)) */
23818 /* Since we have just move the popped value into the frame
23819 pointer, the popping register is available for reuse, and
23820 we know that we still have the stack pointer left to pop. */
23821 regs_available_for_popping
|= (1 << frame_pointer
);
23825 /* If we still have registers left on the stack, but we no longer have
23826 any registers into which we can pop them, then we must move the return
23827 address into the link register and make available the register that
23829 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23831 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23833 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23834 reg_containing_return_addr
);
23836 reg_containing_return_addr
= LR_REGNUM
;
23839 /* If we have registers left on the stack then pop some more.
23840 We know that at most we will want to pop FP and SP. */
23841 if (pops_needed
> 0)
23846 thumb_pop (f
, regs_available_for_popping
);
23848 /* We have popped either FP or SP.
23849 Move whichever one it is into the correct register. */
23850 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23851 move_to
= number_of_first_bit_set (regs_to_pop
);
23853 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23855 regs_to_pop
&= ~(1 << move_to
);
23860 /* If we still have not popped everything then we must have only
23861 had one register available to us and we are now popping the SP. */
23862 if (pops_needed
> 0)
23866 thumb_pop (f
, regs_available_for_popping
);
23868 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23870 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23872 assert (regs_to_pop == (1 << STACK_POINTER))
23873 assert (pops_needed == 1)
23877 /* If necessary restore the a4 register. */
23880 if (reg_containing_return_addr
!= LR_REGNUM
)
23882 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23883 reg_containing_return_addr
= LR_REGNUM
;
23886 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23889 if (crtl
->calls_eh_return
)
23890 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23892 /* Return to caller. */
23893 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23896 /* Scan INSN just before assembler is output for it.
23897 For Thumb-1, we track the status of the condition codes; this
23898 information is used in the cbranchsi4_insn pattern. */
23900 thumb1_final_prescan_insn (rtx_insn
*insn
)
23902 if (flag_print_asm_name
)
23903 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23904 INSN_ADDRESSES (INSN_UID (insn
)));
23905 /* Don't overwrite the previous setter when we get to a cbranch. */
23906 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23908 enum attr_conds conds
;
23910 if (cfun
->machine
->thumb1_cc_insn
)
23912 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23913 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23916 conds
= get_attr_conds (insn
);
23917 if (conds
== CONDS_SET
)
23919 rtx set
= single_set (insn
);
23920 cfun
->machine
->thumb1_cc_insn
= insn
;
23921 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23922 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23923 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23924 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23926 rtx src1
= XEXP (SET_SRC (set
), 1);
23927 if (src1
== const0_rtx
)
23928 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23930 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23932 /* Record the src register operand instead of dest because
23933 cprop_hardreg pass propagates src. */
23934 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23937 else if (conds
!= CONDS_NOCOND
)
23938 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23941 /* Check if unexpected far jump is used. */
23942 if (cfun
->machine
->lr_save_eliminated
23943 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23944 internal_error("Unexpected thumb1 far jump");
23948 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23950 unsigned HOST_WIDE_INT mask
= 0xff;
23953 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23954 if (val
== 0) /* XXX */
23957 for (i
= 0; i
< 25; i
++)
23958 if ((val
& (mask
<< i
)) == val
)
23964 /* Returns nonzero if the current function contains,
23965 or might contain a far jump. */
23967 thumb_far_jump_used_p (void)
23970 bool far_jump
= false;
23971 unsigned int func_size
= 0;
23973 /* This test is only important for leaf functions. */
23974 /* assert (!leaf_function_p ()); */
23976 /* If we have already decided that far jumps may be used,
23977 do not bother checking again, and always return true even if
23978 it turns out that they are not being used. Once we have made
23979 the decision that far jumps are present (and that hence the link
23980 register will be pushed onto the stack) we cannot go back on it. */
23981 if (cfun
->machine
->far_jump_used
)
23984 /* If this function is not being called from the prologue/epilogue
23985 generation code then it must be being called from the
23986 INITIAL_ELIMINATION_OFFSET macro. */
23987 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23989 /* In this case we know that we are being asked about the elimination
23990 of the arg pointer register. If that register is not being used,
23991 then there are no arguments on the stack, and we do not have to
23992 worry that a far jump might force the prologue to push the link
23993 register, changing the stack offsets. In this case we can just
23994 return false, since the presence of far jumps in the function will
23995 not affect stack offsets.
23997 If the arg pointer is live (or if it was live, but has now been
23998 eliminated and so set to dead) then we do have to test to see if
23999 the function might contain a far jump. This test can lead to some
24000 false negatives, since before reload is completed, then length of
24001 branch instructions is not known, so gcc defaults to returning their
24002 longest length, which in turn sets the far jump attribute to true.
24004 A false negative will not result in bad code being generated, but it
24005 will result in a needless push and pop of the link register. We
24006 hope that this does not occur too often.
24008 If we need doubleword stack alignment this could affect the other
24009 elimination offsets so we can't risk getting it wrong. */
24010 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24011 cfun
->machine
->arg_pointer_live
= 1;
24012 else if (!cfun
->machine
->arg_pointer_live
)
24016 /* We should not change far_jump_used during or after reload, as there is
24017 no chance to change stack frame layout. */
24018 if (reload_in_progress
|| reload_completed
)
24021 /* Check to see if the function contains a branch
24022 insn with the far jump attribute set. */
24023 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24025 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24029 func_size
+= get_attr_length (insn
);
24032 /* Attribute far_jump will always be true for thumb1 before
24033 shorten_branch pass. So checking far_jump attribute before
24034 shorten_branch isn't much useful.
24036 Following heuristic tries to estimate more accurately if a far jump
24037 may finally be used. The heuristic is very conservative as there is
24038 no chance to roll-back the decision of not to use far jump.
24040 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24041 2-byte insn is associated with a 4 byte constant pool. Using
24042 function size 2048/3 as the threshold is conservative enough. */
24045 if ((func_size
* 3) >= 2048)
24047 /* Record the fact that we have decided that
24048 the function does use far jumps. */
24049 cfun
->machine
->far_jump_used
= 1;
24057 /* Return nonzero if FUNC must be entered in ARM mode. */
24059 is_called_in_ARM_mode (tree func
)
24061 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24063 /* Ignore the problem about functions whose address is taken. */
24064 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24068 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24074 /* Given the stack offsets and register mask in OFFSETS, decide how
24075 many additional registers to push instead of subtracting a constant
24076 from SP. For epilogues the principle is the same except we use pop.
24077 FOR_PROLOGUE indicates which we're generating. */
24079 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24081 HOST_WIDE_INT amount
;
24082 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24083 /* Extract a mask of the ones we can give to the Thumb's push/pop
24085 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24086 /* Then count how many other high registers will need to be pushed. */
24087 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24088 int n_free
, reg_base
, size
;
24090 if (!for_prologue
&& frame_pointer_needed
)
24091 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24093 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24095 /* If the stack frame size is 512 exactly, we can save one load
24096 instruction, which should make this a win even when optimizing
24098 if (!optimize_size
&& amount
!= 512)
24101 /* Can't do this if there are high registers to push. */
24102 if (high_regs_pushed
!= 0)
24105 /* Shouldn't do it in the prologue if no registers would normally
24106 be pushed at all. In the epilogue, also allow it if we'll have
24107 a pop insn for the PC. */
24110 || TARGET_BACKTRACE
24111 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24112 || TARGET_INTERWORK
24113 || crtl
->args
.pretend_args_size
!= 0))
24116 /* Don't do this if thumb_expand_prologue wants to emit instructions
24117 between the push and the stack frame allocation. */
24119 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24120 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24127 size
= arm_size_return_regs ();
24128 reg_base
= ARM_NUM_INTS (size
);
24129 live_regs_mask
>>= reg_base
;
24132 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24133 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24135 live_regs_mask
>>= 1;
24141 gcc_assert (amount
/ 4 * 4 == amount
);
24143 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24144 return (amount
- 508) / 4;
24145 if (amount
<= n_free
* 4)
24150 /* The bits which aren't usefully expanded as rtl. */
24152 thumb1_unexpanded_epilogue (void)
24154 arm_stack_offsets
*offsets
;
24156 unsigned long live_regs_mask
= 0;
24157 int high_regs_pushed
= 0;
24159 int had_to_push_lr
;
24162 if (cfun
->machine
->return_used_this_function
!= 0)
24165 if (IS_NAKED (arm_current_func_type ()))
24168 offsets
= arm_get_frame_offsets ();
24169 live_regs_mask
= offsets
->saved_regs_mask
;
24170 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24172 /* If we can deduce the registers used from the function's return value.
24173 This is more reliable that examining df_regs_ever_live_p () because that
24174 will be set if the register is ever used in the function, not just if
24175 the register is used to hold a return value. */
24176 size
= arm_size_return_regs ();
24178 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24181 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24182 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24185 /* The prolog may have pushed some high registers to use as
24186 work registers. e.g. the testsuite file:
24187 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24188 compiles to produce:
24189 push {r4, r5, r6, r7, lr}
24193 as part of the prolog. We have to undo that pushing here. */
24195 if (high_regs_pushed
)
24197 unsigned long mask
= live_regs_mask
& 0xff;
24200 /* The available low registers depend on the size of the value we are
24208 /* Oh dear! We have no low registers into which we can pop
24211 ("no low registers available for popping high registers");
24213 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24214 if (live_regs_mask
& (1 << next_hi_reg
))
24217 while (high_regs_pushed
)
24219 /* Find lo register(s) into which the high register(s) can
24221 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24223 if (mask
& (1 << regno
))
24224 high_regs_pushed
--;
24225 if (high_regs_pushed
== 0)
24229 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24231 /* Pop the values into the low register(s). */
24232 thumb_pop (asm_out_file
, mask
);
24234 /* Move the value(s) into the high registers. */
24235 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24237 if (mask
& (1 << regno
))
24239 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24242 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24243 if (live_regs_mask
& (1 << next_hi_reg
))
24248 live_regs_mask
&= ~0x0f00;
24251 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24252 live_regs_mask
&= 0xff;
24254 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24256 /* Pop the return address into the PC. */
24257 if (had_to_push_lr
)
24258 live_regs_mask
|= 1 << PC_REGNUM
;
24260 /* Either no argument registers were pushed or a backtrace
24261 structure was created which includes an adjusted stack
24262 pointer, so just pop everything. */
24263 if (live_regs_mask
)
24264 thumb_pop (asm_out_file
, live_regs_mask
);
24266 /* We have either just popped the return address into the
24267 PC or it is was kept in LR for the entire function.
24268 Note that thumb_pop has already called thumb_exit if the
24269 PC was in the list. */
24270 if (!had_to_push_lr
)
24271 thumb_exit (asm_out_file
, LR_REGNUM
);
24275 /* Pop everything but the return address. */
24276 if (live_regs_mask
)
24277 thumb_pop (asm_out_file
, live_regs_mask
);
24279 if (had_to_push_lr
)
24283 /* We have no free low regs, so save one. */
24284 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24288 /* Get the return address into a temporary register. */
24289 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24293 /* Move the return address to lr. */
24294 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24296 /* Restore the low register. */
24297 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24302 regno
= LAST_ARG_REGNUM
;
24307 /* Remove the argument registers that were pushed onto the stack. */
24308 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24309 SP_REGNUM
, SP_REGNUM
,
24310 crtl
->args
.pretend_args_size
);
24312 thumb_exit (asm_out_file
, regno
);
24318 /* Functions to save and restore machine-specific function data. */
24319 static struct machine_function
*
24320 arm_init_machine_status (void)
24322 struct machine_function
*machine
;
24323 machine
= ggc_cleared_alloc
<machine_function
> ();
24325 #if ARM_FT_UNKNOWN != 0
24326 machine
->func_type
= ARM_FT_UNKNOWN
;
24331 /* Return an RTX indicating where the return address to the
24332 calling function can be found. */
24334 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24339 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24342 /* Do anything needed before RTL is emitted for each function. */
24344 arm_init_expanders (void)
24346 /* Arrange to initialize and mark the machine per-function status. */
24347 init_machine_status
= arm_init_machine_status
;
24349 /* This is to stop the combine pass optimizing away the alignment
24350 adjustment of va_arg. */
24351 /* ??? It is claimed that this should not be necessary. */
24353 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24357 /* Like arm_compute_initial_elimination offset. Simpler because there
24358 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24359 to point at the base of the local variables after static stack
24360 space for a function has been allocated. */
24363 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24365 arm_stack_offsets
*offsets
;
24367 offsets
= arm_get_frame_offsets ();
24371 case ARG_POINTER_REGNUM
:
24374 case STACK_POINTER_REGNUM
:
24375 return offsets
->outgoing_args
- offsets
->saved_args
;
24377 case FRAME_POINTER_REGNUM
:
24378 return offsets
->soft_frame
- offsets
->saved_args
;
24380 case ARM_HARD_FRAME_POINTER_REGNUM
:
24381 return offsets
->saved_regs
- offsets
->saved_args
;
24383 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24384 return offsets
->locals_base
- offsets
->saved_args
;
24387 gcc_unreachable ();
24391 case FRAME_POINTER_REGNUM
:
24394 case STACK_POINTER_REGNUM
:
24395 return offsets
->outgoing_args
- offsets
->soft_frame
;
24397 case ARM_HARD_FRAME_POINTER_REGNUM
:
24398 return offsets
->saved_regs
- offsets
->soft_frame
;
24400 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24401 return offsets
->locals_base
- offsets
->soft_frame
;
24404 gcc_unreachable ();
24409 gcc_unreachable ();
24413 /* Generate the function's prologue. */
24416 thumb1_expand_prologue (void)
24420 HOST_WIDE_INT amount
;
24421 arm_stack_offsets
*offsets
;
24422 unsigned long func_type
;
24424 unsigned long live_regs_mask
;
24425 unsigned long l_mask
;
24426 unsigned high_regs_pushed
= 0;
24428 func_type
= arm_current_func_type ();
24430 /* Naked functions don't have prologues. */
24431 if (IS_NAKED (func_type
))
24434 if (IS_INTERRUPT (func_type
))
24436 error ("interrupt Service Routines cannot be coded in Thumb mode");
24440 if (is_called_in_ARM_mode (current_function_decl
))
24441 emit_insn (gen_prologue_thumb1_interwork ());
24443 offsets
= arm_get_frame_offsets ();
24444 live_regs_mask
= offsets
->saved_regs_mask
;
24446 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24447 l_mask
= live_regs_mask
& 0x40ff;
24448 /* Then count how many other high registers will need to be pushed. */
24449 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24451 if (crtl
->args
.pretend_args_size
)
24453 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24455 if (cfun
->machine
->uses_anonymous_args
)
24457 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24458 unsigned long mask
;
24460 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24461 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24463 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24467 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24468 stack_pointer_rtx
, x
));
24470 RTX_FRAME_RELATED_P (insn
) = 1;
24473 if (TARGET_BACKTRACE
)
24475 HOST_WIDE_INT offset
= 0;
24476 unsigned work_register
;
24477 rtx work_reg
, x
, arm_hfp_rtx
;
24479 /* We have been asked to create a stack backtrace structure.
24480 The code looks like this:
24484 0 sub SP, #16 Reserve space for 4 registers.
24485 2 push {R7} Push low registers.
24486 4 add R7, SP, #20 Get the stack pointer before the push.
24487 6 str R7, [SP, #8] Store the stack pointer
24488 (before reserving the space).
24489 8 mov R7, PC Get hold of the start of this code + 12.
24490 10 str R7, [SP, #16] Store it.
24491 12 mov R7, FP Get hold of the current frame pointer.
24492 14 str R7, [SP, #4] Store it.
24493 16 mov R7, LR Get hold of the current return address.
24494 18 str R7, [SP, #12] Store it.
24495 20 add R7, SP, #16 Point at the start of the
24496 backtrace structure.
24497 22 mov FP, R7 Put this value into the frame pointer. */
24499 work_register
= thumb_find_work_register (live_regs_mask
);
24500 work_reg
= gen_rtx_REG (SImode
, work_register
);
24501 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24503 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24504 stack_pointer_rtx
, GEN_INT (-16)));
24505 RTX_FRAME_RELATED_P (insn
) = 1;
24509 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24510 RTX_FRAME_RELATED_P (insn
) = 1;
24512 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24515 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24516 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24518 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24519 x
= gen_frame_mem (SImode
, x
);
24520 emit_move_insn (x
, work_reg
);
24522 /* Make sure that the instruction fetching the PC is in the right place
24523 to calculate "start of backtrace creation code + 12". */
24524 /* ??? The stores using the common WORK_REG ought to be enough to
24525 prevent the scheduler from doing anything weird. Failing that
24526 we could always move all of the following into an UNSPEC_VOLATILE. */
24529 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24530 emit_move_insn (work_reg
, x
);
24532 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24533 x
= gen_frame_mem (SImode
, x
);
24534 emit_move_insn (x
, work_reg
);
24536 emit_move_insn (work_reg
, arm_hfp_rtx
);
24538 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24539 x
= gen_frame_mem (SImode
, x
);
24540 emit_move_insn (x
, work_reg
);
24544 emit_move_insn (work_reg
, arm_hfp_rtx
);
24546 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24547 x
= gen_frame_mem (SImode
, x
);
24548 emit_move_insn (x
, work_reg
);
24550 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24551 emit_move_insn (work_reg
, x
);
24553 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24554 x
= gen_frame_mem (SImode
, x
);
24555 emit_move_insn (x
, work_reg
);
24558 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24559 emit_move_insn (work_reg
, x
);
24561 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24562 x
= gen_frame_mem (SImode
, x
);
24563 emit_move_insn (x
, work_reg
);
24565 x
= GEN_INT (offset
+ 12);
24566 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24568 emit_move_insn (arm_hfp_rtx
, work_reg
);
24570 /* Optimization: If we are not pushing any low registers but we are going
24571 to push some high registers then delay our first push. This will just
24572 be a push of LR and we can combine it with the push of the first high
24574 else if ((l_mask
& 0xff) != 0
24575 || (high_regs_pushed
== 0 && l_mask
))
24577 unsigned long mask
= l_mask
;
24578 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24579 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24580 RTX_FRAME_RELATED_P (insn
) = 1;
24583 if (high_regs_pushed
)
24585 unsigned pushable_regs
;
24586 unsigned next_hi_reg
;
24587 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24588 : crtl
->args
.info
.nregs
;
24589 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24591 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24592 if (live_regs_mask
& (1 << next_hi_reg
))
24595 /* Here we need to mask out registers used for passing arguments
24596 even if they can be pushed. This is to avoid using them to stash the high
24597 registers. Such kind of stash may clobber the use of arguments. */
24598 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24600 if (pushable_regs
== 0)
24601 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24603 while (high_regs_pushed
> 0)
24605 unsigned long real_regs_mask
= 0;
24607 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24609 if (pushable_regs
& (1 << regno
))
24611 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24612 gen_rtx_REG (SImode
, next_hi_reg
));
24614 high_regs_pushed
--;
24615 real_regs_mask
|= (1 << next_hi_reg
);
24617 if (high_regs_pushed
)
24619 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24621 if (live_regs_mask
& (1 << next_hi_reg
))
24626 pushable_regs
&= ~((1 << regno
) - 1);
24632 /* If we had to find a work register and we have not yet
24633 saved the LR then add it to the list of regs to push. */
24634 if (l_mask
== (1 << LR_REGNUM
))
24636 pushable_regs
|= l_mask
;
24637 real_regs_mask
|= l_mask
;
24641 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24642 RTX_FRAME_RELATED_P (insn
) = 1;
24646 /* Load the pic register before setting the frame pointer,
24647 so we can use r7 as a temporary work register. */
24648 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24649 arm_load_pic_register (live_regs_mask
);
24651 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24652 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24653 stack_pointer_rtx
);
24655 if (flag_stack_usage_info
)
24656 current_function_static_stack_size
24657 = offsets
->outgoing_args
- offsets
->saved_args
;
24659 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24660 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24665 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24666 GEN_INT (- amount
)));
24667 RTX_FRAME_RELATED_P (insn
) = 1;
24673 /* The stack decrement is too big for an immediate value in a single
24674 insn. In theory we could issue multiple subtracts, but after
24675 three of them it becomes more space efficient to place the full
24676 value in the constant pool and load into a register. (Also the
24677 ARM debugger really likes to see only one stack decrement per
24678 function). So instead we look for a scratch register into which
24679 we can load the decrement, and then we subtract this from the
24680 stack pointer. Unfortunately on the thumb the only available
24681 scratch registers are the argument registers, and we cannot use
24682 these as they may hold arguments to the function. Instead we
24683 attempt to locate a call preserved register which is used by this
24684 function. If we can find one, then we know that it will have
24685 been pushed at the start of the prologue and so we can corrupt
24687 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24688 if (live_regs_mask
& (1 << regno
))
24691 gcc_assert(regno
<= LAST_LO_REGNUM
);
24693 reg
= gen_rtx_REG (SImode
, regno
);
24695 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24697 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24698 stack_pointer_rtx
, reg
));
24700 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
24701 plus_constant (Pmode
, stack_pointer_rtx
,
24703 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24704 RTX_FRAME_RELATED_P (insn
) = 1;
24708 if (frame_pointer_needed
)
24709 thumb_set_frame_pointer (offsets
);
24711 /* If we are profiling, make sure no instructions are scheduled before
24712 the call to mcount. Similarly if the user has requested no
24713 scheduling in the prolog. Similarly if we want non-call exceptions
24714 using the EABI unwinder, to prevent faulting instructions from being
24715 swapped with a stack adjustment. */
24716 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24717 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24718 && cfun
->can_throw_non_call_exceptions
))
24719 emit_insn (gen_blockage ());
24721 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24722 if (live_regs_mask
& 0xff)
24723 cfun
->machine
->lr_save_eliminated
= 0;
24726 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24727 POP instruction can be generated. LR should be replaced by PC. All
24728 the checks required are already done by USE_RETURN_INSN (). Hence,
24729 all we really need to check here is if single register is to be
24730 returned, or multiple register return. */
24732 thumb2_expand_return (bool simple_return
)
24735 unsigned long saved_regs_mask
;
24736 arm_stack_offsets
*offsets
;
24738 offsets
= arm_get_frame_offsets ();
24739 saved_regs_mask
= offsets
->saved_regs_mask
;
24741 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24742 if (saved_regs_mask
& (1 << i
))
24745 if (!simple_return
&& saved_regs_mask
)
24749 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24750 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24751 rtx addr
= gen_rtx_MEM (SImode
,
24752 gen_rtx_POST_INC (SImode
,
24753 stack_pointer_rtx
));
24754 set_mem_alias_set (addr
, get_frame_alias_set ());
24755 XVECEXP (par
, 0, 0) = ret_rtx
;
24756 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
24757 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24758 emit_jump_insn (par
);
24762 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24763 saved_regs_mask
|= (1 << PC_REGNUM
);
24764 arm_emit_multi_reg_pop (saved_regs_mask
);
24769 emit_jump_insn (simple_return_rtx
);
24774 thumb1_expand_epilogue (void)
24776 HOST_WIDE_INT amount
;
24777 arm_stack_offsets
*offsets
;
24780 /* Naked functions don't have prologues. */
24781 if (IS_NAKED (arm_current_func_type ()))
24784 offsets
= arm_get_frame_offsets ();
24785 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24787 if (frame_pointer_needed
)
24789 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24790 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24792 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24794 gcc_assert (amount
>= 0);
24797 emit_insn (gen_blockage ());
24800 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24801 GEN_INT (amount
)));
24804 /* r3 is always free in the epilogue. */
24805 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24807 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24808 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24812 /* Emit a USE (stack_pointer_rtx), so that
24813 the stack adjustment will not be deleted. */
24814 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24816 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24817 emit_insn (gen_blockage ());
24819 /* Emit a clobber for each insn that will be restored in the epilogue,
24820 so that flow2 will get register lifetimes correct. */
24821 for (regno
= 0; regno
< 13; regno
++)
24822 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24823 emit_clobber (gen_rtx_REG (SImode
, regno
));
24825 if (! df_regs_ever_live_p (LR_REGNUM
))
24826 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24829 /* Epilogue code for APCS frame. */
24831 arm_expand_epilogue_apcs_frame (bool really_return
)
24833 unsigned long func_type
;
24834 unsigned long saved_regs_mask
;
24837 int floats_from_frame
= 0;
24838 arm_stack_offsets
*offsets
;
24840 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24841 func_type
= arm_current_func_type ();
24843 /* Get frame offsets for ARM. */
24844 offsets
= arm_get_frame_offsets ();
24845 saved_regs_mask
= offsets
->saved_regs_mask
;
24847 /* Find the offset of the floating-point save area in the frame. */
24849 = (offsets
->saved_args
24850 + arm_compute_static_chain_stack_bytes ()
24853 /* Compute how many core registers saved and how far away the floats are. */
24854 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24855 if (saved_regs_mask
& (1 << i
))
24858 floats_from_frame
+= 4;
24861 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24864 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24866 /* The offset is from IP_REGNUM. */
24867 int saved_size
= arm_get_vfp_saved_size ();
24868 if (saved_size
> 0)
24871 floats_from_frame
+= saved_size
;
24872 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24873 hard_frame_pointer_rtx
,
24874 GEN_INT (-floats_from_frame
)));
24875 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24876 ip_rtx
, hard_frame_pointer_rtx
);
24879 /* Generate VFP register multi-pop. */
24880 start_reg
= FIRST_VFP_REGNUM
;
24882 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24883 /* Look for a case where a reg does not need restoring. */
24884 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24885 && (!df_regs_ever_live_p (i
+ 1)
24886 || call_used_regs
[i
+ 1]))
24888 if (start_reg
!= i
)
24889 arm_emit_vfp_multi_reg_pop (start_reg
,
24890 (i
- start_reg
) / 2,
24891 gen_rtx_REG (SImode
,
24896 /* Restore the remaining regs that we have discovered (or possibly
24897 even all of them, if the conditional in the for loop never
24899 if (start_reg
!= i
)
24900 arm_emit_vfp_multi_reg_pop (start_reg
,
24901 (i
- start_reg
) / 2,
24902 gen_rtx_REG (SImode
, IP_REGNUM
));
24907 /* The frame pointer is guaranteed to be non-double-word aligned, as
24908 it is set to double-word-aligned old_stack_pointer - 4. */
24910 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24912 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24913 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24915 rtx addr
= gen_frame_mem (V2SImode
,
24916 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24918 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24919 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24920 gen_rtx_REG (V2SImode
, i
),
24926 /* saved_regs_mask should contain IP which contains old stack pointer
24927 at the time of activation creation. Since SP and IP are adjacent registers,
24928 we can restore the value directly into SP. */
24929 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24930 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24931 saved_regs_mask
|= (1 << SP_REGNUM
);
24933 /* There are two registers left in saved_regs_mask - LR and PC. We
24934 only need to restore LR (the return address), but to
24935 save time we can load it directly into PC, unless we need a
24936 special function exit sequence, or we are not really returning. */
24938 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24939 && !crtl
->calls_eh_return
)
24940 /* Delete LR from the register mask, so that LR on
24941 the stack is loaded into the PC in the register mask. */
24942 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24944 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24946 num_regs
= bit_count (saved_regs_mask
);
24947 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24950 emit_insn (gen_blockage ());
24951 /* Unwind the stack to just below the saved registers. */
24952 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24953 hard_frame_pointer_rtx
,
24954 GEN_INT (- 4 * num_regs
)));
24956 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24957 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24960 arm_emit_multi_reg_pop (saved_regs_mask
);
24962 if (IS_INTERRUPT (func_type
))
24964 /* Interrupt handlers will have pushed the
24965 IP onto the stack, so restore it now. */
24967 rtx addr
= gen_rtx_MEM (SImode
,
24968 gen_rtx_POST_INC (SImode
,
24969 stack_pointer_rtx
));
24970 set_mem_alias_set (addr
, get_frame_alias_set ());
24971 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24972 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24973 gen_rtx_REG (SImode
, IP_REGNUM
),
24977 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24980 if (crtl
->calls_eh_return
)
24981 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24983 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24985 if (IS_STACKALIGN (func_type
))
24986 /* Restore the original stack pointer. Before prologue, the stack was
24987 realigned and the original stack pointer saved in r0. For details,
24988 see comment in arm_expand_prologue. */
24989 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24991 emit_jump_insn (simple_return_rtx
);
24994 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24995 function is not a sibcall. */
24997 arm_expand_epilogue (bool really_return
)
24999 unsigned long func_type
;
25000 unsigned long saved_regs_mask
;
25004 arm_stack_offsets
*offsets
;
25006 func_type
= arm_current_func_type ();
25008 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25009 let output_return_instruction take care of instruction emission if any. */
25010 if (IS_NAKED (func_type
)
25011 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25014 emit_jump_insn (simple_return_rtx
);
25018 /* If we are throwing an exception, then we really must be doing a
25019 return, so we can't tail-call. */
25020 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25022 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25024 arm_expand_epilogue_apcs_frame (really_return
);
25028 /* Get frame offsets for ARM. */
25029 offsets
= arm_get_frame_offsets ();
25030 saved_regs_mask
= offsets
->saved_regs_mask
;
25031 num_regs
= bit_count (saved_regs_mask
);
25033 if (frame_pointer_needed
)
25036 /* Restore stack pointer if necessary. */
25039 /* In ARM mode, frame pointer points to first saved register.
25040 Restore stack pointer to last saved register. */
25041 amount
= offsets
->frame
- offsets
->saved_regs
;
25043 /* Force out any pending memory operations that reference stacked data
25044 before stack de-allocation occurs. */
25045 emit_insn (gen_blockage ());
25046 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25047 hard_frame_pointer_rtx
,
25048 GEN_INT (amount
)));
25049 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25051 hard_frame_pointer_rtx
);
25053 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25055 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25059 /* In Thumb-2 mode, the frame pointer points to the last saved
25061 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25064 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25065 hard_frame_pointer_rtx
,
25066 GEN_INT (amount
)));
25067 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25068 hard_frame_pointer_rtx
,
25069 hard_frame_pointer_rtx
);
25072 /* Force out any pending memory operations that reference stacked data
25073 before stack de-allocation occurs. */
25074 emit_insn (gen_blockage ());
25075 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25076 hard_frame_pointer_rtx
));
25077 arm_add_cfa_adjust_cfa_note (insn
, 0,
25079 hard_frame_pointer_rtx
);
25080 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25082 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25087 /* Pop off outgoing args and local frame to adjust stack pointer to
25088 last saved register. */
25089 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25093 /* Force out any pending memory operations that reference stacked data
25094 before stack de-allocation occurs. */
25095 emit_insn (gen_blockage ());
25096 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25098 GEN_INT (amount
)));
25099 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25100 stack_pointer_rtx
, stack_pointer_rtx
);
25101 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25103 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25107 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25109 /* Generate VFP register multi-pop. */
25110 int end_reg
= LAST_VFP_REGNUM
+ 1;
25112 /* Scan the registers in reverse order. We need to match
25113 any groupings made in the prologue and generate matching
25114 vldm operations. The need to match groups is because,
25115 unlike pop, vldm can only do consecutive regs. */
25116 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25117 /* Look for a case where a reg does not need restoring. */
25118 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25119 && (!df_regs_ever_live_p (i
+ 1)
25120 || call_used_regs
[i
+ 1]))
25122 /* Restore the regs discovered so far (from reg+2 to
25124 if (end_reg
> i
+ 2)
25125 arm_emit_vfp_multi_reg_pop (i
+ 2,
25126 (end_reg
- (i
+ 2)) / 2,
25127 stack_pointer_rtx
);
25131 /* Restore the remaining regs that we have discovered (or possibly
25132 even all of them, if the conditional in the for loop never
25134 if (end_reg
> i
+ 2)
25135 arm_emit_vfp_multi_reg_pop (i
+ 2,
25136 (end_reg
- (i
+ 2)) / 2,
25137 stack_pointer_rtx
);
25141 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25142 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25145 rtx addr
= gen_rtx_MEM (V2SImode
,
25146 gen_rtx_POST_INC (SImode
,
25147 stack_pointer_rtx
));
25148 set_mem_alias_set (addr
, get_frame_alias_set ());
25149 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25150 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25151 gen_rtx_REG (V2SImode
, i
),
25153 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25154 stack_pointer_rtx
, stack_pointer_rtx
);
25157 if (saved_regs_mask
)
25160 bool return_in_pc
= false;
25162 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25163 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25164 && !IS_STACKALIGN (func_type
)
25166 && crtl
->args
.pretend_args_size
== 0
25167 && saved_regs_mask
& (1 << LR_REGNUM
)
25168 && !crtl
->calls_eh_return
)
25170 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25171 saved_regs_mask
|= (1 << PC_REGNUM
);
25172 return_in_pc
= true;
25175 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25177 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25178 if (saved_regs_mask
& (1 << i
))
25180 rtx addr
= gen_rtx_MEM (SImode
,
25181 gen_rtx_POST_INC (SImode
,
25182 stack_pointer_rtx
));
25183 set_mem_alias_set (addr
, get_frame_alias_set ());
25185 if (i
== PC_REGNUM
)
25187 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25188 XVECEXP (insn
, 0, 0) = ret_rtx
;
25189 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
25190 gen_rtx_REG (SImode
, i
),
25192 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25193 insn
= emit_jump_insn (insn
);
25197 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25199 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25200 gen_rtx_REG (SImode
, i
),
25202 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25204 stack_pointer_rtx
);
25211 && current_tune
->prefer_ldrd_strd
25212 && !optimize_function_for_size_p (cfun
))
25215 thumb2_emit_ldrd_pop (saved_regs_mask
);
25216 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25217 arm_emit_ldrd_pop (saved_regs_mask
);
25219 arm_emit_multi_reg_pop (saved_regs_mask
);
25222 arm_emit_multi_reg_pop (saved_regs_mask
);
25229 if (crtl
->args
.pretend_args_size
)
25232 rtx dwarf
= NULL_RTX
;
25234 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25236 GEN_INT (crtl
->args
.pretend_args_size
)));
25238 RTX_FRAME_RELATED_P (tmp
) = 1;
25240 if (cfun
->machine
->uses_anonymous_args
)
25242 /* Restore pretend args. Refer arm_expand_prologue on how to save
25243 pretend_args in stack. */
25244 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25245 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25246 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25247 if (saved_regs_mask
& (1 << i
))
25249 rtx reg
= gen_rtx_REG (SImode
, i
);
25250 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25253 REG_NOTES (tmp
) = dwarf
;
25255 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25256 stack_pointer_rtx
, stack_pointer_rtx
);
25259 if (!really_return
)
25262 if (crtl
->calls_eh_return
)
25263 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25265 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25267 if (IS_STACKALIGN (func_type
))
25268 /* Restore the original stack pointer. Before prologue, the stack was
25269 realigned and the original stack pointer saved in r0. For details,
25270 see comment in arm_expand_prologue. */
25271 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25273 emit_jump_insn (simple_return_rtx
);
25276 /* Implementation of insn prologue_thumb1_interwork. This is the first
25277 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25280 thumb1_output_interwork (void)
25283 FILE *f
= asm_out_file
;
25285 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25286 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25288 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25290 /* Generate code sequence to switch us into Thumb mode. */
25291 /* The .code 32 directive has already been emitted by
25292 ASM_DECLARE_FUNCTION_NAME. */
25293 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25294 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25296 /* Generate a label, so that the debugger will notice the
25297 change in instruction sets. This label is also used by
25298 the assembler to bypass the ARM code when this function
25299 is called from a Thumb encoded function elsewhere in the
25300 same file. Hence the definition of STUB_NAME here must
25301 agree with the definition in gas/config/tc-arm.c. */
25303 #define STUB_NAME ".real_start_of"
25305 fprintf (f
, "\t.code\t16\n");
25307 if (arm_dllexport_name_p (name
))
25308 name
= arm_strip_name_encoding (name
);
25310 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25311 fprintf (f
, "\t.thumb_func\n");
25312 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25317 /* Handle the case of a double word load into a low register from
25318 a computed memory address. The computed address may involve a
25319 register which is overwritten by the load. */
25321 thumb_load_double_from_address (rtx
*operands
)
25329 gcc_assert (REG_P (operands
[0]));
25330 gcc_assert (MEM_P (operands
[1]));
25332 /* Get the memory address. */
25333 addr
= XEXP (operands
[1], 0);
25335 /* Work out how the memory address is computed. */
25336 switch (GET_CODE (addr
))
25339 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25341 if (REGNO (operands
[0]) == REGNO (addr
))
25343 output_asm_insn ("ldr\t%H0, %2", operands
);
25344 output_asm_insn ("ldr\t%0, %1", operands
);
25348 output_asm_insn ("ldr\t%0, %1", operands
);
25349 output_asm_insn ("ldr\t%H0, %2", operands
);
25354 /* Compute <address> + 4 for the high order load. */
25355 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25357 output_asm_insn ("ldr\t%0, %1", operands
);
25358 output_asm_insn ("ldr\t%H0, %2", operands
);
25362 arg1
= XEXP (addr
, 0);
25363 arg2
= XEXP (addr
, 1);
25365 if (CONSTANT_P (arg1
))
25366 base
= arg2
, offset
= arg1
;
25368 base
= arg1
, offset
= arg2
;
25370 gcc_assert (REG_P (base
));
25372 /* Catch the case of <address> = <reg> + <reg> */
25373 if (REG_P (offset
))
25375 int reg_offset
= REGNO (offset
);
25376 int reg_base
= REGNO (base
);
25377 int reg_dest
= REGNO (operands
[0]);
25379 /* Add the base and offset registers together into the
25380 higher destination register. */
25381 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25382 reg_dest
+ 1, reg_base
, reg_offset
);
25384 /* Load the lower destination register from the address in
25385 the higher destination register. */
25386 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25387 reg_dest
, reg_dest
+ 1);
25389 /* Load the higher destination register from its own address
25391 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25392 reg_dest
+ 1, reg_dest
+ 1);
25396 /* Compute <address> + 4 for the high order load. */
25397 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25399 /* If the computed address is held in the low order register
25400 then load the high order register first, otherwise always
25401 load the low order register first. */
25402 if (REGNO (operands
[0]) == REGNO (base
))
25404 output_asm_insn ("ldr\t%H0, %2", operands
);
25405 output_asm_insn ("ldr\t%0, %1", operands
);
25409 output_asm_insn ("ldr\t%0, %1", operands
);
25410 output_asm_insn ("ldr\t%H0, %2", operands
);
25416 /* With no registers to worry about we can just load the value
25418 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25420 output_asm_insn ("ldr\t%H0, %2", operands
);
25421 output_asm_insn ("ldr\t%0, %1", operands
);
25425 gcc_unreachable ();
25432 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25439 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25442 operands
[4] = operands
[5];
25445 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25446 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25450 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25451 std::swap (operands
[4], operands
[5]);
25452 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25453 std::swap (operands
[5], operands
[6]);
25454 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25455 std::swap (operands
[4], operands
[5]);
25457 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25458 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25462 gcc_unreachable ();
25468 /* Output a call-via instruction for thumb state. */
25470 thumb_call_via_reg (rtx reg
)
25472 int regno
= REGNO (reg
);
25475 gcc_assert (regno
< LR_REGNUM
);
25477 /* If we are in the normal text section we can use a single instance
25478 per compilation unit. If we are doing function sections, then we need
25479 an entry per section, since we can't rely on reachability. */
25480 if (in_section
== text_section
)
25482 thumb_call_reg_needed
= 1;
25484 if (thumb_call_via_label
[regno
] == NULL
)
25485 thumb_call_via_label
[regno
] = gen_label_rtx ();
25486 labelp
= thumb_call_via_label
+ regno
;
25490 if (cfun
->machine
->call_via
[regno
] == NULL
)
25491 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25492 labelp
= cfun
->machine
->call_via
+ regno
;
25495 output_asm_insn ("bl\t%a0", labelp
);
25499 /* Routines for generating rtl. */
25501 thumb_expand_movmemqi (rtx
*operands
)
25503 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25504 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25505 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25506 HOST_WIDE_INT offset
= 0;
25510 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25516 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25522 rtx reg
= gen_reg_rtx (SImode
);
25523 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25524 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25531 rtx reg
= gen_reg_rtx (HImode
);
25532 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25533 plus_constant (Pmode
, in
,
25535 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25544 rtx reg
= gen_reg_rtx (QImode
);
25545 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25546 plus_constant (Pmode
, in
,
25548 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25555 thumb_reload_out_hi (rtx
*operands
)
25557 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25560 /* Handle reading a half-word from memory during reload. */
25562 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25564 gcc_unreachable ();
25567 /* Return the length of a function name prefix
25568 that starts with the character 'c'. */
25570 arm_get_strip_length (int c
)
25574 ARM_NAME_ENCODING_LENGTHS
25579 /* Return a pointer to a function's name with any
25580 and all prefix encodings stripped from it. */
25582 arm_strip_name_encoding (const char *name
)
25586 while ((skip
= arm_get_strip_length (* name
)))
25592 /* If there is a '*' anywhere in the name's prefix, then
25593 emit the stripped name verbatim, otherwise prepend an
25594 underscore if leading underscores are being used. */
25596 arm_asm_output_labelref (FILE *stream
, const char *name
)
25601 while ((skip
= arm_get_strip_length (* name
)))
25603 verbatim
|= (*name
== '*');
25608 fputs (name
, stream
);
25610 asm_fprintf (stream
, "%U%s", name
);
25613 /* This function is used to emit an EABI tag and its associated value.
25614 We emit the numerical value of the tag in case the assembler does not
25615 support textual tags. (Eg gas prior to 2.20). If requested we include
25616 the tag name in a comment so that anyone reading the assembler output
25617 will know which tag is being set.
25619 This function is not static because arm-c.c needs it too. */
25622 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25624 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25625 if (flag_verbose_asm
|| flag_debug_asm
)
25626 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25627 asm_fprintf (asm_out_file
, "\n");
25630 /* This function is used to print CPU tuning information as comment
25631 in assembler file. Pointers are not printed for now. */
25634 arm_print_tune_info (void)
25636 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25637 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25638 current_tune
->constant_limit
);
25639 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25640 current_tune
->max_insns_skipped
);
25641 asm_fprintf (asm_out_file
, "\t\t@num_prefetch_slots:\t%d\n",
25642 current_tune
->num_prefetch_slots
);
25643 asm_fprintf (asm_out_file
, "\t\t@l1_cache_size:\t%d\n",
25644 current_tune
->l1_cache_size
);
25645 asm_fprintf (asm_out_file
, "\t\t@l1_cache_line_size:\t%d\n",
25646 current_tune
->l1_cache_line_size
);
25647 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25648 (int) current_tune
->prefer_constant_pool
);
25649 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25650 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25651 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25652 current_tune
->branch_cost (false, false));
25653 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25654 current_tune
->branch_cost (false, true));
25655 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25656 current_tune
->branch_cost (true, false));
25657 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25658 current_tune
->branch_cost (true, true));
25659 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25660 (int) current_tune
->prefer_ldrd_strd
);
25661 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25662 (int) current_tune
->logical_op_non_short_circuit
[0],
25663 (int) current_tune
->logical_op_non_short_circuit
[1]);
25664 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25665 (int) current_tune
->prefer_neon_for_64bits
);
25666 asm_fprintf (asm_out_file
,
25667 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25668 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25669 asm_fprintf (asm_out_file
,
25670 "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25672 ->disparage_partial_flag_setting_t16_encodings
);
25673 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25674 (int) current_tune
->string_ops_prefer_neon
);
25675 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25676 current_tune
->max_insns_inline_memset
);
25677 asm_fprintf (asm_out_file
, "\t\t@fuseable_ops:\t%u\n",
25678 current_tune
->fuseable_ops
);
25679 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25680 (int) current_tune
->sched_autopref
);
25684 arm_file_start (void)
25688 if (TARGET_UNIFIED_ASM
)
25689 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
25693 const char *fpu_name
;
25694 if (arm_selected_arch
)
25696 /* armv7ve doesn't support any extensions. */
25697 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25699 /* Keep backward compatability for assemblers
25700 which don't support armv7ve. */
25701 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25702 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25703 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25704 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25705 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25709 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25713 gcc_assert (strlen (arm_selected_arch
->name
)
25714 <= sizeof (buf
) / sizeof (*pos
));
25715 strncpy (buf
, arm_selected_arch
->name
,
25716 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25717 buf
[pos
- arm_selected_arch
->name
] = '\0';
25718 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25719 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25722 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25725 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25726 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25729 const char* truncated_name
25730 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25731 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25734 if (print_tune_info
)
25735 arm_print_tune_info ();
25737 if (TARGET_SOFT_FLOAT
)
25739 fpu_name
= "softvfp";
25743 fpu_name
= arm_fpu_desc
->name
;
25744 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25746 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25747 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25749 if (TARGET_HARD_FLOAT_ABI
)
25750 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25753 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25755 /* Some of these attributes only apply when the corresponding features
25756 are used. However we don't have any easy way of figuring this out.
25757 Conservatively record the setting that would have been used. */
25759 if (flag_rounding_math
)
25760 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25762 if (!flag_unsafe_math_optimizations
)
25764 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25765 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25767 if (flag_signaling_nans
)
25768 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25770 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25771 flag_finite_math_only
? 1 : 3);
25773 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25774 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25775 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25776 flag_short_enums
? 1 : 2);
25778 /* Tag_ABI_optimization_goals. */
25781 else if (optimize
>= 2)
25787 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25789 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25792 if (arm_fp16_format
)
25793 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25794 (int) arm_fp16_format
);
25796 if (arm_lang_output_object_attributes_hook
)
25797 arm_lang_output_object_attributes_hook();
25800 default_file_start ();
25804 arm_file_end (void)
25808 if (NEED_INDICATE_EXEC_STACK
)
25809 /* Add .note.GNU-stack. */
25810 file_end_indicate_exec_stack ();
25812 if (! thumb_call_reg_needed
)
25815 switch_to_section (text_section
);
25816 asm_fprintf (asm_out_file
, "\t.code 16\n");
25817 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25819 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25821 rtx label
= thumb_call_via_label
[regno
];
25825 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25826 CODE_LABEL_NUMBER (label
));
25827 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25833 /* Symbols in the text segment can be accessed without indirecting via the
25834 constant pool; it may take an extra binary operation, but this is still
25835 faster than indirecting via memory. Don't do this when not optimizing,
25836 since we won't be calculating al of the offsets necessary to do this
25840 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25842 if (optimize
> 0 && TREE_CONSTANT (decl
))
25843 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25845 default_encode_section_info (decl
, rtl
, first
);
25847 #endif /* !ARM_PE */
25850 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25852 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25853 && !strcmp (prefix
, "L"))
25855 arm_ccfsm_state
= 0;
25856 arm_target_insn
= NULL
;
25858 default_internal_label (stream
, prefix
, labelno
);
25861 /* Output code to add DELTA to the first argument, and then jump
25862 to FUNCTION. Used for C++ multiple inheritance. */
25864 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25865 HOST_WIDE_INT delta
,
25866 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25869 static int thunk_label
= 0;
25872 int mi_delta
= delta
;
25873 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25875 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25878 mi_delta
= - mi_delta
;
25880 final_start_function (emit_barrier (), file
, 1);
25884 int labelno
= thunk_label
++;
25885 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25886 /* Thunks are entered in arm mode when avaiable. */
25887 if (TARGET_THUMB1_ONLY
)
25889 /* push r3 so we can use it as a temporary. */
25890 /* TODO: Omit this save if r3 is not used. */
25891 fputs ("\tpush {r3}\n", file
);
25892 fputs ("\tldr\tr3, ", file
);
25896 fputs ("\tldr\tr12, ", file
);
25898 assemble_name (file
, label
);
25899 fputc ('\n', file
);
25902 /* If we are generating PIC, the ldr instruction below loads
25903 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25904 the address of the add + 8, so we have:
25906 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25909 Note that we have "+ 1" because some versions of GNU ld
25910 don't set the low bit of the result for R_ARM_REL32
25911 relocations against thumb function symbols.
25912 On ARMv6M this is +4, not +8. */
25913 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25914 assemble_name (file
, labelpc
);
25915 fputs (":\n", file
);
25916 if (TARGET_THUMB1_ONLY
)
25918 /* This is 2 insns after the start of the thunk, so we know it
25919 is 4-byte aligned. */
25920 fputs ("\tadd\tr3, pc, r3\n", file
);
25921 fputs ("\tmov r12, r3\n", file
);
25924 fputs ("\tadd\tr12, pc, r12\n", file
);
25926 else if (TARGET_THUMB1_ONLY
)
25927 fputs ("\tmov r12, r3\n", file
);
25929 if (TARGET_THUMB1_ONLY
)
25931 if (mi_delta
> 255)
25933 fputs ("\tldr\tr3, ", file
);
25934 assemble_name (file
, label
);
25935 fputs ("+4\n", file
);
25936 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25937 mi_op
, this_regno
, this_regno
);
25939 else if (mi_delta
!= 0)
25941 /* Thumb1 unified syntax requires s suffix in instruction name when
25942 one of the operands is immediate. */
25943 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25944 mi_op
, this_regno
, this_regno
,
25950 /* TODO: Use movw/movt for large constants when available. */
25951 while (mi_delta
!= 0)
25953 if ((mi_delta
& (3 << shift
)) == 0)
25957 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25958 mi_op
, this_regno
, this_regno
,
25959 mi_delta
& (0xff << shift
));
25960 mi_delta
&= ~(0xff << shift
);
25967 if (TARGET_THUMB1_ONLY
)
25968 fputs ("\tpop\t{r3}\n", file
);
25970 fprintf (file
, "\tbx\tr12\n");
25971 ASM_OUTPUT_ALIGN (file
, 2);
25972 assemble_name (file
, label
);
25973 fputs (":\n", file
);
25976 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25977 rtx tem
= XEXP (DECL_RTL (function
), 0);
25978 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25979 pipeline offset is four rather than eight. Adjust the offset
25981 tem
= plus_constant (GET_MODE (tem
), tem
,
25982 TARGET_THUMB1_ONLY
? -3 : -7);
25983 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25985 gen_rtx_SYMBOL_REF (Pmode
,
25986 ggc_strdup (labelpc
)));
25987 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25990 /* Output ".word .LTHUNKn". */
25991 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25993 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25994 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25998 fputs ("\tb\t", file
);
25999 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26000 if (NEED_PLT_RELOC
)
26001 fputs ("(PLT)", file
);
26002 fputc ('\n', file
);
26005 final_end_function ();
26009 arm_emit_vector_const (FILE *file
, rtx x
)
26012 const char * pattern
;
26014 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26016 switch (GET_MODE (x
))
26018 case V2SImode
: pattern
= "%08x"; break;
26019 case V4HImode
: pattern
= "%04x"; break;
26020 case V8QImode
: pattern
= "%02x"; break;
26021 default: gcc_unreachable ();
26024 fprintf (file
, "0x");
26025 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26029 element
= CONST_VECTOR_ELT (x
, i
);
26030 fprintf (file
, pattern
, INTVAL (element
));
26036 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26037 HFmode constant pool entries are actually loaded with ldr. */
26039 arm_emit_fp16_const (rtx c
)
26044 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
26045 bits
= real_to_target (NULL
, &r
, HFmode
);
26046 if (WORDS_BIG_ENDIAN
)
26047 assemble_zeros (2);
26048 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26049 if (!WORDS_BIG_ENDIAN
)
26050 assemble_zeros (2);
26054 arm_output_load_gr (rtx
*operands
)
26061 if (!MEM_P (operands
[1])
26062 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26063 || !REG_P (reg
= XEXP (sum
, 0))
26064 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26065 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26066 return "wldrw%?\t%0, %1";
26068 /* Fix up an out-of-range load of a GR register. */
26069 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26070 wcgr
= operands
[0];
26072 output_asm_insn ("ldr%?\t%0, %1", operands
);
26074 operands
[0] = wcgr
;
26076 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26077 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26082 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26084 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26085 named arg and all anonymous args onto the stack.
26086 XXX I know the prologue shouldn't be pushing registers, but it is faster
26090 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26094 int second_time ATTRIBUTE_UNUSED
)
26096 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26099 cfun
->machine
->uses_anonymous_args
= 1;
26100 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26102 nregs
= pcum
->aapcs_ncrn
;
26103 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26107 nregs
= pcum
->nregs
;
26109 if (nregs
< NUM_ARG_REGS
)
26110 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26113 /* We can't rely on the caller doing the proper promotion when
26114 using APCS or ATPCS. */
26117 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26119 return !TARGET_AAPCS_BASED
;
26122 static machine_mode
26123 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26125 int *punsignedp ATTRIBUTE_UNUSED
,
26126 const_tree fntype ATTRIBUTE_UNUSED
,
26127 int for_return ATTRIBUTE_UNUSED
)
26129 if (GET_MODE_CLASS (mode
) == MODE_INT
26130 && GET_MODE_SIZE (mode
) < 4)
26136 /* AAPCS based ABIs use short enums by default. */
26139 arm_default_short_enums (void)
26141 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26145 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26148 arm_align_anon_bitfield (void)
26150 return TARGET_AAPCS_BASED
;
26154 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26157 arm_cxx_guard_type (void)
26159 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26163 /* The EABI says test the least significant bit of a guard variable. */
26166 arm_cxx_guard_mask_bit (void)
26168 return TARGET_AAPCS_BASED
;
26172 /* The EABI specifies that all array cookies are 8 bytes long. */
26175 arm_get_cookie_size (tree type
)
26179 if (!TARGET_AAPCS_BASED
)
26180 return default_cxx_get_cookie_size (type
);
26182 size
= build_int_cst (sizetype
, 8);
26187 /* The EABI says that array cookies should also contain the element size. */
26190 arm_cookie_has_size (void)
26192 return TARGET_AAPCS_BASED
;
26196 /* The EABI says constructors and destructors should return a pointer to
26197 the object constructed/destroyed. */
26200 arm_cxx_cdtor_returns_this (void)
26202 return TARGET_AAPCS_BASED
;
26205 /* The EABI says that an inline function may never be the key
26209 arm_cxx_key_method_may_be_inline (void)
26211 return !TARGET_AAPCS_BASED
;
26215 arm_cxx_determine_class_data_visibility (tree decl
)
26217 if (!TARGET_AAPCS_BASED
26218 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26221 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26222 is exported. However, on systems without dynamic vague linkage,
26223 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26224 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26225 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26227 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26228 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26232 arm_cxx_class_data_always_comdat (void)
26234 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26235 vague linkage if the class has no key function. */
26236 return !TARGET_AAPCS_BASED
;
26240 /* The EABI says __aeabi_atexit should be used to register static
26244 arm_cxx_use_aeabi_atexit (void)
26246 return TARGET_AAPCS_BASED
;
26251 arm_set_return_address (rtx source
, rtx scratch
)
26253 arm_stack_offsets
*offsets
;
26254 HOST_WIDE_INT delta
;
26256 unsigned long saved_regs
;
26258 offsets
= arm_get_frame_offsets ();
26259 saved_regs
= offsets
->saved_regs_mask
;
26261 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26262 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26265 if (frame_pointer_needed
)
26266 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26269 /* LR will be the first saved register. */
26270 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26275 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26276 GEN_INT (delta
& ~4095)));
26281 addr
= stack_pointer_rtx
;
26283 addr
= plus_constant (Pmode
, addr
, delta
);
26285 /* The store needs to be marked as frame related in order to prevent
26286 DSE from deleting it as dead if it is based on fp. */
26287 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26288 RTX_FRAME_RELATED_P (insn
) = 1;
26289 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26295 thumb_set_return_address (rtx source
, rtx scratch
)
26297 arm_stack_offsets
*offsets
;
26298 HOST_WIDE_INT delta
;
26299 HOST_WIDE_INT limit
;
26302 unsigned long mask
;
26306 offsets
= arm_get_frame_offsets ();
26307 mask
= offsets
->saved_regs_mask
;
26308 if (mask
& (1 << LR_REGNUM
))
26311 /* Find the saved regs. */
26312 if (frame_pointer_needed
)
26314 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26315 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26321 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26324 /* Allow for the stack frame. */
26325 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26327 /* The link register is always the first saved register. */
26330 /* Construct the address. */
26331 addr
= gen_rtx_REG (SImode
, reg
);
26334 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26335 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26339 addr
= plus_constant (Pmode
, addr
, delta
);
26341 /* The store needs to be marked as frame related in order to prevent
26342 DSE from deleting it as dead if it is based on fp. */
26343 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26344 RTX_FRAME_RELATED_P (insn
) = 1;
26345 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26348 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26351 /* Implements target hook vector_mode_supported_p. */
26353 arm_vector_mode_supported_p (machine_mode mode
)
26355 /* Neon also supports V2SImode, etc. listed in the clause below. */
26356 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26357 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26360 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26361 && ((mode
== V2SImode
)
26362 || (mode
== V4HImode
)
26363 || (mode
== V8QImode
)))
26366 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26367 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26368 || mode
== V2HAmode
))
26374 /* Implements target hook array_mode_supported_p. */
26377 arm_array_mode_supported_p (machine_mode mode
,
26378 unsigned HOST_WIDE_INT nelems
)
26381 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26382 && (nelems
>= 2 && nelems
<= 4))
26388 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26389 registers when autovectorizing for Neon, at least until multiple vector
26390 widths are supported properly by the middle-end. */
26392 static machine_mode
26393 arm_preferred_simd_mode (machine_mode mode
)
26399 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26401 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26403 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26405 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26407 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26414 if (TARGET_REALLY_IWMMXT
)
26430 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26432 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26433 using r0-r4 for function arguments, r7 for the stack frame and don't have
26434 enough left over to do doubleword arithmetic. For Thumb-2 all the
26435 potentially problematic instructions accept high registers so this is not
26436 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26437 that require many low registers. */
26439 arm_class_likely_spilled_p (reg_class_t rclass
)
26441 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26442 || rclass
== CC_REG
)
26448 /* Implements target hook small_register_classes_for_mode_p. */
26450 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26452 return TARGET_THUMB1
;
26455 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26456 ARM insns and therefore guarantee that the shift count is modulo 256.
26457 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26458 guarantee no particular behavior for out-of-range counts. */
26460 static unsigned HOST_WIDE_INT
26461 arm_shift_truncation_mask (machine_mode mode
)
26463 return mode
== SImode
? 255 : 0;
26467 /* Map internal gcc register numbers to DWARF2 register numbers. */
26470 arm_dbx_register_number (unsigned int regno
)
26475 if (IS_VFP_REGNUM (regno
))
26477 /* See comment in arm_dwarf_register_span. */
26478 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26479 return 64 + regno
- FIRST_VFP_REGNUM
;
26481 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26484 if (IS_IWMMXT_GR_REGNUM (regno
))
26485 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26487 if (IS_IWMMXT_REGNUM (regno
))
26488 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26490 gcc_unreachable ();
26493 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26494 GCC models tham as 64 32-bit registers, so we need to describe this to
26495 the DWARF generation code. Other registers can use the default. */
26497 arm_dwarf_register_span (rtx rtl
)
26505 regno
= REGNO (rtl
);
26506 if (!IS_VFP_REGNUM (regno
))
26509 /* XXX FIXME: The EABI defines two VFP register ranges:
26510 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26512 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26513 corresponding D register. Until GDB supports this, we shall use the
26514 legacy encodings. We also use these encodings for D0-D15 for
26515 compatibility with older debuggers. */
26516 mode
= GET_MODE (rtl
);
26517 if (GET_MODE_SIZE (mode
) < 8)
26520 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26522 nregs
= GET_MODE_SIZE (mode
) / 4;
26523 for (i
= 0; i
< nregs
; i
+= 2)
26524 if (TARGET_BIG_END
)
26526 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26527 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26531 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26532 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26537 nregs
= GET_MODE_SIZE (mode
) / 8;
26538 for (i
= 0; i
< nregs
; i
++)
26539 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26542 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26545 #if ARM_UNWIND_INFO
26546 /* Emit unwind directives for a store-multiple instruction or stack pointer
26547 push during alignment.
26548 These should only ever be generated by the function prologue code, so
26549 expect them to have a particular form.
26550 The store-multiple instruction sometimes pushes pc as the last register,
26551 although it should not be tracked into unwind information, or for -Os
26552 sometimes pushes some dummy registers before first register that needs
26553 to be tracked in unwind information; such dummy registers are there just
26554 to avoid separate stack adjustment, and will not be restored in the
26558 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26561 HOST_WIDE_INT offset
;
26562 HOST_WIDE_INT nregs
;
26566 unsigned padfirst
= 0, padlast
= 0;
26569 e
= XVECEXP (p
, 0, 0);
26570 gcc_assert (GET_CODE (e
) == SET
);
26572 /* First insn will adjust the stack pointer. */
26573 gcc_assert (GET_CODE (e
) == SET
26574 && REG_P (SET_DEST (e
))
26575 && REGNO (SET_DEST (e
)) == SP_REGNUM
26576 && GET_CODE (SET_SRC (e
)) == PLUS
);
26578 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26579 nregs
= XVECLEN (p
, 0) - 1;
26580 gcc_assert (nregs
);
26582 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26585 /* For -Os dummy registers can be pushed at the beginning to
26586 avoid separate stack pointer adjustment. */
26587 e
= XVECEXP (p
, 0, 1);
26588 e
= XEXP (SET_DEST (e
), 0);
26589 if (GET_CODE (e
) == PLUS
)
26590 padfirst
= INTVAL (XEXP (e
, 1));
26591 gcc_assert (padfirst
== 0 || optimize_size
);
26592 /* The function prologue may also push pc, but not annotate it as it is
26593 never restored. We turn this into a stack pointer adjustment. */
26594 e
= XVECEXP (p
, 0, nregs
);
26595 e
= XEXP (SET_DEST (e
), 0);
26596 if (GET_CODE (e
) == PLUS
)
26597 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26599 padlast
= offset
- 4;
26600 gcc_assert (padlast
== 0 || padlast
== 4);
26602 fprintf (asm_out_file
, "\t.pad #4\n");
26604 fprintf (asm_out_file
, "\t.save {");
26606 else if (IS_VFP_REGNUM (reg
))
26609 fprintf (asm_out_file
, "\t.vsave {");
26612 /* Unknown register type. */
26613 gcc_unreachable ();
26615 /* If the stack increment doesn't match the size of the saved registers,
26616 something has gone horribly wrong. */
26617 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26621 /* The remaining insns will describe the stores. */
26622 for (i
= 1; i
<= nregs
; i
++)
26624 /* Expect (set (mem <addr>) (reg)).
26625 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26626 e
= XVECEXP (p
, 0, i
);
26627 gcc_assert (GET_CODE (e
) == SET
26628 && MEM_P (SET_DEST (e
))
26629 && REG_P (SET_SRC (e
)));
26631 reg
= REGNO (SET_SRC (e
));
26632 gcc_assert (reg
>= lastreg
);
26635 fprintf (asm_out_file
, ", ");
26636 /* We can't use %r for vfp because we need to use the
26637 double precision register names. */
26638 if (IS_VFP_REGNUM (reg
))
26639 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26641 asm_fprintf (asm_out_file
, "%r", reg
);
26643 #ifdef ENABLE_CHECKING
26644 /* Check that the addresses are consecutive. */
26645 e
= XEXP (SET_DEST (e
), 0);
26646 if (GET_CODE (e
) == PLUS
)
26647 gcc_assert (REG_P (XEXP (e
, 0))
26648 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26649 && CONST_INT_P (XEXP (e
, 1))
26650 && offset
== INTVAL (XEXP (e
, 1)));
26654 && REGNO (e
) == SP_REGNUM
);
26655 offset
+= reg_size
;
26658 fprintf (asm_out_file
, "}\n");
26660 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26663 /* Emit unwind directives for a SET. */
26666 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26674 switch (GET_CODE (e0
))
26677 /* Pushing a single register. */
26678 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26679 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26680 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26683 asm_fprintf (asm_out_file
, "\t.save ");
26684 if (IS_VFP_REGNUM (REGNO (e1
)))
26685 asm_fprintf(asm_out_file
, "{d%d}\n",
26686 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26688 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26692 if (REGNO (e0
) == SP_REGNUM
)
26694 /* A stack increment. */
26695 if (GET_CODE (e1
) != PLUS
26696 || !REG_P (XEXP (e1
, 0))
26697 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26698 || !CONST_INT_P (XEXP (e1
, 1)))
26701 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26702 -INTVAL (XEXP (e1
, 1)));
26704 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26706 HOST_WIDE_INT offset
;
26708 if (GET_CODE (e1
) == PLUS
)
26710 if (!REG_P (XEXP (e1
, 0))
26711 || !CONST_INT_P (XEXP (e1
, 1)))
26713 reg
= REGNO (XEXP (e1
, 0));
26714 offset
= INTVAL (XEXP (e1
, 1));
26715 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26716 HARD_FRAME_POINTER_REGNUM
, reg
,
26719 else if (REG_P (e1
))
26722 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26723 HARD_FRAME_POINTER_REGNUM
, reg
);
26728 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26730 /* Move from sp to reg. */
26731 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26733 else if (GET_CODE (e1
) == PLUS
26734 && REG_P (XEXP (e1
, 0))
26735 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26736 && CONST_INT_P (XEXP (e1
, 1)))
26738 /* Set reg to offset from sp. */
26739 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26740 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26752 /* Emit unwind directives for the given insn. */
26755 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26758 bool handled_one
= false;
26760 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26763 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26764 && (TREE_NOTHROW (current_function_decl
)
26765 || crtl
->all_throwers_are_sibcalls
))
26768 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26771 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26773 switch (REG_NOTE_KIND (note
))
26775 case REG_FRAME_RELATED_EXPR
:
26776 pat
= XEXP (note
, 0);
26779 case REG_CFA_REGISTER
:
26780 pat
= XEXP (note
, 0);
26783 pat
= PATTERN (insn
);
26784 if (GET_CODE (pat
) == PARALLEL
)
26785 pat
= XVECEXP (pat
, 0, 0);
26788 /* Only emitted for IS_STACKALIGN re-alignment. */
26793 src
= SET_SRC (pat
);
26794 dest
= SET_DEST (pat
);
26796 gcc_assert (src
== stack_pointer_rtx
);
26797 reg
= REGNO (dest
);
26798 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26801 handled_one
= true;
26804 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26805 to get correct dwarf information for shrink-wrap. We should not
26806 emit unwind information for it because these are used either for
26807 pretend arguments or notes to adjust sp and restore registers from
26809 case REG_CFA_DEF_CFA
:
26810 case REG_CFA_ADJUST_CFA
:
26811 case REG_CFA_RESTORE
:
26814 case REG_CFA_EXPRESSION
:
26815 case REG_CFA_OFFSET
:
26816 /* ??? Only handling here what we actually emit. */
26817 gcc_unreachable ();
26825 pat
= PATTERN (insn
);
26828 switch (GET_CODE (pat
))
26831 arm_unwind_emit_set (asm_out_file
, pat
);
26835 /* Store multiple. */
26836 arm_unwind_emit_sequence (asm_out_file
, pat
);
26845 /* Output a reference from a function exception table to the type_info
26846 object X. The EABI specifies that the symbol should be relocated by
26847 an R_ARM_TARGET2 relocation. */
26850 arm_output_ttype (rtx x
)
26852 fputs ("\t.word\t", asm_out_file
);
26853 output_addr_const (asm_out_file
, x
);
26854 /* Use special relocations for symbol references. */
26855 if (!CONST_INT_P (x
))
26856 fputs ("(TARGET2)", asm_out_file
);
26857 fputc ('\n', asm_out_file
);
26862 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26865 arm_asm_emit_except_personality (rtx personality
)
26867 fputs ("\t.personality\t", asm_out_file
);
26868 output_addr_const (asm_out_file
, personality
);
26869 fputc ('\n', asm_out_file
);
26872 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26875 arm_asm_init_sections (void)
26877 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26880 #endif /* ARM_UNWIND_INFO */
26882 /* Output unwind directives for the start/end of a function. */
26885 arm_output_fn_unwind (FILE * f
, bool prologue
)
26887 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26891 fputs ("\t.fnstart\n", f
);
26894 /* If this function will never be unwound, then mark it as such.
26895 The came condition is used in arm_unwind_emit to suppress
26896 the frame annotations. */
26897 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26898 && (TREE_NOTHROW (current_function_decl
)
26899 || crtl
->all_throwers_are_sibcalls
))
26900 fputs("\t.cantunwind\n", f
);
26902 fputs ("\t.fnend\n", f
);
26907 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26909 enum tls_reloc reloc
;
26912 val
= XVECEXP (x
, 0, 0);
26913 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26915 output_addr_const (fp
, val
);
26920 fputs ("(tlsgd)", fp
);
26923 fputs ("(tlsldm)", fp
);
26926 fputs ("(tlsldo)", fp
);
26929 fputs ("(gottpoff)", fp
);
26932 fputs ("(tpoff)", fp
);
26935 fputs ("(tlsdesc)", fp
);
26938 gcc_unreachable ();
26947 fputs (" + (. - ", fp
);
26948 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26949 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26950 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26951 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26961 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26964 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26966 gcc_assert (size
== 4);
26967 fputs ("\t.word\t", file
);
26968 output_addr_const (file
, x
);
26969 fputs ("(tlsldo)", file
);
26972 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26975 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26977 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26978 return arm_emit_tls_decoration (fp
, x
);
26979 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26982 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26984 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26985 assemble_name_raw (fp
, label
);
26989 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26991 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26995 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26999 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27001 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27005 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27009 else if (GET_CODE (x
) == CONST_VECTOR
)
27010 return arm_emit_vector_const (fp
, x
);
27015 /* Output assembly for a shift instruction.
27016 SET_FLAGS determines how the instruction modifies the condition codes.
27017 0 - Do not set condition codes.
27018 1 - Set condition codes.
27019 2 - Use smallest instruction. */
27021 arm_output_shift(rtx
* operands
, int set_flags
)
27024 static const char flag_chars
[3] = {'?', '.', '!'};
27029 c
= flag_chars
[set_flags
];
27030 if (TARGET_UNIFIED_ASM
)
27032 shift
= shift_op(operands
[3], &val
);
27036 operands
[2] = GEN_INT(val
);
27037 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27040 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27043 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
27044 output_asm_insn (pattern
, operands
);
27048 /* Output assembly for a WMMX immediate shift instruction. */
27050 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27052 int shift
= INTVAL (operands
[2]);
27054 machine_mode opmode
= GET_MODE (operands
[0]);
27056 gcc_assert (shift
>= 0);
27058 /* If the shift value in the register versions is > 63 (for D qualifier),
27059 31 (for W qualifier) or 15 (for H qualifier). */
27060 if (((opmode
== V4HImode
) && (shift
> 15))
27061 || ((opmode
== V2SImode
) && (shift
> 31))
27062 || ((opmode
== DImode
) && (shift
> 63)))
27066 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27067 output_asm_insn (templ
, operands
);
27068 if (opmode
== DImode
)
27070 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27071 output_asm_insn (templ
, operands
);
27076 /* The destination register will contain all zeros. */
27077 sprintf (templ
, "wzero\t%%0");
27078 output_asm_insn (templ
, operands
);
27083 if ((opmode
== DImode
) && (shift
> 32))
27085 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27086 output_asm_insn (templ
, operands
);
27087 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27088 output_asm_insn (templ
, operands
);
27092 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27093 output_asm_insn (templ
, operands
);
27098 /* Output assembly for a WMMX tinsr instruction. */
27100 arm_output_iwmmxt_tinsr (rtx
*operands
)
27102 int mask
= INTVAL (operands
[3]);
27105 int units
= mode_nunits
[GET_MODE (operands
[0])];
27106 gcc_assert ((mask
& (mask
- 1)) == 0);
27107 for (i
= 0; i
< units
; ++i
)
27109 if ((mask
& 0x01) == 1)
27115 gcc_assert (i
< units
);
27117 switch (GET_MODE (operands
[0]))
27120 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27123 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27126 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27129 gcc_unreachable ();
27132 output_asm_insn (templ
, operands
);
27137 /* Output a Thumb-1 casesi dispatch sequence. */
27139 thumb1_output_casesi (rtx
*operands
)
27141 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27143 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27145 switch (GET_MODE(diff_vec
))
27148 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27149 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27151 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27152 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27154 return "bl\t%___gnu_thumb1_case_si";
27156 gcc_unreachable ();
27160 /* Output a Thumb-2 casesi instruction. */
27162 thumb2_output_casesi (rtx
*operands
)
27164 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27166 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27168 output_asm_insn ("cmp\t%0, %1", operands
);
27169 output_asm_insn ("bhi\t%l3", operands
);
27170 switch (GET_MODE(diff_vec
))
27173 return "tbb\t[%|pc, %0]";
27175 return "tbh\t[%|pc, %0, lsl #1]";
27179 output_asm_insn ("adr\t%4, %l2", operands
);
27180 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27181 output_asm_insn ("add\t%4, %4, %5", operands
);
27186 output_asm_insn ("adr\t%4, %l2", operands
);
27187 return "ldr\t%|pc, [%4, %0, lsl #2]";
27190 gcc_unreachable ();
27194 /* Most ARM cores are single issue, but some newer ones can dual issue.
27195 The scheduler descriptions rely on this being correct. */
27197 arm_issue_rate (void)
27230 /* Return how many instructions should scheduler lookahead to choose the
27233 arm_first_cycle_multipass_dfa_lookahead (void)
27235 int issue_rate
= arm_issue_rate ();
27237 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27240 /* Enable modeling of L2 auto-prefetcher. */
27242 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27244 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27248 arm_mangle_type (const_tree type
)
27250 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27251 has to be managled as if it is in the "std" namespace. */
27252 if (TARGET_AAPCS_BASED
27253 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27254 return "St9__va_list";
27256 /* Half-precision float. */
27257 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27260 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27262 if (TYPE_NAME (type
) != NULL
)
27263 return arm_mangle_builtin_type (type
);
27265 /* Use the default mangling. */
27269 /* Order of allocation of core registers for Thumb: this allocation is
27270 written over the corresponding initial entries of the array
27271 initialized with REG_ALLOC_ORDER. We allocate all low registers
27272 first. Saving and restoring a low register is usually cheaper than
27273 using a call-clobbered high register. */
27275 static const int thumb_core_reg_alloc_order
[] =
27277 3, 2, 1, 0, 4, 5, 6, 7,
27278 14, 12, 8, 9, 10, 11
27281 /* Adjust register allocation order when compiling for Thumb. */
27284 arm_order_regs_for_local_alloc (void)
27286 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27287 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27289 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27290 sizeof (thumb_core_reg_alloc_order
));
27293 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27296 arm_frame_pointer_required (void)
27298 return (cfun
->has_nonlocal_label
27299 || SUBTARGET_FRAME_POINTER_REQUIRED
27300 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27303 /* Only thumb1 can't support conditional execution, so return true if
27304 the target is not thumb1. */
27306 arm_have_conditional_execution (void)
27308 return !TARGET_THUMB1
;
27311 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27312 static HOST_WIDE_INT
27313 arm_vector_alignment (const_tree type
)
27315 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27317 if (TARGET_AAPCS_BASED
)
27318 align
= MIN (align
, 64);
27323 static unsigned int
27324 arm_autovectorize_vector_sizes (void)
27326 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27330 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27332 /* Vectors which aren't in packed structures will not be less aligned than
27333 the natural alignment of their element type, so this is safe. */
27334 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27337 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27341 arm_builtin_support_vector_misalignment (machine_mode mode
,
27342 const_tree type
, int misalignment
,
27345 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27347 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27352 /* If the misalignment is unknown, we should be able to handle the access
27353 so long as it is not to a member of a packed data structure. */
27354 if (misalignment
== -1)
27357 /* Return true if the misalignment is a multiple of the natural alignment
27358 of the vector's element type. This is probably always going to be
27359 true in practice, since we've already established that this isn't a
27361 return ((misalignment
% align
) == 0);
27364 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27369 arm_conditional_register_usage (void)
27373 if (TARGET_THUMB1
&& optimize_size
)
27375 /* When optimizing for size on Thumb-1, it's better not
27376 to use the HI regs, because of the overhead of
27378 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27379 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27382 /* The link register can be clobbered by any branch insn,
27383 but we have no way to track that at present, so mark
27384 it as unavailable. */
27386 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27388 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27390 /* VFPv3 registers are disabled when earlier VFP
27391 versions are selected due to the definition of
27392 LAST_VFP_REGNUM. */
27393 for (regno
= FIRST_VFP_REGNUM
;
27394 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27396 fixed_regs
[regno
] = 0;
27397 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27398 || regno
>= FIRST_VFP_REGNUM
+ 32;
27402 if (TARGET_REALLY_IWMMXT
)
27404 regno
= FIRST_IWMMXT_GR_REGNUM
;
27405 /* The 2002/10/09 revision of the XScale ABI has wCG0
27406 and wCG1 as call-preserved registers. The 2002/11/21
27407 revision changed this so that all wCG registers are
27408 scratch registers. */
27409 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27410 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27411 fixed_regs
[regno
] = 0;
27412 /* The XScale ABI has wR0 - wR9 as scratch registers,
27413 the rest as call-preserved registers. */
27414 for (regno
= FIRST_IWMMXT_REGNUM
;
27415 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27417 fixed_regs
[regno
] = 0;
27418 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27422 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27424 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27425 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27427 else if (TARGET_APCS_STACK
)
27429 fixed_regs
[10] = 1;
27430 call_used_regs
[10] = 1;
27432 /* -mcaller-super-interworking reserves r11 for calls to
27433 _interwork_r11_call_via_rN(). Making the register global
27434 is an easy way of ensuring that it remains valid for all
27436 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27437 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27439 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27440 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27441 if (TARGET_CALLER_INTERWORKING
)
27442 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27444 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27448 arm_preferred_rename_class (reg_class_t rclass
)
27450 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27451 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27452 and code size can be reduced. */
27453 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27459 /* Compute the atrribute "length" of insn "*push_multi".
27460 So this function MUST be kept in sync with that insn pattern. */
27462 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27464 int i
, regno
, hi_reg
;
27465 int num_saves
= XVECLEN (parallel_op
, 0);
27475 regno
= REGNO (first_op
);
27476 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27477 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27479 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27480 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27488 /* Compute the number of instructions emitted by output_move_double. */
27490 arm_count_output_move_double_insns (rtx
*operands
)
27494 /* output_move_double may modify the operands array, so call it
27495 here on a copy of the array. */
27496 ops
[0] = operands
[0];
27497 ops
[1] = operands
[1];
27498 output_move_double (ops
, false, &count
);
27503 vfp3_const_double_for_fract_bits (rtx operand
)
27505 REAL_VALUE_TYPE r0
;
27507 if (!CONST_DOUBLE_P (operand
))
27510 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27511 if (exact_real_inverse (DFmode
, &r0
))
27513 if (exact_real_truncate (DFmode
, &r0
))
27515 HOST_WIDE_INT value
= real_to_integer (&r0
);
27516 value
= value
& 0xffffffff;
27517 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27518 return int_log2 (value
);
27525 vfp3_const_double_for_bits (rtx operand
)
27527 REAL_VALUE_TYPE r0
;
27529 if (!CONST_DOUBLE_P (operand
))
27532 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27533 if (exact_real_truncate (DFmode
, &r0
))
27535 HOST_WIDE_INT value
= real_to_integer (&r0
);
27536 value
= value
& 0xffffffff;
27537 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27538 return int_log2 (value
);
27544 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27547 arm_pre_atomic_barrier (enum memmodel model
)
27549 if (need_atomic_barrier_p (model
, true))
27550 emit_insn (gen_memory_barrier ());
27554 arm_post_atomic_barrier (enum memmodel model
)
27556 if (need_atomic_barrier_p (model
, false))
27557 emit_insn (gen_memory_barrier ());
27560 /* Emit the load-exclusive and store-exclusive instructions.
27561 Use acquire and release versions if necessary. */
27564 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27566 rtx (*gen
) (rtx
, rtx
);
27572 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27573 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27574 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27575 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27577 gcc_unreachable ();
27584 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27585 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27586 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27587 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27589 gcc_unreachable ();
27593 emit_insn (gen (rval
, mem
));
27597 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27600 rtx (*gen
) (rtx
, rtx
, rtx
);
27606 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27607 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27608 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27609 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27611 gcc_unreachable ();
27618 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27619 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27620 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27621 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27623 gcc_unreachable ();
27627 emit_insn (gen (bval
, rval
, mem
));
27630 /* Mark the previous jump instruction as unlikely. */
27633 emit_unlikely_jump (rtx insn
)
27635 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27637 insn
= emit_jump_insn (insn
);
27638 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27641 /* Expand a compare and swap pattern. */
27644 arm_expand_compare_and_swap (rtx operands
[])
27646 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27648 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27650 bval
= operands
[0];
27651 rval
= operands
[1];
27653 oldval
= operands
[3];
27654 newval
= operands
[4];
27655 is_weak
= operands
[5];
27656 mod_s
= operands
[6];
27657 mod_f
= operands
[7];
27658 mode
= GET_MODE (mem
);
27660 /* Normally the succ memory model must be stronger than fail, but in the
27661 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27662 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27664 if (TARGET_HAVE_LDACQ
27665 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
27666 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
27667 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27673 /* For narrow modes, we're going to perform the comparison in SImode,
27674 so do the zero-extension now. */
27675 rval
= gen_reg_rtx (SImode
);
27676 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27680 /* Force the value into a register if needed. We waited until after
27681 the zero-extension above to do this properly. */
27682 if (!arm_add_operand (oldval
, SImode
))
27683 oldval
= force_reg (SImode
, oldval
);
27687 if (!cmpdi_operand (oldval
, mode
))
27688 oldval
= force_reg (mode
, oldval
);
27692 gcc_unreachable ();
27697 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27698 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27699 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27700 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27702 gcc_unreachable ();
27705 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27707 if (mode
== QImode
|| mode
== HImode
)
27708 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27710 /* In all cases, we arrange for success to be signaled by Z set.
27711 This arrangement allows for the boolean result to be used directly
27712 in a subsequent branch, post optimization. */
27713 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27714 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27715 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
27718 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27719 another memory store between the load-exclusive and store-exclusive can
27720 reset the monitor from Exclusive to Open state. This means we must wait
27721 until after reload to split the pattern, lest we get a register spill in
27722 the middle of the atomic sequence. */
27725 arm_split_compare_and_swap (rtx operands
[])
27727 rtx rval
, mem
, oldval
, newval
, scratch
;
27729 enum memmodel mod_s
, mod_f
;
27731 rtx_code_label
*label1
, *label2
;
27734 rval
= operands
[0];
27736 oldval
= operands
[2];
27737 newval
= operands
[3];
27738 is_weak
= (operands
[4] != const0_rtx
);
27739 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
27740 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
27741 scratch
= operands
[7];
27742 mode
= GET_MODE (mem
);
27744 bool use_acquire
= TARGET_HAVE_LDACQ
27745 && !(mod_s
== MEMMODEL_RELAXED
27746 || mod_s
== MEMMODEL_CONSUME
27747 || mod_s
== MEMMODEL_RELEASE
);
27749 bool use_release
= TARGET_HAVE_LDACQ
27750 && !(mod_s
== MEMMODEL_RELAXED
27751 || mod_s
== MEMMODEL_CONSUME
27752 || mod_s
== MEMMODEL_ACQUIRE
);
27754 /* Checks whether a barrier is needed and emits one accordingly. */
27755 if (!(use_acquire
|| use_release
))
27756 arm_pre_atomic_barrier (mod_s
);
27761 label1
= gen_label_rtx ();
27762 emit_label (label1
);
27764 label2
= gen_label_rtx ();
27766 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27768 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27769 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27770 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27771 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27772 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27774 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27776 /* Weak or strong, we want EQ to be true for success, so that we
27777 match the flags that we got from the compare above. */
27778 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27779 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27780 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
27784 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27785 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27786 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27787 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27790 if (mod_f
!= MEMMODEL_RELAXED
)
27791 emit_label (label2
);
27793 /* Checks whether a barrier is needed and emits one accordingly. */
27794 if (!(use_acquire
|| use_release
))
27795 arm_post_atomic_barrier (mod_s
);
27797 if (mod_f
== MEMMODEL_RELAXED
)
27798 emit_label (label2
);
27802 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27803 rtx value
, rtx model_rtx
, rtx cond
)
27805 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
27806 machine_mode mode
= GET_MODE (mem
);
27807 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27808 rtx_code_label
*label
;
27811 bool use_acquire
= TARGET_HAVE_LDACQ
27812 && !(model
== MEMMODEL_RELAXED
27813 || model
== MEMMODEL_CONSUME
27814 || model
== MEMMODEL_RELEASE
);
27816 bool use_release
= TARGET_HAVE_LDACQ
27817 && !(model
== MEMMODEL_RELAXED
27818 || model
== MEMMODEL_CONSUME
27819 || model
== MEMMODEL_ACQUIRE
);
27821 /* Checks whether a barrier is needed and emits one accordingly. */
27822 if (!(use_acquire
|| use_release
))
27823 arm_pre_atomic_barrier (model
);
27825 label
= gen_label_rtx ();
27826 emit_label (label
);
27829 new_out
= gen_lowpart (wmode
, new_out
);
27831 old_out
= gen_lowpart (wmode
, old_out
);
27834 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27836 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27845 x
= gen_rtx_AND (wmode
, old_out
, value
);
27846 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27847 x
= gen_rtx_NOT (wmode
, new_out
);
27848 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27852 if (CONST_INT_P (value
))
27854 value
= GEN_INT (-INTVAL (value
));
27860 if (mode
== DImode
)
27862 /* DImode plus/minus need to clobber flags. */
27863 /* The adddi3 and subdi3 patterns are incorrectly written so that
27864 they require matching operands, even when we could easily support
27865 three operands. Thankfully, this can be fixed up post-splitting,
27866 as the individual add+adc patterns do accept three operands and
27867 post-reload cprop can make these moves go away. */
27868 emit_move_insn (new_out
, old_out
);
27870 x
= gen_adddi3 (new_out
, new_out
, value
);
27872 x
= gen_subdi3 (new_out
, new_out
, value
);
27879 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27880 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27884 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27887 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27888 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27890 /* Checks whether a barrier is needed and emits one accordingly. */
27891 if (!(use_acquire
|| use_release
))
27892 arm_post_atomic_barrier (model
);
27895 #define MAX_VECT_LEN 16
27897 struct expand_vec_perm_d
27899 rtx target
, op0
, op1
;
27900 unsigned char perm
[MAX_VECT_LEN
];
27901 machine_mode vmode
;
27902 unsigned char nelt
;
27907 /* Generate a variable permutation. */
27910 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27912 machine_mode vmode
= GET_MODE (target
);
27913 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27915 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27916 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27917 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27918 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27919 gcc_checking_assert (TARGET_NEON
);
27923 if (vmode
== V8QImode
)
27924 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27926 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27932 if (vmode
== V8QImode
)
27934 pair
= gen_reg_rtx (V16QImode
);
27935 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27936 pair
= gen_lowpart (TImode
, pair
);
27937 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27941 pair
= gen_reg_rtx (OImode
);
27942 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27943 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27949 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27951 machine_mode vmode
= GET_MODE (target
);
27952 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27953 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27954 rtx rmask
[MAX_VECT_LEN
], mask
;
27956 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27957 numbering of elements for big-endian, we must reverse the order. */
27958 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27960 /* The VTBL instruction does not use a modulo index, so we must take care
27961 of that ourselves. */
27962 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27963 for (i
= 0; i
< nelt
; ++i
)
27965 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27966 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27968 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27971 /* Generate or test for an insn that supports a constant permutation. */
27973 /* Recognize patterns for the VUZP insns. */
27976 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27978 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27979 rtx out0
, out1
, in0
, in1
, x
;
27980 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27982 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27985 /* Note that these are little-endian tests. Adjust for big-endian later. */
27986 if (d
->perm
[0] == 0)
27988 else if (d
->perm
[0] == 1)
27992 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27994 for (i
= 0; i
< nelt
; i
++)
27996 unsigned elt
= (i
* 2 + odd
) & mask
;
27997 if (d
->perm
[i
] != elt
)
28007 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28008 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28009 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28010 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28011 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28012 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28013 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28014 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28016 gcc_unreachable ();
28021 if (BYTES_BIG_ENDIAN
)
28023 x
= in0
, in0
= in1
, in1
= x
;
28028 out1
= gen_reg_rtx (d
->vmode
);
28030 x
= out0
, out0
= out1
, out1
= x
;
28032 emit_insn (gen (out0
, in0
, in1
, out1
));
28036 /* Recognize patterns for the VZIP insns. */
28039 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28041 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28042 rtx out0
, out1
, in0
, in1
, x
;
28043 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28045 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28048 /* Note that these are little-endian tests. Adjust for big-endian later. */
28050 if (d
->perm
[0] == high
)
28052 else if (d
->perm
[0] == 0)
28056 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28058 for (i
= 0; i
< nelt
/ 2; i
++)
28060 unsigned elt
= (i
+ high
) & mask
;
28061 if (d
->perm
[i
* 2] != elt
)
28063 elt
= (elt
+ nelt
) & mask
;
28064 if (d
->perm
[i
* 2 + 1] != elt
)
28074 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28075 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28076 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28077 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28078 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28079 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28080 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28081 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28083 gcc_unreachable ();
28088 if (BYTES_BIG_ENDIAN
)
28090 x
= in0
, in0
= in1
, in1
= x
;
28095 out1
= gen_reg_rtx (d
->vmode
);
28097 x
= out0
, out0
= out1
, out1
= x
;
28099 emit_insn (gen (out0
, in0
, in1
, out1
));
28103 /* Recognize patterns for the VREV insns. */
28106 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28108 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28109 rtx (*gen
)(rtx
, rtx
);
28111 if (!d
->one_vector_p
)
28120 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28121 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28129 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28130 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28131 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28132 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28140 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28141 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28142 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28143 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28144 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28145 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28146 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28147 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28156 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28157 for (j
= 0; j
<= diff
; j
+= 1)
28159 /* This is guaranteed to be true as the value of diff
28160 is 7, 3, 1 and we should have enough elements in the
28161 queue to generate this. Getting a vector mask with a
28162 value of diff other than these values implies that
28163 something is wrong by the time we get here. */
28164 gcc_assert (i
+ j
< nelt
);
28165 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28173 emit_insn (gen (d
->target
, d
->op0
));
28177 /* Recognize patterns for the VTRN insns. */
28180 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28182 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28183 rtx out0
, out1
, in0
, in1
, x
;
28184 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28186 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28189 /* Note that these are little-endian tests. Adjust for big-endian later. */
28190 if (d
->perm
[0] == 0)
28192 else if (d
->perm
[0] == 1)
28196 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28198 for (i
= 0; i
< nelt
; i
+= 2)
28200 if (d
->perm
[i
] != i
+ odd
)
28202 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28212 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28213 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28214 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28215 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28216 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28217 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28218 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28219 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28221 gcc_unreachable ();
28226 if (BYTES_BIG_ENDIAN
)
28228 x
= in0
, in0
= in1
, in1
= x
;
28233 out1
= gen_reg_rtx (d
->vmode
);
28235 x
= out0
, out0
= out1
, out1
= x
;
28237 emit_insn (gen (out0
, in0
, in1
, out1
));
28241 /* Recognize patterns for the VEXT insns. */
28244 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28246 unsigned int i
, nelt
= d
->nelt
;
28247 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28250 unsigned int location
;
28252 unsigned int next
= d
->perm
[0] + 1;
28254 /* TODO: Handle GCC's numbering of elements for big-endian. */
28255 if (BYTES_BIG_ENDIAN
)
28258 /* Check if the extracted indexes are increasing by one. */
28259 for (i
= 1; i
< nelt
; next
++, i
++)
28261 /* If we hit the most significant element of the 2nd vector in
28262 the previous iteration, no need to test further. */
28263 if (next
== 2 * nelt
)
28266 /* If we are operating on only one vector: it could be a
28267 rotation. If there are only two elements of size < 64, let
28268 arm_evpc_neon_vrev catch it. */
28269 if (d
->one_vector_p
&& (next
== nelt
))
28271 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28277 if (d
->perm
[i
] != next
)
28281 location
= d
->perm
[0];
28285 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28286 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28287 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28288 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28289 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28290 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28291 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28292 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28293 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28302 offset
= GEN_INT (location
);
28303 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28307 /* The NEON VTBL instruction is a fully variable permuation that's even
28308 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28309 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28310 can do slightly better by expanding this as a constant where we don't
28311 have to apply a mask. */
28314 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28316 rtx rperm
[MAX_VECT_LEN
], sel
;
28317 machine_mode vmode
= d
->vmode
;
28318 unsigned int i
, nelt
= d
->nelt
;
28320 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28321 numbering of elements for big-endian, we must reverse the order. */
28322 if (BYTES_BIG_ENDIAN
)
28328 /* Generic code will try constant permutation twice. Once with the
28329 original mode and again with the elements lowered to QImode.
28330 So wait and don't do the selector expansion ourselves. */
28331 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28334 for (i
= 0; i
< nelt
; ++i
)
28335 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28336 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28337 sel
= force_reg (vmode
, sel
);
28339 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28344 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28346 /* Check if the input mask matches vext before reordering the
28349 if (arm_evpc_neon_vext (d
))
28352 /* The pattern matching functions above are written to look for a small
28353 number to begin the sequence (0, 1, N/2). If we begin with an index
28354 from the second operand, we can swap the operands. */
28355 if (d
->perm
[0] >= d
->nelt
)
28357 unsigned i
, nelt
= d
->nelt
;
28360 for (i
= 0; i
< nelt
; ++i
)
28361 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28370 if (arm_evpc_neon_vuzp (d
))
28372 if (arm_evpc_neon_vzip (d
))
28374 if (arm_evpc_neon_vrev (d
))
28376 if (arm_evpc_neon_vtrn (d
))
28378 return arm_evpc_neon_vtbl (d
);
28383 /* Expand a vec_perm_const pattern. */
28386 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28388 struct expand_vec_perm_d d
;
28389 int i
, nelt
, which
;
28395 d
.vmode
= GET_MODE (target
);
28396 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28397 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28398 d
.testing_p
= false;
28400 for (i
= which
= 0; i
< nelt
; ++i
)
28402 rtx e
= XVECEXP (sel
, 0, i
);
28403 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28404 which
|= (ei
< nelt
? 1 : 2);
28414 d
.one_vector_p
= false;
28415 if (!rtx_equal_p (op0
, op1
))
28418 /* The elements of PERM do not suggest that only the first operand
28419 is used, but both operands are identical. Allow easier matching
28420 of the permutation by folding the permutation into the single
28424 for (i
= 0; i
< nelt
; ++i
)
28425 d
.perm
[i
] &= nelt
- 1;
28427 d
.one_vector_p
= true;
28432 d
.one_vector_p
= true;
28436 return arm_expand_vec_perm_const_1 (&d
);
28439 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28442 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28443 const unsigned char *sel
)
28445 struct expand_vec_perm_d d
;
28446 unsigned int i
, nelt
, which
;
28450 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28451 d
.testing_p
= true;
28452 memcpy (d
.perm
, sel
, nelt
);
28454 /* Categorize the set of elements in the selector. */
28455 for (i
= which
= 0; i
< nelt
; ++i
)
28457 unsigned char e
= d
.perm
[i
];
28458 gcc_assert (e
< 2 * nelt
);
28459 which
|= (e
< nelt
? 1 : 2);
28462 /* For all elements from second vector, fold the elements to first. */
28464 for (i
= 0; i
< nelt
; ++i
)
28467 /* Check whether the mask can be applied to the vector type. */
28468 d
.one_vector_p
= (which
!= 3);
28470 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28471 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28472 if (!d
.one_vector_p
)
28473 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28476 ret
= arm_expand_vec_perm_const_1 (&d
);
28483 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28485 /* If we are soft float and we do not have ldrd
28486 then all auto increment forms are ok. */
28487 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28492 /* Post increment and Pre Decrement are supported for all
28493 instruction forms except for vector forms. */
28496 if (VECTOR_MODE_P (mode
))
28498 if (code
!= ARM_PRE_DEC
)
28508 /* Without LDRD and mode size greater than
28509 word size, there is no point in auto-incrementing
28510 because ldm and stm will not have these forms. */
28511 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28514 /* Vector and floating point modes do not support
28515 these auto increment forms. */
28516 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28529 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28530 on ARM, since we know that shifts by negative amounts are no-ops.
28531 Additionally, the default expansion code is not available or suitable
28532 for post-reload insn splits (this can occur when the register allocator
28533 chooses not to do a shift in NEON).
28535 This function is used in both initial expand and post-reload splits, and
28536 handles all kinds of 64-bit shifts.
28538 Input requirements:
28539 - It is safe for the input and output to be the same register, but
28540 early-clobber rules apply for the shift amount and scratch registers.
28541 - Shift by register requires both scratch registers. In all other cases
28542 the scratch registers may be NULL.
28543 - Ashiftrt by a register also clobbers the CC register. */
28545 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28546 rtx amount
, rtx scratch1
, rtx scratch2
)
28548 rtx out_high
= gen_highpart (SImode
, out
);
28549 rtx out_low
= gen_lowpart (SImode
, out
);
28550 rtx in_high
= gen_highpart (SImode
, in
);
28551 rtx in_low
= gen_lowpart (SImode
, in
);
28554 in = the register pair containing the input value.
28555 out = the destination register pair.
28556 up = the high- or low-part of each pair.
28557 down = the opposite part to "up".
28558 In a shift, we can consider bits to shift from "up"-stream to
28559 "down"-stream, so in a left-shift "up" is the low-part and "down"
28560 is the high-part of each register pair. */
28562 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28563 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28564 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28565 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28567 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28569 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28570 && GET_MODE (out
) == DImode
);
28572 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28573 && GET_MODE (in
) == DImode
);
28575 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28576 && GET_MODE (amount
) == SImode
)
28577 || CONST_INT_P (amount
)));
28578 gcc_assert (scratch1
== NULL
28579 || (GET_CODE (scratch1
) == SCRATCH
)
28580 || (GET_MODE (scratch1
) == SImode
28581 && REG_P (scratch1
)));
28582 gcc_assert (scratch2
== NULL
28583 || (GET_CODE (scratch2
) == SCRATCH
)
28584 || (GET_MODE (scratch2
) == SImode
28585 && REG_P (scratch2
)));
28586 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28587 || !HARD_REGISTER_P (out
)
28588 || (REGNO (out
) != REGNO (amount
)
28589 && REGNO (out
) + 1 != REGNO (amount
)));
28591 /* Macros to make following code more readable. */
28592 #define SUB_32(DEST,SRC) \
28593 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28594 #define RSB_32(DEST,SRC) \
28595 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28596 #define SUB_S_32(DEST,SRC) \
28597 gen_addsi3_compare0 ((DEST), (SRC), \
28599 #define SET(DEST,SRC) \
28600 gen_rtx_SET (SImode, (DEST), (SRC))
28601 #define SHIFT(CODE,SRC,AMOUNT) \
28602 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28603 #define LSHIFT(CODE,SRC,AMOUNT) \
28604 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28605 SImode, (SRC), (AMOUNT))
28606 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28607 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28608 SImode, (SRC), (AMOUNT))
28610 gen_rtx_IOR (SImode, (A), (B))
28611 #define BRANCH(COND,LABEL) \
28612 gen_arm_cond_branch ((LABEL), \
28613 gen_rtx_ ## COND (CCmode, cc_reg, \
28617 /* Shifts by register and shifts by constant are handled separately. */
28618 if (CONST_INT_P (amount
))
28620 /* We have a shift-by-constant. */
28622 /* First, handle out-of-range shift amounts.
28623 In both cases we try to match the result an ARM instruction in a
28624 shift-by-register would give. This helps reduce execution
28625 differences between optimization levels, but it won't stop other
28626 parts of the compiler doing different things. This is "undefined
28627 behaviour, in any case. */
28628 if (INTVAL (amount
) <= 0)
28629 emit_insn (gen_movdi (out
, in
));
28630 else if (INTVAL (amount
) >= 64)
28632 if (code
== ASHIFTRT
)
28634 rtx const31_rtx
= GEN_INT (31);
28635 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28636 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28639 emit_insn (gen_movdi (out
, const0_rtx
));
28642 /* Now handle valid shifts. */
28643 else if (INTVAL (amount
) < 32)
28645 /* Shifts by a constant less than 32. */
28646 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28648 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28649 emit_insn (SET (out_down
,
28650 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28652 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28656 /* Shifts by a constant greater than 31. */
28657 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28659 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28660 if (code
== ASHIFTRT
)
28661 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28664 emit_insn (SET (out_up
, const0_rtx
));
28669 /* We have a shift-by-register. */
28670 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28672 /* This alternative requires the scratch registers. */
28673 gcc_assert (scratch1
&& REG_P (scratch1
));
28674 gcc_assert (scratch2
&& REG_P (scratch2
));
28676 /* We will need the values "amount-32" and "32-amount" later.
28677 Swapping them around now allows the later code to be more general. */
28681 emit_insn (SUB_32 (scratch1
, amount
));
28682 emit_insn (RSB_32 (scratch2
, amount
));
28685 emit_insn (RSB_32 (scratch1
, amount
));
28686 /* Also set CC = amount > 32. */
28687 emit_insn (SUB_S_32 (scratch2
, amount
));
28690 emit_insn (RSB_32 (scratch1
, amount
));
28691 emit_insn (SUB_32 (scratch2
, amount
));
28694 gcc_unreachable ();
28697 /* Emit code like this:
28700 out_down = in_down << amount;
28701 out_down = (in_up << (amount - 32)) | out_down;
28702 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28703 out_up = in_up << amount;
28706 out_down = in_down >> amount;
28707 out_down = (in_up << (32 - amount)) | out_down;
28709 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28710 out_up = in_up << amount;
28713 out_down = in_down >> amount;
28714 out_down = (in_up << (32 - amount)) | out_down;
28716 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28717 out_up = in_up << amount;
28719 The ARM and Thumb2 variants are the same but implemented slightly
28720 differently. If this were only called during expand we could just
28721 use the Thumb2 case and let combine do the right thing, but this
28722 can also be called from post-reload splitters. */
28724 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28726 if (!TARGET_THUMB2
)
28728 /* Emit code for ARM mode. */
28729 emit_insn (SET (out_down
,
28730 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28731 if (code
== ASHIFTRT
)
28733 rtx_code_label
*done_label
= gen_label_rtx ();
28734 emit_jump_insn (BRANCH (LT
, done_label
));
28735 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28737 emit_label (done_label
);
28740 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28745 /* Emit code for Thumb2 mode.
28746 Thumb2 can't do shift and or in one insn. */
28747 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28748 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28750 if (code
== ASHIFTRT
)
28752 rtx_code_label
*done_label
= gen_label_rtx ();
28753 emit_jump_insn (BRANCH (LT
, done_label
));
28754 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28755 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28756 emit_label (done_label
);
28760 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28761 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28765 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28780 /* Returns true if a valid comparison operation and makes
28781 the operands in a form that is valid. */
28783 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28785 enum rtx_code code
= GET_CODE (*comparison
);
28787 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28788 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28790 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28792 if (code
== UNEQ
|| code
== LTGT
)
28795 code_int
= (int)code
;
28796 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28797 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28802 if (!arm_add_operand (*op1
, mode
))
28803 *op1
= force_reg (mode
, *op1
);
28804 if (!arm_add_operand (*op2
, mode
))
28805 *op2
= force_reg (mode
, *op2
);
28809 if (!cmpdi_operand (*op1
, mode
))
28810 *op1
= force_reg (mode
, *op1
);
28811 if (!cmpdi_operand (*op2
, mode
))
28812 *op2
= force_reg (mode
, *op2
);
28817 if (!arm_float_compare_operand (*op1
, mode
))
28818 *op1
= force_reg (mode
, *op1
);
28819 if (!arm_float_compare_operand (*op2
, mode
))
28820 *op2
= force_reg (mode
, *op2
);
28830 /* Maximum number of instructions to set block of memory. */
28832 arm_block_set_max_insns (void)
28834 if (optimize_function_for_size_p (cfun
))
28837 return current_tune
->max_insns_inline_memset
;
28840 /* Return TRUE if it's profitable to set block of memory for
28841 non-vectorized case. VAL is the value to set the memory
28842 with. LENGTH is the number of bytes to set. ALIGN is the
28843 alignment of the destination memory in bytes. UNALIGNED_P
28844 is TRUE if we can only set the memory with instructions
28845 meeting alignment requirements. USE_STRD_P is TRUE if we
28846 can use strd to set the memory. */
28848 arm_block_set_non_vect_profit_p (rtx val
,
28849 unsigned HOST_WIDE_INT length
,
28850 unsigned HOST_WIDE_INT align
,
28851 bool unaligned_p
, bool use_strd_p
)
28854 /* For leftovers in bytes of 0-7, we can set the memory block using
28855 strb/strh/str with minimum instruction number. */
28856 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28860 num
= arm_const_inline_cost (SET
, val
);
28861 num
+= length
/ align
+ length
% align
;
28863 else if (use_strd_p
)
28865 num
= arm_const_double_inline_cost (val
);
28866 num
+= (length
>> 3) + leftover
[length
& 7];
28870 num
= arm_const_inline_cost (SET
, val
);
28871 num
+= (length
>> 2) + leftover
[length
& 3];
28874 /* We may be able to combine last pair STRH/STRB into a single STR
28875 by shifting one byte back. */
28876 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28879 return (num
<= arm_block_set_max_insns ());
28882 /* Return TRUE if it's profitable to set block of memory for
28883 vectorized case. LENGTH is the number of bytes to set.
28884 ALIGN is the alignment of destination memory in bytes.
28885 MODE is the vector mode used to set the memory. */
28887 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28888 unsigned HOST_WIDE_INT align
,
28892 bool unaligned_p
= ((align
& 3) != 0);
28893 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28895 /* Instruction loading constant value. */
28897 /* Instructions storing the memory. */
28898 num
+= (length
+ nelt
- 1) / nelt
;
28899 /* Instructions adjusting the address expression. Only need to
28900 adjust address expression if it's 4 bytes aligned and bytes
28901 leftover can only be stored by mis-aligned store instruction. */
28902 if (!unaligned_p
&& (length
& 3) != 0)
28905 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28906 if (!unaligned_p
&& mode
== V16QImode
)
28909 return (num
<= arm_block_set_max_insns ());
28912 /* Set a block of memory using vectorization instructions for the
28913 unaligned case. We fill the first LENGTH bytes of the memory
28914 area starting from DSTBASE with byte constant VALUE. ALIGN is
28915 the alignment requirement of memory. Return TRUE if succeeded. */
28917 arm_block_set_unaligned_vect (rtx dstbase
,
28918 unsigned HOST_WIDE_INT length
,
28919 unsigned HOST_WIDE_INT value
,
28920 unsigned HOST_WIDE_INT align
)
28922 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28924 rtx val_elt
, val_vec
, reg
;
28925 rtx rval
[MAX_VECT_LEN
];
28926 rtx (*gen_func
) (rtx
, rtx
);
28928 unsigned HOST_WIDE_INT v
= value
;
28930 gcc_assert ((align
& 0x3) != 0);
28931 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28932 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28933 if (length
>= nelt_v16
)
28936 gen_func
= gen_movmisalignv16qi
;
28941 gen_func
= gen_movmisalignv8qi
;
28943 nelt_mode
= GET_MODE_NUNITS (mode
);
28944 gcc_assert (length
>= nelt_mode
);
28945 /* Skip if it isn't profitable. */
28946 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28949 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28950 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28952 v
= sext_hwi (v
, BITS_PER_WORD
);
28953 val_elt
= GEN_INT (v
);
28954 for (j
= 0; j
< nelt_mode
; j
++)
28957 reg
= gen_reg_rtx (mode
);
28958 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28959 /* Emit instruction loading the constant value. */
28960 emit_move_insn (reg
, val_vec
);
28962 /* Handle nelt_mode bytes in a vector. */
28963 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28965 emit_insn ((*gen_func
) (mem
, reg
));
28966 if (i
+ 2 * nelt_mode
<= length
)
28967 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28970 /* If there are not less than nelt_v8 bytes leftover, we must be in
28972 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28974 /* Handle (8, 16) bytes leftover. */
28975 if (i
+ nelt_v8
< length
)
28977 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28978 /* We are shifting bytes back, set the alignment accordingly. */
28979 if ((length
& 1) != 0 && align
>= 2)
28980 set_mem_align (mem
, BITS_PER_UNIT
);
28982 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28984 /* Handle (0, 8] bytes leftover. */
28985 else if (i
< length
&& i
+ nelt_v8
>= length
)
28987 if (mode
== V16QImode
)
28989 reg
= gen_lowpart (V8QImode
, reg
);
28990 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28992 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28993 + (nelt_mode
- nelt_v8
))));
28994 /* We are shifting bytes back, set the alignment accordingly. */
28995 if ((length
& 1) != 0 && align
>= 2)
28996 set_mem_align (mem
, BITS_PER_UNIT
);
28998 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29004 /* Set a block of memory using vectorization instructions for the
29005 aligned case. We fill the first LENGTH bytes of the memory area
29006 starting from DSTBASE with byte constant VALUE. ALIGN is the
29007 alignment requirement of memory. Return TRUE if succeeded. */
29009 arm_block_set_aligned_vect (rtx dstbase
,
29010 unsigned HOST_WIDE_INT length
,
29011 unsigned HOST_WIDE_INT value
,
29012 unsigned HOST_WIDE_INT align
)
29014 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29015 rtx dst
, addr
, mem
;
29016 rtx val_elt
, val_vec
, reg
;
29017 rtx rval
[MAX_VECT_LEN
];
29019 unsigned HOST_WIDE_INT v
= value
;
29021 gcc_assert ((align
& 0x3) == 0);
29022 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29023 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29024 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29029 nelt_mode
= GET_MODE_NUNITS (mode
);
29030 gcc_assert (length
>= nelt_mode
);
29031 /* Skip if it isn't profitable. */
29032 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29035 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29037 v
= sext_hwi (v
, BITS_PER_WORD
);
29038 val_elt
= GEN_INT (v
);
29039 for (j
= 0; j
< nelt_mode
; j
++)
29042 reg
= gen_reg_rtx (mode
);
29043 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29044 /* Emit instruction loading the constant value. */
29045 emit_move_insn (reg
, val_vec
);
29048 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29049 if (mode
== V16QImode
)
29051 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29052 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29054 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29055 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29057 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29058 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29059 /* We are shifting bytes back, set the alignment accordingly. */
29060 if ((length
& 0x3) == 0)
29061 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29062 else if ((length
& 0x1) == 0)
29063 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29065 set_mem_align (mem
, BITS_PER_UNIT
);
29067 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29070 /* Fall through for bytes leftover. */
29072 nelt_mode
= GET_MODE_NUNITS (mode
);
29073 reg
= gen_lowpart (V8QImode
, reg
);
29076 /* Handle 8 bytes in a vector. */
29077 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29079 addr
= plus_constant (Pmode
, dst
, i
);
29080 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29081 emit_move_insn (mem
, reg
);
29084 /* Handle single word leftover by shifting 4 bytes back. We can
29085 use aligned access for this case. */
29086 if (i
+ UNITS_PER_WORD
== length
)
29088 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29089 mem
= adjust_automodify_address (dstbase
, mode
,
29090 addr
, i
- UNITS_PER_WORD
);
29091 /* We are shifting 4 bytes back, set the alignment accordingly. */
29092 if (align
> UNITS_PER_WORD
)
29093 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29095 emit_move_insn (mem
, reg
);
29097 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29098 We have to use unaligned access for this case. */
29099 else if (i
< length
)
29101 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29102 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29103 /* We are shifting bytes back, set the alignment accordingly. */
29104 if ((length
& 1) == 0)
29105 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29107 set_mem_align (mem
, BITS_PER_UNIT
);
29109 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29115 /* Set a block of memory using plain strh/strb instructions, only
29116 using instructions allowed by ALIGN on processor. We fill the
29117 first LENGTH bytes of the memory area starting from DSTBASE
29118 with byte constant VALUE. ALIGN is the alignment requirement
29121 arm_block_set_unaligned_non_vect (rtx dstbase
,
29122 unsigned HOST_WIDE_INT length
,
29123 unsigned HOST_WIDE_INT value
,
29124 unsigned HOST_WIDE_INT align
)
29127 rtx dst
, addr
, mem
;
29128 rtx val_exp
, val_reg
, reg
;
29130 HOST_WIDE_INT v
= value
;
29132 gcc_assert (align
== 1 || align
== 2);
29135 v
|= (value
<< BITS_PER_UNIT
);
29137 v
= sext_hwi (v
, BITS_PER_WORD
);
29138 val_exp
= GEN_INT (v
);
29139 /* Skip if it isn't profitable. */
29140 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29141 align
, true, false))
29144 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29145 mode
= (align
== 2 ? HImode
: QImode
);
29146 val_reg
= force_reg (SImode
, val_exp
);
29147 reg
= gen_lowpart (mode
, val_reg
);
29149 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29151 addr
= plus_constant (Pmode
, dst
, i
);
29152 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29153 emit_move_insn (mem
, reg
);
29156 /* Handle single byte leftover. */
29157 if (i
+ 1 == length
)
29159 reg
= gen_lowpart (QImode
, val_reg
);
29160 addr
= plus_constant (Pmode
, dst
, i
);
29161 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29162 emit_move_insn (mem
, reg
);
29166 gcc_assert (i
== length
);
29170 /* Set a block of memory using plain strd/str/strh/strb instructions,
29171 to permit unaligned copies on processors which support unaligned
29172 semantics for those instructions. We fill the first LENGTH bytes
29173 of the memory area starting from DSTBASE with byte constant VALUE.
29174 ALIGN is the alignment requirement of memory. */
29176 arm_block_set_aligned_non_vect (rtx dstbase
,
29177 unsigned HOST_WIDE_INT length
,
29178 unsigned HOST_WIDE_INT value
,
29179 unsigned HOST_WIDE_INT align
)
29182 rtx dst
, addr
, mem
;
29183 rtx val_exp
, val_reg
, reg
;
29184 unsigned HOST_WIDE_INT v
;
29187 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29188 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29190 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29191 if (length
< UNITS_PER_WORD
)
29192 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29195 v
|= (v
<< BITS_PER_WORD
);
29197 v
= sext_hwi (v
, BITS_PER_WORD
);
29199 val_exp
= GEN_INT (v
);
29200 /* Skip if it isn't profitable. */
29201 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29202 align
, false, use_strd_p
))
29207 /* Try without strd. */
29208 v
= (v
>> BITS_PER_WORD
);
29209 v
= sext_hwi (v
, BITS_PER_WORD
);
29210 val_exp
= GEN_INT (v
);
29211 use_strd_p
= false;
29212 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29213 align
, false, use_strd_p
))
29218 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29219 /* Handle double words using strd if possible. */
29222 val_reg
= force_reg (DImode
, val_exp
);
29224 for (; (i
+ 8 <= length
); i
+= 8)
29226 addr
= plus_constant (Pmode
, dst
, i
);
29227 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29228 emit_move_insn (mem
, reg
);
29232 val_reg
= force_reg (SImode
, val_exp
);
29234 /* Handle words. */
29235 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29236 for (; (i
+ 4 <= length
); i
+= 4)
29238 addr
= plus_constant (Pmode
, dst
, i
);
29239 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29240 if ((align
& 3) == 0)
29241 emit_move_insn (mem
, reg
);
29243 emit_insn (gen_unaligned_storesi (mem
, reg
));
29246 /* Merge last pair of STRH and STRB into a STR if possible. */
29247 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29249 addr
= plus_constant (Pmode
, dst
, i
- 1);
29250 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29251 /* We are shifting one byte back, set the alignment accordingly. */
29252 if ((align
& 1) == 0)
29253 set_mem_align (mem
, BITS_PER_UNIT
);
29255 /* Most likely this is an unaligned access, and we can't tell at
29256 compilation time. */
29257 emit_insn (gen_unaligned_storesi (mem
, reg
));
29261 /* Handle half word leftover. */
29262 if (i
+ 2 <= length
)
29264 reg
= gen_lowpart (HImode
, val_reg
);
29265 addr
= plus_constant (Pmode
, dst
, i
);
29266 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29267 if ((align
& 1) == 0)
29268 emit_move_insn (mem
, reg
);
29270 emit_insn (gen_unaligned_storehi (mem
, reg
));
29275 /* Handle single byte leftover. */
29276 if (i
+ 1 == length
)
29278 reg
= gen_lowpart (QImode
, val_reg
);
29279 addr
= plus_constant (Pmode
, dst
, i
);
29280 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29281 emit_move_insn (mem
, reg
);
29287 /* Set a block of memory using vectorization instructions for both
29288 aligned and unaligned cases. We fill the first LENGTH bytes of
29289 the memory area starting from DSTBASE with byte constant VALUE.
29290 ALIGN is the alignment requirement of memory. */
29292 arm_block_set_vect (rtx dstbase
,
29293 unsigned HOST_WIDE_INT length
,
29294 unsigned HOST_WIDE_INT value
,
29295 unsigned HOST_WIDE_INT align
)
29297 /* Check whether we need to use unaligned store instruction. */
29298 if (((align
& 3) != 0 || (length
& 3) != 0)
29299 /* Check whether unaligned store instruction is available. */
29300 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29303 if ((align
& 3) == 0)
29304 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29306 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29309 /* Expand string store operation. Firstly we try to do that by using
29310 vectorization instructions, then try with ARM unaligned access and
29311 double-word store if profitable. OPERANDS[0] is the destination,
29312 OPERANDS[1] is the number of bytes, operands[2] is the value to
29313 initialize the memory, OPERANDS[3] is the known alignment of the
29316 arm_gen_setmem (rtx
*operands
)
29318 rtx dstbase
= operands
[0];
29319 unsigned HOST_WIDE_INT length
;
29320 unsigned HOST_WIDE_INT value
;
29321 unsigned HOST_WIDE_INT align
;
29323 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29326 length
= UINTVAL (operands
[1]);
29330 value
= (UINTVAL (operands
[2]) & 0xFF);
29331 align
= UINTVAL (operands
[3]);
29332 if (TARGET_NEON
&& length
>= 8
29333 && current_tune
->string_ops_prefer_neon
29334 && arm_block_set_vect (dstbase
, length
, value
, align
))
29337 if (!unaligned_access
&& (align
& 3) != 0)
29338 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29340 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29345 arm_macro_fusion_p (void)
29347 return current_tune
->fuseable_ops
!= ARM_FUSE_NOTHING
;
29352 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29355 rtx prev_set
= single_set (prev
);
29356 rtx curr_set
= single_set (curr
);
29362 if (any_condjump_p (curr
))
29365 if (!arm_macro_fusion_p ())
29368 if (current_tune
->fuseable_ops
& ARM_FUSE_MOVW_MOVT
)
29370 /* We are trying to fuse
29371 movw imm / movt imm
29372 instructions as a group that gets scheduled together. */
29374 set_dest
= SET_DEST (curr_set
);
29376 if (GET_MODE (set_dest
) != SImode
)
29379 /* We are trying to match:
29380 prev (movw) == (set (reg r0) (const_int imm16))
29381 curr (movt) == (set (zero_extract (reg r0)
29384 (const_int imm16_1))
29386 prev (movw) == (set (reg r1)
29387 (high (symbol_ref ("SYM"))))
29388 curr (movt) == (set (reg r0)
29390 (symbol_ref ("SYM")))) */
29391 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29393 if (CONST_INT_P (SET_SRC (curr_set
))
29394 && CONST_INT_P (SET_SRC (prev_set
))
29395 && REG_P (XEXP (set_dest
, 0))
29396 && REG_P (SET_DEST (prev_set
))
29397 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29400 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29401 && REG_P (SET_DEST (curr_set
))
29402 && REG_P (SET_DEST (prev_set
))
29403 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29404 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29410 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29412 static unsigned HOST_WIDE_INT
29413 arm_asan_shadow_offset (void)
29415 return (unsigned HOST_WIDE_INT
) 1 << 29;
29419 /* This is a temporary fix for PR60655. Ideally we need
29420 to handle most of these cases in the generic part but
29421 currently we reject minus (..) (sym_ref). We try to
29422 ameliorate the case with minus (sym_ref1) (sym_ref2)
29423 where they are in the same section. */
29426 arm_const_not_ok_for_debug_p (rtx p
)
29428 tree decl_op0
= NULL
;
29429 tree decl_op1
= NULL
;
29431 if (GET_CODE (p
) == MINUS
)
29433 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29435 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29437 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29438 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29440 if ((TREE_CODE (decl_op1
) == VAR_DECL
29441 || TREE_CODE (decl_op1
) == CONST_DECL
)
29442 && (TREE_CODE (decl_op0
) == VAR_DECL
29443 || TREE_CODE (decl_op0
) == CONST_DECL
))
29444 return (get_variable_section (decl_op1
, false)
29445 != get_variable_section (decl_op0
, false));
29447 if (TREE_CODE (decl_op1
) == LABEL_DECL
29448 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29449 return (DECL_CONTEXT (decl_op1
)
29450 != DECL_CONTEXT (decl_op0
));
29460 /* return TRUE if x is a reference to a value in a constant pool */
29462 arm_is_constant_pool_ref (rtx x
)
29465 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29466 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29469 /* If MEM is in the form of [base+offset], extract the two parts
29470 of address and set to BASE and OFFSET, otherwise return false
29471 after clearing BASE and OFFSET. */
29474 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29478 gcc_assert (MEM_P (mem
));
29480 addr
= XEXP (mem
, 0);
29482 /* Strip off const from addresses like (const (addr)). */
29483 if (GET_CODE (addr
) == CONST
)
29484 addr
= XEXP (addr
, 0);
29486 if (GET_CODE (addr
) == REG
)
29489 *offset
= const0_rtx
;
29493 if (GET_CODE (addr
) == PLUS
29494 && GET_CODE (XEXP (addr
, 0)) == REG
29495 && CONST_INT_P (XEXP (addr
, 1)))
29497 *base
= XEXP (addr
, 0);
29498 *offset
= XEXP (addr
, 1);
29503 *offset
= NULL_RTX
;
29508 /* If INSN is a load or store of address in the form of [base+offset],
29509 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29510 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29511 otherwise return FALSE. */
29514 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29518 gcc_assert (INSN_P (insn
));
29519 x
= PATTERN (insn
);
29520 if (GET_CODE (x
) != SET
)
29524 dest
= SET_DEST (x
);
29525 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29528 extract_base_offset_in_addr (dest
, base
, offset
);
29530 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29533 extract_base_offset_in_addr (src
, base
, offset
);
29538 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29541 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29543 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29544 and PRI are only calculated for these instructions. For other instruction,
29545 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29546 instruction fusion can be supported by returning different priorities.
29548 It's important that irrelevant instructions get the largest FUSION_PRI. */
29551 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29552 int *fusion_pri
, int *pri
)
29558 gcc_assert (INSN_P (insn
));
29561 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29568 /* Load goes first. */
29570 *fusion_pri
= tmp
- 1;
29572 *fusion_pri
= tmp
- 2;
29576 /* INSN with smaller base register goes first. */
29577 tmp
-= ((REGNO (base
) & 0xff) << 20);
29579 /* INSN with smaller offset goes first. */
29580 off_val
= (int)(INTVAL (offset
));
29582 tmp
-= (off_val
& 0xfffff);
29584 tmp
+= ((- off_val
) & 0xfffff);
29589 #include "gt-arm.h"