1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "double-int.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
50 #include "insn-attr.h"
55 #include "statistics.h"
57 #include "fixed-value.h"
64 #include "insn-codes.h"
66 #include "diagnostic-core.h"
69 #include "dominance.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
79 #include "plugin-api.h"
86 #include "sched-int.h"
87 #include "target-def.h"
89 #include "langhooks.h"
96 #include "gimple-expr.h"
98 #include "tm-constrs.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode
;
104 typedef struct minipool_fixup Mfix
;
106 void (*arm_lang_output_object_attributes_hook
)(void);
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx
);
115 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets
*arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
120 HOST_WIDE_INT
, rtx
, rtx
, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx
, int);
123 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
124 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
125 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
126 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
127 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
128 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
129 inline static int thumb1_index_register_rtx_p (rtx
, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx
, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx
, int);
135 static void arm_print_operand_address (FILE *, rtx
);
136 static bool arm_print_operand_punct_valid_p (unsigned char code
);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
138 static arm_cc
get_arm_condition_code (rtx
);
139 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
140 static const char *output_multi_immediate (rtx
*, const char *, const char *,
142 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
143 static struct machine_function
*arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
146 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
147 static Mnode
*add_minipool_forward_ref (Mfix
*);
148 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
149 static Mnode
*add_minipool_backward_ref (Mfix
*);
150 static void assign_minipool_offsets (Mfix
*);
151 static void arm_print_value (FILE *, rtx
);
152 static void dump_minipool (rtx_insn
*);
153 static int arm_barrier_cost (rtx_insn
*);
154 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
155 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
156 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree
);
163 static unsigned long arm_compute_func_type (void);
164 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
165 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
166 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
172 static int arm_comp_type_attributes (const_tree
, const_tree
);
173 static void arm_set_default_type_attributes (tree
);
174 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code
,
177 unsigned HOST_WIDE_INT val
,
178 struct four_ints
*return_sequence
);
179 static int optimal_immediate_sequence_1 (enum rtx_code code
,
180 unsigned HOST_WIDE_INT val
,
181 struct four_ints
*return_sequence
,
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree
, tree
);
185 static machine_mode
arm_promote_function_mode (const_tree
,
188 static bool arm_return_in_memory (const_tree
, const_tree
);
189 static rtx
arm_function_value (const_tree
, const_tree
, bool);
190 static rtx
arm_libcall_value_1 (machine_mode
);
191 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
198 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
199 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
200 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
201 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
204 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
205 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
206 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
207 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
208 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
209 static void emit_constant_insn (rtx cond
, rtx pattern
);
210 static rtx_insn
*emit_set_insn (rtx
, rtx
);
211 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
214 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
216 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
218 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
219 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
221 static rtx
aapcs_libcall_value (machine_mode
);
222 static int aapcs_select_return_coproc (const_tree
, const_tree
);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
226 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
229 static void arm_encode_section_info (tree
, rtx
, int);
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
237 static bool arm_pass_by_reference (cumulative_args_t
,
238 machine_mode
, const_tree
, bool);
239 static bool arm_promote_prototypes (const_tree
);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree
);
243 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
244 static bool arm_return_in_memory (const_tree
, const_tree
);
246 static void arm_unwind_emit (FILE *, rtx_insn
*);
247 static bool arm_output_ttype (rtx
);
248 static void arm_asm_emit_except_personality (rtx
);
249 static void arm_asm_init_sections (void);
251 static rtx
arm_dwarf_register_span (rtx
);
253 static tree
arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree
arm_get_cookie_size (tree
);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree
);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree
arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree
, rtx
);
265 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn
*);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
274 static bool arm_output_addr_const_extra (FILE *, rtx
);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree
);
277 static const char *arm_invalid_parameter_type (const_tree t
);
278 static const char *arm_invalid_return_type (const_tree t
);
279 static tree
arm_promoted_type (const_tree t
);
280 static tree
arm_convert_to_type (tree type
, tree expr
);
281 static bool arm_scalar_mode_supported_p (machine_mode
);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx
, tree
, rtx
);
286 static rtx
arm_trampoline_adjust_address (rtx
);
287 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
291 static bool arm_array_mode_supported_p (machine_mode
,
292 unsigned HOST_WIDE_INT
);
293 static machine_mode
arm_preferred_simd_mode (machine_mode
);
294 static bool arm_class_likely_spilled_p (reg_class_t
);
295 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
296 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
301 static void arm_conditional_register_usage (void);
302 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
310 const unsigned char *sel
);
312 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
316 int misalign ATTRIBUTE_UNUSED
);
317 static unsigned arm_add_stmt_cost (void *data
, int count
,
318 enum vect_cost_for_stmt kind
,
319 struct _stmt_vec_info
*stmt_info
,
321 enum vect_cost_model_location where
);
323 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
324 bool op0_preserve_value
);
325 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table
[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
337 { "long_call", 0, 0, false, true, true, NULL
, false },
338 /* Whereas these functions are always known to reside within the 26 bit
340 { "short_call", 0, 0, false, true, true, NULL
, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
352 /* ARM/PE has three new attributes:
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
361 { "dllimport", 0, 0, true, false, false, NULL
, false },
362 { "dllexport", 0, 0, true, false, false, NULL
, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
371 { NULL
, 0, 0, false, false, false, NULL
, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm
= TARGET_INITIALIZER
;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack
;
746 static char * minipool_startobj
;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped
= 5;
752 extern FILE * asm_out_file
;
754 /* True if we are currently building a constant table. */
755 int making_const_table
;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune
= arm_none
;
760 /* The current tuning set. */
761 const struct tune_params
*current_tune
;
763 /* Which floating point hardware to schedule for. */
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc
*arm_fpu_desc
;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label
[14];
771 static int thumb_call_reg_needed
;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags
= 0;
777 /* The bits in this mask specify which instruction scheduling options should
779 unsigned long tune_flags
= 0;
781 /* The highest ARM architecture version supported by the
783 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
806 /* Nonzero if this chip supports the ARM 6K extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched
= 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm
= 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2
= 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale
= 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale
= 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf
= 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9
= 0;
849 /* Nonzero if generating Thumb instructions. */
852 /* Nonzero if generating Thumb-1 instructions. */
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork
= 0;
862 /* Nonzero if chip supports Thumb 2. */
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv
;
867 int arm_arch_thumb_hwdiv
;
869 /* Nonzero if chip disallows volatile memory access in IT block. */
870 int arm_arch_no_volatile_ce
;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits
= 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool
= false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 machine_mode output_memory_reference_mode
;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register
= INVALID_REGNUM
;
887 enum arm_pcs arm_pcs_default
;
889 /* For an explanation of these variables, see final_prescan_insn below. */
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc
;
895 int arm_target_label
;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count
= 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask
= 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen
= 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc
= 0;
907 /* Nonzero if the core has a very small, high-latency, multiply unit. */
908 int arm_m_profile_small_mul
= 0;
910 /* The condition codes of the ARM, and the inverse function. */
911 static const char * const arm_condition_codes
[] =
913 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
917 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
918 int arm_regs_in_sequence
[] =
920 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 /* Initialization code. */
934 const char *const name
;
935 enum processor_type core
;
937 enum base_architecture base_arch
;
938 const unsigned long flags
;
939 const struct tune_params
*const tune
;
943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
949 /* arm generic vectorizer costs. */
951 struct cpu_vec_costs arm_default_vec_cost
= {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs
=
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
990 true /* non_exec_costs_exec. */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1008 COSTS_N_INSNS (4), /* extend_add. */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1068 COSTS_N_INSNS (1) /* alu. */
1072 const struct cpu_cost_table cortexa8_extra_costs
=
1078 COSTS_N_INSNS (1), /* shift. */
1080 COSTS_N_INSNS (1), /* arith_shift. */
1081 0, /* arith_shift_reg. */
1082 COSTS_N_INSNS (1), /* log_shift. */
1083 0, /* log_shift_reg. */
1085 0, /* extend_arith. */
1091 true /* non_exec_costs_exec. */
1096 COSTS_N_INSNS (1), /* simple. */
1097 COSTS_N_INSNS (1), /* flag_setting. */
1098 COSTS_N_INSNS (1), /* extend. */
1099 COSTS_N_INSNS (1), /* add. */
1100 COSTS_N_INSNS (1), /* extend_add. */
1101 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1105 0, /* simple (N/A). */
1106 0, /* flag_setting (N/A). */
1107 COSTS_N_INSNS (2), /* extend. */
1109 COSTS_N_INSNS (2), /* extend_add. */
1115 COSTS_N_INSNS (1), /* load. */
1116 COSTS_N_INSNS (1), /* load_sign_extend. */
1117 COSTS_N_INSNS (1), /* ldrd. */
1118 COSTS_N_INSNS (1), /* ldm_1st. */
1119 1, /* ldm_regs_per_insn_1st. */
1120 2, /* ldm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* loadf. */
1122 COSTS_N_INSNS (1), /* loadd. */
1123 COSTS_N_INSNS (1), /* load_unaligned. */
1124 COSTS_N_INSNS (1), /* store. */
1125 COSTS_N_INSNS (1), /* strd. */
1126 COSTS_N_INSNS (1), /* stm_1st. */
1127 1, /* stm_regs_per_insn_1st. */
1128 2, /* stm_regs_per_insn_subsequent. */
1129 COSTS_N_INSNS (1), /* storef. */
1130 COSTS_N_INSNS (1), /* stored. */
1131 COSTS_N_INSNS (1) /* store_unaligned. */
1136 COSTS_N_INSNS (36), /* div. */
1137 COSTS_N_INSNS (11), /* mult. */
1138 COSTS_N_INSNS (20), /* mult_addsub. */
1139 COSTS_N_INSNS (30), /* fma. */
1140 COSTS_N_INSNS (9), /* addsub. */
1141 COSTS_N_INSNS (3), /* fpconst. */
1142 COSTS_N_INSNS (3), /* neg. */
1143 COSTS_N_INSNS (6), /* compare. */
1144 COSTS_N_INSNS (4), /* widen. */
1145 COSTS_N_INSNS (4), /* narrow. */
1146 COSTS_N_INSNS (8), /* toint. */
1147 COSTS_N_INSNS (8), /* fromint. */
1148 COSTS_N_INSNS (8) /* roundint. */
1152 COSTS_N_INSNS (64), /* div. */
1153 COSTS_N_INSNS (16), /* mult. */
1154 COSTS_N_INSNS (25), /* mult_addsub. */
1155 COSTS_N_INSNS (30), /* fma. */
1156 COSTS_N_INSNS (9), /* addsub. */
1157 COSTS_N_INSNS (3), /* fpconst. */
1158 COSTS_N_INSNS (3), /* neg. */
1159 COSTS_N_INSNS (6), /* compare. */
1160 COSTS_N_INSNS (6), /* widen. */
1161 COSTS_N_INSNS (6), /* narrow. */
1162 COSTS_N_INSNS (8), /* toint. */
1163 COSTS_N_INSNS (8), /* fromint. */
1164 COSTS_N_INSNS (8) /* roundint. */
1169 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa5_extra_costs
=
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1192 true /* non_exec_costs_exec. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (6), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (4), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1271 COSTS_N_INSNS (1) /* alu. */
1276 const struct cpu_cost_table cortexa7_extra_costs
=
1282 COSTS_N_INSNS (1), /* shift. */
1283 COSTS_N_INSNS (1), /* shift_reg. */
1284 COSTS_N_INSNS (1), /* arith_shift. */
1285 COSTS_N_INSNS (1), /* arith_shift_reg. */
1286 COSTS_N_INSNS (1), /* log_shift. */
1287 COSTS_N_INSNS (1), /* log_shift_reg. */
1288 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (1), /* extend_arith. */
1290 COSTS_N_INSNS (1), /* bfi. */
1291 COSTS_N_INSNS (1), /* bfx. */
1292 COSTS_N_INSNS (1), /* clz. */
1293 COSTS_N_INSNS (1), /* rev. */
1295 true /* non_exec_costs_exec. */
1302 COSTS_N_INSNS (1), /* flag_setting. */
1303 COSTS_N_INSNS (1), /* extend. */
1304 COSTS_N_INSNS (1), /* add. */
1305 COSTS_N_INSNS (1), /* extend_add. */
1306 COSTS_N_INSNS (7) /* idiv. */
1310 0, /* simple (N/A). */
1311 0, /* flag_setting (N/A). */
1312 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (2), /* extend_add. */
1320 COSTS_N_INSNS (1), /* load. */
1321 COSTS_N_INSNS (1), /* load_sign_extend. */
1322 COSTS_N_INSNS (3), /* ldrd. */
1323 COSTS_N_INSNS (1), /* ldm_1st. */
1324 1, /* ldm_regs_per_insn_1st. */
1325 2, /* ldm_regs_per_insn_subsequent. */
1326 COSTS_N_INSNS (2), /* loadf. */
1327 COSTS_N_INSNS (2), /* loadd. */
1328 COSTS_N_INSNS (1), /* load_unaligned. */
1329 COSTS_N_INSNS (1), /* store. */
1330 COSTS_N_INSNS (3), /* strd. */
1331 COSTS_N_INSNS (1), /* stm_1st. */
1332 1, /* stm_regs_per_insn_1st. */
1333 2, /* stm_regs_per_insn_subsequent. */
1334 COSTS_N_INSNS (2), /* storef. */
1335 COSTS_N_INSNS (2), /* stored. */
1336 COSTS_N_INSNS (1) /* store_unaligned. */
1341 COSTS_N_INSNS (15), /* div. */
1342 COSTS_N_INSNS (3), /* mult. */
1343 COSTS_N_INSNS (7), /* mult_addsub. */
1344 COSTS_N_INSNS (7), /* fma. */
1345 COSTS_N_INSNS (3), /* addsub. */
1346 COSTS_N_INSNS (3), /* fpconst. */
1347 COSTS_N_INSNS (3), /* neg. */
1348 COSTS_N_INSNS (3), /* compare. */
1349 COSTS_N_INSNS (3), /* widen. */
1350 COSTS_N_INSNS (3), /* narrow. */
1351 COSTS_N_INSNS (3), /* toint. */
1352 COSTS_N_INSNS (3), /* fromint. */
1353 COSTS_N_INSNS (3) /* roundint. */
1357 COSTS_N_INSNS (30), /* div. */
1358 COSTS_N_INSNS (6), /* mult. */
1359 COSTS_N_INSNS (10), /* mult_addsub. */
1360 COSTS_N_INSNS (7), /* fma. */
1361 COSTS_N_INSNS (3), /* addsub. */
1362 COSTS_N_INSNS (3), /* fpconst. */
1363 COSTS_N_INSNS (3), /* neg. */
1364 COSTS_N_INSNS (3), /* compare. */
1365 COSTS_N_INSNS (3), /* widen. */
1366 COSTS_N_INSNS (3), /* narrow. */
1367 COSTS_N_INSNS (3), /* toint. */
1368 COSTS_N_INSNS (3), /* fromint. */
1369 COSTS_N_INSNS (3) /* roundint. */
1374 COSTS_N_INSNS (1) /* alu. */
1378 const struct cpu_cost_table cortexa12_extra_costs
=
1385 COSTS_N_INSNS (1), /* shift_reg. */
1386 COSTS_N_INSNS (1), /* arith_shift. */
1387 COSTS_N_INSNS (1), /* arith_shift_reg. */
1388 COSTS_N_INSNS (1), /* log_shift. */
1389 COSTS_N_INSNS (1), /* log_shift_reg. */
1391 COSTS_N_INSNS (1), /* extend_arith. */
1393 COSTS_N_INSNS (1), /* bfx. */
1394 COSTS_N_INSNS (1), /* clz. */
1395 COSTS_N_INSNS (1), /* rev. */
1397 true /* non_exec_costs_exec. */
1402 COSTS_N_INSNS (2), /* simple. */
1403 COSTS_N_INSNS (3), /* flag_setting. */
1404 COSTS_N_INSNS (2), /* extend. */
1405 COSTS_N_INSNS (3), /* add. */
1406 COSTS_N_INSNS (2), /* extend_add. */
1407 COSTS_N_INSNS (18) /* idiv. */
1411 0, /* simple (N/A). */
1412 0, /* flag_setting (N/A). */
1413 COSTS_N_INSNS (3), /* extend. */
1415 COSTS_N_INSNS (3), /* extend_add. */
1421 COSTS_N_INSNS (3), /* load. */
1422 COSTS_N_INSNS (3), /* load_sign_extend. */
1423 COSTS_N_INSNS (3), /* ldrd. */
1424 COSTS_N_INSNS (3), /* ldm_1st. */
1425 1, /* ldm_regs_per_insn_1st. */
1426 2, /* ldm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (3), /* loadf. */
1428 COSTS_N_INSNS (3), /* loadd. */
1429 0, /* load_unaligned. */
1433 1, /* stm_regs_per_insn_1st. */
1434 2, /* stm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (2), /* storef. */
1436 COSTS_N_INSNS (2), /* stored. */
1437 0 /* store_unaligned. */
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1475 COSTS_N_INSNS (1) /* alu. */
1479 const struct cpu_cost_table cortexa15_extra_costs
=
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1498 true /* non_exec_costs_exec. */
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1516 COSTS_N_INSNS (3), /* extend_add. */
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1538 0 /* store_unaligned. */
1543 COSTS_N_INSNS (17), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (5), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1559 COSTS_N_INSNS (31), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (2), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1576 COSTS_N_INSNS (1) /* alu. */
1580 const struct cpu_cost_table v7m_extra_costs
=
1588 0, /* arith_shift. */
1589 COSTS_N_INSNS (1), /* arith_shift_reg. */
1591 COSTS_N_INSNS (1), /* log_shift_reg. */
1593 COSTS_N_INSNS (1), /* extend_arith. */
1598 COSTS_N_INSNS (1), /* non_exec. */
1599 false /* non_exec_costs_exec. */
1604 COSTS_N_INSNS (1), /* simple. */
1605 COSTS_N_INSNS (1), /* flag_setting. */
1606 COSTS_N_INSNS (2), /* extend. */
1607 COSTS_N_INSNS (1), /* add. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 COSTS_N_INSNS (8) /* idiv. */
1613 0, /* simple (N/A). */
1614 0, /* flag_setting (N/A). */
1615 COSTS_N_INSNS (2), /* extend. */
1617 COSTS_N_INSNS (3), /* extend_add. */
1623 COSTS_N_INSNS (2), /* load. */
1624 0, /* load_sign_extend. */
1625 COSTS_N_INSNS (3), /* ldrd. */
1626 COSTS_N_INSNS (2), /* ldm_1st. */
1627 1, /* ldm_regs_per_insn_1st. */
1628 1, /* ldm_regs_per_insn_subsequent. */
1629 COSTS_N_INSNS (2), /* loadf. */
1630 COSTS_N_INSNS (3), /* loadd. */
1631 COSTS_N_INSNS (1), /* load_unaligned. */
1632 COSTS_N_INSNS (2), /* store. */
1633 COSTS_N_INSNS (3), /* strd. */
1634 COSTS_N_INSNS (2), /* stm_1st. */
1635 1, /* stm_regs_per_insn_1st. */
1636 1, /* stm_regs_per_insn_subsequent. */
1637 COSTS_N_INSNS (2), /* storef. */
1638 COSTS_N_INSNS (3), /* stored. */
1639 COSTS_N_INSNS (1) /* store_unaligned. */
1644 COSTS_N_INSNS (7), /* div. */
1645 COSTS_N_INSNS (2), /* mult. */
1646 COSTS_N_INSNS (5), /* mult_addsub. */
1647 COSTS_N_INSNS (3), /* fma. */
1648 COSTS_N_INSNS (1), /* addsub. */
1660 COSTS_N_INSNS (15), /* div. */
1661 COSTS_N_INSNS (5), /* mult. */
1662 COSTS_N_INSNS (7), /* mult_addsub. */
1663 COSTS_N_INSNS (7), /* fma. */
1664 COSTS_N_INSNS (3), /* addsub. */
1677 COSTS_N_INSNS (1) /* alu. */
1681 #define ARM_FUSE_NOTHING (0)
1682 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1684 const struct tune_params arm_slowmul_tune
=
1686 arm_slowmul_rtx_costs
,
1688 NULL
, /* Sched adj cost. */
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL
,
1692 true, /* Prefer constant pool. */
1693 arm_default_branch_cost
,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost
, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false, /* Prefer 32-bit encodings. */
1699 false, /* Prefer Neon for stringops. */
1700 8, /* Maximum insns to inline memset. */
1701 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1702 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1706 const struct tune_params arm_fastmul_tune
=
1708 arm_fastmul_rtx_costs
,
1710 NULL
, /* Sched adj cost. */
1711 1, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL
,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost
,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost
, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8, /* Maximum insns to inline memset. */
1723 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1724 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune
=
1733 arm_fastmul_rtx_costs
,
1735 NULL
, /* Sched adj cost. */
1736 1, /* Constant limit. */
1737 3, /* Max cond insns. */
1738 ARM_PREFETCH_NOT_BENEFICIAL
,
1739 true, /* Prefer constant pool. */
1740 arm_default_branch_cost
,
1741 false, /* Prefer LDRD/STRD. */
1742 {true, true}, /* Prefer non short circuit. */
1743 &arm_default_vec_cost
, /* Vectorizer costs. */
1744 false, /* Prefer Neon for 64-bits bitops. */
1745 false, false, /* Prefer 32-bit encodings. */
1746 false, /* Prefer Neon for stringops. */
1747 8, /* Maximum insns to inline memset. */
1748 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1749 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1753 const struct tune_params arm_xscale_tune
=
1755 arm_xscale_rtx_costs
,
1757 xscale_sched_adjust_cost
,
1758 2, /* Constant limit. */
1759 3, /* Max cond insns. */
1760 ARM_PREFETCH_NOT_BENEFICIAL
,
1761 true, /* Prefer constant pool. */
1762 arm_default_branch_cost
,
1763 false, /* Prefer LDRD/STRD. */
1764 {true, true}, /* Prefer non short circuit. */
1765 &arm_default_vec_cost
, /* Vectorizer costs. */
1766 false, /* Prefer Neon for 64-bits bitops. */
1767 false, false, /* Prefer 32-bit encodings. */
1768 false, /* Prefer Neon for stringops. */
1769 8, /* Maximum insns to inline memset. */
1770 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1771 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1775 const struct tune_params arm_9e_tune
=
1779 NULL
, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL
,
1783 true, /* Prefer constant pool. */
1784 arm_default_branch_cost
,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost
, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false, /* Prefer 32-bit encodings. */
1790 false, /* Prefer Neon for stringops. */
1791 8, /* Maximum insns to inline memset. */
1792 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1793 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1797 const struct tune_params arm_marvell_pj4_tune
=
1801 NULL
, /* Sched adj cost. */
1802 1, /* Constant limit. */
1803 5, /* Max cond insns. */
1804 ARM_PREFETCH_NOT_BENEFICIAL
,
1805 true, /* Prefer constant pool. */
1806 arm_default_branch_cost
,
1807 false, /* Prefer LDRD/STRD. */
1808 {true, true}, /* Prefer non short circuit. */
1809 &arm_default_vec_cost
, /* Vectorizer costs. */
1810 false, /* Prefer Neon for 64-bits bitops. */
1811 false, false, /* Prefer 32-bit encodings. */
1812 false, /* Prefer Neon for stringops. */
1813 8, /* Maximum insns to inline memset. */
1814 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1815 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1819 const struct tune_params arm_v6t2_tune
=
1823 NULL
, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 5, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL
,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost
,
1829 false, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost
, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 false, false, /* Prefer 32-bit encodings. */
1834 false, /* Prefer Neon for stringops. */
1835 8, /* Maximum insns to inline memset. */
1836 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1837 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1842 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1843 const struct tune_params arm_cortex_tune
=
1846 &generic_extra_costs
,
1847 NULL
, /* Sched adj cost. */
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL
,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost
,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost
, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 false, /* Prefer Neon for stringops. */
1859 8, /* Maximum insns to inline memset. */
1860 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1861 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1865 const struct tune_params arm_cortex_a8_tune
=
1868 &cortexa8_extra_costs
,
1869 NULL
, /* Sched adj cost. */
1870 1, /* Constant limit. */
1871 5, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL
,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost
,
1875 false, /* Prefer LDRD/STRD. */
1876 {true, true}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost
, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 false, false, /* Prefer 32-bit encodings. */
1880 true, /* Prefer Neon for stringops. */
1881 8, /* Maximum insns to inline memset. */
1882 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1883 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1887 const struct tune_params arm_cortex_a7_tune
=
1890 &cortexa7_extra_costs
,
1892 1, /* Constant limit. */
1893 5, /* Max cond insns. */
1894 ARM_PREFETCH_NOT_BENEFICIAL
,
1895 false, /* Prefer constant pool. */
1896 arm_default_branch_cost
,
1897 false, /* Prefer LDRD/STRD. */
1898 {true, true}, /* Prefer non short circuit. */
1899 &arm_default_vec_cost
, /* Vectorizer costs. */
1900 false, /* Prefer Neon for 64-bits bitops. */
1901 false, false, /* Prefer 32-bit encodings. */
1902 true, /* Prefer Neon for stringops. */
1903 8, /* Maximum insns to inline memset. */
1904 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1905 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1909 const struct tune_params arm_cortex_a15_tune
=
1912 &cortexa15_extra_costs
,
1913 NULL
, /* Sched adj cost. */
1914 1, /* Constant limit. */
1915 2, /* Max cond insns. */
1916 ARM_PREFETCH_NOT_BENEFICIAL
,
1917 false, /* Prefer constant pool. */
1918 arm_default_branch_cost
,
1919 true, /* Prefer LDRD/STRD. */
1920 {true, true}, /* Prefer non short circuit. */
1921 &arm_default_vec_cost
, /* Vectorizer costs. */
1922 false, /* Prefer Neon for 64-bits bitops. */
1923 true, true, /* Prefer 32-bit encodings. */
1924 true, /* Prefer Neon for stringops. */
1925 8, /* Maximum insns to inline memset. */
1926 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1927 ARM_SCHED_AUTOPREF_FULL
, /* Sched L2 autopref. */
1931 const struct tune_params arm_cortex_a53_tune
=
1934 &cortexa53_extra_costs
,
1935 NULL
, /* Scheduler cost adjustment. */
1936 1, /* Constant limit. */
1937 5, /* Max cond insns. */
1938 ARM_PREFETCH_NOT_BENEFICIAL
,
1939 false, /* Prefer constant pool. */
1940 arm_default_branch_cost
,
1941 false, /* Prefer LDRD/STRD. */
1942 {true, true}, /* Prefer non short circuit. */
1943 &arm_default_vec_cost
, /* Vectorizer costs. */
1944 false, /* Prefer Neon for 64-bits bitops. */
1945 false, false, /* Prefer 32-bit encodings. */
1946 true, /* Prefer Neon for stringops. */
1947 8, /* Maximum insns to inline memset. */
1948 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
1949 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1953 const struct tune_params arm_cortex_a57_tune
=
1956 &cortexa57_extra_costs
,
1957 NULL
, /* Scheduler cost adjustment. */
1958 1, /* Constant limit. */
1959 2, /* Max cond insns. */
1960 ARM_PREFETCH_NOT_BENEFICIAL
,
1961 false, /* Prefer constant pool. */
1962 arm_default_branch_cost
,
1963 true, /* Prefer LDRD/STRD. */
1964 {true, true}, /* Prefer non short circuit. */
1965 &arm_default_vec_cost
, /* Vectorizer costs. */
1966 false, /* Prefer Neon for 64-bits bitops. */
1967 true, true, /* Prefer 32-bit encodings. */
1968 true, /* Prefer Neon for stringops. */
1969 8, /* Maximum insns to inline memset. */
1970 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
1971 ARM_SCHED_AUTOPREF_FULL
, /* Sched L2 autopref. */
1975 const struct tune_params arm_xgene1_tune
=
1978 &xgene1_extra_costs
,
1979 NULL
, /* Scheduler cost adjustment. */
1980 1, /* Constant limit. */
1981 2, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost
,
1985 true, /* Prefer LDRD/STRD. */
1986 {true, true}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost
, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 true, true, /* Prefer 32-bit encodings. */
1990 false, /* Prefer Neon for stringops. */
1991 32, /* Maximum insns to inline memset. */
1992 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
1993 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
1997 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1998 less appealing. Set max_insns_skipped to a low value. */
2000 const struct tune_params arm_cortex_a5_tune
=
2003 &cortexa5_extra_costs
,
2004 NULL
, /* Sched adj cost. */
2005 1, /* Constant limit. */
2006 1, /* Max cond insns. */
2007 ARM_PREFETCH_NOT_BENEFICIAL
,
2008 false, /* Prefer constant pool. */
2009 arm_cortex_a5_branch_cost
,
2010 false, /* Prefer LDRD/STRD. */
2011 {false, false}, /* Prefer non short circuit. */
2012 &arm_default_vec_cost
, /* Vectorizer costs. */
2013 false, /* Prefer Neon for 64-bits bitops. */
2014 false, false, /* Prefer 32-bit encodings. */
2015 true, /* Prefer Neon for stringops. */
2016 8, /* Maximum insns to inline memset. */
2017 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2018 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2022 const struct tune_params arm_cortex_a9_tune
=
2025 &cortexa9_extra_costs
,
2026 cortex_a9_sched_adjust_cost
,
2027 1, /* Constant limit. */
2028 5, /* Max cond insns. */
2029 ARM_PREFETCH_BENEFICIAL(4,32,32),
2030 false, /* Prefer constant pool. */
2031 arm_default_branch_cost
,
2032 false, /* Prefer LDRD/STRD. */
2033 {true, true}, /* Prefer non short circuit. */
2034 &arm_default_vec_cost
, /* Vectorizer costs. */
2035 false, /* Prefer Neon for 64-bits bitops. */
2036 false, false, /* Prefer 32-bit encodings. */
2037 false, /* Prefer Neon for stringops. */
2038 8, /* Maximum insns to inline memset. */
2039 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2040 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2044 const struct tune_params arm_cortex_a12_tune
=
2047 &cortexa12_extra_costs
,
2048 NULL
, /* Sched adj cost. */
2049 1, /* Constant limit. */
2050 2, /* Max cond insns. */
2051 ARM_PREFETCH_NOT_BENEFICIAL
,
2052 false, /* Prefer constant pool. */
2053 arm_default_branch_cost
,
2054 true, /* Prefer LDRD/STRD. */
2055 {true, true}, /* Prefer non short circuit. */
2056 &arm_default_vec_cost
, /* Vectorizer costs. */
2057 false, /* Prefer Neon for 64-bits bitops. */
2058 true, true, /* Prefer 32-bit encodings. */
2059 true, /* Prefer Neon for stringops. */
2060 8, /* Maximum insns to inline memset. */
2061 ARM_FUSE_MOVW_MOVT
, /* Fuseable pairs of instructions. */
2062 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2066 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2067 cycle to execute each. An LDR from the constant pool also takes two cycles
2068 to execute, but mildly increases pipelining opportunity (consecutive
2069 loads/stores can be pipelined together, saving one cycle), and may also
2070 improve icache utilisation. Hence we prefer the constant pool for such
2073 const struct tune_params arm_v7m_tune
=
2077 NULL
, /* Sched adj cost. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 true, /* Prefer constant pool. */
2082 arm_cortex_m_branch_cost
,
2083 false, /* Prefer LDRD/STRD. */
2084 {false, false}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost
, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8, /* Maximum insns to inline memset. */
2090 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2091 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2095 /* Cortex-M7 tuning. */
2097 const struct tune_params arm_cortex_m7_tune
=
2101 NULL
, /* Sched adj cost. */
2102 0, /* Constant limit. */
2103 1, /* Max cond insns. */
2104 ARM_PREFETCH_NOT_BENEFICIAL
,
2105 true, /* Prefer constant pool. */
2106 arm_cortex_m7_branch_cost
,
2107 false, /* Prefer LDRD/STRD. */
2108 {true, true}, /* Prefer non short circuit. */
2109 &arm_default_vec_cost
, /* Vectorizer costs. */
2110 false, /* Prefer Neon for 64-bits bitops. */
2111 false, false, /* Prefer 32-bit encodings. */
2112 false, /* Prefer Neon for stringops. */
2113 8, /* Maximum insns to inline memset. */
2114 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2115 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2119 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2120 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2121 const struct tune_params arm_v6m_tune
=
2125 NULL
, /* Sched adj cost. */
2126 1, /* Constant limit. */
2127 5, /* Max cond insns. */
2128 ARM_PREFETCH_NOT_BENEFICIAL
,
2129 false, /* Prefer constant pool. */
2130 arm_default_branch_cost
,
2131 false, /* Prefer LDRD/STRD. */
2132 {false, false}, /* Prefer non short circuit. */
2133 &arm_default_vec_cost
, /* Vectorizer costs. */
2134 false, /* Prefer Neon for 64-bits bitops. */
2135 false, false, /* Prefer 32-bit encodings. */
2136 false, /* Prefer Neon for stringops. */
2137 8, /* Maximum insns to inline memset. */
2138 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2139 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2143 const struct tune_params arm_fa726te_tune
=
2147 fa726te_sched_adjust_cost
,
2148 1, /* Constant limit. */
2149 5, /* Max cond insns. */
2150 ARM_PREFETCH_NOT_BENEFICIAL
,
2151 true, /* Prefer constant pool. */
2152 arm_default_branch_cost
,
2153 false, /* Prefer LDRD/STRD. */
2154 {true, true}, /* Prefer non short circuit. */
2155 &arm_default_vec_cost
, /* Vectorizer costs. */
2156 false, /* Prefer Neon for 64-bits bitops. */
2157 false, false, /* Prefer 32-bit encodings. */
2158 false, /* Prefer Neon for stringops. */
2159 8, /* Maximum insns to inline memset. */
2160 ARM_FUSE_NOTHING
, /* Fuseable pairs of instructions. */
2161 ARM_SCHED_AUTOPREF_OFF
, /* Sched L2 autopref. */
2166 /* Not all of these give usefully different compilation alternatives,
2167 but there is no simple way of generalizing them. */
2168 static const struct processors all_cores
[] =
2171 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2172 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2173 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2174 #include "arm-cores.def"
2176 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2179 static const struct processors all_architectures
[] =
2181 /* ARM Architectures */
2182 /* We don't specify tuning costs here as it will be figured out
2185 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2186 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2187 #include "arm-arches.def"
2189 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2193 /* These are populated as commandline arguments are processed, or NULL
2194 if not specified. */
2195 static const struct processors
*arm_selected_arch
;
2196 static const struct processors
*arm_selected_cpu
;
2197 static const struct processors
*arm_selected_tune
;
2199 /* The name of the preprocessor macro to define for this architecture. */
2201 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2203 /* Available values for -mfpu=. */
2205 static const struct arm_fpu_desc all_fpus
[] =
2207 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2208 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2209 #include "arm-fpus.def"
2214 /* Supported TLS relocations. */
2222 TLS_DESCSEQ
/* GNU scheme */
2225 /* The maximum number of insns to be used when loading a constant. */
2227 arm_constant_limit (bool size_p
)
2229 return size_p
? 1 : current_tune
->constant_limit
;
2232 /* Emit an insn that's a simple single-set. Both the operands must be known
2234 inline static rtx_insn
*
2235 emit_set_insn (rtx x
, rtx y
)
2237 return emit_insn (gen_rtx_SET (x
, y
));
2240 /* Return the number of bits set in VALUE. */
2242 bit_count (unsigned long value
)
2244 unsigned long count
= 0;
2249 value
&= value
- 1; /* Clear the least-significant set bit. */
2259 } arm_fixed_mode_set
;
2261 /* A small helper for setting fixed-point library libfuncs. */
2264 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2265 const char *funcname
, const char *modename
,
2270 if (num_suffix
== 0)
2271 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2273 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2275 set_optab_libfunc (optable
, mode
, buffer
);
2279 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2280 machine_mode from
, const char *funcname
,
2281 const char *toname
, const char *fromname
)
2284 const char *maybe_suffix_2
= "";
2286 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2287 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2288 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2289 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2290 maybe_suffix_2
= "2";
2292 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2295 set_conv_libfunc (optable
, to
, from
, buffer
);
2298 /* Set up library functions unique to ARM. */
2301 arm_init_libfuncs (void)
2303 /* For Linux, we have access to kernel support for atomic operations. */
2304 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2305 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2307 /* There are no special library functions unless we are using the
2312 /* The functions below are described in Section 4 of the "Run-Time
2313 ABI for the ARM architecture", Version 1.0. */
2315 /* Double-precision floating-point arithmetic. Table 2. */
2316 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2317 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2318 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2319 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2320 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2322 /* Double-precision comparisons. Table 3. */
2323 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2324 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2325 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2326 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2327 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2328 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2329 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2331 /* Single-precision floating-point arithmetic. Table 4. */
2332 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2333 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2334 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2335 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2336 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2338 /* Single-precision comparisons. Table 5. */
2339 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2340 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2341 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2342 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2343 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2344 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2345 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2347 /* Floating-point to integer conversions. Table 6. */
2348 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2349 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2350 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2351 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2352 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2353 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2354 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2355 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2357 /* Conversions between floating types. Table 7. */
2358 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2359 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2361 /* Integer to floating-point conversions. Table 8. */
2362 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2363 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2364 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2365 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2366 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2367 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2368 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2369 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2371 /* Long long. Table 9. */
2372 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2373 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2374 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2375 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2376 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2377 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2378 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2379 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2381 /* Integer (32/32->32) division. \S 4.3.1. */
2382 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2383 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2385 /* The divmod functions are designed so that they can be used for
2386 plain division, even though they return both the quotient and the
2387 remainder. The quotient is returned in the usual location (i.e.,
2388 r0 for SImode, {r0, r1} for DImode), just as would be expected
2389 for an ordinary division routine. Because the AAPCS calling
2390 conventions specify that all of { r0, r1, r2, r3 } are
2391 callee-saved registers, there is no need to tell the compiler
2392 explicitly that those registers are clobbered by these
2394 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2395 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2397 /* For SImode division the ABI provides div-without-mod routines,
2398 which are faster. */
2399 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2400 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2402 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2403 divmod libcalls instead. */
2404 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2405 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2406 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2407 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2409 /* Half-precision float operations. The compiler handles all operations
2410 with NULL libfuncs by converting the SFmode. */
2411 switch (arm_fp16_format
)
2413 case ARM_FP16_FORMAT_IEEE
:
2414 case ARM_FP16_FORMAT_ALTERNATIVE
:
2417 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2418 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2420 : "__gnu_f2h_alternative"));
2421 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2422 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2424 : "__gnu_h2f_alternative"));
2427 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2428 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2429 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2430 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2431 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2434 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2435 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2436 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2437 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2438 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2439 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2440 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2447 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2449 const arm_fixed_mode_set fixed_arith_modes
[] =
2470 const arm_fixed_mode_set fixed_conv_modes
[] =
2500 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2502 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2503 "add", fixed_arith_modes
[i
].name
, 3);
2504 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2505 "ssadd", fixed_arith_modes
[i
].name
, 3);
2506 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2507 "usadd", fixed_arith_modes
[i
].name
, 3);
2508 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2509 "sub", fixed_arith_modes
[i
].name
, 3);
2510 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2511 "sssub", fixed_arith_modes
[i
].name
, 3);
2512 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2513 "ussub", fixed_arith_modes
[i
].name
, 3);
2514 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2515 "mul", fixed_arith_modes
[i
].name
, 3);
2516 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2517 "ssmul", fixed_arith_modes
[i
].name
, 3);
2518 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2519 "usmul", fixed_arith_modes
[i
].name
, 3);
2520 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2521 "div", fixed_arith_modes
[i
].name
, 3);
2522 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2523 "udiv", fixed_arith_modes
[i
].name
, 3);
2524 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2525 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2526 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2527 "usdiv", fixed_arith_modes
[i
].name
, 3);
2528 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2529 "neg", fixed_arith_modes
[i
].name
, 2);
2530 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2531 "ssneg", fixed_arith_modes
[i
].name
, 2);
2532 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2533 "usneg", fixed_arith_modes
[i
].name
, 2);
2534 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2535 "ashl", fixed_arith_modes
[i
].name
, 3);
2536 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2537 "ashr", fixed_arith_modes
[i
].name
, 3);
2538 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2539 "lshr", fixed_arith_modes
[i
].name
, 3);
2540 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2541 "ssashl", fixed_arith_modes
[i
].name
, 3);
2542 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2543 "usashl", fixed_arith_modes
[i
].name
, 3);
2544 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2545 "cmp", fixed_arith_modes
[i
].name
, 2);
2548 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2549 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2552 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2553 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2556 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2557 fixed_conv_modes
[j
].mode
, "fract",
2558 fixed_conv_modes
[i
].name
,
2559 fixed_conv_modes
[j
].name
);
2560 arm_set_fixed_conv_libfunc (satfract_optab
,
2561 fixed_conv_modes
[i
].mode
,
2562 fixed_conv_modes
[j
].mode
, "satfract",
2563 fixed_conv_modes
[i
].name
,
2564 fixed_conv_modes
[j
].name
);
2565 arm_set_fixed_conv_libfunc (fractuns_optab
,
2566 fixed_conv_modes
[i
].mode
,
2567 fixed_conv_modes
[j
].mode
, "fractuns",
2568 fixed_conv_modes
[i
].name
,
2569 fixed_conv_modes
[j
].name
);
2570 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2571 fixed_conv_modes
[i
].mode
,
2572 fixed_conv_modes
[j
].mode
, "satfractuns",
2573 fixed_conv_modes
[i
].name
,
2574 fixed_conv_modes
[j
].name
);
2578 if (TARGET_AAPCS_BASED
)
2579 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2582 /* On AAPCS systems, this is the "struct __va_list". */
2583 static GTY(()) tree va_list_type
;
2585 /* Return the type to use as __builtin_va_list. */
2587 arm_build_builtin_va_list (void)
2592 if (!TARGET_AAPCS_BASED
)
2593 return std_build_builtin_va_list ();
2595 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2603 The C Library ABI further reinforces this definition in \S
2606 We must follow this definition exactly. The structure tag
2607 name is visible in C++ mangled names, and thus forms a part
2608 of the ABI. The field name may be used by people who
2609 #include <stdarg.h>. */
2610 /* Create the type. */
2611 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2612 /* Give it the required name. */
2613 va_list_name
= build_decl (BUILTINS_LOCATION
,
2615 get_identifier ("__va_list"),
2617 DECL_ARTIFICIAL (va_list_name
) = 1;
2618 TYPE_NAME (va_list_type
) = va_list_name
;
2619 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2620 /* Create the __ap field. */
2621 ap_field
= build_decl (BUILTINS_LOCATION
,
2623 get_identifier ("__ap"),
2625 DECL_ARTIFICIAL (ap_field
) = 1;
2626 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2627 TYPE_FIELDS (va_list_type
) = ap_field
;
2628 /* Compute its layout. */
2629 layout_type (va_list_type
);
2631 return va_list_type
;
2634 /* Return an expression of type "void *" pointing to the next
2635 available argument in a variable-argument list. VALIST is the
2636 user-level va_list object, of type __builtin_va_list. */
2638 arm_extract_valist_ptr (tree valist
)
2640 if (TREE_TYPE (valist
) == error_mark_node
)
2641 return error_mark_node
;
2643 /* On an AAPCS target, the pointer is stored within "struct
2645 if (TARGET_AAPCS_BASED
)
2647 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2648 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2649 valist
, ap_field
, NULL_TREE
);
2655 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2657 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2659 valist
= arm_extract_valist_ptr (valist
);
2660 std_expand_builtin_va_start (valist
, nextarg
);
2663 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2665 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2668 valist
= arm_extract_valist_ptr (valist
);
2669 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2672 /* Fix up any incompatible options that the user has specified. */
2674 arm_option_override (void)
2676 arm_selected_arch
= NULL
;
2677 arm_selected_cpu
= NULL
;
2678 arm_selected_tune
= NULL
;
2680 if (global_options_set
.x_arm_arch_option
)
2681 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2683 if (global_options_set
.x_arm_cpu_option
)
2685 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2686 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2689 if (global_options_set
.x_arm_tune_option
)
2690 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693 SUBTARGET_OVERRIDE_OPTIONS
;
2696 if (arm_selected_arch
)
2698 if (arm_selected_cpu
)
2700 /* Check for conflict between mcpu and march. */
2701 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2703 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2704 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2705 /* -march wins for code generation.
2706 -mcpu wins for default tuning. */
2707 if (!arm_selected_tune
)
2708 arm_selected_tune
= arm_selected_cpu
;
2710 arm_selected_cpu
= arm_selected_arch
;
2714 arm_selected_arch
= NULL
;
2717 /* Pick a CPU based on the architecture. */
2718 arm_selected_cpu
= arm_selected_arch
;
2721 /* If the user did not specify a processor, choose one for them. */
2722 if (!arm_selected_cpu
)
2724 const struct processors
* sel
;
2725 unsigned int sought
;
2727 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2728 if (!arm_selected_cpu
->name
)
2730 #ifdef SUBTARGET_CPU_DEFAULT
2731 /* Use the subtarget default CPU if none was specified by
2733 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2735 /* Default to ARM6. */
2736 if (!arm_selected_cpu
->name
)
2737 arm_selected_cpu
= &all_cores
[arm6
];
2740 sel
= arm_selected_cpu
;
2741 insn_flags
= sel
->flags
;
2743 /* Now check to see if the user has specified some command line
2744 switch that require certain abilities from the cpu. */
2747 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2749 sought
|= (FL_THUMB
| FL_MODE32
);
2751 /* There are no ARM processors that support both APCS-26 and
2752 interworking. Therefore we force FL_MODE26 to be removed
2753 from insn_flags here (if it was set), so that the search
2754 below will always be able to find a compatible processor. */
2755 insn_flags
&= ~FL_MODE26
;
2758 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2760 /* Try to locate a CPU type that supports all of the abilities
2761 of the default CPU, plus the extra abilities requested by
2763 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2764 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2767 if (sel
->name
== NULL
)
2769 unsigned current_bit_count
= 0;
2770 const struct processors
* best_fit
= NULL
;
2772 /* Ideally we would like to issue an error message here
2773 saying that it was not possible to find a CPU compatible
2774 with the default CPU, but which also supports the command
2775 line options specified by the programmer, and so they
2776 ought to use the -mcpu=<name> command line option to
2777 override the default CPU type.
2779 If we cannot find a cpu that has both the
2780 characteristics of the default cpu and the given
2781 command line options we scan the array again looking
2782 for a best match. */
2783 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2784 if ((sel
->flags
& sought
) == sought
)
2788 count
= bit_count (sel
->flags
& insn_flags
);
2790 if (count
>= current_bit_count
)
2793 current_bit_count
= count
;
2797 gcc_assert (best_fit
);
2801 arm_selected_cpu
= sel
;
2805 gcc_assert (arm_selected_cpu
);
2806 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2807 if (!arm_selected_tune
)
2808 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2810 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2811 insn_flags
= arm_selected_cpu
->flags
;
2812 arm_base_arch
= arm_selected_cpu
->base_arch
;
2814 arm_tune
= arm_selected_tune
->core
;
2815 tune_flags
= arm_selected_tune
->flags
;
2816 current_tune
= arm_selected_tune
->tune
;
2818 /* Make sure that the processor choice does not conflict with any of the
2819 other command line choices. */
2820 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2821 error ("target CPU does not support ARM mode");
2823 /* BPABI targets use linker tricks to allow interworking on cores
2824 without thumb support. */
2825 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2827 warning (0, "target CPU does not support interworking" );
2828 target_flags
&= ~MASK_INTERWORK
;
2831 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2833 warning (0, "target CPU does not support THUMB instructions");
2834 target_flags
&= ~MASK_THUMB
;
2837 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2839 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2840 target_flags
&= ~MASK_APCS_FRAME
;
2843 /* Callee super interworking implies thumb interworking. Adding
2844 this to the flags here simplifies the logic elsewhere. */
2845 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2846 target_flags
|= MASK_INTERWORK
;
2848 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2849 from here where no function is being compiled currently. */
2850 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2851 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2853 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2854 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2856 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2858 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2859 target_flags
|= MASK_APCS_FRAME
;
2862 if (TARGET_POKE_FUNCTION_NAME
)
2863 target_flags
|= MASK_APCS_FRAME
;
2865 if (TARGET_APCS_REENT
&& flag_pic
)
2866 error ("-fpic and -mapcs-reent are incompatible");
2868 if (TARGET_APCS_REENT
)
2869 warning (0, "APCS reentrant code not supported. Ignored");
2871 /* If this target is normally configured to use APCS frames, warn if they
2872 are turned off and debugging is turned on. */
2874 && write_symbols
!= NO_DEBUG
2875 && !TARGET_APCS_FRAME
2876 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2877 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879 if (TARGET_APCS_FLOAT
)
2880 warning (0, "passing floating point arguments in fp regs not yet supported");
2882 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2883 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2884 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2885 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2886 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2887 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2888 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2889 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2890 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2891 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2892 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2893 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2894 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2895 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2896 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2898 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2899 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2900 thumb_code
= TARGET_ARM
== 0;
2901 thumb1_code
= TARGET_THUMB1
!= 0;
2902 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2903 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2904 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2905 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2906 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2907 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2908 arm_arch_no_volatile_ce
= (insn_flags
& FL_NO_VOLATILE_CE
) != 0;
2909 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2910 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2911 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
2912 if (arm_restrict_it
== 2)
2913 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2916 arm_restrict_it
= 0;
2918 /* If we are not using the default (ARM mode) section anchor offset
2919 ranges, then set the correct ranges now. */
2922 /* Thumb-1 LDR instructions cannot have negative offsets.
2923 Permissible positive offset ranges are 5-bit (for byte loads),
2924 6-bit (for halfword loads), or 7-bit (for word loads).
2925 Empirical results suggest a 7-bit anchor range gives the best
2926 overall code size. */
2927 targetm
.min_anchor_offset
= 0;
2928 targetm
.max_anchor_offset
= 127;
2930 else if (TARGET_THUMB2
)
2932 /* The minimum is set such that the total size of the block
2933 for a particular anchor is 248 + 1 + 4095 bytes, which is
2934 divisible by eight, ensuring natural spacing of anchors. */
2935 targetm
.min_anchor_offset
= -248;
2936 targetm
.max_anchor_offset
= 4095;
2939 /* V5 code we generate is completely interworking capable, so we turn off
2940 TARGET_INTERWORK here to avoid many tests later on. */
2942 /* XXX However, we must pass the right pre-processor defines to CPP
2943 or GLD can get confused. This is a hack. */
2944 if (TARGET_INTERWORK
)
2945 arm_cpp_interwork
= 1;
2948 target_flags
&= ~MASK_INTERWORK
;
2950 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2951 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2953 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2954 error ("iwmmxt abi requires an iwmmxt capable cpu");
2956 if (!global_options_set
.x_arm_fpu_index
)
2958 const char *target_fpu_name
;
2961 #ifdef FPUTYPE_DEFAULT
2962 target_fpu_name
= FPUTYPE_DEFAULT
;
2964 target_fpu_name
= "vfp";
2967 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2972 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2974 switch (arm_fpu_desc
->model
)
2976 case ARM_FP_MODEL_VFP
:
2977 arm_fpu_attr
= FPU_VFP
;
2984 if (TARGET_AAPCS_BASED
)
2986 if (TARGET_CALLER_INTERWORKING
)
2987 error ("AAPCS does not support -mcaller-super-interworking");
2989 if (TARGET_CALLEE_INTERWORKING
)
2990 error ("AAPCS does not support -mcallee-super-interworking");
2993 /* iWMMXt and NEON are incompatible. */
2994 if (TARGET_IWMMXT
&& TARGET_NEON
)
2995 error ("iWMMXt and NEON are incompatible");
2997 /* iWMMXt unsupported under Thumb mode. */
2998 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2999 error ("iWMMXt unsupported under Thumb mode");
3001 /* __fp16 support currently assumes the core has ldrh. */
3002 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3003 sorry ("__fp16 and no ldrh");
3005 /* If soft-float is specified then don't use FPU. */
3006 if (TARGET_SOFT_FLOAT
)
3007 arm_fpu_attr
= FPU_NONE
;
3009 if (TARGET_AAPCS_BASED
)
3011 if (arm_abi
== ARM_ABI_IWMMXT
)
3012 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3013 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3014 && TARGET_HARD_FLOAT
3016 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3018 arm_pcs_default
= ARM_PCS_AAPCS
;
3022 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3023 sorry ("-mfloat-abi=hard and VFP");
3025 if (arm_abi
== ARM_ABI_APCS
)
3026 arm_pcs_default
= ARM_PCS_APCS
;
3028 arm_pcs_default
= ARM_PCS_ATPCS
;
3031 /* For arm2/3 there is no need to do any scheduling if we are doing
3032 software floating-point. */
3033 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
3034 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3036 /* Use the cp15 method if it is available. */
3037 if (target_thread_pointer
== TP_AUTO
)
3039 if (arm_arch6k
&& !TARGET_THUMB1
)
3040 target_thread_pointer
= TP_CP15
;
3042 target_thread_pointer
= TP_SOFT
;
3045 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
3046 error ("can not use -mtp=cp15 with 16-bit Thumb");
3048 /* Override the default structure alignment for AAPCS ABI. */
3049 if (!global_options_set
.x_arm_structure_size_boundary
)
3051 if (TARGET_AAPCS_BASED
)
3052 arm_structure_size_boundary
= 8;
3056 if (arm_structure_size_boundary
!= 8
3057 && arm_structure_size_boundary
!= 32
3058 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3060 if (ARM_DOUBLEWORD_ALIGN
)
3062 "structure size boundary can only be set to 8, 32 or 64");
3064 warning (0, "structure size boundary can only be set to 8 or 32");
3065 arm_structure_size_boundary
3066 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3070 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
3072 error ("RTP PIC is incompatible with Thumb");
3076 /* If stack checking is disabled, we can use r10 as the PIC register,
3077 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3078 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3080 if (TARGET_VXWORKS_RTP
)
3081 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3082 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3085 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3086 arm_pic_register
= 9;
3088 if (arm_pic_register_string
!= NULL
)
3090 int pic_register
= decode_reg_name (arm_pic_register_string
);
3093 warning (0, "-mpic-register= is useless without -fpic");
3095 /* Prevent the user from choosing an obviously stupid PIC register. */
3096 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3097 || pic_register
== HARD_FRAME_POINTER_REGNUM
3098 || pic_register
== STACK_POINTER_REGNUM
3099 || pic_register
>= PC_REGNUM
3100 || (TARGET_VXWORKS_RTP
3101 && (unsigned int) pic_register
!= arm_pic_register
))
3102 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3104 arm_pic_register
= pic_register
;
3107 if (TARGET_VXWORKS_RTP
3108 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3109 arm_pic_data_is_text_relative
= 0;
3111 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3112 if (fix_cm3_ldrd
== 2)
3114 if (arm_selected_cpu
->core
== cortexm3
)
3120 /* Enable -munaligned-access by default for
3121 - all ARMv6 architecture-based processors
3122 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3123 - ARMv8 architecture-base processors.
3125 Disable -munaligned-access by default for
3126 - all pre-ARMv6 architecture-based processors
3127 - ARMv6-M architecture-based processors. */
3129 if (unaligned_access
== 2)
3131 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3132 unaligned_access
= 1;
3134 unaligned_access
= 0;
3136 else if (unaligned_access
== 1
3137 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3139 warning (0, "target CPU does not support unaligned accesses");
3140 unaligned_access
= 0;
3143 if (TARGET_THUMB1
&& flag_schedule_insns
)
3145 /* Don't warn since it's on by default in -O2. */
3146 flag_schedule_insns
= 0;
3151 /* If optimizing for size, bump the number of instructions that we
3152 are prepared to conditionally execute (even on a StrongARM). */
3153 max_insns_skipped
= 6;
3155 /* For THUMB2, we limit the conditional sequence to one IT block. */
3157 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3160 max_insns_skipped
= current_tune
->max_insns_skipped
;
3162 /* Hot/Cold partitioning is not currently supported, since we can't
3163 handle literal pool placement in that case. */
3164 if (flag_reorder_blocks_and_partition
)
3166 inform (input_location
,
3167 "-freorder-blocks-and-partition not supported on this architecture");
3168 flag_reorder_blocks_and_partition
= 0;
3169 flag_reorder_blocks
= 1;
3173 /* Hoisting PIC address calculations more aggressively provides a small,
3174 but measurable, size reduction for PIC code. Therefore, we decrease
3175 the bar for unrestricted expression hoisting to the cost of PIC address
3176 calculation, which is 2 instructions. */
3177 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3178 global_options
.x_param_values
,
3179 global_options_set
.x_param_values
);
3181 /* ARM EABI defaults to strict volatile bitfields. */
3182 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3183 && abi_version_at_least(2))
3184 flag_strict_volatile_bitfields
= 1;
3186 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3187 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3188 if (flag_prefetch_loop_arrays
< 0
3191 && current_tune
->num_prefetch_slots
> 0)
3192 flag_prefetch_loop_arrays
= 1;
3194 /* Set up parameters to be used in prefetching algorithm. Do not override the
3195 defaults unless we are tuning for a core we have researched values for. */
3196 if (current_tune
->num_prefetch_slots
> 0)
3197 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3198 current_tune
->num_prefetch_slots
,
3199 global_options
.x_param_values
,
3200 global_options_set
.x_param_values
);
3201 if (current_tune
->l1_cache_line_size
>= 0)
3202 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3203 current_tune
->l1_cache_line_size
,
3204 global_options
.x_param_values
,
3205 global_options_set
.x_param_values
);
3206 if (current_tune
->l1_cache_size
>= 0)
3207 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3208 current_tune
->l1_cache_size
,
3209 global_options
.x_param_values
,
3210 global_options_set
.x_param_values
);
3212 /* Use Neon to perform 64-bits operations rather than core
3214 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3215 if (use_neon_for_64bits
== 1)
3216 prefer_neon_for_64bits
= true;
3218 /* Use the alternative scheduling-pressure algorithm by default. */
3219 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3220 global_options
.x_param_values
,
3221 global_options_set
.x_param_values
);
3223 /* Look through ready list and all of queue for instructions
3224 relevant for L2 auto-prefetcher. */
3225 int param_sched_autopref_queue_depth
;
3226 if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_OFF
)
3227 param_sched_autopref_queue_depth
= -1;
3228 else if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_RANK
)
3229 param_sched_autopref_queue_depth
= 0;
3230 else if (current_tune
->sched_autopref
== ARM_SCHED_AUTOPREF_FULL
)
3231 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3234 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3235 param_sched_autopref_queue_depth
,
3236 global_options
.x_param_values
,
3237 global_options_set
.x_param_values
);
3239 /* Disable shrink-wrap when optimizing function for size, since it tends to
3240 generate additional returns. */
3241 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3242 flag_shrink_wrap
= false;
3243 /* TBD: Dwarf info for apcs frame is not handled yet. */
3244 if (TARGET_APCS_FRAME
)
3245 flag_shrink_wrap
= false;
3247 /* We only support -mslow-flash-data on armv7-m targets. */
3248 if (target_slow_flash_data
3249 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3250 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3251 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3253 /* Currently, for slow flash data, we just disable literal pools. */
3254 if (target_slow_flash_data
)
3255 arm_disable_literal_pool
= true;
3257 /* Thumb2 inline assembly code should always use unified syntax.
3258 This will apply to ARM and Thumb1 eventually. */
3260 inline_asm_unified
= 1;
3262 /* Disable scheduling fusion by default if it's not armv7 processor
3263 or doesn't prefer ldrd/strd. */
3264 if (flag_schedule_fusion
== 2
3265 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3266 flag_schedule_fusion
= 0;
3268 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3269 - epilogue_insns - does not accurately model the corresponding insns
3270 emitted in the asm file. In particular, see the comment in thumb_exit
3271 'Find out how many of the (return) argument registers we can corrupt'.
3272 As a consequence, the epilogue may clobber registers without fipa-ra
3273 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3274 TODO: Accurately model clobbers for epilogue_insns and reenable
3279 /* Register global variables with the garbage collector. */
3280 arm_add_gc_roots ();
3284 arm_add_gc_roots (void)
3286 gcc_obstack_init(&minipool_obstack
);
3287 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3290 /* A table of known ARM exception types.
3291 For use with the interrupt function attribute. */
3295 const char *const arg
;
3296 const unsigned long return_value
;
3300 static const isr_attribute_arg isr_attribute_args
[] =
3302 { "IRQ", ARM_FT_ISR
},
3303 { "irq", ARM_FT_ISR
},
3304 { "FIQ", ARM_FT_FIQ
},
3305 { "fiq", ARM_FT_FIQ
},
3306 { "ABORT", ARM_FT_ISR
},
3307 { "abort", ARM_FT_ISR
},
3308 { "ABORT", ARM_FT_ISR
},
3309 { "abort", ARM_FT_ISR
},
3310 { "UNDEF", ARM_FT_EXCEPTION
},
3311 { "undef", ARM_FT_EXCEPTION
},
3312 { "SWI", ARM_FT_EXCEPTION
},
3313 { "swi", ARM_FT_EXCEPTION
},
3314 { NULL
, ARM_FT_NORMAL
}
3317 /* Returns the (interrupt) function type of the current
3318 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3320 static unsigned long
3321 arm_isr_value (tree argument
)
3323 const isr_attribute_arg
* ptr
;
3327 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3329 /* No argument - default to IRQ. */
3330 if (argument
== NULL_TREE
)
3333 /* Get the value of the argument. */
3334 if (TREE_VALUE (argument
) == NULL_TREE
3335 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3336 return ARM_FT_UNKNOWN
;
3338 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3340 /* Check it against the list of known arguments. */
3341 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3342 if (streq (arg
, ptr
->arg
))
3343 return ptr
->return_value
;
3345 /* An unrecognized interrupt type. */
3346 return ARM_FT_UNKNOWN
;
3349 /* Computes the type of the current function. */
3351 static unsigned long
3352 arm_compute_func_type (void)
3354 unsigned long type
= ARM_FT_UNKNOWN
;
3358 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3360 /* Decide if the current function is volatile. Such functions
3361 never return, and many memory cycles can be saved by not storing
3362 register values that will never be needed again. This optimization
3363 was added to speed up context switching in a kernel application. */
3365 && (TREE_NOTHROW (current_function_decl
)
3366 || !(flag_unwind_tables
3368 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3369 && TREE_THIS_VOLATILE (current_function_decl
))
3370 type
|= ARM_FT_VOLATILE
;
3372 if (cfun
->static_chain_decl
!= NULL
)
3373 type
|= ARM_FT_NESTED
;
3375 attr
= DECL_ATTRIBUTES (current_function_decl
);
3377 a
= lookup_attribute ("naked", attr
);
3379 type
|= ARM_FT_NAKED
;
3381 a
= lookup_attribute ("isr", attr
);
3383 a
= lookup_attribute ("interrupt", attr
);
3386 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3388 type
|= arm_isr_value (TREE_VALUE (a
));
3393 /* Returns the type of the current function. */
3396 arm_current_func_type (void)
3398 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3399 cfun
->machine
->func_type
= arm_compute_func_type ();
3401 return cfun
->machine
->func_type
;
3405 arm_allocate_stack_slots_for_args (void)
3407 /* Naked functions should not allocate stack slots for arguments. */
3408 return !IS_NAKED (arm_current_func_type ());
3412 arm_warn_func_return (tree decl
)
3414 /* Naked functions are implemented entirely in assembly, including the
3415 return sequence, so suppress warnings about this. */
3416 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3420 /* Output assembler code for a block containing the constant parts
3421 of a trampoline, leaving space for the variable parts.
3423 On the ARM, (if r8 is the static chain regnum, and remembering that
3424 referencing pc adds an offset of 8) the trampoline looks like:
3427 .word static chain value
3428 .word function's address
3429 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3432 arm_asm_trampoline_template (FILE *f
)
3436 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3437 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3439 else if (TARGET_THUMB2
)
3441 /* The Thumb-2 trampoline is similar to the arm implementation.
3442 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3443 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3444 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3445 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3449 ASM_OUTPUT_ALIGN (f
, 2);
3450 fprintf (f
, "\t.code\t16\n");
3451 fprintf (f
, ".Ltrampoline_start:\n");
3452 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3453 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3454 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3455 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3456 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3457 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3459 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3460 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3463 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3466 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3468 rtx fnaddr
, mem
, a_tramp
;
3470 emit_block_move (m_tramp
, assemble_trampoline_template (),
3471 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3473 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3474 emit_move_insn (mem
, chain_value
);
3476 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3477 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3478 emit_move_insn (mem
, fnaddr
);
3480 a_tramp
= XEXP (m_tramp
, 0);
3481 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3482 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3483 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3486 /* Thumb trampolines should be entered in thumb mode, so set
3487 the bottom bit of the address. */
3490 arm_trampoline_adjust_address (rtx addr
)
3493 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3494 NULL
, 0, OPTAB_LIB_WIDEN
);
3498 /* Return 1 if it is possible to return using a single instruction.
3499 If SIBLING is non-null, this is a test for a return before a sibling
3500 call. SIBLING is the call insn, so we can examine its register usage. */
3503 use_return_insn (int iscond
, rtx sibling
)
3506 unsigned int func_type
;
3507 unsigned long saved_int_regs
;
3508 unsigned HOST_WIDE_INT stack_adjust
;
3509 arm_stack_offsets
*offsets
;
3511 /* Never use a return instruction before reload has run. */
3512 if (!reload_completed
)
3515 func_type
= arm_current_func_type ();
3517 /* Naked, volatile and stack alignment functions need special
3519 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3522 /* So do interrupt functions that use the frame pointer and Thumb
3523 interrupt functions. */
3524 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3527 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3528 && !optimize_function_for_size_p (cfun
))
3531 offsets
= arm_get_frame_offsets ();
3532 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3534 /* As do variadic functions. */
3535 if (crtl
->args
.pretend_args_size
3536 || cfun
->machine
->uses_anonymous_args
3537 /* Or if the function calls __builtin_eh_return () */
3538 || crtl
->calls_eh_return
3539 /* Or if the function calls alloca */
3540 || cfun
->calls_alloca
3541 /* Or if there is a stack adjustment. However, if the stack pointer
3542 is saved on the stack, we can use a pre-incrementing stack load. */
3543 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3544 && stack_adjust
== 4)))
3547 saved_int_regs
= offsets
->saved_regs_mask
;
3549 /* Unfortunately, the insn
3551 ldmib sp, {..., sp, ...}
3553 triggers a bug on most SA-110 based devices, such that the stack
3554 pointer won't be correctly restored if the instruction takes a
3555 page fault. We work around this problem by popping r3 along with
3556 the other registers, since that is never slower than executing
3557 another instruction.
3559 We test for !arm_arch5 here, because code for any architecture
3560 less than this could potentially be run on one of the buggy
3562 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3564 /* Validate that r3 is a call-clobbered register (always true in
3565 the default abi) ... */
3566 if (!call_used_regs
[3])
3569 /* ... that it isn't being used for a return value ... */
3570 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3573 /* ... or for a tail-call argument ... */
3576 gcc_assert (CALL_P (sibling
));
3578 if (find_regno_fusage (sibling
, USE
, 3))
3582 /* ... and that there are no call-saved registers in r0-r2
3583 (always true in the default ABI). */
3584 if (saved_int_regs
& 0x7)
3588 /* Can't be done if interworking with Thumb, and any registers have been
3590 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3593 /* On StrongARM, conditional returns are expensive if they aren't
3594 taken and multiple registers have been stacked. */
3595 if (iscond
&& arm_tune_strongarm
)
3597 /* Conditional return when just the LR is stored is a simple
3598 conditional-load instruction, that's not expensive. */
3599 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3603 && arm_pic_register
!= INVALID_REGNUM
3604 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3608 /* If there are saved registers but the LR isn't saved, then we need
3609 two instructions for the return. */
3610 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3613 /* Can't be done if any of the VFP regs are pushed,
3614 since this also requires an insn. */
3615 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3616 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3617 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3620 if (TARGET_REALLY_IWMMXT
)
3621 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3622 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3628 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3629 shrink-wrapping if possible. This is the case if we need to emit a
3630 prologue, which we can test by looking at the offsets. */
3632 use_simple_return_p (void)
3634 arm_stack_offsets
*offsets
;
3636 offsets
= arm_get_frame_offsets ();
3637 return offsets
->outgoing_args
!= 0;
3640 /* Return TRUE if int I is a valid immediate ARM constant. */
3643 const_ok_for_arm (HOST_WIDE_INT i
)
3647 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3648 be all zero, or all one. */
3649 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3650 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3651 != ((~(unsigned HOST_WIDE_INT
) 0)
3652 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3655 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3657 /* Fast return for 0 and small values. We must do this for zero, since
3658 the code below can't handle that one case. */
3659 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3662 /* Get the number of trailing zeros. */
3663 lowbit
= ffs((int) i
) - 1;
3665 /* Only even shifts are allowed in ARM mode so round down to the
3666 nearest even number. */
3670 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3675 /* Allow rotated constants in ARM mode. */
3677 && ((i
& ~0xc000003f) == 0
3678 || (i
& ~0xf000000f) == 0
3679 || (i
& ~0xfc000003) == 0))
3686 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3689 if (i
== v
|| i
== (v
| (v
<< 8)))
3692 /* Allow repeated pattern 0xXY00XY00. */
3702 /* Return true if I is a valid constant for the operation CODE. */
3704 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3706 if (const_ok_for_arm (i
))
3712 /* See if we can use movw. */
3713 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3716 /* Otherwise, try mvn. */
3717 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3720 /* See if we can use addw or subw. */
3722 && ((i
& 0xfffff000) == 0
3723 || ((-i
) & 0xfffff000) == 0))
3725 /* else fall through. */
3745 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3747 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3753 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3757 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3764 /* Return true if I is a valid di mode constant for the operation CODE. */
3766 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3768 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3769 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3770 rtx hi
= GEN_INT (hi_val
);
3771 rtx lo
= GEN_INT (lo_val
);
3781 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3782 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3784 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3791 /* Emit a sequence of insns to handle a large constant.
3792 CODE is the code of the operation required, it can be any of SET, PLUS,
3793 IOR, AND, XOR, MINUS;
3794 MODE is the mode in which the operation is being performed;
3795 VAL is the integer to operate on;
3796 SOURCE is the other operand (a register, or a null-pointer for SET);
3797 SUBTARGETS means it is safe to create scratch registers if that will
3798 either produce a simpler sequence, or we will want to cse the values.
3799 Return value is the number of insns emitted. */
3801 /* ??? Tweak this for thumb2. */
3803 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3804 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3808 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3809 cond
= COND_EXEC_TEST (PATTERN (insn
));
3813 if (subtargets
|| code
== SET
3814 || (REG_P (target
) && REG_P (source
)
3815 && REGNO (target
) != REGNO (source
)))
3817 /* After arm_reorg has been called, we can't fix up expensive
3818 constants by pushing them into memory so we must synthesize
3819 them in-line, regardless of the cost. This is only likely to
3820 be more costly on chips that have load delay slots and we are
3821 compiling without running the scheduler (so no splitting
3822 occurred before the final instruction emission).
3824 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3826 if (!cfun
->machine
->after_arm_reorg
3828 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3830 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3835 /* Currently SET is the only monadic value for CODE, all
3836 the rest are diadic. */
3837 if (TARGET_USE_MOVT
)
3838 arm_emit_movpair (target
, GEN_INT (val
));
3840 emit_set_insn (target
, GEN_INT (val
));
3846 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3848 if (TARGET_USE_MOVT
)
3849 arm_emit_movpair (temp
, GEN_INT (val
));
3851 emit_set_insn (temp
, GEN_INT (val
));
3853 /* For MINUS, the value is subtracted from, since we never
3854 have subtraction of a constant. */
3856 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3858 emit_set_insn (target
,
3859 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3865 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3869 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3870 ARM/THUMB2 immediates, and add up to VAL.
3871 Thr function return value gives the number of insns required. */
3873 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3874 struct four_ints
*return_sequence
)
3876 int best_consecutive_zeros
= 0;
3880 struct four_ints tmp_sequence
;
3882 /* If we aren't targeting ARM, the best place to start is always at
3883 the bottom, otherwise look more closely. */
3886 for (i
= 0; i
< 32; i
+= 2)
3888 int consecutive_zeros
= 0;
3890 if (!(val
& (3 << i
)))
3892 while ((i
< 32) && !(val
& (3 << i
)))
3894 consecutive_zeros
+= 2;
3897 if (consecutive_zeros
> best_consecutive_zeros
)
3899 best_consecutive_zeros
= consecutive_zeros
;
3900 best_start
= i
- consecutive_zeros
;
3907 /* So long as it won't require any more insns to do so, it's
3908 desirable to emit a small constant (in bits 0...9) in the last
3909 insn. This way there is more chance that it can be combined with
3910 a later addressing insn to form a pre-indexed load or store
3911 operation. Consider:
3913 *((volatile int *)0xe0000100) = 1;
3914 *((volatile int *)0xe0000110) = 2;
3916 We want this to wind up as:
3920 str rB, [rA, #0x100]
3922 str rB, [rA, #0x110]
3924 rather than having to synthesize both large constants from scratch.
3926 Therefore, we calculate how many insns would be required to emit
3927 the constant starting from `best_start', and also starting from
3928 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3929 yield a shorter sequence, we may as well use zero. */
3930 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3932 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3934 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3935 if (insns2
<= insns1
)
3937 *return_sequence
= tmp_sequence
;
3945 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3947 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3948 struct four_ints
*return_sequence
, int i
)
3950 int remainder
= val
& 0xffffffff;
3953 /* Try and find a way of doing the job in either two or three
3956 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3957 location. We start at position I. This may be the MSB, or
3958 optimial_immediate_sequence may have positioned it at the largest block
3959 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3960 wrapping around to the top of the word when we drop off the bottom.
3961 In the worst case this code should produce no more than four insns.
3963 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3964 constants, shifted to any arbitrary location. We should always start
3969 unsigned int b1
, b2
, b3
, b4
;
3970 unsigned HOST_WIDE_INT result
;
3973 gcc_assert (insns
< 4);
3978 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3979 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3982 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3983 /* We can use addw/subw for the last 12 bits. */
3987 /* Use an 8-bit shifted/rotated immediate. */
3991 result
= remainder
& ((0x0ff << end
)
3992 | ((i
< end
) ? (0xff >> (32 - end
))
3999 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4000 arbitrary shifts. */
4001 i
-= TARGET_ARM
? 2 : 1;
4005 /* Next, see if we can do a better job with a thumb2 replicated
4008 We do it this way around to catch the cases like 0x01F001E0 where
4009 two 8-bit immediates would work, but a replicated constant would
4012 TODO: 16-bit constants that don't clear all the bits, but still win.
4013 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4016 b1
= (remainder
& 0xff000000) >> 24;
4017 b2
= (remainder
& 0x00ff0000) >> 16;
4018 b3
= (remainder
& 0x0000ff00) >> 8;
4019 b4
= remainder
& 0xff;
4023 /* The 8-bit immediate already found clears b1 (and maybe b2),
4024 but must leave b3 and b4 alone. */
4026 /* First try to find a 32-bit replicated constant that clears
4027 almost everything. We can assume that we can't do it in one,
4028 or else we wouldn't be here. */
4029 unsigned int tmp
= b1
& b2
& b3
& b4
;
4030 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4032 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4033 + (tmp
== b3
) + (tmp
== b4
);
4035 && (matching_bytes
>= 3
4036 || (matching_bytes
== 2
4037 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4039 /* At least 3 of the bytes match, and the fourth has at
4040 least as many bits set, or two of the bytes match
4041 and it will only require one more insn to finish. */
4049 /* Second, try to find a 16-bit replicated constant that can
4050 leave three of the bytes clear. If b2 or b4 is already
4051 zero, then we can. If the 8-bit from above would not
4052 clear b2 anyway, then we still win. */
4053 else if (b1
== b3
&& (!b2
|| !b4
4054 || (remainder
& 0x00ff0000 & ~result
)))
4056 result
= remainder
& 0xff00ff00;
4062 /* The 8-bit immediate already found clears b2 (and maybe b3)
4063 and we don't get here unless b1 is alredy clear, but it will
4064 leave b4 unchanged. */
4066 /* If we can clear b2 and b4 at once, then we win, since the
4067 8-bits couldn't possibly reach that far. */
4070 result
= remainder
& 0x00ff00ff;
4076 return_sequence
->i
[insns
++] = result
;
4077 remainder
&= ~result
;
4079 if (code
== SET
|| code
== MINUS
)
4087 /* Emit an instruction with the indicated PATTERN. If COND is
4088 non-NULL, conditionalize the execution of the instruction on COND
4092 emit_constant_insn (rtx cond
, rtx pattern
)
4095 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4096 emit_insn (pattern
);
4099 /* As above, but extra parameter GENERATE which, if clear, suppresses
4103 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4104 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4109 int final_invert
= 0;
4111 int set_sign_bit_copies
= 0;
4112 int clear_sign_bit_copies
= 0;
4113 int clear_zero_bit_copies
= 0;
4114 int set_zero_bit_copies
= 0;
4115 int insns
= 0, neg_insns
, inv_insns
;
4116 unsigned HOST_WIDE_INT temp1
, temp2
;
4117 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4118 struct four_ints
*immediates
;
4119 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4121 /* Find out which operations are safe for a given CODE. Also do a quick
4122 check for degenerate cases; these can occur when DImode operations
4135 if (remainder
== 0xffffffff)
4138 emit_constant_insn (cond
,
4139 gen_rtx_SET (target
,
4140 GEN_INT (ARM_SIGN_EXTEND (val
))));
4146 if (reload_completed
&& rtx_equal_p (target
, source
))
4150 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4159 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4162 if (remainder
== 0xffffffff)
4164 if (reload_completed
&& rtx_equal_p (target
, source
))
4167 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4176 if (reload_completed
&& rtx_equal_p (target
, source
))
4179 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4183 if (remainder
== 0xffffffff)
4186 emit_constant_insn (cond
,
4187 gen_rtx_SET (target
,
4188 gen_rtx_NOT (mode
, source
)));
4195 /* We treat MINUS as (val - source), since (source - val) is always
4196 passed as (source + (-val)). */
4200 emit_constant_insn (cond
,
4201 gen_rtx_SET (target
,
4202 gen_rtx_NEG (mode
, source
)));
4205 if (const_ok_for_arm (val
))
4208 emit_constant_insn (cond
,
4209 gen_rtx_SET (target
,
4210 gen_rtx_MINUS (mode
, GEN_INT (val
),
4221 /* If we can do it in one insn get out quickly. */
4222 if (const_ok_for_op (val
, code
))
4225 emit_constant_insn (cond
,
4226 gen_rtx_SET (target
,
4228 ? gen_rtx_fmt_ee (code
, mode
, source
,
4234 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4236 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4237 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4241 if (mode
== SImode
&& i
== 16)
4242 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4244 emit_constant_insn (cond
,
4245 gen_zero_extendhisi2
4246 (target
, gen_lowpart (HImode
, source
)));
4248 /* Extz only supports SImode, but we can coerce the operands
4250 emit_constant_insn (cond
,
4251 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4252 gen_lowpart (SImode
, source
),
4253 GEN_INT (i
), const0_rtx
));
4259 /* Calculate a few attributes that may be useful for specific
4261 /* Count number of leading zeros. */
4262 for (i
= 31; i
>= 0; i
--)
4264 if ((remainder
& (1 << i
)) == 0)
4265 clear_sign_bit_copies
++;
4270 /* Count number of leading 1's. */
4271 for (i
= 31; i
>= 0; i
--)
4273 if ((remainder
& (1 << i
)) != 0)
4274 set_sign_bit_copies
++;
4279 /* Count number of trailing zero's. */
4280 for (i
= 0; i
<= 31; i
++)
4282 if ((remainder
& (1 << i
)) == 0)
4283 clear_zero_bit_copies
++;
4288 /* Count number of trailing 1's. */
4289 for (i
= 0; i
<= 31; i
++)
4291 if ((remainder
& (1 << i
)) != 0)
4292 set_zero_bit_copies
++;
4300 /* See if we can do this by sign_extending a constant that is known
4301 to be negative. This is a good, way of doing it, since the shift
4302 may well merge into a subsequent insn. */
4303 if (set_sign_bit_copies
> 1)
4305 if (const_ok_for_arm
4306 (temp1
= ARM_SIGN_EXTEND (remainder
4307 << (set_sign_bit_copies
- 1))))
4311 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4312 emit_constant_insn (cond
,
4313 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4314 emit_constant_insn (cond
,
4315 gen_ashrsi3 (target
, new_src
,
4316 GEN_INT (set_sign_bit_copies
- 1)));
4320 /* For an inverted constant, we will need to set the low bits,
4321 these will be shifted out of harm's way. */
4322 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4323 if (const_ok_for_arm (~temp1
))
4327 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4328 emit_constant_insn (cond
,
4329 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4330 emit_constant_insn (cond
,
4331 gen_ashrsi3 (target
, new_src
,
4332 GEN_INT (set_sign_bit_copies
- 1)));
4338 /* See if we can calculate the value as the difference between two
4339 valid immediates. */
4340 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4342 int topshift
= clear_sign_bit_copies
& ~1;
4344 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4345 & (0xff000000 >> topshift
));
4347 /* If temp1 is zero, then that means the 9 most significant
4348 bits of remainder were 1 and we've caused it to overflow.
4349 When topshift is 0 we don't need to do anything since we
4350 can borrow from 'bit 32'. */
4351 if (temp1
== 0 && topshift
!= 0)
4352 temp1
= 0x80000000 >> (topshift
- 1);
4354 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4356 if (const_ok_for_arm (temp2
))
4360 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4361 emit_constant_insn (cond
,
4362 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4363 emit_constant_insn (cond
,
4364 gen_addsi3 (target
, new_src
,
4372 /* See if we can generate this by setting the bottom (or the top)
4373 16 bits, and then shifting these into the other half of the
4374 word. We only look for the simplest cases, to do more would cost
4375 too much. Be careful, however, not to generate this when the
4376 alternative would take fewer insns. */
4377 if (val
& 0xffff0000)
4379 temp1
= remainder
& 0xffff0000;
4380 temp2
= remainder
& 0x0000ffff;
4382 /* Overlaps outside this range are best done using other methods. */
4383 for (i
= 9; i
< 24; i
++)
4385 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4386 && !const_ok_for_arm (temp2
))
4388 rtx new_src
= (subtargets
4389 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4391 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4392 source
, subtargets
, generate
);
4400 gen_rtx_ASHIFT (mode
, source
,
4407 /* Don't duplicate cases already considered. */
4408 for (i
= 17; i
< 24; i
++)
4410 if (((temp1
| (temp1
>> i
)) == remainder
)
4411 && !const_ok_for_arm (temp1
))
4413 rtx new_src
= (subtargets
4414 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4416 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4417 source
, subtargets
, generate
);
4422 gen_rtx_SET (target
,
4425 gen_rtx_LSHIFTRT (mode
, source
,
4436 /* If we have IOR or XOR, and the constant can be loaded in a
4437 single instruction, and we can find a temporary to put it in,
4438 then this can be done in two instructions instead of 3-4. */
4440 /* TARGET can't be NULL if SUBTARGETS is 0 */
4441 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4443 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4447 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4449 emit_constant_insn (cond
,
4450 gen_rtx_SET (sub
, GEN_INT (val
)));
4451 emit_constant_insn (cond
,
4452 gen_rtx_SET (target
,
4453 gen_rtx_fmt_ee (code
, mode
,
4464 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4465 and the remainder 0s for e.g. 0xfff00000)
4466 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4468 This can be done in 2 instructions by using shifts with mov or mvn.
4473 mvn r0, r0, lsr #12 */
4474 if (set_sign_bit_copies
> 8
4475 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4479 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4480 rtx shift
= GEN_INT (set_sign_bit_copies
);
4486 gen_rtx_ASHIFT (mode
,
4491 gen_rtx_SET (target
,
4493 gen_rtx_LSHIFTRT (mode
, sub
,
4500 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4502 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4504 For eg. r0 = r0 | 0xfff
4509 if (set_zero_bit_copies
> 8
4510 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4514 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4515 rtx shift
= GEN_INT (set_zero_bit_copies
);
4521 gen_rtx_LSHIFTRT (mode
,
4526 gen_rtx_SET (target
,
4528 gen_rtx_ASHIFT (mode
, sub
,
4534 /* This will never be reached for Thumb2 because orn is a valid
4535 instruction. This is for Thumb1 and the ARM 32 bit cases.
4537 x = y | constant (such that ~constant is a valid constant)
4539 x = ~(~y & ~constant).
4541 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4545 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4546 emit_constant_insn (cond
,
4548 gen_rtx_NOT (mode
, source
)));
4551 sub
= gen_reg_rtx (mode
);
4552 emit_constant_insn (cond
,
4554 gen_rtx_AND (mode
, source
,
4556 emit_constant_insn (cond
,
4557 gen_rtx_SET (target
,
4558 gen_rtx_NOT (mode
, sub
)));
4565 /* See if two shifts will do 2 or more insn's worth of work. */
4566 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4568 HOST_WIDE_INT shift_mask
= ((0xffffffff
4569 << (32 - clear_sign_bit_copies
))
4572 if ((remainder
| shift_mask
) != 0xffffffff)
4574 HOST_WIDE_INT new_val
4575 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4579 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4580 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4581 new_src
, source
, subtargets
, 1);
4586 rtx targ
= subtargets
? NULL_RTX
: target
;
4587 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4588 targ
, source
, subtargets
, 0);
4594 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4595 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4597 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4598 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4604 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4606 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4608 if ((remainder
| shift_mask
) != 0xffffffff)
4610 HOST_WIDE_INT new_val
4611 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4614 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4616 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4617 new_src
, source
, subtargets
, 1);
4622 rtx targ
= subtargets
? NULL_RTX
: target
;
4624 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4625 targ
, source
, subtargets
, 0);
4631 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4632 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4634 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4635 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4647 /* Calculate what the instruction sequences would be if we generated it
4648 normally, negated, or inverted. */
4650 /* AND cannot be split into multiple insns, so invert and use BIC. */
4653 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4656 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4661 if (can_invert
|| final_invert
)
4662 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4667 immediates
= &pos_immediates
;
4669 /* Is the negated immediate sequence more efficient? */
4670 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4673 immediates
= &neg_immediates
;
4678 /* Is the inverted immediate sequence more efficient?
4679 We must allow for an extra NOT instruction for XOR operations, although
4680 there is some chance that the final 'mvn' will get optimized later. */
4681 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4684 immediates
= &inv_immediates
;
4692 /* Now output the chosen sequence as instructions. */
4695 for (i
= 0; i
< insns
; i
++)
4697 rtx new_src
, temp1_rtx
;
4699 temp1
= immediates
->i
[i
];
4701 if (code
== SET
|| code
== MINUS
)
4702 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4703 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4704 new_src
= gen_reg_rtx (mode
);
4710 else if (can_negate
)
4713 temp1
= trunc_int_for_mode (temp1
, mode
);
4714 temp1_rtx
= GEN_INT (temp1
);
4718 else if (code
== MINUS
)
4719 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4721 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4723 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4728 can_negate
= can_invert
;
4732 else if (code
== MINUS
)
4740 emit_constant_insn (cond
, gen_rtx_SET (target
,
4741 gen_rtx_NOT (mode
, source
)));
4748 /* Canonicalize a comparison so that we are more likely to recognize it.
4749 This can be done for a few constant compares, where we can make the
4750 immediate value easier to load. */
4753 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4754 bool op0_preserve_value
)
4757 unsigned HOST_WIDE_INT i
, maxval
;
4759 mode
= GET_MODE (*op0
);
4760 if (mode
== VOIDmode
)
4761 mode
= GET_MODE (*op1
);
4763 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4765 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4766 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4767 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4768 for GTU/LEU in Thumb mode. */
4772 if (*code
== GT
|| *code
== LE
4773 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4775 /* Missing comparison. First try to use an available
4777 if (CONST_INT_P (*op1
))
4785 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4787 *op1
= GEN_INT (i
+ 1);
4788 *code
= *code
== GT
? GE
: LT
;
4794 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4795 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4797 *op1
= GEN_INT (i
+ 1);
4798 *code
= *code
== GTU
? GEU
: LTU
;
4807 /* If that did not work, reverse the condition. */
4808 if (!op0_preserve_value
)
4810 std::swap (*op0
, *op1
);
4811 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4817 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4818 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4819 to facilitate possible combining with a cmp into 'ands'. */
4821 && GET_CODE (*op0
) == ZERO_EXTEND
4822 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4823 && GET_MODE (XEXP (*op0
, 0)) == QImode
4824 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4825 && subreg_lowpart_p (XEXP (*op0
, 0))
4826 && *op1
== const0_rtx
)
4827 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4830 /* Comparisons smaller than DImode. Only adjust comparisons against
4831 an out-of-range constant. */
4832 if (!CONST_INT_P (*op1
)
4833 || const_ok_for_arm (INTVAL (*op1
))
4834 || const_ok_for_arm (- INTVAL (*op1
)))
4848 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4850 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4851 *code
= *code
== GT
? GE
: LT
;
4859 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4861 *op1
= GEN_INT (i
- 1);
4862 *code
= *code
== GE
? GT
: LE
;
4869 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4870 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4872 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4873 *code
= *code
== GTU
? GEU
: LTU
;
4881 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4883 *op1
= GEN_INT (i
- 1);
4884 *code
= *code
== GEU
? GTU
: LEU
;
4895 /* Define how to find the value returned by a function. */
4898 arm_function_value(const_tree type
, const_tree func
,
4899 bool outgoing ATTRIBUTE_UNUSED
)
4902 int unsignedp ATTRIBUTE_UNUSED
;
4903 rtx r ATTRIBUTE_UNUSED
;
4905 mode
= TYPE_MODE (type
);
4907 if (TARGET_AAPCS_BASED
)
4908 return aapcs_allocate_return_reg (mode
, type
, func
);
4910 /* Promote integer types. */
4911 if (INTEGRAL_TYPE_P (type
))
4912 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4914 /* Promotes small structs returned in a register to full-word size
4915 for big-endian AAPCS. */
4916 if (arm_return_in_msb (type
))
4918 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4919 if (size
% UNITS_PER_WORD
!= 0)
4921 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4922 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4926 return arm_libcall_value_1 (mode
);
4929 /* libcall hashtable helpers. */
4931 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4933 typedef const rtx_def
*value_type
;
4934 typedef const rtx_def
*compare_type
;
4935 static inline hashval_t
hash (const rtx_def
*);
4936 static inline bool equal (const rtx_def
*, const rtx_def
*);
4937 static inline void remove (rtx_def
*);
4941 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
4943 return rtx_equal_p (p1
, p2
);
4947 libcall_hasher::hash (const rtx_def
*p1
)
4949 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4952 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4955 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4957 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4961 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4963 static bool init_done
= false;
4964 static libcall_table_type
*libcall_htab
= NULL
;
4970 libcall_htab
= new libcall_table_type (31);
4971 add_libcall (libcall_htab
,
4972 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4973 add_libcall (libcall_htab
,
4974 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4975 add_libcall (libcall_htab
,
4976 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4977 add_libcall (libcall_htab
,
4978 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4980 add_libcall (libcall_htab
,
4981 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4982 add_libcall (libcall_htab
,
4983 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4984 add_libcall (libcall_htab
,
4985 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4986 add_libcall (libcall_htab
,
4987 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4989 add_libcall (libcall_htab
,
4990 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4991 add_libcall (libcall_htab
,
4992 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4993 add_libcall (libcall_htab
,
4994 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4995 add_libcall (libcall_htab
,
4996 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4997 add_libcall (libcall_htab
,
4998 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4999 add_libcall (libcall_htab
,
5000 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5001 add_libcall (libcall_htab
,
5002 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5003 add_libcall (libcall_htab
,
5004 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5006 /* Values from double-precision helper functions are returned in core
5007 registers if the selected core only supports single-precision
5008 arithmetic, even if we are using the hard-float ABI. The same is
5009 true for single-precision helpers, but we will never be using the
5010 hard-float ABI on a CPU which doesn't support single-precision
5011 operations in hardware. */
5012 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5013 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5014 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5015 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5016 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5017 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5018 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5019 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5020 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5021 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5022 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5023 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5025 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5029 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5033 arm_libcall_value_1 (machine_mode mode
)
5035 if (TARGET_AAPCS_BASED
)
5036 return aapcs_libcall_value (mode
);
5037 else if (TARGET_IWMMXT_ABI
5038 && arm_vector_mode_supported_p (mode
))
5039 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5041 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5044 /* Define how to find the value returned by a library function
5045 assuming the value has mode MODE. */
5048 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5050 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5051 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5053 /* The following libcalls return their result in integer registers,
5054 even though they return a floating point value. */
5055 if (arm_libcall_uses_aapcs_base (libcall
))
5056 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5060 return arm_libcall_value_1 (mode
);
5063 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5066 arm_function_value_regno_p (const unsigned int regno
)
5068 if (regno
== ARG_REGISTER (1)
5070 && TARGET_AAPCS_BASED
5072 && TARGET_HARD_FLOAT
5073 && regno
== FIRST_VFP_REGNUM
)
5074 || (TARGET_IWMMXT_ABI
5075 && regno
== FIRST_IWMMXT_REGNUM
))
5081 /* Determine the amount of memory needed to store the possible return
5082 registers of an untyped call. */
5084 arm_apply_result_size (void)
5090 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5092 if (TARGET_IWMMXT_ABI
)
5099 /* Decide whether TYPE should be returned in memory (true)
5100 or in a register (false). FNTYPE is the type of the function making
5103 arm_return_in_memory (const_tree type
, const_tree fntype
)
5107 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5109 if (TARGET_AAPCS_BASED
)
5111 /* Simple, non-aggregate types (ie not including vectors and
5112 complex) are always returned in a register (or registers).
5113 We don't care about which register here, so we can short-cut
5114 some of the detail. */
5115 if (!AGGREGATE_TYPE_P (type
)
5116 && TREE_CODE (type
) != VECTOR_TYPE
5117 && TREE_CODE (type
) != COMPLEX_TYPE
)
5120 /* Any return value that is no larger than one word can be
5122 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5125 /* Check any available co-processors to see if they accept the
5126 type as a register candidate (VFP, for example, can return
5127 some aggregates in consecutive registers). These aren't
5128 available if the call is variadic. */
5129 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5132 /* Vector values should be returned using ARM registers, not
5133 memory (unless they're over 16 bytes, which will break since
5134 we only have four call-clobbered registers to play with). */
5135 if (TREE_CODE (type
) == VECTOR_TYPE
)
5136 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5138 /* The rest go in memory. */
5142 if (TREE_CODE (type
) == VECTOR_TYPE
)
5143 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5145 if (!AGGREGATE_TYPE_P (type
) &&
5146 (TREE_CODE (type
) != VECTOR_TYPE
))
5147 /* All simple types are returned in registers. */
5150 if (arm_abi
!= ARM_ABI_APCS
)
5152 /* ATPCS and later return aggregate types in memory only if they are
5153 larger than a word (or are variable size). */
5154 return (size
< 0 || size
> UNITS_PER_WORD
);
5157 /* For the arm-wince targets we choose to be compatible with Microsoft's
5158 ARM and Thumb compilers, which always return aggregates in memory. */
5160 /* All structures/unions bigger than one word are returned in memory.
5161 Also catch the case where int_size_in_bytes returns -1. In this case
5162 the aggregate is either huge or of variable size, and in either case
5163 we will want to return it via memory and not in a register. */
5164 if (size
< 0 || size
> UNITS_PER_WORD
)
5167 if (TREE_CODE (type
) == RECORD_TYPE
)
5171 /* For a struct the APCS says that we only return in a register
5172 if the type is 'integer like' and every addressable element
5173 has an offset of zero. For practical purposes this means
5174 that the structure can have at most one non bit-field element
5175 and that this element must be the first one in the structure. */
5177 /* Find the first field, ignoring non FIELD_DECL things which will
5178 have been created by C++. */
5179 for (field
= TYPE_FIELDS (type
);
5180 field
&& TREE_CODE (field
) != FIELD_DECL
;
5181 field
= DECL_CHAIN (field
))
5185 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5187 /* Check that the first field is valid for returning in a register. */
5189 /* ... Floats are not allowed */
5190 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5193 /* ... Aggregates that are not themselves valid for returning in
5194 a register are not allowed. */
5195 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5198 /* Now check the remaining fields, if any. Only bitfields are allowed,
5199 since they are not addressable. */
5200 for (field
= DECL_CHAIN (field
);
5202 field
= DECL_CHAIN (field
))
5204 if (TREE_CODE (field
) != FIELD_DECL
)
5207 if (!DECL_BIT_FIELD_TYPE (field
))
5214 if (TREE_CODE (type
) == UNION_TYPE
)
5218 /* Unions can be returned in registers if every element is
5219 integral, or can be returned in an integer register. */
5220 for (field
= TYPE_FIELDS (type
);
5222 field
= DECL_CHAIN (field
))
5224 if (TREE_CODE (field
) != FIELD_DECL
)
5227 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5230 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5236 #endif /* not ARM_WINCE */
5238 /* Return all other types in memory. */
5242 const struct pcs_attribute_arg
5246 } pcs_attribute_args
[] =
5248 {"aapcs", ARM_PCS_AAPCS
},
5249 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5251 /* We could recognize these, but changes would be needed elsewhere
5252 * to implement them. */
5253 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5254 {"atpcs", ARM_PCS_ATPCS
},
5255 {"apcs", ARM_PCS_APCS
},
5257 {NULL
, ARM_PCS_UNKNOWN
}
5261 arm_pcs_from_attribute (tree attr
)
5263 const struct pcs_attribute_arg
*ptr
;
5266 /* Get the value of the argument. */
5267 if (TREE_VALUE (attr
) == NULL_TREE
5268 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5269 return ARM_PCS_UNKNOWN
;
5271 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5273 /* Check it against the list of known arguments. */
5274 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5275 if (streq (arg
, ptr
->arg
))
5278 /* An unrecognized interrupt type. */
5279 return ARM_PCS_UNKNOWN
;
5282 /* Get the PCS variant to use for this call. TYPE is the function's type
5283 specification, DECL is the specific declartion. DECL may be null if
5284 the call could be indirect or if this is a library call. */
5286 arm_get_pcs_model (const_tree type
, const_tree decl
)
5288 bool user_convention
= false;
5289 enum arm_pcs user_pcs
= arm_pcs_default
;
5294 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5297 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5298 user_convention
= true;
5301 if (TARGET_AAPCS_BASED
)
5303 /* Detect varargs functions. These always use the base rules
5304 (no argument is ever a candidate for a co-processor
5306 bool base_rules
= stdarg_p (type
);
5308 if (user_convention
)
5310 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5311 sorry ("non-AAPCS derived PCS variant");
5312 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5313 error ("variadic functions must use the base AAPCS variant");
5317 return ARM_PCS_AAPCS
;
5318 else if (user_convention
)
5320 else if (decl
&& flag_unit_at_a_time
)
5322 /* Local functions never leak outside this compilation unit,
5323 so we are free to use whatever conventions are
5325 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5326 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5328 return ARM_PCS_AAPCS_LOCAL
;
5331 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5332 sorry ("PCS variant");
5334 /* For everything else we use the target's default. */
5335 return arm_pcs_default
;
5340 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5341 const_tree fntype ATTRIBUTE_UNUSED
,
5342 rtx libcall ATTRIBUTE_UNUSED
,
5343 const_tree fndecl ATTRIBUTE_UNUSED
)
5345 /* Record the unallocated VFP registers. */
5346 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5347 pcum
->aapcs_vfp_reg_alloc
= 0;
5350 /* Walk down the type tree of TYPE counting consecutive base elements.
5351 If *MODEP is VOIDmode, then set it to the first valid floating point
5352 type. If a non-floating point type is found, or if a floating point
5353 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5354 otherwise return the count in the sub-tree. */
5356 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5361 switch (TREE_CODE (type
))
5364 mode
= TYPE_MODE (type
);
5365 if (mode
!= DFmode
&& mode
!= SFmode
)
5368 if (*modep
== VOIDmode
)
5377 mode
= TYPE_MODE (TREE_TYPE (type
));
5378 if (mode
!= DFmode
&& mode
!= SFmode
)
5381 if (*modep
== VOIDmode
)
5390 /* Use V2SImode and V4SImode as representatives of all 64-bit
5391 and 128-bit vector types, whether or not those modes are
5392 supported with the present options. */
5393 size
= int_size_in_bytes (type
);
5406 if (*modep
== VOIDmode
)
5409 /* Vector modes are considered to be opaque: two vectors are
5410 equivalent for the purposes of being homogeneous aggregates
5411 if they are the same size. */
5420 tree index
= TYPE_DOMAIN (type
);
5422 /* Can't handle incomplete types nor sizes that are not
5424 if (!COMPLETE_TYPE_P (type
)
5425 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5428 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5431 || !TYPE_MAX_VALUE (index
)
5432 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5433 || !TYPE_MIN_VALUE (index
)
5434 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5438 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5439 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5441 /* There must be no padding. */
5442 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5454 /* Can't handle incomplete types nor sizes that are not
5456 if (!COMPLETE_TYPE_P (type
)
5457 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5460 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5462 if (TREE_CODE (field
) != FIELD_DECL
)
5465 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5471 /* There must be no padding. */
5472 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5479 case QUAL_UNION_TYPE
:
5481 /* These aren't very interesting except in a degenerate case. */
5486 /* Can't handle incomplete types nor sizes that are not
5488 if (!COMPLETE_TYPE_P (type
)
5489 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5492 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5494 if (TREE_CODE (field
) != FIELD_DECL
)
5497 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5500 count
= count
> sub_count
? count
: sub_count
;
5503 /* There must be no padding. */
5504 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5517 /* Return true if PCS_VARIANT should use VFP registers. */
5519 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5521 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5523 static bool seen_thumb1_vfp
= false;
5525 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5527 sorry ("Thumb-1 hard-float VFP ABI");
5528 /* sorry() is not immediately fatal, so only display this once. */
5529 seen_thumb1_vfp
= true;
5535 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5538 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5539 (TARGET_VFP_DOUBLE
|| !is_double
));
5542 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5543 suitable for passing or returning in VFP registers for the PCS
5544 variant selected. If it is, then *BASE_MODE is updated to contain
5545 a machine mode describing each element of the argument's type and
5546 *COUNT to hold the number of such elements. */
5548 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5549 machine_mode mode
, const_tree type
,
5550 machine_mode
*base_mode
, int *count
)
5552 machine_mode new_mode
= VOIDmode
;
5554 /* If we have the type information, prefer that to working things
5555 out from the mode. */
5558 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5560 if (ag_count
> 0 && ag_count
<= 4)
5565 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5566 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5567 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5572 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5575 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5581 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5584 *base_mode
= new_mode
;
5589 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5590 machine_mode mode
, const_tree type
)
5592 int count ATTRIBUTE_UNUSED
;
5593 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5595 if (!use_vfp_abi (pcs_variant
, false))
5597 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5602 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5605 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5608 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5609 &pcum
->aapcs_vfp_rmode
,
5610 &pcum
->aapcs_vfp_rcount
);
5614 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5615 const_tree type ATTRIBUTE_UNUSED
)
5617 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5618 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5621 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5622 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5624 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5626 || (mode
== TImode
&& ! TARGET_NEON
)
5627 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5630 int rcount
= pcum
->aapcs_vfp_rcount
;
5632 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5636 /* Avoid using unsupported vector modes. */
5637 if (rmode
== V2SImode
)
5639 else if (rmode
== V4SImode
)
5646 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5647 for (i
= 0; i
< rcount
; i
++)
5649 rtx tmp
= gen_rtx_REG (rmode
,
5650 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5651 tmp
= gen_rtx_EXPR_LIST
5653 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5654 XVECEXP (par
, 0, i
) = tmp
;
5657 pcum
->aapcs_reg
= par
;
5660 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5667 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5669 const_tree type ATTRIBUTE_UNUSED
)
5671 if (!use_vfp_abi (pcs_variant
, false))
5674 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5677 machine_mode ag_mode
;
5682 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5687 if (ag_mode
== V2SImode
)
5689 else if (ag_mode
== V4SImode
)
5695 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5696 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5697 for (i
= 0; i
< count
; i
++)
5699 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5700 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5701 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5702 XVECEXP (par
, 0, i
) = tmp
;
5708 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5712 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5713 machine_mode mode ATTRIBUTE_UNUSED
,
5714 const_tree type ATTRIBUTE_UNUSED
)
5716 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5717 pcum
->aapcs_vfp_reg_alloc
= 0;
5721 #define AAPCS_CP(X) \
5723 aapcs_ ## X ## _cum_init, \
5724 aapcs_ ## X ## _is_call_candidate, \
5725 aapcs_ ## X ## _allocate, \
5726 aapcs_ ## X ## _is_return_candidate, \
5727 aapcs_ ## X ## _allocate_return_reg, \
5728 aapcs_ ## X ## _advance \
5731 /* Table of co-processors that can be used to pass arguments in
5732 registers. Idealy no arugment should be a candidate for more than
5733 one co-processor table entry, but the table is processed in order
5734 and stops after the first match. If that entry then fails to put
5735 the argument into a co-processor register, the argument will go on
5739 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5740 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5742 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5743 BLKmode) is a candidate for this co-processor's registers; this
5744 function should ignore any position-dependent state in
5745 CUMULATIVE_ARGS and only use call-type dependent information. */
5746 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5748 /* Return true if the argument does get a co-processor register; it
5749 should set aapcs_reg to an RTX of the register allocated as is
5750 required for a return from FUNCTION_ARG. */
5751 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5753 /* Return true if a result of mode MODE (or type TYPE if MODE is
5754 BLKmode) is can be returned in this co-processor's registers. */
5755 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5757 /* Allocate and return an RTX element to hold the return type of a
5758 call, this routine must not fail and will only be called if
5759 is_return_candidate returned true with the same parameters. */
5760 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5762 /* Finish processing this argument and prepare to start processing
5764 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5765 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5773 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5778 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5779 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5786 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5788 /* We aren't passed a decl, so we can't check that a call is local.
5789 However, it isn't clear that that would be a win anyway, since it
5790 might limit some tail-calling opportunities. */
5791 enum arm_pcs pcs_variant
;
5795 const_tree fndecl
= NULL_TREE
;
5797 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5800 fntype
= TREE_TYPE (fntype
);
5803 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5806 pcs_variant
= arm_pcs_default
;
5808 if (pcs_variant
!= ARM_PCS_AAPCS
)
5812 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5813 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5822 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5825 /* We aren't passed a decl, so we can't check that a call is local.
5826 However, it isn't clear that that would be a win anyway, since it
5827 might limit some tail-calling opportunities. */
5828 enum arm_pcs pcs_variant
;
5829 int unsignedp ATTRIBUTE_UNUSED
;
5833 const_tree fndecl
= NULL_TREE
;
5835 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5838 fntype
= TREE_TYPE (fntype
);
5841 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5844 pcs_variant
= arm_pcs_default
;
5846 /* Promote integer types. */
5847 if (type
&& INTEGRAL_TYPE_P (type
))
5848 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5850 if (pcs_variant
!= ARM_PCS_AAPCS
)
5854 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5855 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5857 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5861 /* Promotes small structs returned in a register to full-word size
5862 for big-endian AAPCS. */
5863 if (type
&& arm_return_in_msb (type
))
5865 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5866 if (size
% UNITS_PER_WORD
!= 0)
5868 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5869 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5873 return gen_rtx_REG (mode
, R0_REGNUM
);
5877 aapcs_libcall_value (machine_mode mode
)
5879 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5880 && GET_MODE_SIZE (mode
) <= 4)
5883 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5886 /* Lay out a function argument using the AAPCS rules. The rule
5887 numbers referred to here are those in the AAPCS. */
5889 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5890 const_tree type
, bool named
)
5895 /* We only need to do this once per argument. */
5896 if (pcum
->aapcs_arg_processed
)
5899 pcum
->aapcs_arg_processed
= true;
5901 /* Special case: if named is false then we are handling an incoming
5902 anonymous argument which is on the stack. */
5906 /* Is this a potential co-processor register candidate? */
5907 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5909 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5910 pcum
->aapcs_cprc_slot
= slot
;
5912 /* We don't have to apply any of the rules from part B of the
5913 preparation phase, these are handled elsewhere in the
5918 /* A Co-processor register candidate goes either in its own
5919 class of registers or on the stack. */
5920 if (!pcum
->aapcs_cprc_failed
[slot
])
5922 /* C1.cp - Try to allocate the argument to co-processor
5924 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5927 /* C2.cp - Put the argument on the stack and note that we
5928 can't assign any more candidates in this slot. We also
5929 need to note that we have allocated stack space, so that
5930 we won't later try to split a non-cprc candidate between
5931 core registers and the stack. */
5932 pcum
->aapcs_cprc_failed
[slot
] = true;
5933 pcum
->can_split
= false;
5936 /* We didn't get a register, so this argument goes on the
5938 gcc_assert (pcum
->can_split
== false);
5943 /* C3 - For double-word aligned arguments, round the NCRN up to the
5944 next even number. */
5945 ncrn
= pcum
->aapcs_ncrn
;
5946 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5949 nregs
= ARM_NUM_REGS2(mode
, type
);
5951 /* Sigh, this test should really assert that nregs > 0, but a GCC
5952 extension allows empty structs and then gives them empty size; it
5953 then allows such a structure to be passed by value. For some of
5954 the code below we have to pretend that such an argument has
5955 non-zero size so that we 'locate' it correctly either in
5956 registers or on the stack. */
5957 gcc_assert (nregs
>= 0);
5959 nregs2
= nregs
? nregs
: 1;
5961 /* C4 - Argument fits entirely in core registers. */
5962 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5964 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5965 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5969 /* C5 - Some core registers left and there are no arguments already
5970 on the stack: split this argument between the remaining core
5971 registers and the stack. */
5972 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5974 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5975 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5976 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5980 /* C6 - NCRN is set to 4. */
5981 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5983 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5987 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5988 for a call to a function whose data type is FNTYPE.
5989 For a library call, FNTYPE is NULL. */
5991 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5993 tree fndecl ATTRIBUTE_UNUSED
)
5995 /* Long call handling. */
5997 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5999 pcum
->pcs_variant
= arm_pcs_default
;
6001 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6003 if (arm_libcall_uses_aapcs_base (libname
))
6004 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6006 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6007 pcum
->aapcs_reg
= NULL_RTX
;
6008 pcum
->aapcs_partial
= 0;
6009 pcum
->aapcs_arg_processed
= false;
6010 pcum
->aapcs_cprc_slot
= -1;
6011 pcum
->can_split
= true;
6013 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6017 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6019 pcum
->aapcs_cprc_failed
[i
] = false;
6020 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6028 /* On the ARM, the offset starts at 0. */
6030 pcum
->iwmmxt_nregs
= 0;
6031 pcum
->can_split
= true;
6033 /* Varargs vectors are treated the same as long long.
6034 named_count avoids having to change the way arm handles 'named' */
6035 pcum
->named_count
= 0;
6038 if (TARGET_REALLY_IWMMXT
&& fntype
)
6042 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6044 fn_arg
= TREE_CHAIN (fn_arg
))
6045 pcum
->named_count
+= 1;
6047 if (! pcum
->named_count
)
6048 pcum
->named_count
= INT_MAX
;
6052 /* Return true if mode/type need doubleword alignment. */
6054 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6056 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
6057 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
6061 /* Determine where to put an argument to a function.
6062 Value is zero to push the argument on the stack,
6063 or a hard register in which to store the argument.
6065 MODE is the argument's machine mode.
6066 TYPE is the data type of the argument (as a tree).
6067 This is null for libcalls where that information may
6069 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6070 the preceding args and about the function being called.
6071 NAMED is nonzero if this argument is a named parameter
6072 (otherwise it is an extra parameter matching an ellipsis).
6074 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6075 other arguments are passed on the stack. If (NAMED == 0) (which happens
6076 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6077 defined), say it is passed in the stack (function_prologue will
6078 indeed make it pass in the stack if necessary). */
6081 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6082 const_tree type
, bool named
)
6084 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6087 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6088 a call insn (op3 of a call_value insn). */
6089 if (mode
== VOIDmode
)
6092 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6094 aapcs_layout_arg (pcum
, mode
, type
, named
);
6095 return pcum
->aapcs_reg
;
6098 /* Varargs vectors are treated the same as long long.
6099 named_count avoids having to change the way arm handles 'named' */
6100 if (TARGET_IWMMXT_ABI
6101 && arm_vector_mode_supported_p (mode
)
6102 && pcum
->named_count
> pcum
->nargs
+ 1)
6104 if (pcum
->iwmmxt_nregs
<= 9)
6105 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6108 pcum
->can_split
= false;
6113 /* Put doubleword aligned quantities in even register pairs. */
6115 && ARM_DOUBLEWORD_ALIGN
6116 && arm_needs_doubleword_align (mode
, type
))
6119 /* Only allow splitting an arg between regs and memory if all preceding
6120 args were allocated to regs. For args passed by reference we only count
6121 the reference pointer. */
6122 if (pcum
->can_split
)
6125 nregs
= ARM_NUM_REGS2 (mode
, type
);
6127 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6130 return gen_rtx_REG (mode
, pcum
->nregs
);
6134 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6136 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6137 ? DOUBLEWORD_ALIGNMENT
6142 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6143 tree type
, bool named
)
6145 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6146 int nregs
= pcum
->nregs
;
6148 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6150 aapcs_layout_arg (pcum
, mode
, type
, named
);
6151 return pcum
->aapcs_partial
;
6154 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6157 if (NUM_ARG_REGS
> nregs
6158 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6160 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6165 /* Update the data in PCUM to advance over an argument
6166 of mode MODE and data type TYPE.
6167 (TYPE is null for libcalls where that information may not be available.) */
6170 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6171 const_tree type
, bool named
)
6173 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6175 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6177 aapcs_layout_arg (pcum
, mode
, type
, named
);
6179 if (pcum
->aapcs_cprc_slot
>= 0)
6181 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6183 pcum
->aapcs_cprc_slot
= -1;
6186 /* Generic stuff. */
6187 pcum
->aapcs_arg_processed
= false;
6188 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6189 pcum
->aapcs_reg
= NULL_RTX
;
6190 pcum
->aapcs_partial
= 0;
6195 if (arm_vector_mode_supported_p (mode
)
6196 && pcum
->named_count
> pcum
->nargs
6197 && TARGET_IWMMXT_ABI
)
6198 pcum
->iwmmxt_nregs
+= 1;
6200 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6204 /* Variable sized types are passed by reference. This is a GCC
6205 extension to the ARM ABI. */
6208 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6209 machine_mode mode ATTRIBUTE_UNUSED
,
6210 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6212 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6215 /* Encode the current state of the #pragma [no_]long_calls. */
6218 OFF
, /* No #pragma [no_]long_calls is in effect. */
6219 LONG
, /* #pragma long_calls is in effect. */
6220 SHORT
/* #pragma no_long_calls is in effect. */
6223 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6226 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6228 arm_pragma_long_calls
= LONG
;
6232 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6234 arm_pragma_long_calls
= SHORT
;
6238 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6240 arm_pragma_long_calls
= OFF
;
6243 /* Handle an attribute requiring a FUNCTION_DECL;
6244 arguments as in struct attribute_spec.handler. */
6246 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6247 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6249 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6251 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6253 *no_add_attrs
= true;
6259 /* Handle an "interrupt" or "isr" attribute;
6260 arguments as in struct attribute_spec.handler. */
6262 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6267 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6269 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6271 *no_add_attrs
= true;
6273 /* FIXME: the argument if any is checked for type attributes;
6274 should it be checked for decl ones? */
6278 if (TREE_CODE (*node
) == FUNCTION_TYPE
6279 || TREE_CODE (*node
) == METHOD_TYPE
)
6281 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6283 warning (OPT_Wattributes
, "%qE attribute ignored",
6285 *no_add_attrs
= true;
6288 else if (TREE_CODE (*node
) == POINTER_TYPE
6289 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6290 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6291 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6293 *node
= build_variant_type_copy (*node
);
6294 TREE_TYPE (*node
) = build_type_attribute_variant
6296 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6297 *no_add_attrs
= true;
6301 /* Possibly pass this attribute on from the type to a decl. */
6302 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6303 | (int) ATTR_FLAG_FUNCTION_NEXT
6304 | (int) ATTR_FLAG_ARRAY_NEXT
))
6306 *no_add_attrs
= true;
6307 return tree_cons (name
, args
, NULL_TREE
);
6311 warning (OPT_Wattributes
, "%qE attribute ignored",
6320 /* Handle a "pcs" attribute; arguments as in struct
6321 attribute_spec.handler. */
6323 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6324 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6326 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6328 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6329 *no_add_attrs
= true;
6334 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6335 /* Handle the "notshared" attribute. This attribute is another way of
6336 requesting hidden visibility. ARM's compiler supports
6337 "__declspec(notshared)"; we support the same thing via an
6341 arm_handle_notshared_attribute (tree
*node
,
6342 tree name ATTRIBUTE_UNUSED
,
6343 tree args ATTRIBUTE_UNUSED
,
6344 int flags ATTRIBUTE_UNUSED
,
6347 tree decl
= TYPE_NAME (*node
);
6351 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6352 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6353 *no_add_attrs
= false;
6359 /* Return 0 if the attributes for two types are incompatible, 1 if they
6360 are compatible, and 2 if they are nearly compatible (which causes a
6361 warning to be generated). */
6363 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6367 /* Check for mismatch of non-default calling convention. */
6368 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6371 /* Check for mismatched call attributes. */
6372 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6373 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6374 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6375 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6377 /* Only bother to check if an attribute is defined. */
6378 if (l1
| l2
| s1
| s2
)
6380 /* If one type has an attribute, the other must have the same attribute. */
6381 if ((l1
!= l2
) || (s1
!= s2
))
6384 /* Disallow mixed attributes. */
6385 if ((l1
& s2
) || (l2
& s1
))
6389 /* Check for mismatched ISR attribute. */
6390 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6392 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6393 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6395 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6402 /* Assigns default attributes to newly defined type. This is used to
6403 set short_call/long_call attributes for function types of
6404 functions defined inside corresponding #pragma scopes. */
6406 arm_set_default_type_attributes (tree type
)
6408 /* Add __attribute__ ((long_call)) to all functions, when
6409 inside #pragma long_calls or __attribute__ ((short_call)),
6410 when inside #pragma no_long_calls. */
6411 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6413 tree type_attr_list
, attr_name
;
6414 type_attr_list
= TYPE_ATTRIBUTES (type
);
6416 if (arm_pragma_long_calls
== LONG
)
6417 attr_name
= get_identifier ("long_call");
6418 else if (arm_pragma_long_calls
== SHORT
)
6419 attr_name
= get_identifier ("short_call");
6423 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6424 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6428 /* Return true if DECL is known to be linked into section SECTION. */
6431 arm_function_in_section_p (tree decl
, section
*section
)
6433 /* We can only be certain about the prevailing symbol definition. */
6434 if (!decl_binds_to_current_def_p (decl
))
6437 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6438 if (!DECL_SECTION_NAME (decl
))
6440 /* Make sure that we will not create a unique section for DECL. */
6441 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6445 return function_section (decl
) == section
;
6448 /* Return nonzero if a 32-bit "long_call" should be generated for
6449 a call from the current function to DECL. We generate a long_call
6452 a. has an __attribute__((long call))
6453 or b. is within the scope of a #pragma long_calls
6454 or c. the -mlong-calls command line switch has been specified
6456 However we do not generate a long call if the function:
6458 d. has an __attribute__ ((short_call))
6459 or e. is inside the scope of a #pragma no_long_calls
6460 or f. is defined in the same section as the current function. */
6463 arm_is_long_call_p (tree decl
)
6468 return TARGET_LONG_CALLS
;
6470 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6471 if (lookup_attribute ("short_call", attrs
))
6474 /* For "f", be conservative, and only cater for cases in which the
6475 whole of the current function is placed in the same section. */
6476 if (!flag_reorder_blocks_and_partition
6477 && TREE_CODE (decl
) == FUNCTION_DECL
6478 && arm_function_in_section_p (decl
, current_function_section ()))
6481 if (lookup_attribute ("long_call", attrs
))
6484 return TARGET_LONG_CALLS
;
6487 /* Return nonzero if it is ok to make a tail-call to DECL. */
6489 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6491 unsigned long func_type
;
6493 if (cfun
->machine
->sibcall_blocked
)
6496 /* Never tailcall something if we are generating code for Thumb-1. */
6500 /* The PIC register is live on entry to VxWorks PLT entries, so we
6501 must make the call before restoring the PIC register. */
6502 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6505 /* If we are interworking and the function is not declared static
6506 then we can't tail-call it unless we know that it exists in this
6507 compilation unit (since it might be a Thumb routine). */
6508 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6509 && !TREE_ASM_WRITTEN (decl
))
6512 func_type
= arm_current_func_type ();
6513 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6514 if (IS_INTERRUPT (func_type
))
6517 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6519 /* Check that the return value locations are the same. For
6520 example that we aren't returning a value from the sibling in
6521 a VFP register but then need to transfer it to a core
6525 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6526 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6528 if (!rtx_equal_p (a
, b
))
6532 /* Never tailcall if function may be called with a misaligned SP. */
6533 if (IS_STACKALIGN (func_type
))
6536 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6537 references should become a NOP. Don't convert such calls into
6539 if (TARGET_AAPCS_BASED
6540 && arm_abi
== ARM_ABI_AAPCS
6542 && DECL_WEAK (decl
))
6545 /* Everything else is ok. */
6550 /* Addressing mode support functions. */
6552 /* Return nonzero if X is a legitimate immediate operand when compiling
6553 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6555 legitimate_pic_operand_p (rtx x
)
6557 if (GET_CODE (x
) == SYMBOL_REF
6558 || (GET_CODE (x
) == CONST
6559 && GET_CODE (XEXP (x
, 0)) == PLUS
6560 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6566 /* Record that the current function needs a PIC register. Initialize
6567 cfun->machine->pic_reg if we have not already done so. */
6570 require_pic_register (void)
6572 /* A lot of the logic here is made obscure by the fact that this
6573 routine gets called as part of the rtx cost estimation process.
6574 We don't want those calls to affect any assumptions about the real
6575 function; and further, we can't call entry_of_function() until we
6576 start the real expansion process. */
6577 if (!crtl
->uses_pic_offset_table
)
6579 gcc_assert (can_create_pseudo_p ());
6580 if (arm_pic_register
!= INVALID_REGNUM
6581 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6583 if (!cfun
->machine
->pic_reg
)
6584 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6586 /* Play games to avoid marking the function as needing pic
6587 if we are being called as part of the cost-estimation
6589 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6590 crtl
->uses_pic_offset_table
= 1;
6594 rtx_insn
*seq
, *insn
;
6596 if (!cfun
->machine
->pic_reg
)
6597 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6599 /* Play games to avoid marking the function as needing pic
6600 if we are being called as part of the cost-estimation
6602 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6604 crtl
->uses_pic_offset_table
= 1;
6607 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6608 && arm_pic_register
> LAST_LO_REGNUM
)
6609 emit_move_insn (cfun
->machine
->pic_reg
,
6610 gen_rtx_REG (Pmode
, arm_pic_register
));
6612 arm_load_pic_register (0UL);
6617 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6619 INSN_LOCATION (insn
) = prologue_location
;
6621 /* We can be called during expansion of PHI nodes, where
6622 we can't yet emit instructions directly in the final
6623 insn stream. Queue the insns on the entry edge, they will
6624 be committed after everything else is expanded. */
6625 insert_insn_on_edge (seq
,
6626 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6633 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6635 if (GET_CODE (orig
) == SYMBOL_REF
6636 || GET_CODE (orig
) == LABEL_REF
)
6642 gcc_assert (can_create_pseudo_p ());
6643 reg
= gen_reg_rtx (Pmode
);
6646 /* VxWorks does not impose a fixed gap between segments; the run-time
6647 gap can be different from the object-file gap. We therefore can't
6648 use GOTOFF unless we are absolutely sure that the symbol is in the
6649 same segment as the GOT. Unfortunately, the flexibility of linker
6650 scripts means that we can't be sure of that in general, so assume
6651 that GOTOFF is never valid on VxWorks. */
6652 if ((GET_CODE (orig
) == LABEL_REF
6653 || (GET_CODE (orig
) == SYMBOL_REF
&&
6654 SYMBOL_REF_LOCAL_P (orig
)))
6656 && arm_pic_data_is_text_relative
)
6657 insn
= arm_pic_static_addr (orig
, reg
);
6663 /* If this function doesn't have a pic register, create one now. */
6664 require_pic_register ();
6666 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6668 /* Make the MEM as close to a constant as possible. */
6669 mem
= SET_SRC (pat
);
6670 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6671 MEM_READONLY_P (mem
) = 1;
6672 MEM_NOTRAP_P (mem
) = 1;
6674 insn
= emit_insn (pat
);
6677 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6679 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6683 else if (GET_CODE (orig
) == CONST
)
6687 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6688 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6691 /* Handle the case where we have: const (UNSPEC_TLS). */
6692 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6693 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6696 /* Handle the case where we have:
6697 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6699 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6700 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6701 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6703 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6709 gcc_assert (can_create_pseudo_p ());
6710 reg
= gen_reg_rtx (Pmode
);
6713 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6715 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6716 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6717 base
== reg
? 0 : reg
);
6719 if (CONST_INT_P (offset
))
6721 /* The base register doesn't really matter, we only want to
6722 test the index for the appropriate mode. */
6723 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6725 gcc_assert (can_create_pseudo_p ());
6726 offset
= force_reg (Pmode
, offset
);
6729 if (CONST_INT_P (offset
))
6730 return plus_constant (Pmode
, base
, INTVAL (offset
));
6733 if (GET_MODE_SIZE (mode
) > 4
6734 && (GET_MODE_CLASS (mode
) == MODE_INT
6735 || TARGET_SOFT_FLOAT
))
6737 emit_insn (gen_addsi3 (reg
, base
, offset
));
6741 return gen_rtx_PLUS (Pmode
, base
, offset
);
6748 /* Find a spare register to use during the prolog of a function. */
6751 thumb_find_work_register (unsigned long pushed_regs_mask
)
6755 /* Check the argument registers first as these are call-used. The
6756 register allocation order means that sometimes r3 might be used
6757 but earlier argument registers might not, so check them all. */
6758 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6759 if (!df_regs_ever_live_p (reg
))
6762 /* Before going on to check the call-saved registers we can try a couple
6763 more ways of deducing that r3 is available. The first is when we are
6764 pushing anonymous arguments onto the stack and we have less than 4
6765 registers worth of fixed arguments(*). In this case r3 will be part of
6766 the variable argument list and so we can be sure that it will be
6767 pushed right at the start of the function. Hence it will be available
6768 for the rest of the prologue.
6769 (*): ie crtl->args.pretend_args_size is greater than 0. */
6770 if (cfun
->machine
->uses_anonymous_args
6771 && crtl
->args
.pretend_args_size
> 0)
6772 return LAST_ARG_REGNUM
;
6774 /* The other case is when we have fixed arguments but less than 4 registers
6775 worth. In this case r3 might be used in the body of the function, but
6776 it is not being used to convey an argument into the function. In theory
6777 we could just check crtl->args.size to see how many bytes are
6778 being passed in argument registers, but it seems that it is unreliable.
6779 Sometimes it will have the value 0 when in fact arguments are being
6780 passed. (See testcase execute/20021111-1.c for an example). So we also
6781 check the args_info.nregs field as well. The problem with this field is
6782 that it makes no allowances for arguments that are passed to the
6783 function but which are not used. Hence we could miss an opportunity
6784 when a function has an unused argument in r3. But it is better to be
6785 safe than to be sorry. */
6786 if (! cfun
->machine
->uses_anonymous_args
6787 && crtl
->args
.size
>= 0
6788 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6789 && (TARGET_AAPCS_BASED
6790 ? crtl
->args
.info
.aapcs_ncrn
< 4
6791 : crtl
->args
.info
.nregs
< 4))
6792 return LAST_ARG_REGNUM
;
6794 /* Otherwise look for a call-saved register that is going to be pushed. */
6795 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6796 if (pushed_regs_mask
& (1 << reg
))
6801 /* Thumb-2 can use high regs. */
6802 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6803 if (pushed_regs_mask
& (1 << reg
))
6806 /* Something went wrong - thumb_compute_save_reg_mask()
6807 should have arranged for a suitable register to be pushed. */
6811 static GTY(()) int pic_labelno
;
6813 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6817 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6819 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6821 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6824 gcc_assert (flag_pic
);
6826 pic_reg
= cfun
->machine
->pic_reg
;
6827 if (TARGET_VXWORKS_RTP
)
6829 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6830 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6831 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6833 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6835 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6836 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6840 /* We use an UNSPEC rather than a LABEL_REF because this label
6841 never appears in the code stream. */
6843 labelno
= GEN_INT (pic_labelno
++);
6844 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6845 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6847 /* On the ARM the PC register contains 'dot + 8' at the time of the
6848 addition, on the Thumb it is 'dot + 4'. */
6849 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6850 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6852 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6856 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6858 else /* TARGET_THUMB1 */
6860 if (arm_pic_register
!= INVALID_REGNUM
6861 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6863 /* We will have pushed the pic register, so we should always be
6864 able to find a work register. */
6865 pic_tmp
= gen_rtx_REG (SImode
,
6866 thumb_find_work_register (saved_regs
));
6867 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6868 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6869 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6871 else if (arm_pic_register
!= INVALID_REGNUM
6872 && arm_pic_register
> LAST_LO_REGNUM
6873 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6875 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6876 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6877 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6880 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6884 /* Need to emit this whether or not we obey regdecls,
6885 since setjmp/longjmp can cause life info to screw up. */
6889 /* Generate code to load the address of a static var when flag_pic is set. */
6891 arm_pic_static_addr (rtx orig
, rtx reg
)
6893 rtx l1
, labelno
, offset_rtx
, insn
;
6895 gcc_assert (flag_pic
);
6897 /* We use an UNSPEC rather than a LABEL_REF because this label
6898 never appears in the code stream. */
6899 labelno
= GEN_INT (pic_labelno
++);
6900 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6901 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6903 /* On the ARM the PC register contains 'dot + 8' at the time of the
6904 addition, on the Thumb it is 'dot + 4'. */
6905 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6906 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6907 UNSPEC_SYMBOL_OFFSET
);
6908 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6910 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6914 /* Return nonzero if X is valid as an ARM state addressing register. */
6916 arm_address_register_rtx_p (rtx x
, int strict_p
)
6926 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6928 return (regno
<= LAST_ARM_REGNUM
6929 || regno
>= FIRST_PSEUDO_REGISTER
6930 || regno
== FRAME_POINTER_REGNUM
6931 || regno
== ARG_POINTER_REGNUM
);
6934 /* Return TRUE if this rtx is the difference of a symbol and a label,
6935 and will reduce to a PC-relative relocation in the object file.
6936 Expressions like this can be left alone when generating PIC, rather
6937 than forced through the GOT. */
6939 pcrel_constant_p (rtx x
)
6941 if (GET_CODE (x
) == MINUS
)
6942 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6947 /* Return true if X will surely end up in an index register after next
6950 will_be_in_index_register (const_rtx x
)
6952 /* arm.md: calculate_pic_address will split this into a register. */
6953 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6956 /* Return nonzero if X is a valid ARM state address operand. */
6958 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6962 enum rtx_code code
= GET_CODE (x
);
6964 if (arm_address_register_rtx_p (x
, strict_p
))
6967 use_ldrd
= (TARGET_LDRD
6969 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6971 if (code
== POST_INC
|| code
== PRE_DEC
6972 || ((code
== PRE_INC
|| code
== POST_DEC
)
6973 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6974 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6976 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6977 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6978 && GET_CODE (XEXP (x
, 1)) == PLUS
6979 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6981 rtx addend
= XEXP (XEXP (x
, 1), 1);
6983 /* Don't allow ldrd post increment by register because it's hard
6984 to fixup invalid register choices. */
6986 && GET_CODE (x
) == POST_MODIFY
6990 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6991 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6994 /* After reload constants split into minipools will have addresses
6995 from a LABEL_REF. */
6996 else if (reload_completed
6997 && (code
== LABEL_REF
6999 && GET_CODE (XEXP (x
, 0)) == PLUS
7000 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7001 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7004 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7007 else if (code
== PLUS
)
7009 rtx xop0
= XEXP (x
, 0);
7010 rtx xop1
= XEXP (x
, 1);
7012 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7013 && ((CONST_INT_P (xop1
)
7014 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7015 || (!strict_p
&& will_be_in_index_register (xop1
))))
7016 || (arm_address_register_rtx_p (xop1
, strict_p
)
7017 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7021 /* Reload currently can't handle MINUS, so disable this for now */
7022 else if (GET_CODE (x
) == MINUS
)
7024 rtx xop0
= XEXP (x
, 0);
7025 rtx xop1
= XEXP (x
, 1);
7027 return (arm_address_register_rtx_p (xop0
, strict_p
)
7028 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7032 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7033 && code
== SYMBOL_REF
7034 && CONSTANT_POOL_ADDRESS_P (x
)
7036 && symbol_mentioned_p (get_pool_constant (x
))
7037 && ! pcrel_constant_p (get_pool_constant (x
))))
7043 /* Return nonzero if X is a valid Thumb-2 address operand. */
7045 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7048 enum rtx_code code
= GET_CODE (x
);
7050 if (arm_address_register_rtx_p (x
, strict_p
))
7053 use_ldrd
= (TARGET_LDRD
7055 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7057 if (code
== POST_INC
|| code
== PRE_DEC
7058 || ((code
== PRE_INC
|| code
== POST_DEC
)
7059 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7060 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7062 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7063 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7064 && GET_CODE (XEXP (x
, 1)) == PLUS
7065 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7067 /* Thumb-2 only has autoincrement by constant. */
7068 rtx addend
= XEXP (XEXP (x
, 1), 1);
7069 HOST_WIDE_INT offset
;
7071 if (!CONST_INT_P (addend
))
7074 offset
= INTVAL(addend
);
7075 if (GET_MODE_SIZE (mode
) <= 4)
7076 return (offset
> -256 && offset
< 256);
7078 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7079 && (offset
& 3) == 0);
7082 /* After reload constants split into minipools will have addresses
7083 from a LABEL_REF. */
7084 else if (reload_completed
7085 && (code
== LABEL_REF
7087 && GET_CODE (XEXP (x
, 0)) == PLUS
7088 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7089 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7092 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7095 else if (code
== PLUS
)
7097 rtx xop0
= XEXP (x
, 0);
7098 rtx xop1
= XEXP (x
, 1);
7100 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7101 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7102 || (!strict_p
&& will_be_in_index_register (xop1
))))
7103 || (arm_address_register_rtx_p (xop1
, strict_p
)
7104 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7107 /* Normally we can assign constant values to target registers without
7108 the help of constant pool. But there are cases we have to use constant
7110 1) assign a label to register.
7111 2) sign-extend a 8bit value to 32bit and then assign to register.
7113 Constant pool access in format:
7114 (set (reg r0) (mem (symbol_ref (".LC0"))))
7115 will cause the use of literal pool (later in function arm_reorg).
7116 So here we mark such format as an invalid format, then the compiler
7117 will adjust it into:
7118 (set (reg r0) (symbol_ref (".LC0")))
7119 (set (reg r0) (mem (reg r0))).
7120 No extra register is required, and (mem (reg r0)) won't cause the use
7121 of literal pools. */
7122 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7123 && CONSTANT_POOL_ADDRESS_P (x
))
7126 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7127 && code
== SYMBOL_REF
7128 && CONSTANT_POOL_ADDRESS_P (x
)
7130 && symbol_mentioned_p (get_pool_constant (x
))
7131 && ! pcrel_constant_p (get_pool_constant (x
))))
7137 /* Return nonzero if INDEX is valid for an address index operand in
7140 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7143 HOST_WIDE_INT range
;
7144 enum rtx_code code
= GET_CODE (index
);
7146 /* Standard coprocessor addressing modes. */
7147 if (TARGET_HARD_FLOAT
7149 && (mode
== SFmode
|| mode
== DFmode
))
7150 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7151 && INTVAL (index
) > -1024
7152 && (INTVAL (index
) & 3) == 0);
7154 /* For quad modes, we restrict the constant offset to be slightly less
7155 than what the instruction format permits. We do this because for
7156 quad mode moves, we will actually decompose them into two separate
7157 double-mode reads or writes. INDEX must therefore be a valid
7158 (double-mode) offset and so should INDEX+8. */
7159 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7160 return (code
== CONST_INT
7161 && INTVAL (index
) < 1016
7162 && INTVAL (index
) > -1024
7163 && (INTVAL (index
) & 3) == 0);
7165 /* We have no such constraint on double mode offsets, so we permit the
7166 full range of the instruction format. */
7167 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7168 return (code
== CONST_INT
7169 && INTVAL (index
) < 1024
7170 && INTVAL (index
) > -1024
7171 && (INTVAL (index
) & 3) == 0);
7173 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7174 return (code
== CONST_INT
7175 && INTVAL (index
) < 1024
7176 && INTVAL (index
) > -1024
7177 && (INTVAL (index
) & 3) == 0);
7179 if (arm_address_register_rtx_p (index
, strict_p
)
7180 && (GET_MODE_SIZE (mode
) <= 4))
7183 if (mode
== DImode
|| mode
== DFmode
)
7185 if (code
== CONST_INT
)
7187 HOST_WIDE_INT val
= INTVAL (index
);
7190 return val
> -256 && val
< 256;
7192 return val
> -4096 && val
< 4092;
7195 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7198 if (GET_MODE_SIZE (mode
) <= 4
7202 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7206 rtx xiop0
= XEXP (index
, 0);
7207 rtx xiop1
= XEXP (index
, 1);
7209 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7210 && power_of_two_operand (xiop1
, SImode
))
7211 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7212 && power_of_two_operand (xiop0
, SImode
)));
7214 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7215 || code
== ASHIFT
|| code
== ROTATERT
)
7217 rtx op
= XEXP (index
, 1);
7219 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7222 && INTVAL (op
) <= 31);
7226 /* For ARM v4 we may be doing a sign-extend operation during the
7232 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7238 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7240 return (code
== CONST_INT
7241 && INTVAL (index
) < range
7242 && INTVAL (index
) > -range
);
7245 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7246 index operand. i.e. 1, 2, 4 or 8. */
7248 thumb2_index_mul_operand (rtx op
)
7252 if (!CONST_INT_P (op
))
7256 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7259 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7261 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7263 enum rtx_code code
= GET_CODE (index
);
7265 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7266 /* Standard coprocessor addressing modes. */
7267 if (TARGET_HARD_FLOAT
7269 && (mode
== SFmode
|| mode
== DFmode
))
7270 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7271 /* Thumb-2 allows only > -256 index range for it's core register
7272 load/stores. Since we allow SF/DF in core registers, we have
7273 to use the intersection between -256~4096 (core) and -1024~1024
7275 && INTVAL (index
) > -256
7276 && (INTVAL (index
) & 3) == 0);
7278 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7280 /* For DImode assume values will usually live in core regs
7281 and only allow LDRD addressing modes. */
7282 if (!TARGET_LDRD
|| mode
!= DImode
)
7283 return (code
== CONST_INT
7284 && INTVAL (index
) < 1024
7285 && INTVAL (index
) > -1024
7286 && (INTVAL (index
) & 3) == 0);
7289 /* For quad modes, we restrict the constant offset to be slightly less
7290 than what the instruction format permits. We do this because for
7291 quad mode moves, we will actually decompose them into two separate
7292 double-mode reads or writes. INDEX must therefore be a valid
7293 (double-mode) offset and so should INDEX+8. */
7294 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7295 return (code
== CONST_INT
7296 && INTVAL (index
) < 1016
7297 && INTVAL (index
) > -1024
7298 && (INTVAL (index
) & 3) == 0);
7300 /* We have no such constraint on double mode offsets, so we permit the
7301 full range of the instruction format. */
7302 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7303 return (code
== CONST_INT
7304 && INTVAL (index
) < 1024
7305 && INTVAL (index
) > -1024
7306 && (INTVAL (index
) & 3) == 0);
7308 if (arm_address_register_rtx_p (index
, strict_p
)
7309 && (GET_MODE_SIZE (mode
) <= 4))
7312 if (mode
== DImode
|| mode
== DFmode
)
7314 if (code
== CONST_INT
)
7316 HOST_WIDE_INT val
= INTVAL (index
);
7317 /* ??? Can we assume ldrd for thumb2? */
7318 /* Thumb-2 ldrd only has reg+const addressing modes. */
7319 /* ldrd supports offsets of +-1020.
7320 However the ldr fallback does not. */
7321 return val
> -256 && val
< 256 && (val
& 3) == 0;
7329 rtx xiop0
= XEXP (index
, 0);
7330 rtx xiop1
= XEXP (index
, 1);
7332 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7333 && thumb2_index_mul_operand (xiop1
))
7334 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7335 && thumb2_index_mul_operand (xiop0
)));
7337 else if (code
== ASHIFT
)
7339 rtx op
= XEXP (index
, 1);
7341 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7344 && INTVAL (op
) <= 3);
7347 return (code
== CONST_INT
7348 && INTVAL (index
) < 4096
7349 && INTVAL (index
) > -256);
7352 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7354 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7364 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7366 return (regno
<= LAST_LO_REGNUM
7367 || regno
> LAST_VIRTUAL_REGISTER
7368 || regno
== FRAME_POINTER_REGNUM
7369 || (GET_MODE_SIZE (mode
) >= 4
7370 && (regno
== STACK_POINTER_REGNUM
7371 || regno
>= FIRST_PSEUDO_REGISTER
7372 || x
== hard_frame_pointer_rtx
7373 || x
== arg_pointer_rtx
)));
7376 /* Return nonzero if x is a legitimate index register. This is the case
7377 for any base register that can access a QImode object. */
7379 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7381 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7384 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7386 The AP may be eliminated to either the SP or the FP, so we use the
7387 least common denominator, e.g. SImode, and offsets from 0 to 64.
7389 ??? Verify whether the above is the right approach.
7391 ??? Also, the FP may be eliminated to the SP, so perhaps that
7392 needs special handling also.
7394 ??? Look at how the mips16 port solves this problem. It probably uses
7395 better ways to solve some of these problems.
7397 Although it is not incorrect, we don't accept QImode and HImode
7398 addresses based on the frame pointer or arg pointer until the
7399 reload pass starts. This is so that eliminating such addresses
7400 into stack based ones won't produce impossible code. */
7402 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7404 /* ??? Not clear if this is right. Experiment. */
7405 if (GET_MODE_SIZE (mode
) < 4
7406 && !(reload_in_progress
|| reload_completed
)
7407 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7408 || reg_mentioned_p (arg_pointer_rtx
, x
)
7409 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7410 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7411 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7412 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7415 /* Accept any base register. SP only in SImode or larger. */
7416 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7419 /* This is PC relative data before arm_reorg runs. */
7420 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7421 && GET_CODE (x
) == SYMBOL_REF
7422 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7425 /* This is PC relative data after arm_reorg runs. */
7426 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7428 && (GET_CODE (x
) == LABEL_REF
7429 || (GET_CODE (x
) == CONST
7430 && GET_CODE (XEXP (x
, 0)) == PLUS
7431 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7432 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7435 /* Post-inc indexing only supported for SImode and larger. */
7436 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7437 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7440 else if (GET_CODE (x
) == PLUS
)
7442 /* REG+REG address can be any two index registers. */
7443 /* We disallow FRAME+REG addressing since we know that FRAME
7444 will be replaced with STACK, and SP relative addressing only
7445 permits SP+OFFSET. */
7446 if (GET_MODE_SIZE (mode
) <= 4
7447 && XEXP (x
, 0) != frame_pointer_rtx
7448 && XEXP (x
, 1) != frame_pointer_rtx
7449 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7450 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7451 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7454 /* REG+const has 5-7 bit offset for non-SP registers. */
7455 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7456 || XEXP (x
, 0) == arg_pointer_rtx
)
7457 && CONST_INT_P (XEXP (x
, 1))
7458 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7461 /* REG+const has 10-bit offset for SP, but only SImode and
7462 larger is supported. */
7463 /* ??? Should probably check for DI/DFmode overflow here
7464 just like GO_IF_LEGITIMATE_OFFSET does. */
7465 else if (REG_P (XEXP (x
, 0))
7466 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7467 && GET_MODE_SIZE (mode
) >= 4
7468 && CONST_INT_P (XEXP (x
, 1))
7469 && INTVAL (XEXP (x
, 1)) >= 0
7470 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7471 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7474 else if (REG_P (XEXP (x
, 0))
7475 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7476 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7477 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7478 && REGNO (XEXP (x
, 0))
7479 <= LAST_VIRTUAL_POINTER_REGISTER
))
7480 && GET_MODE_SIZE (mode
) >= 4
7481 && CONST_INT_P (XEXP (x
, 1))
7482 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7486 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7487 && GET_MODE_SIZE (mode
) == 4
7488 && GET_CODE (x
) == SYMBOL_REF
7489 && CONSTANT_POOL_ADDRESS_P (x
)
7491 && symbol_mentioned_p (get_pool_constant (x
))
7492 && ! pcrel_constant_p (get_pool_constant (x
))))
7498 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7499 instruction of mode MODE. */
7501 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7503 switch (GET_MODE_SIZE (mode
))
7506 return val
>= 0 && val
< 32;
7509 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7513 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7519 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7522 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7523 else if (TARGET_THUMB2
)
7524 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7525 else /* if (TARGET_THUMB1) */
7526 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7529 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7531 Given an rtx X being reloaded into a reg required to be
7532 in class CLASS, return the class of reg to actually use.
7533 In general this is just CLASS, but for the Thumb core registers and
7534 immediate constants we prefer a LO_REGS class or a subset. */
7537 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7543 if (rclass
== GENERAL_REGS
)
7550 /* Build the SYMBOL_REF for __tls_get_addr. */
7552 static GTY(()) rtx tls_get_addr_libfunc
;
7555 get_tls_get_addr (void)
7557 if (!tls_get_addr_libfunc
)
7558 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7559 return tls_get_addr_libfunc
;
7563 arm_load_tp (rtx target
)
7566 target
= gen_reg_rtx (SImode
);
7570 /* Can return in any reg. */
7571 emit_insn (gen_load_tp_hard (target
));
7575 /* Always returned in r0. Immediately copy the result into a pseudo,
7576 otherwise other uses of r0 (e.g. setting up function arguments) may
7577 clobber the value. */
7581 emit_insn (gen_load_tp_soft ());
7583 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7584 emit_move_insn (target
, tmp
);
7590 load_tls_operand (rtx x
, rtx reg
)
7594 if (reg
== NULL_RTX
)
7595 reg
= gen_reg_rtx (SImode
);
7597 tmp
= gen_rtx_CONST (SImode
, x
);
7599 emit_move_insn (reg
, tmp
);
7605 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7607 rtx insns
, label
, labelno
, sum
;
7609 gcc_assert (reloc
!= TLS_DESCSEQ
);
7612 labelno
= GEN_INT (pic_labelno
++);
7613 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7614 label
= gen_rtx_CONST (VOIDmode
, label
);
7616 sum
= gen_rtx_UNSPEC (Pmode
,
7617 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7618 GEN_INT (TARGET_ARM
? 8 : 4)),
7620 reg
= load_tls_operand (sum
, reg
);
7623 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7625 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7627 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7628 LCT_PURE
, /* LCT_CONST? */
7629 Pmode
, 1, reg
, Pmode
);
7631 insns
= get_insns ();
7638 arm_tls_descseq_addr (rtx x
, rtx reg
)
7640 rtx labelno
= GEN_INT (pic_labelno
++);
7641 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7642 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7643 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7644 gen_rtx_CONST (VOIDmode
, label
),
7645 GEN_INT (!TARGET_ARM
)),
7647 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7649 emit_insn (gen_tlscall (x
, labelno
));
7651 reg
= gen_reg_rtx (SImode
);
7653 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7655 emit_move_insn (reg
, reg0
);
7661 legitimize_tls_address (rtx x
, rtx reg
)
7663 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7664 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7668 case TLS_MODEL_GLOBAL_DYNAMIC
:
7669 if (TARGET_GNU2_TLS
)
7671 reg
= arm_tls_descseq_addr (x
, reg
);
7673 tp
= arm_load_tp (NULL_RTX
);
7675 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7679 /* Original scheme */
7680 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7681 dest
= gen_reg_rtx (Pmode
);
7682 emit_libcall_block (insns
, dest
, ret
, x
);
7686 case TLS_MODEL_LOCAL_DYNAMIC
:
7687 if (TARGET_GNU2_TLS
)
7689 reg
= arm_tls_descseq_addr (x
, reg
);
7691 tp
= arm_load_tp (NULL_RTX
);
7693 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7697 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7699 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7700 share the LDM result with other LD model accesses. */
7701 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7703 dest
= gen_reg_rtx (Pmode
);
7704 emit_libcall_block (insns
, dest
, ret
, eqv
);
7706 /* Load the addend. */
7707 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7708 GEN_INT (TLS_LDO32
)),
7710 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7711 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7715 case TLS_MODEL_INITIAL_EXEC
:
7716 labelno
= GEN_INT (pic_labelno
++);
7717 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7718 label
= gen_rtx_CONST (VOIDmode
, label
);
7719 sum
= gen_rtx_UNSPEC (Pmode
,
7720 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7721 GEN_INT (TARGET_ARM
? 8 : 4)),
7723 reg
= load_tls_operand (sum
, reg
);
7726 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7727 else if (TARGET_THUMB2
)
7728 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7731 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7732 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7735 tp
= arm_load_tp (NULL_RTX
);
7737 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7739 case TLS_MODEL_LOCAL_EXEC
:
7740 tp
= arm_load_tp (NULL_RTX
);
7742 reg
= gen_rtx_UNSPEC (Pmode
,
7743 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7745 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7747 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7754 /* Try machine-dependent ways of modifying an illegitimate address
7755 to be legitimate. If we find one, return the new, valid address. */
7757 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7759 if (arm_tls_referenced_p (x
))
7763 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7765 addend
= XEXP (XEXP (x
, 0), 1);
7766 x
= XEXP (XEXP (x
, 0), 0);
7769 if (GET_CODE (x
) != SYMBOL_REF
)
7772 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7774 x
= legitimize_tls_address (x
, NULL_RTX
);
7778 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7787 /* TODO: legitimize_address for Thumb2. */
7790 return thumb_legitimize_address (x
, orig_x
, mode
);
7793 if (GET_CODE (x
) == PLUS
)
7795 rtx xop0
= XEXP (x
, 0);
7796 rtx xop1
= XEXP (x
, 1);
7798 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7799 xop0
= force_reg (SImode
, xop0
);
7801 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7802 && !symbol_mentioned_p (xop1
))
7803 xop1
= force_reg (SImode
, xop1
);
7805 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7806 && CONST_INT_P (xop1
))
7808 HOST_WIDE_INT n
, low_n
;
7812 /* VFP addressing modes actually allow greater offsets, but for
7813 now we just stick with the lowest common denominator. */
7815 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7827 low_n
= ((mode
) == TImode
? 0
7828 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7832 base_reg
= gen_reg_rtx (SImode
);
7833 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7834 emit_move_insn (base_reg
, val
);
7835 x
= plus_constant (Pmode
, base_reg
, low_n
);
7837 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7838 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7841 /* XXX We don't allow MINUS any more -- see comment in
7842 arm_legitimate_address_outer_p (). */
7843 else if (GET_CODE (x
) == MINUS
)
7845 rtx xop0
= XEXP (x
, 0);
7846 rtx xop1
= XEXP (x
, 1);
7848 if (CONSTANT_P (xop0
))
7849 xop0
= force_reg (SImode
, xop0
);
7851 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7852 xop1
= force_reg (SImode
, xop1
);
7854 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7855 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7858 /* Make sure to take full advantage of the pre-indexed addressing mode
7859 with absolute addresses which often allows for the base register to
7860 be factorized for multiple adjacent memory references, and it might
7861 even allows for the mini pool to be avoided entirely. */
7862 else if (CONST_INT_P (x
) && optimize
> 0)
7865 HOST_WIDE_INT mask
, base
, index
;
7868 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7869 use a 8-bit index. So let's use a 12-bit index for SImode only and
7870 hope that arm_gen_constant will enable ldrb to use more bits. */
7871 bits
= (mode
== SImode
) ? 12 : 8;
7872 mask
= (1 << bits
) - 1;
7873 base
= INTVAL (x
) & ~mask
;
7874 index
= INTVAL (x
) & mask
;
7875 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7877 /* It'll most probably be more efficient to generate the base
7878 with more bits set and use a negative index instead. */
7882 base_reg
= force_reg (SImode
, GEN_INT (base
));
7883 x
= plus_constant (Pmode
, base_reg
, index
);
7888 /* We need to find and carefully transform any SYMBOL and LABEL
7889 references; so go back to the original address expression. */
7890 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7892 if (new_x
!= orig_x
)
7900 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7901 to be legitimate. If we find one, return the new, valid address. */
7903 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7905 if (GET_CODE (x
) == PLUS
7906 && CONST_INT_P (XEXP (x
, 1))
7907 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7908 || INTVAL (XEXP (x
, 1)) < 0))
7910 rtx xop0
= XEXP (x
, 0);
7911 rtx xop1
= XEXP (x
, 1);
7912 HOST_WIDE_INT offset
= INTVAL (xop1
);
7914 /* Try and fold the offset into a biasing of the base register and
7915 then offsetting that. Don't do this when optimizing for space
7916 since it can cause too many CSEs. */
7917 if (optimize_size
&& offset
>= 0
7918 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7920 HOST_WIDE_INT delta
;
7923 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7924 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7925 delta
= 31 * GET_MODE_SIZE (mode
);
7927 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7929 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7931 x
= plus_constant (Pmode
, xop0
, delta
);
7933 else if (offset
< 0 && offset
> -256)
7934 /* Small negative offsets are best done with a subtract before the
7935 dereference, forcing these into a register normally takes two
7937 x
= force_operand (x
, NULL_RTX
);
7940 /* For the remaining cases, force the constant into a register. */
7941 xop1
= force_reg (SImode
, xop1
);
7942 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7945 else if (GET_CODE (x
) == PLUS
7946 && s_register_operand (XEXP (x
, 1), SImode
)
7947 && !s_register_operand (XEXP (x
, 0), SImode
))
7949 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7951 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7956 /* We need to find and carefully transform any SYMBOL and LABEL
7957 references; so go back to the original address expression. */
7958 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7960 if (new_x
!= orig_x
)
7967 /* Return TRUE if X contains any TLS symbol references. */
7970 arm_tls_referenced_p (rtx x
)
7972 if (! TARGET_HAVE_TLS
)
7975 subrtx_iterator::array_type array
;
7976 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
7978 const_rtx x
= *iter
;
7979 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
7982 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7983 TLS offsets, not real symbol references. */
7984 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
7985 iter
.skip_subrtxes ();
7990 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7992 On the ARM, allow any integer (invalid ones are removed later by insn
7993 patterns), nice doubles and symbol_refs which refer to the function's
7996 When generating pic allow anything. */
7999 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8001 return flag_pic
|| !label_mentioned_p (x
);
8005 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8007 return (CONST_INT_P (x
)
8008 || CONST_DOUBLE_P (x
)
8009 || CONSTANT_ADDRESS_P (x
)
8014 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8016 return (!arm_cannot_force_const_mem (mode
, x
)
8018 ? arm_legitimate_constant_p_1 (mode
, x
)
8019 : thumb_legitimate_constant_p (mode
, x
)));
8022 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8025 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8029 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8031 split_const (x
, &base
, &offset
);
8032 if (GET_CODE (base
) == SYMBOL_REF
8033 && !offset_within_block_p (base
, INTVAL (offset
)))
8036 return arm_tls_referenced_p (x
);
8039 #define REG_OR_SUBREG_REG(X) \
8041 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8043 #define REG_OR_SUBREG_RTX(X) \
8044 (REG_P (X) ? (X) : SUBREG_REG (X))
8047 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8049 machine_mode mode
= GET_MODE (x
);
8058 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8065 return COSTS_N_INSNS (1);
8068 if (CONST_INT_P (XEXP (x
, 1)))
8071 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8078 return COSTS_N_INSNS (2) + cycles
;
8080 return COSTS_N_INSNS (1) + 16;
8083 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8085 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8086 return (COSTS_N_INSNS (words
)
8087 + 4 * ((MEM_P (SET_SRC (x
)))
8088 + MEM_P (SET_DEST (x
))));
8093 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8095 if (thumb_shiftable_const (INTVAL (x
)))
8096 return COSTS_N_INSNS (2);
8097 return COSTS_N_INSNS (3);
8099 else if ((outer
== PLUS
|| outer
== COMPARE
)
8100 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8102 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8103 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8104 return COSTS_N_INSNS (1);
8105 else if (outer
== AND
)
8108 /* This duplicates the tests in the andsi3 expander. */
8109 for (i
= 9; i
<= 31; i
++)
8110 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8111 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8112 return COSTS_N_INSNS (2);
8114 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8115 || outer
== LSHIFTRT
)
8117 return COSTS_N_INSNS (2);
8123 return COSTS_N_INSNS (3);
8141 /* XXX another guess. */
8142 /* Memory costs quite a lot for the first word, but subsequent words
8143 load at the equivalent of a single insn each. */
8144 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8145 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8150 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8156 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8157 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8163 return total
+ COSTS_N_INSNS (1);
8165 /* Assume a two-shift sequence. Increase the cost slightly so
8166 we prefer actual shifts over an extend operation. */
8167 return total
+ 1 + COSTS_N_INSNS (2);
8175 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8177 machine_mode mode
= GET_MODE (x
);
8178 enum rtx_code subcode
;
8180 enum rtx_code code
= GET_CODE (x
);
8186 /* Memory costs quite a lot for the first word, but subsequent words
8187 load at the equivalent of a single insn each. */
8188 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8195 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8196 *total
= COSTS_N_INSNS (2);
8197 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8198 *total
= COSTS_N_INSNS (4);
8200 *total
= COSTS_N_INSNS (20);
8204 if (REG_P (XEXP (x
, 1)))
8205 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8206 else if (!CONST_INT_P (XEXP (x
, 1)))
8207 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8213 *total
+= COSTS_N_INSNS (4);
8218 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8219 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8222 *total
+= COSTS_N_INSNS (3);
8226 *total
+= COSTS_N_INSNS (1);
8227 /* Increase the cost of complex shifts because they aren't any faster,
8228 and reduce dual issue opportunities. */
8229 if (arm_tune_cortex_a9
8230 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8238 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8239 if (CONST_INT_P (XEXP (x
, 0))
8240 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8242 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8246 if (CONST_INT_P (XEXP (x
, 1))
8247 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8249 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8256 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8258 if (TARGET_HARD_FLOAT
8260 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8262 *total
= COSTS_N_INSNS (1);
8263 if (CONST_DOUBLE_P (XEXP (x
, 0))
8264 && arm_const_double_rtx (XEXP (x
, 0)))
8266 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8270 if (CONST_DOUBLE_P (XEXP (x
, 1))
8271 && arm_const_double_rtx (XEXP (x
, 1)))
8273 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8279 *total
= COSTS_N_INSNS (20);
8283 *total
= COSTS_N_INSNS (1);
8284 if (CONST_INT_P (XEXP (x
, 0))
8285 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8287 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8291 subcode
= GET_CODE (XEXP (x
, 1));
8292 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8293 || subcode
== LSHIFTRT
8294 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8296 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8297 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8301 /* A shift as a part of RSB costs no more than RSB itself. */
8302 if (GET_CODE (XEXP (x
, 0)) == MULT
8303 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8305 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8306 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8311 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8313 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8314 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8318 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8319 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8321 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8322 if (REG_P (XEXP (XEXP (x
, 1), 0))
8323 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8324 *total
+= COSTS_N_INSNS (1);
8332 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8333 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8334 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8336 *total
= COSTS_N_INSNS (1);
8337 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8339 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8343 /* MLA: All arguments must be registers. We filter out
8344 multiplication by a power of two, so that we fall down into
8346 if (GET_CODE (XEXP (x
, 0)) == MULT
8347 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8349 /* The cost comes from the cost of the multiply. */
8353 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8355 if (TARGET_HARD_FLOAT
8357 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8359 *total
= COSTS_N_INSNS (1);
8360 if (CONST_DOUBLE_P (XEXP (x
, 1))
8361 && arm_const_double_rtx (XEXP (x
, 1)))
8363 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8370 *total
= COSTS_N_INSNS (20);
8374 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8375 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8377 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8378 if (REG_P (XEXP (XEXP (x
, 0), 0))
8379 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8380 *total
+= COSTS_N_INSNS (1);
8386 case AND
: case XOR
: case IOR
:
8388 /* Normally the frame registers will be spilt into reg+const during
8389 reload, so it is a bad idea to combine them with other instructions,
8390 since then they might not be moved outside of loops. As a compromise
8391 we allow integration with ops that have a constant as their second
8393 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8394 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8395 && !CONST_INT_P (XEXP (x
, 1)))
8396 *total
= COSTS_N_INSNS (1);
8400 *total
+= COSTS_N_INSNS (2);
8401 if (CONST_INT_P (XEXP (x
, 1))
8402 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8404 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 *total
+= COSTS_N_INSNS (1);
8412 if (CONST_INT_P (XEXP (x
, 1))
8413 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8415 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8418 subcode
= GET_CODE (XEXP (x
, 0));
8419 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8420 || subcode
== LSHIFTRT
8421 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8423 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8424 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8429 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8431 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8432 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8436 if (subcode
== UMIN
|| subcode
== UMAX
8437 || subcode
== SMIN
|| subcode
== SMAX
)
8439 *total
= COSTS_N_INSNS (3);
8446 /* This should have been handled by the CPU specific routines. */
8450 if (arm_arch3m
&& mode
== SImode
8451 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8452 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8453 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8454 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8455 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8456 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8458 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8461 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8465 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8467 if (TARGET_HARD_FLOAT
8469 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8471 *total
= COSTS_N_INSNS (1);
8474 *total
= COSTS_N_INSNS (2);
8480 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8481 if (mode
== SImode
&& code
== NOT
)
8483 subcode
= GET_CODE (XEXP (x
, 0));
8484 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8485 || subcode
== LSHIFTRT
8486 || subcode
== ROTATE
|| subcode
== ROTATERT
8488 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8490 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8491 /* Register shifts cost an extra cycle. */
8492 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8493 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8502 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8504 *total
= COSTS_N_INSNS (4);
8508 operand
= XEXP (x
, 0);
8510 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8511 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8512 && REG_P (XEXP (operand
, 0))
8513 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8514 *total
+= COSTS_N_INSNS (1);
8515 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8516 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8520 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8522 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8528 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8529 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8531 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8537 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8538 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8540 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8560 /* SCC insns. In the case where the comparison has already been
8561 performed, then they cost 2 instructions. Otherwise they need
8562 an additional comparison before them. */
8563 *total
= COSTS_N_INSNS (2);
8564 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8571 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8577 *total
+= COSTS_N_INSNS (1);
8578 if (CONST_INT_P (XEXP (x
, 1))
8579 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8581 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8585 subcode
= GET_CODE (XEXP (x
, 0));
8586 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8587 || subcode
== LSHIFTRT
8588 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8590 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8591 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8596 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8598 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8599 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8609 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8610 if (!CONST_INT_P (XEXP (x
, 1))
8611 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8612 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8616 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8618 if (TARGET_HARD_FLOAT
8620 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8622 *total
= COSTS_N_INSNS (1);
8625 *total
= COSTS_N_INSNS (20);
8628 *total
= COSTS_N_INSNS (1);
8630 *total
+= COSTS_N_INSNS (3);
8636 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8638 rtx op
= XEXP (x
, 0);
8639 machine_mode opmode
= GET_MODE (op
);
8642 *total
+= COSTS_N_INSNS (1);
8644 if (opmode
!= SImode
)
8648 /* If !arm_arch4, we use one of the extendhisi2_mem
8649 or movhi_bytes patterns for HImode. For a QImode
8650 sign extension, we first zero-extend from memory
8651 and then perform a shift sequence. */
8652 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8653 *total
+= COSTS_N_INSNS (2);
8656 *total
+= COSTS_N_INSNS (1);
8658 /* We don't have the necessary insn, so we need to perform some
8660 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8661 /* An and with constant 255. */
8662 *total
+= COSTS_N_INSNS (1);
8664 /* A shift sequence. Increase costs slightly to avoid
8665 combining two shifts into an extend operation. */
8666 *total
+= COSTS_N_INSNS (2) + 1;
8672 switch (GET_MODE (XEXP (x
, 0)))
8679 *total
= COSTS_N_INSNS (1);
8689 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8693 if (const_ok_for_arm (INTVAL (x
))
8694 || const_ok_for_arm (~INTVAL (x
)))
8695 *total
= COSTS_N_INSNS (1);
8697 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8698 INTVAL (x
), NULL_RTX
,
8705 *total
= COSTS_N_INSNS (3);
8709 *total
= COSTS_N_INSNS (1);
8713 *total
= COSTS_N_INSNS (1);
8714 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8718 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8719 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8720 *total
= COSTS_N_INSNS (1);
8722 *total
= COSTS_N_INSNS (4);
8726 /* The vec_extract patterns accept memory operands that require an
8727 address reload. Account for the cost of that reload to give the
8728 auto-inc-dec pass an incentive to try to replace them. */
8729 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8730 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8732 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8733 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8734 *total
+= COSTS_N_INSNS (1);
8737 /* Likewise for the vec_set patterns. */
8738 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8739 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8740 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8742 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8743 *total
= rtx_cost (mem
, code
, 0, speed
);
8744 if (!neon_vector_mem_operand (mem
, 2, true))
8745 *total
+= COSTS_N_INSNS (1);
8751 /* We cost this as high as our memory costs to allow this to
8752 be hoisted from loops. */
8753 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8755 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8761 && TARGET_HARD_FLOAT
8763 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8764 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8765 *total
= COSTS_N_INSNS (1);
8767 *total
= COSTS_N_INSNS (4);
8771 *total
= COSTS_N_INSNS (4);
8776 /* Estimates the size cost of thumb1 instructions.
8777 For now most of the code is copied from thumb1_rtx_costs. We need more
8778 fine grain tuning when we have more related test cases. */
8780 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8782 machine_mode mode
= GET_MODE (x
);
8791 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8795 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8796 defined by RTL expansion, especially for the expansion of
8798 if ((GET_CODE (XEXP (x
, 0)) == MULT
8799 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8800 || (GET_CODE (XEXP (x
, 1)) == MULT
8801 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8802 return COSTS_N_INSNS (2);
8803 /* On purpose fall through for normal RTX. */
8807 return COSTS_N_INSNS (1);
8810 if (CONST_INT_P (XEXP (x
, 1)))
8812 /* Thumb1 mul instruction can't operate on const. We must Load it
8813 into a register first. */
8814 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8815 /* For the targets which have a very small and high-latency multiply
8816 unit, we prefer to synthesize the mult with up to 5 instructions,
8817 giving a good balance between size and performance. */
8818 if (arm_arch6m
&& arm_m_profile_small_mul
)
8819 return COSTS_N_INSNS (5);
8821 return COSTS_N_INSNS (1) + const_size
;
8823 return COSTS_N_INSNS (1);
8826 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8828 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8829 return COSTS_N_INSNS (words
)
8830 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8831 || satisfies_constraint_K (SET_SRC (x
))
8832 /* thumb1_movdi_insn. */
8833 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8838 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8839 return COSTS_N_INSNS (1);
8840 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8841 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8842 return COSTS_N_INSNS (2);
8843 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8844 if (thumb_shiftable_const (INTVAL (x
)))
8845 return COSTS_N_INSNS (2);
8846 return COSTS_N_INSNS (3);
8848 else if ((outer
== PLUS
|| outer
== COMPARE
)
8849 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8851 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8852 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8853 return COSTS_N_INSNS (1);
8854 else if (outer
== AND
)
8857 /* This duplicates the tests in the andsi3 expander. */
8858 for (i
= 9; i
<= 31; i
++)
8859 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8860 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8861 return COSTS_N_INSNS (2);
8863 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8864 || outer
== LSHIFTRT
)
8866 return COSTS_N_INSNS (2);
8872 return COSTS_N_INSNS (3);
8886 return COSTS_N_INSNS (1);
8889 return (COSTS_N_INSNS (1)
8891 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8892 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8893 ? COSTS_N_INSNS (1) : 0));
8897 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8902 /* XXX still guessing. */
8903 switch (GET_MODE (XEXP (x
, 0)))
8906 return (1 + (mode
== DImode
? 4 : 0)
8907 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8910 return (4 + (mode
== DImode
? 4 : 0)
8911 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8914 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8925 /* RTX costs when optimizing for size. */
8927 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8930 machine_mode mode
= GET_MODE (x
);
8933 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8937 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8941 /* A memory access costs 1 insn if the mode is small, or the address is
8942 a single register, otherwise it costs one insn per word. */
8943 if (REG_P (XEXP (x
, 0)))
8944 *total
= COSTS_N_INSNS (1);
8946 && GET_CODE (XEXP (x
, 0)) == PLUS
8947 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8948 /* This will be split into two instructions.
8949 See arm.md:calculate_pic_address. */
8950 *total
= COSTS_N_INSNS (2);
8952 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8959 /* Needs a libcall, so it costs about this. */
8960 *total
= COSTS_N_INSNS (2);
8964 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8966 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8974 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8976 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8979 else if (mode
== SImode
)
8981 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8982 /* Slightly disparage register shifts, but not by much. */
8983 if (!CONST_INT_P (XEXP (x
, 1)))
8984 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8988 /* Needs a libcall. */
8989 *total
= COSTS_N_INSNS (2);
8993 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8994 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8996 *total
= COSTS_N_INSNS (1);
9002 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9003 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9005 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9006 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9007 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9008 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9009 || subcode1
== ASHIFTRT
)
9011 /* It's just the cost of the two operands. */
9016 *total
= COSTS_N_INSNS (1);
9020 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9024 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9025 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9027 *total
= COSTS_N_INSNS (1);
9031 /* A shift as a part of ADD costs nothing. */
9032 if (GET_CODE (XEXP (x
, 0)) == MULT
9033 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9035 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9036 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9037 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9042 case AND
: case XOR
: case IOR
:
9045 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9047 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9048 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9049 || (code
== AND
&& subcode
== NOT
))
9051 /* It's just the cost of the two operands. */
9057 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9061 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9065 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9066 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9068 *total
= COSTS_N_INSNS (1);
9074 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9083 if (cc_register (XEXP (x
, 0), VOIDmode
))
9086 *total
= COSTS_N_INSNS (1);
9090 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9091 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9092 *total
= COSTS_N_INSNS (1);
9094 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9099 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9102 if (const_ok_for_arm (INTVAL (x
)))
9103 /* A multiplication by a constant requires another instruction
9104 to load the constant to a register. */
9105 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9107 else if (const_ok_for_arm (~INTVAL (x
)))
9108 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9109 else if (const_ok_for_arm (-INTVAL (x
)))
9111 if (outer_code
== COMPARE
|| outer_code
== PLUS
9112 || outer_code
== MINUS
)
9115 *total
= COSTS_N_INSNS (1);
9118 *total
= COSTS_N_INSNS (2);
9124 *total
= COSTS_N_INSNS (2);
9128 *total
= COSTS_N_INSNS (4);
9133 && TARGET_HARD_FLOAT
9134 && outer_code
== SET
9135 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9136 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9137 *total
= COSTS_N_INSNS (1);
9139 *total
= COSTS_N_INSNS (4);
9144 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9145 cost of these slightly. */
9146 *total
= COSTS_N_INSNS (1) + 1;
9153 if (mode
!= VOIDmode
)
9154 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9156 *total
= COSTS_N_INSNS (4); /* How knows? */
9161 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9162 operand, then return the operand that is being shifted. If the shift
9163 is not by a constant, then set SHIFT_REG to point to the operand.
9164 Return NULL if OP is not a shifter operand. */
9166 shifter_op_p (rtx op
, rtx
*shift_reg
)
9168 enum rtx_code code
= GET_CODE (op
);
9170 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9171 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9172 return XEXP (op
, 0);
9173 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9174 return XEXP (op
, 0);
9175 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9176 || code
== ASHIFTRT
)
9178 if (!CONST_INT_P (XEXP (op
, 1)))
9179 *shift_reg
= XEXP (op
, 1);
9180 return XEXP (op
, 0);
9187 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9189 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9190 gcc_assert (GET_CODE (x
) == UNSPEC
);
9192 switch (XINT (x
, 1))
9194 case UNSPEC_UNALIGNED_LOAD
:
9195 /* We can only do unaligned loads into the integer unit, and we can't
9197 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9199 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9200 + extra_cost
->ldst
.load_unaligned
);
9203 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9204 ADDR_SPACE_GENERIC
, speed_p
);
9208 case UNSPEC_UNALIGNED_STORE
:
9209 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9211 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9212 + extra_cost
->ldst
.store_unaligned
);
9214 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9216 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9217 ADDR_SPACE_GENERIC
, speed_p
);
9227 *cost
= COSTS_N_INSNS (1);
9229 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9233 *cost
= COSTS_N_INSNS (2);
9239 /* Cost of a libcall. We assume one insn per argument, an amount for the
9240 call (one insn for -Os) and then one for processing the result. */
9241 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9243 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9246 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9247 if (shift_op != NULL \
9248 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9253 *cost += extra_cost->alu.arith_shift_reg; \
9254 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9257 *cost += extra_cost->alu.arith_shift; \
9259 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9260 + rtx_cost (XEXP (x, 1 - IDX), \
9267 /* RTX costs. Make an estimate of the cost of executing the operation
9268 X, which is contained with an operation with code OUTER_CODE.
9269 SPEED_P indicates whether the cost desired is the performance cost,
9270 or the size cost. The estimate is stored in COST and the return
9271 value is TRUE if the cost calculation is final, or FALSE if the
9272 caller should recurse through the operands of X to add additional
9275 We currently make no attempt to model the size savings of Thumb-2
9276 16-bit instructions. At the normal points in compilation where
9277 this code is called we have no measure of whether the condition
9278 flags are live or not, and thus no realistic way to determine what
9279 the size will eventually be. */
9281 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9282 const struct cpu_cost_table
*extra_cost
,
9283 int *cost
, bool speed_p
)
9285 machine_mode mode
= GET_MODE (x
);
9290 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9292 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9300 /* SET RTXs don't have a mode so we get it from the destination. */
9301 mode
= GET_MODE (SET_DEST (x
));
9303 if (REG_P (SET_SRC (x
))
9304 && REG_P (SET_DEST (x
)))
9306 /* Assume that most copies can be done with a single insn,
9307 unless we don't have HW FP, in which case everything
9308 larger than word mode will require two insns. */
9309 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9310 && GET_MODE_SIZE (mode
) > 4)
9313 /* Conditional register moves can be encoded
9314 in 16 bits in Thumb mode. */
9315 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9321 if (CONST_INT_P (SET_SRC (x
)))
9323 /* Handle CONST_INT here, since the value doesn't have a mode
9324 and we would otherwise be unable to work out the true cost. */
9325 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9327 /* Slightly lower the cost of setting a core reg to a constant.
9328 This helps break up chains and allows for better scheduling. */
9329 if (REG_P (SET_DEST (x
))
9330 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9333 /* Immediate moves with an immediate in the range [0, 255] can be
9334 encoded in 16 bits in Thumb mode. */
9335 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9336 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9338 goto const_int_cost
;
9344 /* A memory access costs 1 insn if the mode is small, or the address is
9345 a single register, otherwise it costs one insn per word. */
9346 if (REG_P (XEXP (x
, 0)))
9347 *cost
= COSTS_N_INSNS (1);
9349 && GET_CODE (XEXP (x
, 0)) == PLUS
9350 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9351 /* This will be split into two instructions.
9352 See arm.md:calculate_pic_address. */
9353 *cost
= COSTS_N_INSNS (2);
9355 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9357 /* For speed optimizations, add the costs of the address and
9358 accessing memory. */
9361 *cost
+= (extra_cost
->ldst
.load
9362 + arm_address_cost (XEXP (x
, 0), mode
,
9363 ADDR_SPACE_GENERIC
, speed_p
));
9365 *cost
+= extra_cost
->ldst
.load
;
9371 /* Calculations of LDM costs are complex. We assume an initial cost
9372 (ldm_1st) which will load the number of registers mentioned in
9373 ldm_regs_per_insn_1st registers; then each additional
9374 ldm_regs_per_insn_subsequent registers cost one more insn. The
9375 formula for N regs is thus:
9377 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9378 + ldm_regs_per_insn_subsequent - 1)
9379 / ldm_regs_per_insn_subsequent).
9381 Additional costs may also be added for addressing. A similar
9382 formula is used for STM. */
9384 bool is_ldm
= load_multiple_operation (x
, SImode
);
9385 bool is_stm
= store_multiple_operation (x
, SImode
);
9387 *cost
= COSTS_N_INSNS (1);
9389 if (is_ldm
|| is_stm
)
9393 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9394 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9395 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9396 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9397 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9398 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9399 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9401 *cost
+= regs_per_insn_1st
9402 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9403 + regs_per_insn_sub
- 1)
9404 / regs_per_insn_sub
);
9413 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9414 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9415 *cost
= COSTS_N_INSNS (speed_p
9416 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9417 else if (mode
== SImode
&& TARGET_IDIV
)
9418 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9420 *cost
= LIBCALL_COST (2);
9421 return false; /* All arguments must be in registers. */
9425 *cost
= LIBCALL_COST (2);
9426 return false; /* All arguments must be in registers. */
9429 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9431 *cost
= (COSTS_N_INSNS (2)
9432 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9434 *cost
+= extra_cost
->alu
.shift_reg
;
9442 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9444 *cost
= (COSTS_N_INSNS (3)
9445 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9447 *cost
+= 2 * extra_cost
->alu
.shift
;
9450 else if (mode
== SImode
)
9452 *cost
= (COSTS_N_INSNS (1)
9453 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9454 /* Slightly disparage register shifts at -Os, but not by much. */
9455 if (!CONST_INT_P (XEXP (x
, 1)))
9456 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9457 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9460 else if (GET_MODE_CLASS (mode
) == MODE_INT
9461 && GET_MODE_SIZE (mode
) < 4)
9465 *cost
= (COSTS_N_INSNS (1)
9466 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9467 /* Slightly disparage register shifts at -Os, but not by
9469 if (!CONST_INT_P (XEXP (x
, 1)))
9470 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9471 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9473 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9475 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9477 /* Can use SBFX/UBFX. */
9478 *cost
= COSTS_N_INSNS (1);
9480 *cost
+= extra_cost
->alu
.bfx
;
9481 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9485 *cost
= COSTS_N_INSNS (2);
9486 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9489 if (CONST_INT_P (XEXP (x
, 1)))
9490 *cost
+= 2 * extra_cost
->alu
.shift
;
9492 *cost
+= (extra_cost
->alu
.shift
9493 + extra_cost
->alu
.shift_reg
);
9496 /* Slightly disparage register shifts. */
9497 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9502 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9503 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9506 if (CONST_INT_P (XEXP (x
, 1)))
9507 *cost
+= (2 * extra_cost
->alu
.shift
9508 + extra_cost
->alu
.log_shift
);
9510 *cost
+= (extra_cost
->alu
.shift
9511 + extra_cost
->alu
.shift_reg
9512 + extra_cost
->alu
.log_shift_reg
);
9518 *cost
= LIBCALL_COST (2);
9526 *cost
= COSTS_N_INSNS (1);
9528 *cost
+= extra_cost
->alu
.rev
;
9535 /* No rev instruction available. Look at arm_legacy_rev
9536 and thumb_legacy_rev for the form of RTL used then. */
9539 *cost
= COSTS_N_INSNS (10);
9543 *cost
+= 6 * extra_cost
->alu
.shift
;
9544 *cost
+= 3 * extra_cost
->alu
.logical
;
9549 *cost
= COSTS_N_INSNS (5);
9553 *cost
+= 2 * extra_cost
->alu
.shift
;
9554 *cost
+= extra_cost
->alu
.arith_shift
;
9555 *cost
+= 2 * extra_cost
->alu
.logical
;
9563 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9564 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9566 *cost
= COSTS_N_INSNS (1);
9567 if (GET_CODE (XEXP (x
, 0)) == MULT
9568 || GET_CODE (XEXP (x
, 1)) == MULT
)
9570 rtx mul_op0
, mul_op1
, sub_op
;
9573 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9575 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9577 mul_op0
= XEXP (XEXP (x
, 0), 0);
9578 mul_op1
= XEXP (XEXP (x
, 0), 1);
9579 sub_op
= XEXP (x
, 1);
9583 mul_op0
= XEXP (XEXP (x
, 1), 0);
9584 mul_op1
= XEXP (XEXP (x
, 1), 1);
9585 sub_op
= XEXP (x
, 0);
9588 /* The first operand of the multiply may be optionally
9590 if (GET_CODE (mul_op0
) == NEG
)
9591 mul_op0
= XEXP (mul_op0
, 0);
9593 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9594 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9595 + rtx_cost (sub_op
, code
, 0, speed_p
));
9601 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9607 rtx shift_by_reg
= NULL
;
9611 *cost
= COSTS_N_INSNS (1);
9613 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9614 if (shift_op
== NULL
)
9616 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9617 non_shift_op
= XEXP (x
, 0);
9620 non_shift_op
= XEXP (x
, 1);
9622 if (shift_op
!= NULL
)
9624 if (shift_by_reg
!= NULL
)
9627 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9628 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9631 *cost
+= extra_cost
->alu
.arith_shift
;
9633 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9634 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9639 && GET_CODE (XEXP (x
, 1)) == MULT
)
9643 *cost
+= extra_cost
->mult
[0].add
;
9644 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9645 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9646 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9650 if (CONST_INT_P (XEXP (x
, 0)))
9652 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9653 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9655 *cost
= COSTS_N_INSNS (insns
);
9657 *cost
+= insns
* extra_cost
->alu
.arith
;
9658 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9662 *cost
+= extra_cost
->alu
.arith
;
9667 if (GET_MODE_CLASS (mode
) == MODE_INT
9668 && GET_MODE_SIZE (mode
) < 4)
9670 rtx shift_op
, shift_reg
;
9673 /* We check both sides of the MINUS for shifter operands since,
9674 unlike PLUS, it's not commutative. */
9676 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9677 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9679 /* Slightly disparage, as we might need to widen the result. */
9680 *cost
= 1 + COSTS_N_INSNS (1);
9682 *cost
+= extra_cost
->alu
.arith
;
9684 if (CONST_INT_P (XEXP (x
, 0)))
9686 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9695 *cost
= COSTS_N_INSNS (2);
9697 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9699 rtx op1
= XEXP (x
, 1);
9702 *cost
+= 2 * extra_cost
->alu
.arith
;
9704 if (GET_CODE (op1
) == ZERO_EXTEND
)
9705 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9707 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9708 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9712 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9715 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9716 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9718 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9721 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9722 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9725 *cost
+= (extra_cost
->alu
.arith
9726 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9727 ? extra_cost
->alu
.arith
9728 : extra_cost
->alu
.arith_shift
));
9729 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9730 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9731 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9736 *cost
+= 2 * extra_cost
->alu
.arith
;
9742 *cost
= LIBCALL_COST (2);
9746 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9747 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9749 *cost
= COSTS_N_INSNS (1);
9750 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9752 rtx mul_op0
, mul_op1
, add_op
;
9755 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9757 mul_op0
= XEXP (XEXP (x
, 0), 0);
9758 mul_op1
= XEXP (XEXP (x
, 0), 1);
9759 add_op
= XEXP (x
, 1);
9761 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9762 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9763 + rtx_cost (add_op
, code
, 0, speed_p
));
9769 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9772 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9774 *cost
= LIBCALL_COST (2);
9778 /* Narrow modes can be synthesized in SImode, but the range
9779 of useful sub-operations is limited. Check for shift operations
9780 on one of the operands. Only left shifts can be used in the
9782 if (GET_MODE_CLASS (mode
) == MODE_INT
9783 && GET_MODE_SIZE (mode
) < 4)
9785 rtx shift_op
, shift_reg
;
9788 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9790 if (CONST_INT_P (XEXP (x
, 1)))
9792 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9793 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9795 *cost
= COSTS_N_INSNS (insns
);
9797 *cost
+= insns
* extra_cost
->alu
.arith
;
9798 /* Slightly penalize a narrow operation as the result may
9800 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9804 /* Slightly penalize a narrow operation as the result may
9806 *cost
= 1 + COSTS_N_INSNS (1);
9808 *cost
+= extra_cost
->alu
.arith
;
9815 rtx shift_op
, shift_reg
;
9817 *cost
= COSTS_N_INSNS (1);
9819 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9820 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9822 /* UXTA[BH] or SXTA[BH]. */
9824 *cost
+= extra_cost
->alu
.extend_arith
;
9825 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9827 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9832 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9833 if (shift_op
!= NULL
)
9838 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9839 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9842 *cost
+= extra_cost
->alu
.arith_shift
;
9844 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9845 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9848 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9850 rtx mul_op
= XEXP (x
, 0);
9852 *cost
= COSTS_N_INSNS (1);
9854 if (TARGET_DSP_MULTIPLY
9855 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9856 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9857 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9858 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9859 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9860 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9861 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9862 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9863 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9864 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9865 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9866 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9871 *cost
+= extra_cost
->mult
[0].extend_add
;
9872 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9873 SIGN_EXTEND
, 0, speed_p
)
9874 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9875 SIGN_EXTEND
, 0, speed_p
)
9876 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9881 *cost
+= extra_cost
->mult
[0].add
;
9882 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9883 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9884 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9887 if (CONST_INT_P (XEXP (x
, 1)))
9889 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9890 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9892 *cost
= COSTS_N_INSNS (insns
);
9894 *cost
+= insns
* extra_cost
->alu
.arith
;
9895 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9899 *cost
+= extra_cost
->alu
.arith
;
9907 && GET_CODE (XEXP (x
, 0)) == MULT
9908 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9909 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9910 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9911 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9913 *cost
= COSTS_N_INSNS (1);
9915 *cost
+= extra_cost
->mult
[1].extend_add
;
9916 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9917 ZERO_EXTEND
, 0, speed_p
)
9918 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9919 ZERO_EXTEND
, 0, speed_p
)
9920 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9924 *cost
= COSTS_N_INSNS (2);
9926 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9927 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9930 *cost
+= (extra_cost
->alu
.arith
9931 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9932 ? extra_cost
->alu
.arith
9933 : extra_cost
->alu
.arith_shift
));
9935 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9937 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9942 *cost
+= 2 * extra_cost
->alu
.arith
;
9947 *cost
= LIBCALL_COST (2);
9950 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9952 *cost
= COSTS_N_INSNS (1);
9954 *cost
+= extra_cost
->alu
.rev
;
9962 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9963 rtx op0
= XEXP (x
, 0);
9964 rtx shift_op
, shift_reg
;
9966 *cost
= COSTS_N_INSNS (1);
9970 || (code
== IOR
&& TARGET_THUMB2
)))
9971 op0
= XEXP (op0
, 0);
9974 shift_op
= shifter_op_p (op0
, &shift_reg
);
9975 if (shift_op
!= NULL
)
9980 *cost
+= extra_cost
->alu
.log_shift_reg
;
9981 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9984 *cost
+= extra_cost
->alu
.log_shift
;
9986 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9987 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9991 if (CONST_INT_P (XEXP (x
, 1)))
9993 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9994 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9997 *cost
= COSTS_N_INSNS (insns
);
9999 *cost
+= insns
* extra_cost
->alu
.logical
;
10000 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10005 *cost
+= extra_cost
->alu
.logical
;
10006 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10007 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10011 if (mode
== DImode
)
10013 rtx op0
= XEXP (x
, 0);
10014 enum rtx_code subcode
= GET_CODE (op0
);
10016 *cost
= COSTS_N_INSNS (2);
10020 || (code
== IOR
&& TARGET_THUMB2
)))
10021 op0
= XEXP (op0
, 0);
10023 if (GET_CODE (op0
) == ZERO_EXTEND
)
10026 *cost
+= 2 * extra_cost
->alu
.logical
;
10028 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10029 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10032 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10035 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10037 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10038 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10043 *cost
+= 2 * extra_cost
->alu
.logical
;
10049 *cost
= LIBCALL_COST (2);
10053 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10054 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10056 rtx op0
= XEXP (x
, 0);
10058 *cost
= COSTS_N_INSNS (1);
10060 if (GET_CODE (op0
) == NEG
)
10061 op0
= XEXP (op0
, 0);
10064 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10066 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10067 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10070 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10072 *cost
= LIBCALL_COST (2);
10076 if (mode
== SImode
)
10078 *cost
= COSTS_N_INSNS (1);
10079 if (TARGET_DSP_MULTIPLY
10080 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10081 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10082 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10083 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10084 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10085 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10086 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10087 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10088 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10089 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10090 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10091 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10094 /* SMUL[TB][TB]. */
10096 *cost
+= extra_cost
->mult
[0].extend
;
10097 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10098 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10102 *cost
+= extra_cost
->mult
[0].simple
;
10106 if (mode
== DImode
)
10109 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10110 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10111 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10112 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10114 *cost
= COSTS_N_INSNS (1);
10116 *cost
+= extra_cost
->mult
[1].extend
;
10117 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10118 ZERO_EXTEND
, 0, speed_p
)
10119 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10120 ZERO_EXTEND
, 0, speed_p
));
10124 *cost
= LIBCALL_COST (2);
10129 *cost
= LIBCALL_COST (2);
10133 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10134 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10136 *cost
= COSTS_N_INSNS (1);
10138 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10142 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10144 *cost
= LIBCALL_COST (1);
10148 if (mode
== SImode
)
10150 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10152 *cost
= COSTS_N_INSNS (2);
10153 /* Assume the non-flag-changing variant. */
10155 *cost
+= (extra_cost
->alu
.log_shift
10156 + extra_cost
->alu
.arith_shift
);
10157 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10161 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10162 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10164 *cost
= COSTS_N_INSNS (2);
10165 /* No extra cost for MOV imm and MVN imm. */
10166 /* If the comparison op is using the flags, there's no further
10167 cost, otherwise we need to add the cost of the comparison. */
10168 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10169 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10170 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10172 *cost
+= (COSTS_N_INSNS (1)
10173 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10175 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10178 *cost
+= extra_cost
->alu
.arith
;
10182 *cost
= COSTS_N_INSNS (1);
10184 *cost
+= extra_cost
->alu
.arith
;
10188 if (GET_MODE_CLASS (mode
) == MODE_INT
10189 && GET_MODE_SIZE (mode
) < 4)
10191 /* Slightly disparage, as we might need an extend operation. */
10192 *cost
= 1 + COSTS_N_INSNS (1);
10194 *cost
+= extra_cost
->alu
.arith
;
10198 if (mode
== DImode
)
10200 *cost
= COSTS_N_INSNS (2);
10202 *cost
+= 2 * extra_cost
->alu
.arith
;
10207 *cost
= LIBCALL_COST (1);
10211 if (mode
== SImode
)
10214 rtx shift_reg
= NULL
;
10216 *cost
= COSTS_N_INSNS (1);
10217 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10221 if (shift_reg
!= NULL
)
10224 *cost
+= extra_cost
->alu
.log_shift_reg
;
10225 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10228 *cost
+= extra_cost
->alu
.log_shift
;
10229 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10234 *cost
+= extra_cost
->alu
.logical
;
10237 if (mode
== DImode
)
10239 *cost
= COSTS_N_INSNS (2);
10245 *cost
+= LIBCALL_COST (1);
10250 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10252 *cost
= COSTS_N_INSNS (4);
10255 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10256 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10258 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10259 /* Assume that if one arm of the if_then_else is a register,
10260 that it will be tied with the result and eliminate the
10261 conditional insn. */
10262 if (REG_P (XEXP (x
, 1)))
10264 else if (REG_P (XEXP (x
, 2)))
10270 if (extra_cost
->alu
.non_exec_costs_exec
)
10271 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10273 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10276 *cost
+= op1cost
+ op2cost
;
10282 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10286 machine_mode op0mode
;
10287 /* We'll mostly assume that the cost of a compare is the cost of the
10288 LHS. However, there are some notable exceptions. */
10290 /* Floating point compares are never done as side-effects. */
10291 op0mode
= GET_MODE (XEXP (x
, 0));
10292 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10293 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10295 *cost
= COSTS_N_INSNS (1);
10297 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10299 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10301 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10307 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10309 *cost
= LIBCALL_COST (2);
10313 /* DImode compares normally take two insns. */
10314 if (op0mode
== DImode
)
10316 *cost
= COSTS_N_INSNS (2);
10318 *cost
+= 2 * extra_cost
->alu
.arith
;
10322 if (op0mode
== SImode
)
10327 if (XEXP (x
, 1) == const0_rtx
10328 && !(REG_P (XEXP (x
, 0))
10329 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10330 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10332 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10334 /* Multiply operations that set the flags are often
10335 significantly more expensive. */
10337 && GET_CODE (XEXP (x
, 0)) == MULT
10338 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10339 *cost
+= extra_cost
->mult
[0].flag_setting
;
10342 && GET_CODE (XEXP (x
, 0)) == PLUS
10343 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10344 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10346 *cost
+= extra_cost
->mult
[0].flag_setting
;
10351 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10352 if (shift_op
!= NULL
)
10354 *cost
= COSTS_N_INSNS (1);
10355 if (shift_reg
!= NULL
)
10357 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10359 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10362 *cost
+= extra_cost
->alu
.arith_shift
;
10363 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10364 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10368 *cost
= COSTS_N_INSNS (1);
10370 *cost
+= extra_cost
->alu
.arith
;
10371 if (CONST_INT_P (XEXP (x
, 1))
10372 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10374 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10382 *cost
= LIBCALL_COST (2);
10405 if (outer_code
== SET
)
10407 /* Is it a store-flag operation? */
10408 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10409 && XEXP (x
, 1) == const0_rtx
)
10411 /* Thumb also needs an IT insn. */
10412 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10415 if (XEXP (x
, 1) == const0_rtx
)
10420 /* LSR Rd, Rn, #31. */
10421 *cost
= COSTS_N_INSNS (1);
10423 *cost
+= extra_cost
->alu
.shift
;
10433 *cost
= COSTS_N_INSNS (2);
10437 /* RSBS T1, Rn, Rn, LSR #31
10439 *cost
= COSTS_N_INSNS (2);
10441 *cost
+= extra_cost
->alu
.arith_shift
;
10445 /* RSB Rd, Rn, Rn, ASR #1
10446 LSR Rd, Rd, #31. */
10447 *cost
= COSTS_N_INSNS (2);
10449 *cost
+= (extra_cost
->alu
.arith_shift
10450 + extra_cost
->alu
.shift
);
10456 *cost
= COSTS_N_INSNS (2);
10458 *cost
+= extra_cost
->alu
.shift
;
10462 /* Remaining cases are either meaningless or would take
10463 three insns anyway. */
10464 *cost
= COSTS_N_INSNS (3);
10467 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10472 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10473 if (CONST_INT_P (XEXP (x
, 1))
10474 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10476 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10483 /* Not directly inside a set. If it involves the condition code
10484 register it must be the condition for a branch, cond_exec or
10485 I_T_E operation. Since the comparison is performed elsewhere
10486 this is just the control part which has no additional
10488 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10489 && XEXP (x
, 1) == const0_rtx
)
10497 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10498 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10500 *cost
= COSTS_N_INSNS (1);
10502 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10506 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10508 *cost
= LIBCALL_COST (1);
10512 if (mode
== SImode
)
10514 *cost
= COSTS_N_INSNS (1);
10516 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10520 *cost
= LIBCALL_COST (1);
10524 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10525 && MEM_P (XEXP (x
, 0)))
10527 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10529 if (mode
== DImode
)
10530 *cost
+= COSTS_N_INSNS (1);
10535 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10536 *cost
+= extra_cost
->ldst
.load
;
10538 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10540 if (mode
== DImode
)
10541 *cost
+= extra_cost
->alu
.shift
;
10546 /* Widening from less than 32-bits requires an extend operation. */
10547 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10549 /* We have SXTB/SXTH. */
10550 *cost
= COSTS_N_INSNS (1);
10551 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10553 *cost
+= extra_cost
->alu
.extend
;
10555 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10557 /* Needs two shifts. */
10558 *cost
= COSTS_N_INSNS (2);
10559 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10561 *cost
+= 2 * extra_cost
->alu
.shift
;
10564 /* Widening beyond 32-bits requires one more insn. */
10565 if (mode
== DImode
)
10567 *cost
+= COSTS_N_INSNS (1);
10569 *cost
+= extra_cost
->alu
.shift
;
10576 || GET_MODE (XEXP (x
, 0)) == SImode
10577 || GET_MODE (XEXP (x
, 0)) == QImode
)
10578 && MEM_P (XEXP (x
, 0)))
10580 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10582 if (mode
== DImode
)
10583 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10588 /* Widening from less than 32-bits requires an extend operation. */
10589 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10591 /* UXTB can be a shorter instruction in Thumb2, but it might
10592 be slower than the AND Rd, Rn, #255 alternative. When
10593 optimizing for speed it should never be slower to use
10594 AND, and we don't really model 16-bit vs 32-bit insns
10596 *cost
= COSTS_N_INSNS (1);
10598 *cost
+= extra_cost
->alu
.logical
;
10600 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10602 /* We have UXTB/UXTH. */
10603 *cost
= COSTS_N_INSNS (1);
10604 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10606 *cost
+= extra_cost
->alu
.extend
;
10608 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10610 /* Needs two shifts. It's marginally preferable to use
10611 shifts rather than two BIC instructions as the second
10612 shift may merge with a subsequent insn as a shifter
10614 *cost
= COSTS_N_INSNS (2);
10615 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10617 *cost
+= 2 * extra_cost
->alu
.shift
;
10619 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10620 *cost
= COSTS_N_INSNS (1);
10622 /* Widening beyond 32-bits requires one more insn. */
10623 if (mode
== DImode
)
10625 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10632 /* CONST_INT has no mode, so we cannot tell for sure how many
10633 insns are really going to be needed. The best we can do is
10634 look at the value passed. If it fits in SImode, then assume
10635 that's the mode it will be used for. Otherwise assume it
10636 will be used in DImode. */
10637 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10642 /* Avoid blowing up in arm_gen_constant (). */
10643 if (!(outer_code
== PLUS
10644 || outer_code
== AND
10645 || outer_code
== IOR
10646 || outer_code
== XOR
10647 || outer_code
== MINUS
))
10651 if (mode
== SImode
)
10653 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10654 INTVAL (x
), NULL
, NULL
,
10660 *cost
+= COSTS_N_INSNS (arm_gen_constant
10661 (outer_code
, SImode
, NULL
,
10662 trunc_int_for_mode (INTVAL (x
), SImode
),
10664 + arm_gen_constant (outer_code
, SImode
, NULL
,
10665 INTVAL (x
) >> 32, NULL
,
10677 if (arm_arch_thumb2
&& !flag_pic
)
10678 *cost
= COSTS_N_INSNS (2);
10680 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10683 *cost
= COSTS_N_INSNS (2);
10687 *cost
+= COSTS_N_INSNS (1);
10689 *cost
+= extra_cost
->alu
.arith
;
10695 *cost
= COSTS_N_INSNS (4);
10700 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10701 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10703 if (vfp3_const_double_rtx (x
))
10705 *cost
= COSTS_N_INSNS (1);
10707 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10713 *cost
= COSTS_N_INSNS (1);
10714 if (mode
== DFmode
)
10715 *cost
+= extra_cost
->ldst
.loadd
;
10717 *cost
+= extra_cost
->ldst
.loadf
;
10720 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10724 *cost
= COSTS_N_INSNS (4);
10730 && TARGET_HARD_FLOAT
10731 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10732 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10733 *cost
= COSTS_N_INSNS (1);
10735 *cost
= COSTS_N_INSNS (4);
10740 *cost
= COSTS_N_INSNS (1);
10741 /* When optimizing for size, we prefer constant pool entries to
10742 MOVW/MOVT pairs, so bump the cost of these slightly. */
10748 *cost
= COSTS_N_INSNS (1);
10750 *cost
+= extra_cost
->alu
.clz
;
10754 if (XEXP (x
, 1) == const0_rtx
)
10756 *cost
= COSTS_N_INSNS (1);
10758 *cost
+= extra_cost
->alu
.log_shift
;
10759 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10762 /* Fall through. */
10766 *cost
= COSTS_N_INSNS (2);
10770 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10771 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10772 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10773 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10774 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10775 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10776 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10777 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10780 *cost
= COSTS_N_INSNS (1);
10782 *cost
+= extra_cost
->mult
[1].extend
;
10783 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10785 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10789 *cost
= LIBCALL_COST (1);
10793 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10796 /* Reading the PC is like reading any other register. Writing it
10797 is more expensive, but we take that into account elsewhere. */
10802 /* TODO: Simple zero_extract of bottom bits using AND. */
10803 /* Fall through. */
10807 && CONST_INT_P (XEXP (x
, 1))
10808 && CONST_INT_P (XEXP (x
, 2)))
10810 *cost
= COSTS_N_INSNS (1);
10812 *cost
+= extra_cost
->alu
.bfx
;
10813 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10816 /* Without UBFX/SBFX, need to resort to shift operations. */
10817 *cost
= COSTS_N_INSNS (2);
10819 *cost
+= 2 * extra_cost
->alu
.shift
;
10820 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10824 if (TARGET_HARD_FLOAT
)
10826 *cost
= COSTS_N_INSNS (1);
10828 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10829 if (!TARGET_FPU_ARMV8
10830 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10832 /* Pre v8, widening HF->DF is a two-step process, first
10833 widening to SFmode. */
10834 *cost
+= COSTS_N_INSNS (1);
10836 *cost
+= extra_cost
->fp
[0].widen
;
10838 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10842 *cost
= LIBCALL_COST (1);
10845 case FLOAT_TRUNCATE
:
10846 if (TARGET_HARD_FLOAT
)
10848 *cost
= COSTS_N_INSNS (1);
10850 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10851 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10853 /* Vector modes? */
10855 *cost
= LIBCALL_COST (1);
10859 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10861 rtx op0
= XEXP (x
, 0);
10862 rtx op1
= XEXP (x
, 1);
10863 rtx op2
= XEXP (x
, 2);
10865 *cost
= COSTS_N_INSNS (1);
10867 /* vfms or vfnma. */
10868 if (GET_CODE (op0
) == NEG
)
10869 op0
= XEXP (op0
, 0);
10871 /* vfnms or vfnma. */
10872 if (GET_CODE (op2
) == NEG
)
10873 op2
= XEXP (op2
, 0);
10875 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10876 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10877 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10880 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10885 *cost
= LIBCALL_COST (3);
10890 if (TARGET_HARD_FLOAT
)
10892 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10894 *cost
= COSTS_N_INSNS (1);
10896 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10897 /* Strip of the 'cost' of rounding towards zero. */
10898 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10899 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10901 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10902 /* ??? Increase the cost to deal with transferring from
10903 FP -> CORE registers? */
10906 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10907 && TARGET_FPU_ARMV8
)
10909 *cost
= COSTS_N_INSNS (1);
10911 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10914 /* Vector costs? */
10916 *cost
= LIBCALL_COST (1);
10920 case UNSIGNED_FLOAT
:
10921 if (TARGET_HARD_FLOAT
)
10923 /* ??? Increase the cost to deal with transferring from CORE
10924 -> FP registers? */
10925 *cost
= COSTS_N_INSNS (1);
10927 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10930 *cost
= LIBCALL_COST (1);
10934 *cost
= COSTS_N_INSNS (1);
10939 /* Just a guess. Guess number of instructions in the asm
10940 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10941 though (see PR60663). */
10942 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10943 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10945 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10949 if (mode
!= VOIDmode
)
10950 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10952 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10957 #undef HANDLE_NARROW_SHIFT_ARITH
10959 /* RTX costs when optimizing for size. */
10961 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10962 int *total
, bool speed
)
10966 if (TARGET_OLD_RTX_COSTS
10967 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10969 /* Old way. (Deprecated.) */
10971 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10972 (enum rtx_code
) outer_code
, total
);
10974 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10975 (enum rtx_code
) outer_code
, total
,
10981 if (current_tune
->insn_extra_cost
)
10982 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10983 (enum rtx_code
) outer_code
,
10984 current_tune
->insn_extra_cost
,
10986 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10987 && current_tune->insn_extra_cost != NULL */
10989 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10990 (enum rtx_code
) outer_code
,
10991 &generic_extra_costs
, total
, speed
);
10994 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10996 print_rtl_single (dump_file
, x
);
10997 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10998 *total
, result
? "final" : "partial");
11003 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11004 supported on any "slowmul" cores, so it can be ignored. */
11007 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11008 int *total
, bool speed
)
11010 machine_mode mode
= GET_MODE (x
);
11014 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11021 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11024 *total
= COSTS_N_INSNS (20);
11028 if (CONST_INT_P (XEXP (x
, 1)))
11030 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11031 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11032 int cost
, const_ok
= const_ok_for_arm (i
);
11033 int j
, booth_unit_size
;
11035 /* Tune as appropriate. */
11036 cost
= const_ok
? 4 : 8;
11037 booth_unit_size
= 2;
11038 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11040 i
>>= booth_unit_size
;
11044 *total
= COSTS_N_INSNS (cost
);
11045 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11049 *total
= COSTS_N_INSNS (20);
11053 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11058 /* RTX cost for cores with a fast multiply unit (M variants). */
11061 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11062 int *total
, bool speed
)
11064 machine_mode mode
= GET_MODE (x
);
11068 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11072 /* ??? should thumb2 use different costs? */
11076 /* There is no point basing this on the tuning, since it is always the
11077 fast variant if it exists at all. */
11079 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11080 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11081 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11083 *total
= COSTS_N_INSNS(2);
11088 if (mode
== DImode
)
11090 *total
= COSTS_N_INSNS (5);
11094 if (CONST_INT_P (XEXP (x
, 1)))
11096 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11097 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11098 int cost
, const_ok
= const_ok_for_arm (i
);
11099 int j
, booth_unit_size
;
11101 /* Tune as appropriate. */
11102 cost
= const_ok
? 4 : 8;
11103 booth_unit_size
= 8;
11104 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11106 i
>>= booth_unit_size
;
11110 *total
= COSTS_N_INSNS(cost
);
11114 if (mode
== SImode
)
11116 *total
= COSTS_N_INSNS (4);
11120 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11122 if (TARGET_HARD_FLOAT
11124 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11126 *total
= COSTS_N_INSNS (1);
11131 /* Requires a lib call */
11132 *total
= COSTS_N_INSNS (20);
11136 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11141 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11142 so it can be ignored. */
11145 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11146 int *total
, bool speed
)
11148 machine_mode mode
= GET_MODE (x
);
11152 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11159 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11160 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11162 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11163 will stall until the multiplication is complete. */
11164 *total
= COSTS_N_INSNS (3);
11168 /* There is no point basing this on the tuning, since it is always the
11169 fast variant if it exists at all. */
11171 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11172 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11173 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11175 *total
= COSTS_N_INSNS (2);
11180 if (mode
== DImode
)
11182 *total
= COSTS_N_INSNS (5);
11186 if (CONST_INT_P (XEXP (x
, 1)))
11188 /* If operand 1 is a constant we can more accurately
11189 calculate the cost of the multiply. The multiplier can
11190 retire 15 bits on the first cycle and a further 12 on the
11191 second. We do, of course, have to load the constant into
11192 a register first. */
11193 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11194 /* There's a general overhead of one cycle. */
11196 unsigned HOST_WIDE_INT masked_const
;
11198 if (i
& 0x80000000)
11201 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11203 masked_const
= i
& 0xffff8000;
11204 if (masked_const
!= 0)
11207 masked_const
= i
& 0xf8000000;
11208 if (masked_const
!= 0)
11211 *total
= COSTS_N_INSNS (cost
);
11215 if (mode
== SImode
)
11217 *total
= COSTS_N_INSNS (3);
11221 /* Requires a lib call */
11222 *total
= COSTS_N_INSNS (20);
11226 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11231 /* RTX costs for 9e (and later) cores. */
11234 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11235 int *total
, bool speed
)
11237 machine_mode mode
= GET_MODE (x
);
11244 /* Small multiply: 32 cycles for an integer multiply inst. */
11245 if (arm_arch6m
&& arm_m_profile_small_mul
)
11246 *total
= COSTS_N_INSNS (32);
11248 *total
= COSTS_N_INSNS (3);
11252 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11260 /* There is no point basing this on the tuning, since it is always the
11261 fast variant if it exists at all. */
11263 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11264 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11265 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11267 *total
= COSTS_N_INSNS (2);
11272 if (mode
== DImode
)
11274 *total
= COSTS_N_INSNS (5);
11278 if (mode
== SImode
)
11280 *total
= COSTS_N_INSNS (2);
11284 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11286 if (TARGET_HARD_FLOAT
11288 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11290 *total
= COSTS_N_INSNS (1);
11295 *total
= COSTS_N_INSNS (20);
11299 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11302 /* All address computations that can be done are free, but rtx cost returns
11303 the same for practically all of them. So we weight the different types
11304 of address here in the order (most pref first):
11305 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11307 arm_arm_address_cost (rtx x
)
11309 enum rtx_code c
= GET_CODE (x
);
11311 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11313 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11318 if (CONST_INT_P (XEXP (x
, 1)))
11321 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11331 arm_thumb_address_cost (rtx x
)
11333 enum rtx_code c
= GET_CODE (x
);
11338 && REG_P (XEXP (x
, 0))
11339 && CONST_INT_P (XEXP (x
, 1)))
11346 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11347 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11349 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11352 /* Adjust cost hook for XScale. */
11354 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11356 /* Some true dependencies can have a higher cost depending
11357 on precisely how certain input operands are used. */
11358 if (REG_NOTE_KIND(link
) == 0
11359 && recog_memoized (insn
) >= 0
11360 && recog_memoized (dep
) >= 0)
11362 int shift_opnum
= get_attr_shift (insn
);
11363 enum attr_type attr_type
= get_attr_type (dep
);
11365 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11366 operand for INSN. If we have a shifted input operand and the
11367 instruction we depend on is another ALU instruction, then we may
11368 have to account for an additional stall. */
11369 if (shift_opnum
!= 0
11370 && (attr_type
== TYPE_ALU_SHIFT_IMM
11371 || attr_type
== TYPE_ALUS_SHIFT_IMM
11372 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11373 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11374 || attr_type
== TYPE_ALU_SHIFT_REG
11375 || attr_type
== TYPE_ALUS_SHIFT_REG
11376 || attr_type
== TYPE_LOGIC_SHIFT_REG
11377 || attr_type
== TYPE_LOGICS_SHIFT_REG
11378 || attr_type
== TYPE_MOV_SHIFT
11379 || attr_type
== TYPE_MVN_SHIFT
11380 || attr_type
== TYPE_MOV_SHIFT_REG
11381 || attr_type
== TYPE_MVN_SHIFT_REG
))
11383 rtx shifted_operand
;
11386 /* Get the shifted operand. */
11387 extract_insn (insn
);
11388 shifted_operand
= recog_data
.operand
[shift_opnum
];
11390 /* Iterate over all the operands in DEP. If we write an operand
11391 that overlaps with SHIFTED_OPERAND, then we have increase the
11392 cost of this dependency. */
11393 extract_insn (dep
);
11394 preprocess_constraints (dep
);
11395 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11397 /* We can ignore strict inputs. */
11398 if (recog_data
.operand_type
[opno
] == OP_IN
)
11401 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11413 /* Adjust cost hook for Cortex A9. */
11415 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11417 switch (REG_NOTE_KIND (link
))
11424 case REG_DEP_OUTPUT
:
11425 if (recog_memoized (insn
) >= 0
11426 && recog_memoized (dep
) >= 0)
11428 if (GET_CODE (PATTERN (insn
)) == SET
)
11431 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11433 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11435 enum attr_type attr_type_insn
= get_attr_type (insn
);
11436 enum attr_type attr_type_dep
= get_attr_type (dep
);
11438 /* By default all dependencies of the form
11441 have an extra latency of 1 cycle because
11442 of the input and output dependency in this
11443 case. However this gets modeled as an true
11444 dependency and hence all these checks. */
11445 if (REG_P (SET_DEST (PATTERN (insn
)))
11446 && REG_P (SET_DEST (PATTERN (dep
)))
11447 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11448 SET_DEST (PATTERN (dep
))))
11450 /* FMACS is a special case where the dependent
11451 instruction can be issued 3 cycles before
11452 the normal latency in case of an output
11454 if ((attr_type_insn
== TYPE_FMACS
11455 || attr_type_insn
== TYPE_FMACD
)
11456 && (attr_type_dep
== TYPE_FMACS
11457 || attr_type_dep
== TYPE_FMACD
))
11459 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11460 *cost
= insn_default_latency (dep
) - 3;
11462 *cost
= insn_default_latency (dep
);
11467 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11468 *cost
= insn_default_latency (dep
) + 1;
11470 *cost
= insn_default_latency (dep
);
11480 gcc_unreachable ();
11486 /* Adjust cost hook for FA726TE. */
11488 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11490 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11491 have penalty of 3. */
11492 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11493 && recog_memoized (insn
) >= 0
11494 && recog_memoized (dep
) >= 0
11495 && get_attr_conds (dep
) == CONDS_SET
)
11497 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11498 if (get_attr_conds (insn
) == CONDS_USE
11499 && get_attr_type (insn
) != TYPE_BRANCH
)
11505 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11506 || get_attr_conds (insn
) == CONDS_USE
)
11516 /* Implement TARGET_REGISTER_MOVE_COST.
11518 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11519 it is typically more expensive than a single memory access. We set
11520 the cost to less than two memory accesses so that floating
11521 point to integer conversion does not go through memory. */
11524 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11525 reg_class_t from
, reg_class_t to
)
11529 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11530 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11532 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11533 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11535 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11542 if (from
== HI_REGS
|| to
== HI_REGS
)
11549 /* Implement TARGET_MEMORY_MOVE_COST. */
11552 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11553 bool in ATTRIBUTE_UNUSED
)
11559 if (GET_MODE_SIZE (mode
) < 4)
11562 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11566 /* Vectorizer cost model implementation. */
11568 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11570 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11572 int misalign ATTRIBUTE_UNUSED
)
11576 switch (type_of_cost
)
11579 return current_tune
->vec_costs
->scalar_stmt_cost
;
11582 return current_tune
->vec_costs
->scalar_load_cost
;
11585 return current_tune
->vec_costs
->scalar_store_cost
;
11588 return current_tune
->vec_costs
->vec_stmt_cost
;
11591 return current_tune
->vec_costs
->vec_align_load_cost
;
11594 return current_tune
->vec_costs
->vec_store_cost
;
11596 case vec_to_scalar
:
11597 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11599 case scalar_to_vec
:
11600 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11602 case unaligned_load
:
11603 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11605 case unaligned_store
:
11606 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11608 case cond_branch_taken
:
11609 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11611 case cond_branch_not_taken
:
11612 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11615 case vec_promote_demote
:
11616 return current_tune
->vec_costs
->vec_stmt_cost
;
11618 case vec_construct
:
11619 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11620 return elements
/ 2 + 1;
11623 gcc_unreachable ();
11627 /* Implement targetm.vectorize.add_stmt_cost. */
11630 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11631 struct _stmt_vec_info
*stmt_info
, int misalign
,
11632 enum vect_cost_model_location where
)
11634 unsigned *cost
= (unsigned *) data
;
11635 unsigned retval
= 0;
11637 if (flag_vect_cost_model
)
11639 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11640 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11642 /* Statements in an inner loop relative to the loop being
11643 vectorized are weighted more heavily. The value here is
11644 arbitrary and could potentially be improved with analysis. */
11645 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11646 count
*= 50; /* FIXME. */
11648 retval
= (unsigned) (count
* stmt_cost
);
11649 cost
[where
] += retval
;
11655 /* Return true if and only if this insn can dual-issue only as older. */
11657 cortexa7_older_only (rtx_insn
*insn
)
11659 if (recog_memoized (insn
) < 0)
11662 switch (get_attr_type (insn
))
11664 case TYPE_ALU_DSP_REG
:
11665 case TYPE_ALU_SREG
:
11666 case TYPE_ALUS_SREG
:
11667 case TYPE_LOGIC_REG
:
11668 case TYPE_LOGICS_REG
:
11670 case TYPE_ADCS_REG
:
11675 case TYPE_SHIFT_IMM
:
11676 case TYPE_SHIFT_REG
:
11677 case TYPE_LOAD_BYTE
:
11680 case TYPE_FFARITHS
:
11682 case TYPE_FFARITHD
:
11700 case TYPE_F_STORES
:
11707 /* Return true if and only if this insn can dual-issue as younger. */
11709 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11711 if (recog_memoized (insn
) < 0)
11714 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11718 switch (get_attr_type (insn
))
11721 case TYPE_ALUS_IMM
:
11722 case TYPE_LOGIC_IMM
:
11723 case TYPE_LOGICS_IMM
:
11728 case TYPE_MOV_SHIFT
:
11729 case TYPE_MOV_SHIFT_REG
:
11739 /* Look for an instruction that can dual issue only as an older
11740 instruction, and move it in front of any instructions that can
11741 dual-issue as younger, while preserving the relative order of all
11742 other instructions in the ready list. This is a hueuristic to help
11743 dual-issue in later cycles, by postponing issue of more flexible
11744 instructions. This heuristic may affect dual issue opportunities
11745 in the current cycle. */
11747 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11748 int *n_readyp
, int clock
)
11751 int first_older_only
= -1, first_younger
= -1;
11755 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11759 /* Traverse the ready list from the head (the instruction to issue
11760 first), and looking for the first instruction that can issue as
11761 younger and the first instruction that can dual-issue only as
11763 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11765 rtx_insn
*insn
= ready
[i
];
11766 if (cortexa7_older_only (insn
))
11768 first_older_only
= i
;
11770 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11773 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11777 /* Nothing to reorder because either no younger insn found or insn
11778 that can dual-issue only as older appears before any insn that
11779 can dual-issue as younger. */
11780 if (first_younger
== -1)
11783 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11787 /* Nothing to reorder because no older-only insn in the ready list. */
11788 if (first_older_only
== -1)
11791 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11795 /* Move first_older_only insn before first_younger. */
11797 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11798 INSN_UID(ready
[first_older_only
]),
11799 INSN_UID(ready
[first_younger
]));
11800 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11801 for (i
= first_older_only
; i
< first_younger
; i
++)
11803 ready
[i
] = ready
[i
+1];
11806 ready
[i
] = first_older_only_insn
;
11810 /* Implement TARGET_SCHED_REORDER. */
11812 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11818 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11821 /* Do nothing for other cores. */
11825 return arm_issue_rate ();
11828 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11829 It corrects the value of COST based on the relationship between
11830 INSN and DEP through the dependence LINK. It returns the new
11831 value. There is a per-core adjust_cost hook to adjust scheduler costs
11832 and the per-core hook can choose to completely override the generic
11833 adjust_cost function. Only put bits of code into arm_adjust_cost that
11834 are common across all cores. */
11836 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11840 /* When generating Thumb-1 code, we want to place flag-setting operations
11841 close to a conditional branch which depends on them, so that we can
11842 omit the comparison. */
11844 && REG_NOTE_KIND (link
) == 0
11845 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11846 && recog_memoized (dep
) >= 0
11847 && get_attr_conds (dep
) == CONDS_SET
)
11850 if (current_tune
->sched_adjust_cost
!= NULL
)
11852 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11856 /* XXX Is this strictly true? */
11857 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11858 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11861 /* Call insns don't incur a stall, even if they follow a load. */
11862 if (REG_NOTE_KIND (link
) == 0
11866 if ((i_pat
= single_set (insn
)) != NULL
11867 && MEM_P (SET_SRC (i_pat
))
11868 && (d_pat
= single_set (dep
)) != NULL
11869 && MEM_P (SET_DEST (d_pat
)))
11871 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11872 /* This is a load after a store, there is no conflict if the load reads
11873 from a cached area. Assume that loads from the stack, and from the
11874 constant pool are cached, and that others will miss. This is a
11877 if ((GET_CODE (src_mem
) == SYMBOL_REF
11878 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11879 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11880 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11881 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11889 arm_max_conditional_execute (void)
11891 return max_insns_skipped
;
11895 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11898 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11900 return (optimize
> 0) ? 2 : 0;
11904 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11906 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11909 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11910 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11911 sequences of non-executed instructions in IT blocks probably take the same
11912 amount of time as executed instructions (and the IT instruction itself takes
11913 space in icache). This function was experimentally determined to give good
11914 results on a popular embedded benchmark. */
11917 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11919 return (TARGET_32BIT
&& speed_p
) ? 1
11920 : arm_default_branch_cost (speed_p
, predictable_p
);
11924 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11926 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11929 static bool fp_consts_inited
= false;
11931 static REAL_VALUE_TYPE value_fp0
;
11934 init_fp_table (void)
11938 r
= REAL_VALUE_ATOF ("0", DFmode
);
11940 fp_consts_inited
= true;
11943 /* Return TRUE if rtx X is a valid immediate FP constant. */
11945 arm_const_double_rtx (rtx x
)
11949 if (!fp_consts_inited
)
11952 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11953 if (REAL_VALUE_MINUS_ZERO (r
))
11956 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11962 /* VFPv3 has a fairly wide range of representable immediates, formed from
11963 "quarter-precision" floating-point values. These can be evaluated using this
11964 formula (with ^ for exponentiation):
11968 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11969 16 <= n <= 31 and 0 <= r <= 7.
11971 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11973 - A (most-significant) is the sign bit.
11974 - BCD are the exponent (encoded as r XOR 3).
11975 - EFGH are the mantissa (encoded as n - 16).
11978 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11979 fconst[sd] instruction, or -1 if X isn't suitable. */
11981 vfp3_const_double_index (rtx x
)
11983 REAL_VALUE_TYPE r
, m
;
11984 int sign
, exponent
;
11985 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11986 unsigned HOST_WIDE_INT mask
;
11987 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11990 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11993 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11995 /* We can't represent these things, so detect them first. */
11996 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11999 /* Extract sign, exponent and mantissa. */
12000 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12001 r
= real_value_abs (&r
);
12002 exponent
= REAL_EXP (&r
);
12003 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12004 highest (sign) bit, with a fixed binary point at bit point_pos.
12005 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12006 bits for the mantissa, this may fail (low bits would be lost). */
12007 real_ldexp (&m
, &r
, point_pos
- exponent
);
12008 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12009 mantissa
= w
.elt (0);
12010 mant_hi
= w
.elt (1);
12012 /* If there are bits set in the low part of the mantissa, we can't
12013 represent this value. */
12017 /* Now make it so that mantissa contains the most-significant bits, and move
12018 the point_pos to indicate that the least-significant bits have been
12020 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12021 mantissa
= mant_hi
;
12023 /* We can permit four significant bits of mantissa only, plus a high bit
12024 which is always 1. */
12025 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12026 if ((mantissa
& mask
) != 0)
12029 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12030 mantissa
>>= point_pos
- 5;
12032 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12033 floating-point immediate zero with Neon using an integer-zero load, but
12034 that case is handled elsewhere.) */
12038 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12040 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12041 normalized significands are in the range [1, 2). (Our mantissa is shifted
12042 left 4 places at this point relative to normalized IEEE754 values). GCC
12043 internally uses [0.5, 1) (see real.c), so the exponent returned from
12044 REAL_EXP must be altered. */
12045 exponent
= 5 - exponent
;
12047 if (exponent
< 0 || exponent
> 7)
12050 /* Sign, mantissa and exponent are now in the correct form to plug into the
12051 formula described in the comment above. */
12052 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12055 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12057 vfp3_const_double_rtx (rtx x
)
12062 return vfp3_const_double_index (x
) != -1;
12065 /* Recognize immediates which can be used in various Neon instructions. Legal
12066 immediates are described by the following table (for VMVN variants, the
12067 bitwise inverse of the constant shown is recognized. In either case, VMOV
12068 is output and the correct instruction to use for a given constant is chosen
12069 by the assembler). The constant shown is replicated across all elements of
12070 the destination vector.
12072 insn elems variant constant (binary)
12073 ---- ----- ------- -----------------
12074 vmov i32 0 00000000 00000000 00000000 abcdefgh
12075 vmov i32 1 00000000 00000000 abcdefgh 00000000
12076 vmov i32 2 00000000 abcdefgh 00000000 00000000
12077 vmov i32 3 abcdefgh 00000000 00000000 00000000
12078 vmov i16 4 00000000 abcdefgh
12079 vmov i16 5 abcdefgh 00000000
12080 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12081 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12082 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12083 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12084 vmvn i16 10 00000000 abcdefgh
12085 vmvn i16 11 abcdefgh 00000000
12086 vmov i32 12 00000000 00000000 abcdefgh 11111111
12087 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12088 vmov i32 14 00000000 abcdefgh 11111111 11111111
12089 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12090 vmov i8 16 abcdefgh
12091 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12092 eeeeeeee ffffffff gggggggg hhhhhhhh
12093 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12094 vmov f32 19 00000000 00000000 00000000 00000000
12096 For case 18, B = !b. Representable values are exactly those accepted by
12097 vfp3_const_double_index, but are output as floating-point numbers rather
12100 For case 19, we will change it to vmov.i32 when assembling.
12102 Variants 0-5 (inclusive) may also be used as immediates for the second
12103 operand of VORR/VBIC instructions.
12105 The INVERSE argument causes the bitwise inverse of the given operand to be
12106 recognized instead (used for recognizing legal immediates for the VAND/VORN
12107 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12108 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12109 output, rather than the real insns vbic/vorr).
12111 INVERSE makes no difference to the recognition of float vectors.
12113 The return value is the variant of immediate as shown in the above table, or
12114 -1 if the given value doesn't match any of the listed patterns.
12117 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12118 rtx
*modconst
, int *elementwidth
)
12120 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12122 for (i = 0; i < idx; i += (STRIDE)) \
12127 immtype = (CLASS); \
12128 elsize = (ELSIZE); \
12132 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12133 unsigned int innersize
;
12134 unsigned char bytes
[16];
12135 int immtype
= -1, matches
;
12136 unsigned int invmask
= inverse
? 0xff : 0;
12137 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12141 n_elts
= CONST_VECTOR_NUNITS (op
);
12142 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12147 if (mode
== VOIDmode
)
12149 innersize
= GET_MODE_SIZE (mode
);
12152 /* Vectors of float constants. */
12153 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12155 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12156 REAL_VALUE_TYPE r0
;
12158 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12161 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12163 for (i
= 1; i
< n_elts
; i
++)
12165 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12166 REAL_VALUE_TYPE re
;
12168 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12170 if (!REAL_VALUES_EQUAL (r0
, re
))
12175 *modconst
= CONST_VECTOR_ELT (op
, 0);
12180 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12186 /* Splat vector constant out into a byte vector. */
12187 for (i
= 0; i
< n_elts
; i
++)
12189 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12190 unsigned HOST_WIDE_INT elpart
;
12191 unsigned int part
, parts
;
12193 if (CONST_INT_P (el
))
12195 elpart
= INTVAL (el
);
12198 else if (CONST_DOUBLE_P (el
))
12200 elpart
= CONST_DOUBLE_LOW (el
);
12204 gcc_unreachable ();
12206 for (part
= 0; part
< parts
; part
++)
12209 for (byte
= 0; byte
< innersize
; byte
++)
12211 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12212 elpart
>>= BITS_PER_UNIT
;
12214 if (CONST_DOUBLE_P (el
))
12215 elpart
= CONST_DOUBLE_HIGH (el
);
12219 /* Sanity check. */
12220 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12224 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12225 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12227 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12228 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12230 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12231 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12233 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12234 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12236 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12238 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12240 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12241 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12243 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12244 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12246 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12247 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12249 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12250 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12252 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12254 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12256 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12257 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12259 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12260 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12262 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12263 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12265 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12266 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12268 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12270 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12271 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12279 *elementwidth
= elsize
;
12283 unsigned HOST_WIDE_INT imm
= 0;
12285 /* Un-invert bytes of recognized vector, if necessary. */
12287 for (i
= 0; i
< idx
; i
++)
12288 bytes
[i
] ^= invmask
;
12292 /* FIXME: Broken on 32-bit H_W_I hosts. */
12293 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12295 for (i
= 0; i
< 8; i
++)
12296 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12297 << (i
* BITS_PER_UNIT
);
12299 *modconst
= GEN_INT (imm
);
12303 unsigned HOST_WIDE_INT imm
= 0;
12305 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12306 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12308 *modconst
= GEN_INT (imm
);
12316 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12317 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12318 float elements), and a modified constant (whatever should be output for a
12319 VMOV) in *MODCONST. */
12322 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12323 rtx
*modconst
, int *elementwidth
)
12327 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12333 *modconst
= tmpconst
;
12336 *elementwidth
= tmpwidth
;
12341 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12342 the immediate is valid, write a constant suitable for using as an operand
12343 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12344 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12347 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12348 rtx
*modconst
, int *elementwidth
)
12352 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12354 if (retval
< 0 || retval
> 5)
12358 *modconst
= tmpconst
;
12361 *elementwidth
= tmpwidth
;
12366 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12367 the immediate is valid, write a constant suitable for using as an operand
12368 to VSHR/VSHL to *MODCONST and the corresponding element width to
12369 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12370 because they have different limitations. */
12373 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12374 rtx
*modconst
, int *elementwidth
,
12377 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12378 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12379 unsigned HOST_WIDE_INT last_elt
= 0;
12380 unsigned HOST_WIDE_INT maxshift
;
12382 /* Split vector constant out into a byte vector. */
12383 for (i
= 0; i
< n_elts
; i
++)
12385 rtx el
= CONST_VECTOR_ELT (op
, i
);
12386 unsigned HOST_WIDE_INT elpart
;
12388 if (CONST_INT_P (el
))
12389 elpart
= INTVAL (el
);
12390 else if (CONST_DOUBLE_P (el
))
12393 gcc_unreachable ();
12395 if (i
!= 0 && elpart
!= last_elt
)
12401 /* Shift less than element size. */
12402 maxshift
= innersize
* 8;
12406 /* Left shift immediate value can be from 0 to <size>-1. */
12407 if (last_elt
>= maxshift
)
12412 /* Right shift immediate value can be from 1 to <size>. */
12413 if (last_elt
== 0 || last_elt
> maxshift
)
12418 *elementwidth
= innersize
* 8;
12421 *modconst
= CONST_VECTOR_ELT (op
, 0);
12426 /* Return a string suitable for output of Neon immediate logic operation
12430 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12431 int inverse
, int quad
)
12433 int width
, is_valid
;
12434 static char templ
[40];
12436 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12438 gcc_assert (is_valid
!= 0);
12441 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12443 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12448 /* Return a string suitable for output of Neon immediate shift operation
12449 (VSHR or VSHL) MNEM. */
12452 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12453 machine_mode mode
, int quad
,
12456 int width
, is_valid
;
12457 static char templ
[40];
12459 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12460 gcc_assert (is_valid
!= 0);
12463 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12465 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12470 /* Output a sequence of pairwise operations to implement a reduction.
12471 NOTE: We do "too much work" here, because pairwise operations work on two
12472 registers-worth of operands in one go. Unfortunately we can't exploit those
12473 extra calculations to do the full operation in fewer steps, I don't think.
12474 Although all vector elements of the result but the first are ignored, we
12475 actually calculate the same result in each of the elements. An alternative
12476 such as initially loading a vector with zero to use as each of the second
12477 operands would use up an additional register and take an extra instruction,
12478 for no particular gain. */
12481 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12482 rtx (*reduc
) (rtx
, rtx
, rtx
))
12484 machine_mode inner
= GET_MODE_INNER (mode
);
12485 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12488 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12490 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12491 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12496 /* If VALS is a vector constant that can be loaded into a register
12497 using VDUP, generate instructions to do so and return an RTX to
12498 assign to the register. Otherwise return NULL_RTX. */
12501 neon_vdup_constant (rtx vals
)
12503 machine_mode mode
= GET_MODE (vals
);
12504 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12505 int n_elts
= GET_MODE_NUNITS (mode
);
12506 bool all_same
= true;
12510 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12513 for (i
= 0; i
< n_elts
; ++i
)
12515 x
= XVECEXP (vals
, 0, i
);
12516 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12521 /* The elements are not all the same. We could handle repeating
12522 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12523 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12527 /* We can load this constant by using VDUP and a constant in a
12528 single ARM register. This will be cheaper than a vector
12531 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12532 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12535 /* Generate code to load VALS, which is a PARALLEL containing only
12536 constants (for vec_init) or CONST_VECTOR, efficiently into a
12537 register. Returns an RTX to copy into the register, or NULL_RTX
12538 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12541 neon_make_constant (rtx vals
)
12543 machine_mode mode
= GET_MODE (vals
);
12545 rtx const_vec
= NULL_RTX
;
12546 int n_elts
= GET_MODE_NUNITS (mode
);
12550 if (GET_CODE (vals
) == CONST_VECTOR
)
12552 else if (GET_CODE (vals
) == PARALLEL
)
12554 /* A CONST_VECTOR must contain only CONST_INTs and
12555 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12556 Only store valid constants in a CONST_VECTOR. */
12557 for (i
= 0; i
< n_elts
; ++i
)
12559 rtx x
= XVECEXP (vals
, 0, i
);
12560 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12563 if (n_const
== n_elts
)
12564 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12567 gcc_unreachable ();
12569 if (const_vec
!= NULL
12570 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12571 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12573 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12574 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12575 pipeline cycle; creating the constant takes one or two ARM
12576 pipeline cycles. */
12578 else if (const_vec
!= NULL_RTX
)
12579 /* Load from constant pool. On Cortex-A8 this takes two cycles
12580 (for either double or quad vectors). We can not take advantage
12581 of single-cycle VLD1 because we need a PC-relative addressing
12585 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12586 We can not construct an initializer. */
12590 /* Initialize vector TARGET to VALS. */
12593 neon_expand_vector_init (rtx target
, rtx vals
)
12595 machine_mode mode
= GET_MODE (target
);
12596 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12597 int n_elts
= GET_MODE_NUNITS (mode
);
12598 int n_var
= 0, one_var
= -1;
12599 bool all_same
= true;
12603 for (i
= 0; i
< n_elts
; ++i
)
12605 x
= XVECEXP (vals
, 0, i
);
12606 if (!CONSTANT_P (x
))
12607 ++n_var
, one_var
= i
;
12609 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12615 rtx constant
= neon_make_constant (vals
);
12616 if (constant
!= NULL_RTX
)
12618 emit_move_insn (target
, constant
);
12623 /* Splat a single non-constant element if we can. */
12624 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12626 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12627 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12631 /* One field is non-constant. Load constant then overwrite varying
12632 field. This is more efficient than using the stack. */
12635 rtx copy
= copy_rtx (vals
);
12636 rtx index
= GEN_INT (one_var
);
12638 /* Load constant part of vector, substitute neighboring value for
12639 varying element. */
12640 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12641 neon_expand_vector_init (target
, copy
);
12643 /* Insert variable. */
12644 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12648 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12651 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12654 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12657 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12660 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12663 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12666 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12669 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12672 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12675 gcc_unreachable ();
12680 /* Construct the vector in memory one field at a time
12681 and load the whole vector. */
12682 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12683 for (i
= 0; i
< n_elts
; i
++)
12684 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12685 i
* GET_MODE_SIZE (inner_mode
)),
12686 XVECEXP (vals
, 0, i
));
12687 emit_move_insn (target
, mem
);
12690 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12691 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12692 reported source locations are bogus. */
12695 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12698 HOST_WIDE_INT lane
;
12700 gcc_assert (CONST_INT_P (operand
));
12702 lane
= INTVAL (operand
);
12704 if (lane
< low
|| lane
>= high
)
12708 /* Bounds-check lanes. */
12711 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12713 bounds_check (operand
, low
, high
, "lane out of range");
12716 /* Bounds-check constants. */
12719 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12721 bounds_check (operand
, low
, high
, "constant out of range");
12725 neon_element_bits (machine_mode mode
)
12727 if (mode
== DImode
)
12728 return GET_MODE_BITSIZE (mode
);
12730 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12734 /* Predicates for `match_operand' and `match_operator'. */
12736 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12737 WB is true if full writeback address modes are allowed and is false
12738 if limited writeback address modes (POST_INC and PRE_DEC) are
12742 arm_coproc_mem_operand (rtx op
, bool wb
)
12746 /* Reject eliminable registers. */
12747 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12748 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12749 || reg_mentioned_p (arg_pointer_rtx
, op
)
12750 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12751 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12752 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12753 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12756 /* Constants are converted into offsets from labels. */
12760 ind
= XEXP (op
, 0);
12762 if (reload_completed
12763 && (GET_CODE (ind
) == LABEL_REF
12764 || (GET_CODE (ind
) == CONST
12765 && GET_CODE (XEXP (ind
, 0)) == PLUS
12766 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12767 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12770 /* Match: (mem (reg)). */
12772 return arm_address_register_rtx_p (ind
, 0);
12774 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12775 acceptable in any case (subject to verification by
12776 arm_address_register_rtx_p). We need WB to be true to accept
12777 PRE_INC and POST_DEC. */
12778 if (GET_CODE (ind
) == POST_INC
12779 || GET_CODE (ind
) == PRE_DEC
12781 && (GET_CODE (ind
) == PRE_INC
12782 || GET_CODE (ind
) == POST_DEC
)))
12783 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12786 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12787 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12788 && GET_CODE (XEXP (ind
, 1)) == PLUS
12789 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12790 ind
= XEXP (ind
, 1);
12795 if (GET_CODE (ind
) == PLUS
12796 && REG_P (XEXP (ind
, 0))
12797 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12798 && CONST_INT_P (XEXP (ind
, 1))
12799 && INTVAL (XEXP (ind
, 1)) > -1024
12800 && INTVAL (XEXP (ind
, 1)) < 1024
12801 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12807 /* Return TRUE if OP is a memory operand which we can load or store a vector
12808 to/from. TYPE is one of the following values:
12809 0 - Vector load/stor (vldr)
12810 1 - Core registers (ldm)
12811 2 - Element/structure loads (vld1)
12814 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12818 /* Reject eliminable registers. */
12819 if (! (reload_in_progress
|| reload_completed
)
12820 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12821 || reg_mentioned_p (arg_pointer_rtx
, op
)
12822 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12823 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12824 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12825 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12828 /* Constants are converted into offsets from labels. */
12832 ind
= XEXP (op
, 0);
12834 if (reload_completed
12835 && (GET_CODE (ind
) == LABEL_REF
12836 || (GET_CODE (ind
) == CONST
12837 && GET_CODE (XEXP (ind
, 0)) == PLUS
12838 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12839 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12842 /* Match: (mem (reg)). */
12844 return arm_address_register_rtx_p (ind
, 0);
12846 /* Allow post-increment with Neon registers. */
12847 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12848 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12849 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12851 /* Allow post-increment by register for VLDn */
12852 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12853 && GET_CODE (XEXP (ind
, 1)) == PLUS
12854 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12861 && GET_CODE (ind
) == PLUS
12862 && REG_P (XEXP (ind
, 0))
12863 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12864 && CONST_INT_P (XEXP (ind
, 1))
12865 && INTVAL (XEXP (ind
, 1)) > -1024
12866 /* For quad modes, we restrict the constant offset to be slightly less
12867 than what the instruction format permits. We have no such constraint
12868 on double mode offsets. (This must match arm_legitimate_index_p.) */
12869 && (INTVAL (XEXP (ind
, 1))
12870 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12871 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12877 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12880 neon_struct_mem_operand (rtx op
)
12884 /* Reject eliminable registers. */
12885 if (! (reload_in_progress
|| reload_completed
)
12886 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12887 || reg_mentioned_p (arg_pointer_rtx
, op
)
12888 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12889 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12890 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12891 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12894 /* Constants are converted into offsets from labels. */
12898 ind
= XEXP (op
, 0);
12900 if (reload_completed
12901 && (GET_CODE (ind
) == LABEL_REF
12902 || (GET_CODE (ind
) == CONST
12903 && GET_CODE (XEXP (ind
, 0)) == PLUS
12904 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12905 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12908 /* Match: (mem (reg)). */
12910 return arm_address_register_rtx_p (ind
, 0);
12912 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12913 if (GET_CODE (ind
) == POST_INC
12914 || GET_CODE (ind
) == PRE_DEC
)
12915 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12920 /* Return true if X is a register that will be eliminated later on. */
12922 arm_eliminable_register (rtx x
)
12924 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12925 || REGNO (x
) == ARG_POINTER_REGNUM
12926 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12927 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12930 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12931 coprocessor registers. Otherwise return NO_REGS. */
12934 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12936 if (mode
== HFmode
)
12938 if (!TARGET_NEON_FP16
)
12939 return GENERAL_REGS
;
12940 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12942 return GENERAL_REGS
;
12945 /* The neon move patterns handle all legitimate vector and struct
12948 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12949 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12950 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12951 || VALID_NEON_STRUCT_MODE (mode
)))
12954 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12957 return GENERAL_REGS
;
12960 /* Values which must be returned in the most-significant end of the return
12964 arm_return_in_msb (const_tree valtype
)
12966 return (TARGET_AAPCS_BASED
12967 && BYTES_BIG_ENDIAN
12968 && (AGGREGATE_TYPE_P (valtype
)
12969 || TREE_CODE (valtype
) == COMPLEX_TYPE
12970 || FIXED_POINT_TYPE_P (valtype
)));
12973 /* Return TRUE if X references a SYMBOL_REF. */
12975 symbol_mentioned_p (rtx x
)
12980 if (GET_CODE (x
) == SYMBOL_REF
)
12983 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12984 are constant offsets, not symbols. */
12985 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12988 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12990 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12996 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12997 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13000 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13007 /* Return TRUE if X references a LABEL_REF. */
13009 label_mentioned_p (rtx x
)
13014 if (GET_CODE (x
) == LABEL_REF
)
13017 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13018 instruction, but they are constant offsets, not symbols. */
13019 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13022 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13023 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13029 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13030 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13033 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13041 tls_mentioned_p (rtx x
)
13043 switch (GET_CODE (x
))
13046 return tls_mentioned_p (XEXP (x
, 0));
13049 if (XINT (x
, 1) == UNSPEC_TLS
)
13057 /* Must not copy any rtx that uses a pc-relative address. */
13060 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13062 /* The tls call insn cannot be copied, as it is paired with a data
13064 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13067 subrtx_iterator::array_type array
;
13068 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13070 const_rtx x
= *iter
;
13071 if (GET_CODE (x
) == UNSPEC
13072 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13073 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13080 minmax_code (rtx x
)
13082 enum rtx_code code
= GET_CODE (x
);
13095 gcc_unreachable ();
13099 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13102 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13103 int *mask
, bool *signed_sat
)
13105 /* The high bound must be a power of two minus one. */
13106 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13110 /* The low bound is either zero (for usat) or one less than the
13111 negation of the high bound (for ssat). */
13112 if (INTVAL (lo_bound
) == 0)
13117 *signed_sat
= false;
13122 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13127 *signed_sat
= true;
13135 /* Return 1 if memory locations are adjacent. */
13137 adjacent_mem_locations (rtx a
, rtx b
)
13139 /* We don't guarantee to preserve the order of these memory refs. */
13140 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13143 if ((REG_P (XEXP (a
, 0))
13144 || (GET_CODE (XEXP (a
, 0)) == PLUS
13145 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13146 && (REG_P (XEXP (b
, 0))
13147 || (GET_CODE (XEXP (b
, 0)) == PLUS
13148 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13150 HOST_WIDE_INT val0
= 0, val1
= 0;
13154 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13156 reg0
= XEXP (XEXP (a
, 0), 0);
13157 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13160 reg0
= XEXP (a
, 0);
13162 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13164 reg1
= XEXP (XEXP (b
, 0), 0);
13165 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13168 reg1
= XEXP (b
, 0);
13170 /* Don't accept any offset that will require multiple
13171 instructions to handle, since this would cause the
13172 arith_adjacentmem pattern to output an overlong sequence. */
13173 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13176 /* Don't allow an eliminable register: register elimination can make
13177 the offset too large. */
13178 if (arm_eliminable_register (reg0
))
13181 val_diff
= val1
- val0
;
13185 /* If the target has load delay slots, then there's no benefit
13186 to using an ldm instruction unless the offset is zero and
13187 we are optimizing for size. */
13188 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13189 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13190 && (val_diff
== 4 || val_diff
== -4));
13193 return ((REGNO (reg0
) == REGNO (reg1
))
13194 && (val_diff
== 4 || val_diff
== -4));
13200 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13201 for load operations, false for store operations. CONSECUTIVE is true
13202 if the register numbers in the operation must be consecutive in the register
13203 bank. RETURN_PC is true if value is to be loaded in PC.
13204 The pattern we are trying to match for load is:
13205 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13206 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13209 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13212 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13213 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13214 3. If consecutive is TRUE, then for kth register being loaded,
13215 REGNO (R_dk) = REGNO (R_d0) + k.
13216 The pattern for store is similar. */
13218 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13219 bool consecutive
, bool return_pc
)
13221 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13222 rtx reg
, mem
, addr
;
13224 unsigned first_regno
;
13225 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13227 bool addr_reg_in_reglist
= false;
13228 bool update
= false;
13233 /* If not in SImode, then registers must be consecutive
13234 (e.g., VLDM instructions for DFmode). */
13235 gcc_assert ((mode
== SImode
) || consecutive
);
13236 /* Setting return_pc for stores is illegal. */
13237 gcc_assert (!return_pc
|| load
);
13239 /* Set up the increments and the regs per val based on the mode. */
13240 reg_increment
= GET_MODE_SIZE (mode
);
13241 regs_per_val
= reg_increment
/ 4;
13242 offset_adj
= return_pc
? 1 : 0;
13245 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13246 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13249 /* Check if this is a write-back. */
13250 elt
= XVECEXP (op
, 0, offset_adj
);
13251 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13257 /* The offset adjustment must be the number of registers being
13258 popped times the size of a single register. */
13259 if (!REG_P (SET_DEST (elt
))
13260 || !REG_P (XEXP (SET_SRC (elt
), 0))
13261 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13262 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13263 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13264 ((count
- 1 - offset_adj
) * reg_increment
))
13268 i
= i
+ offset_adj
;
13269 base
= base
+ offset_adj
;
13270 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13271 success depends on the type: VLDM can do just one reg,
13272 LDM must do at least two. */
13273 if ((count
<= i
) && (mode
== SImode
))
13276 elt
= XVECEXP (op
, 0, i
- 1);
13277 if (GET_CODE (elt
) != SET
)
13282 reg
= SET_DEST (elt
);
13283 mem
= SET_SRC (elt
);
13287 reg
= SET_SRC (elt
);
13288 mem
= SET_DEST (elt
);
13291 if (!REG_P (reg
) || !MEM_P (mem
))
13294 regno
= REGNO (reg
);
13295 first_regno
= regno
;
13296 addr
= XEXP (mem
, 0);
13297 if (GET_CODE (addr
) == PLUS
)
13299 if (!CONST_INT_P (XEXP (addr
, 1)))
13302 offset
= INTVAL (XEXP (addr
, 1));
13303 addr
= XEXP (addr
, 0);
13309 /* Don't allow SP to be loaded unless it is also the base register. It
13310 guarantees that SP is reset correctly when an LDM instruction
13311 is interrupted. Otherwise, we might end up with a corrupt stack. */
13312 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13315 for (; i
< count
; i
++)
13317 elt
= XVECEXP (op
, 0, i
);
13318 if (GET_CODE (elt
) != SET
)
13323 reg
= SET_DEST (elt
);
13324 mem
= SET_SRC (elt
);
13328 reg
= SET_SRC (elt
);
13329 mem
= SET_DEST (elt
);
13333 || GET_MODE (reg
) != mode
13334 || REGNO (reg
) <= regno
13337 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13338 /* Don't allow SP to be loaded unless it is also the base register. It
13339 guarantees that SP is reset correctly when an LDM instruction
13340 is interrupted. Otherwise, we might end up with a corrupt stack. */
13341 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13343 || GET_MODE (mem
) != mode
13344 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13345 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13346 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13347 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13348 offset
+ (i
- base
) * reg_increment
))
13349 && (!REG_P (XEXP (mem
, 0))
13350 || offset
+ (i
- base
) * reg_increment
!= 0)))
13353 regno
= REGNO (reg
);
13354 if (regno
== REGNO (addr
))
13355 addr_reg_in_reglist
= true;
13360 if (update
&& addr_reg_in_reglist
)
13363 /* For Thumb-1, address register is always modified - either by write-back
13364 or by explicit load. If the pattern does not describe an update,
13365 then the address register must be in the list of loaded registers. */
13367 return update
|| addr_reg_in_reglist
;
13373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13374 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13375 instruction. ADD_OFFSET is nonzero if the base address register needs
13376 to be modified with an add instruction before we can use it. */
13379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13380 int nops
, HOST_WIDE_INT add_offset
)
13382 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13383 if the offset isn't small enough. The reason 2 ldrs are faster
13384 is because these ARMs are able to do more than one cache access
13385 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13386 whilst the ARM8 has a double bandwidth cache. This means that
13387 these cores can do both an instruction fetch and a data fetch in
13388 a single cycle, so the trick of calculating the address into a
13389 scratch register (one of the result regs) and then doing a load
13390 multiple actually becomes slower (and no smaller in code size).
13391 That is the transformation
13393 ldr rd1, [rbase + offset]
13394 ldr rd2, [rbase + offset + 4]
13398 add rd1, rbase, offset
13399 ldmia rd1, {rd1, rd2}
13401 produces worse code -- '3 cycles + any stalls on rd2' instead of
13402 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13403 access per cycle, the first sequence could never complete in less
13404 than 6 cycles, whereas the ldm sequence would only take 5 and
13405 would make better use of sequential accesses if not hitting the
13408 We cheat here and test 'arm_ld_sched' which we currently know to
13409 only be true for the ARM8, ARM9 and StrongARM. If this ever
13410 changes, then the test below needs to be reworked. */
13411 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13414 /* XScale has load-store double instructions, but they have stricter
13415 alignment requirements than load-store multiple, so we cannot
13418 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13419 the pipeline until completion.
13427 An ldr instruction takes 1-3 cycles, but does not block the
13436 Best case ldr will always win. However, the more ldr instructions
13437 we issue, the less likely we are to be able to schedule them well.
13438 Using ldr instructions also increases code size.
13440 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13441 for counts of 3 or 4 regs. */
13442 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13448 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13449 an array ORDER which describes the sequence to use when accessing the
13450 offsets that produces an ascending order. In this sequence, each
13451 offset must be larger by exactly 4 than the previous one. ORDER[0]
13452 must have been filled in with the lowest offset by the caller.
13453 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13454 we use to verify that ORDER produces an ascending order of registers.
13455 Return true if it was possible to construct such an order, false if
13459 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13460 int *unsorted_regs
)
13463 for (i
= 1; i
< nops
; i
++)
13467 order
[i
] = order
[i
- 1];
13468 for (j
= 0; j
< nops
; j
++)
13469 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13471 /* We must find exactly one offset that is higher than the
13472 previous one by 4. */
13473 if (order
[i
] != order
[i
- 1])
13477 if (order
[i
] == order
[i
- 1])
13479 /* The register numbers must be ascending. */
13480 if (unsorted_regs
!= NULL
13481 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13487 /* Used to determine in a peephole whether a sequence of load
13488 instructions can be changed into a load-multiple instruction.
13489 NOPS is the number of separate load instructions we are examining. The
13490 first NOPS entries in OPERANDS are the destination registers, the
13491 next NOPS entries are memory operands. If this function is
13492 successful, *BASE is set to the common base register of the memory
13493 accesses; *LOAD_OFFSET is set to the first memory location's offset
13494 from that base register.
13495 REGS is an array filled in with the destination register numbers.
13496 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13497 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13498 the sequence of registers in REGS matches the loads from ascending memory
13499 locations, and the function verifies that the register numbers are
13500 themselves ascending. If CHECK_REGS is false, the register numbers
13501 are stored in the order they are found in the operands. */
13503 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13504 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13506 int unsorted_regs
[MAX_LDM_STM_OPS
];
13507 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13508 int order
[MAX_LDM_STM_OPS
];
13509 rtx base_reg_rtx
= NULL
;
13513 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13514 easily extended if required. */
13515 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13517 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13519 /* Loop over the operands and check that the memory references are
13520 suitable (i.e. immediate offsets from the same base register). At
13521 the same time, extract the target register, and the memory
13523 for (i
= 0; i
< nops
; i
++)
13528 /* Convert a subreg of a mem into the mem itself. */
13529 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13530 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13532 gcc_assert (MEM_P (operands
[nops
+ i
]));
13534 /* Don't reorder volatile memory references; it doesn't seem worth
13535 looking for the case where the order is ok anyway. */
13536 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13539 offset
= const0_rtx
;
13541 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13542 || (GET_CODE (reg
) == SUBREG
13543 && REG_P (reg
= SUBREG_REG (reg
))))
13544 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13545 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13546 || (GET_CODE (reg
) == SUBREG
13547 && REG_P (reg
= SUBREG_REG (reg
))))
13548 && (CONST_INT_P (offset
13549 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13553 base_reg
= REGNO (reg
);
13554 base_reg_rtx
= reg
;
13555 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13558 else if (base_reg
!= (int) REGNO (reg
))
13559 /* Not addressed from the same base register. */
13562 unsorted_regs
[i
] = (REG_P (operands
[i
])
13563 ? REGNO (operands
[i
])
13564 : REGNO (SUBREG_REG (operands
[i
])));
13566 /* If it isn't an integer register, or if it overwrites the
13567 base register but isn't the last insn in the list, then
13568 we can't do this. */
13569 if (unsorted_regs
[i
] < 0
13570 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13571 || unsorted_regs
[i
] > 14
13572 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13575 /* Don't allow SP to be loaded unless it is also the base
13576 register. It guarantees that SP is reset correctly when
13577 an LDM instruction is interrupted. Otherwise, we might
13578 end up with a corrupt stack. */
13579 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13582 unsorted_offsets
[i
] = INTVAL (offset
);
13583 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13587 /* Not a suitable memory address. */
13591 /* All the useful information has now been extracted from the
13592 operands into unsorted_regs and unsorted_offsets; additionally,
13593 order[0] has been set to the lowest offset in the list. Sort
13594 the offsets into order, verifying that they are adjacent, and
13595 check that the register numbers are ascending. */
13596 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13597 check_regs
? unsorted_regs
: NULL
))
13601 memcpy (saved_order
, order
, sizeof order
);
13607 for (i
= 0; i
< nops
; i
++)
13608 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13610 *load_offset
= unsorted_offsets
[order
[0]];
13614 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13617 if (unsorted_offsets
[order
[0]] == 0)
13618 ldm_case
= 1; /* ldmia */
13619 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13620 ldm_case
= 2; /* ldmib */
13621 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13622 ldm_case
= 3; /* ldmda */
13623 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13624 ldm_case
= 4; /* ldmdb */
13625 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13626 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13631 if (!multiple_operation_profitable_p (false, nops
,
13633 ? unsorted_offsets
[order
[0]] : 0))
13639 /* Used to determine in a peephole whether a sequence of store instructions can
13640 be changed into a store-multiple instruction.
13641 NOPS is the number of separate store instructions we are examining.
13642 NOPS_TOTAL is the total number of instructions recognized by the peephole
13644 The first NOPS entries in OPERANDS are the source registers, the next
13645 NOPS entries are memory operands. If this function is successful, *BASE is
13646 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13647 to the first memory location's offset from that base register. REGS is an
13648 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13649 likewise filled with the corresponding rtx's.
13650 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13651 numbers to an ascending order of stores.
13652 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13653 from ascending memory locations, and the function verifies that the register
13654 numbers are themselves ascending. If CHECK_REGS is false, the register
13655 numbers are stored in the order they are found in the operands. */
13657 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13658 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13659 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13661 int unsorted_regs
[MAX_LDM_STM_OPS
];
13662 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13663 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13664 int order
[MAX_LDM_STM_OPS
];
13666 rtx base_reg_rtx
= NULL
;
13669 /* Write back of base register is currently only supported for Thumb 1. */
13670 int base_writeback
= TARGET_THUMB1
;
13672 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13673 easily extended if required. */
13674 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13676 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13678 /* Loop over the operands and check that the memory references are
13679 suitable (i.e. immediate offsets from the same base register). At
13680 the same time, extract the target register, and the memory
13682 for (i
= 0; i
< nops
; i
++)
13687 /* Convert a subreg of a mem into the mem itself. */
13688 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13689 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13691 gcc_assert (MEM_P (operands
[nops
+ i
]));
13693 /* Don't reorder volatile memory references; it doesn't seem worth
13694 looking for the case where the order is ok anyway. */
13695 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13698 offset
= const0_rtx
;
13700 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13701 || (GET_CODE (reg
) == SUBREG
13702 && REG_P (reg
= SUBREG_REG (reg
))))
13703 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13704 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13705 || (GET_CODE (reg
) == SUBREG
13706 && REG_P (reg
= SUBREG_REG (reg
))))
13707 && (CONST_INT_P (offset
13708 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13710 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13711 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13712 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13716 base_reg
= REGNO (reg
);
13717 base_reg_rtx
= reg
;
13718 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13721 else if (base_reg
!= (int) REGNO (reg
))
13722 /* Not addressed from the same base register. */
13725 /* If it isn't an integer register, then we can't do this. */
13726 if (unsorted_regs
[i
] < 0
13727 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13728 /* The effects are unpredictable if the base register is
13729 both updated and stored. */
13730 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13731 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13732 || unsorted_regs
[i
] > 14)
13735 unsorted_offsets
[i
] = INTVAL (offset
);
13736 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13740 /* Not a suitable memory address. */
13744 /* All the useful information has now been extracted from the
13745 operands into unsorted_regs and unsorted_offsets; additionally,
13746 order[0] has been set to the lowest offset in the list. Sort
13747 the offsets into order, verifying that they are adjacent, and
13748 check that the register numbers are ascending. */
13749 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13750 check_regs
? unsorted_regs
: NULL
))
13754 memcpy (saved_order
, order
, sizeof order
);
13760 for (i
= 0; i
< nops
; i
++)
13762 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13764 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13767 *load_offset
= unsorted_offsets
[order
[0]];
13771 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13774 if (unsorted_offsets
[order
[0]] == 0)
13775 stm_case
= 1; /* stmia */
13776 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13777 stm_case
= 2; /* stmib */
13778 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13779 stm_case
= 3; /* stmda */
13780 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13781 stm_case
= 4; /* stmdb */
13785 if (!multiple_operation_profitable_p (false, nops
, 0))
13791 /* Routines for use in generating RTL. */
13793 /* Generate a load-multiple instruction. COUNT is the number of loads in
13794 the instruction; REGS and MEMS are arrays containing the operands.
13795 BASEREG is the base register to be used in addressing the memory operands.
13796 WBACK_OFFSET is nonzero if the instruction should update the base
13800 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13801 HOST_WIDE_INT wback_offset
)
13806 if (!multiple_operation_profitable_p (false, count
, 0))
13812 for (i
= 0; i
< count
; i
++)
13813 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13815 if (wback_offset
!= 0)
13816 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13818 seq
= get_insns ();
13824 result
= gen_rtx_PARALLEL (VOIDmode
,
13825 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13826 if (wback_offset
!= 0)
13828 XVECEXP (result
, 0, 0)
13829 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13834 for (j
= 0; i
< count
; i
++, j
++)
13835 XVECEXP (result
, 0, i
)
13836 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13841 /* Generate a store-multiple instruction. COUNT is the number of stores in
13842 the instruction; REGS and MEMS are arrays containing the operands.
13843 BASEREG is the base register to be used in addressing the memory operands.
13844 WBACK_OFFSET is nonzero if the instruction should update the base
13848 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13849 HOST_WIDE_INT wback_offset
)
13854 if (GET_CODE (basereg
) == PLUS
)
13855 basereg
= XEXP (basereg
, 0);
13857 if (!multiple_operation_profitable_p (false, count
, 0))
13863 for (i
= 0; i
< count
; i
++)
13864 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13866 if (wback_offset
!= 0)
13867 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13869 seq
= get_insns ();
13875 result
= gen_rtx_PARALLEL (VOIDmode
,
13876 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13877 if (wback_offset
!= 0)
13879 XVECEXP (result
, 0, 0)
13880 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13885 for (j
= 0; i
< count
; i
++, j
++)
13886 XVECEXP (result
, 0, i
)
13887 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13892 /* Generate either a load-multiple or a store-multiple instruction. This
13893 function can be used in situations where we can start with a single MEM
13894 rtx and adjust its address upwards.
13895 COUNT is the number of operations in the instruction, not counting a
13896 possible update of the base register. REGS is an array containing the
13898 BASEREG is the base register to be used in addressing the memory operands,
13899 which are constructed from BASEMEM.
13900 WRITE_BACK specifies whether the generated instruction should include an
13901 update of the base register.
13902 OFFSETP is used to pass an offset to and from this function; this offset
13903 is not used when constructing the address (instead BASEMEM should have an
13904 appropriate offset in its address), it is used only for setting
13905 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13908 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13909 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13911 rtx mems
[MAX_LDM_STM_OPS
];
13912 HOST_WIDE_INT offset
= *offsetp
;
13915 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13917 if (GET_CODE (basereg
) == PLUS
)
13918 basereg
= XEXP (basereg
, 0);
13920 for (i
= 0; i
< count
; i
++)
13922 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13923 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13931 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13932 write_back
? 4 * count
: 0);
13934 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13935 write_back
? 4 * count
: 0);
13939 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13940 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13942 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13947 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13948 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13950 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13954 /* Called from a peephole2 expander to turn a sequence of loads into an
13955 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13956 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13957 is true if we can reorder the registers because they are used commutatively
13959 Returns true iff we could generate a new instruction. */
13962 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13964 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13965 rtx mems
[MAX_LDM_STM_OPS
];
13966 int i
, j
, base_reg
;
13968 HOST_WIDE_INT offset
;
13969 int write_back
= FALSE
;
13973 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13974 &base_reg
, &offset
, !sort_regs
);
13980 for (i
= 0; i
< nops
- 1; i
++)
13981 for (j
= i
+ 1; j
< nops
; j
++)
13982 if (regs
[i
] > regs
[j
])
13988 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13992 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13993 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13999 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14000 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14002 if (!TARGET_THUMB1
)
14004 base_reg
= regs
[0];
14005 base_reg_rtx
= newbase
;
14009 for (i
= 0; i
< nops
; i
++)
14011 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14012 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14015 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14016 write_back
? offset
+ i
* 4 : 0));
14020 /* Called from a peephole2 expander to turn a sequence of stores into an
14021 STM instruction. OPERANDS are the operands found by the peephole matcher;
14022 NOPS indicates how many separate stores we are trying to combine.
14023 Returns true iff we could generate a new instruction. */
14026 gen_stm_seq (rtx
*operands
, int nops
)
14029 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14030 rtx mems
[MAX_LDM_STM_OPS
];
14033 HOST_WIDE_INT offset
;
14034 int write_back
= FALSE
;
14037 bool base_reg_dies
;
14039 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14040 mem_order
, &base_reg
, &offset
, true);
14045 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14047 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14050 gcc_assert (base_reg_dies
);
14056 gcc_assert (base_reg_dies
);
14057 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14061 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14063 for (i
= 0; i
< nops
; i
++)
14065 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14066 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14069 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14070 write_back
? offset
+ i
* 4 : 0));
14074 /* Called from a peephole2 expander to turn a sequence of stores that are
14075 preceded by constant loads into an STM instruction. OPERANDS are the
14076 operands found by the peephole matcher; NOPS indicates how many
14077 separate stores we are trying to combine; there are 2 * NOPS
14078 instructions in the peephole.
14079 Returns true iff we could generate a new instruction. */
14082 gen_const_stm_seq (rtx
*operands
, int nops
)
14084 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14085 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14086 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14087 rtx mems
[MAX_LDM_STM_OPS
];
14090 HOST_WIDE_INT offset
;
14091 int write_back
= FALSE
;
14094 bool base_reg_dies
;
14096 HARD_REG_SET allocated
;
14098 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14099 mem_order
, &base_reg
, &offset
, false);
14104 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14106 /* If the same register is used more than once, try to find a free
14108 CLEAR_HARD_REG_SET (allocated
);
14109 for (i
= 0; i
< nops
; i
++)
14111 for (j
= i
+ 1; j
< nops
; j
++)
14112 if (regs
[i
] == regs
[j
])
14114 rtx t
= peep2_find_free_register (0, nops
* 2,
14115 TARGET_THUMB1
? "l" : "r",
14116 SImode
, &allocated
);
14120 regs
[i
] = REGNO (t
);
14124 /* Compute an ordering that maps the register numbers to an ascending
14127 for (i
= 0; i
< nops
; i
++)
14128 if (regs
[i
] < regs
[reg_order
[0]])
14131 for (i
= 1; i
< nops
; i
++)
14133 int this_order
= reg_order
[i
- 1];
14134 for (j
= 0; j
< nops
; j
++)
14135 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14136 && (this_order
== reg_order
[i
- 1]
14137 || regs
[j
] < regs
[this_order
]))
14139 reg_order
[i
] = this_order
;
14142 /* Ensure that registers that must be live after the instruction end
14143 up with the correct value. */
14144 for (i
= 0; i
< nops
; i
++)
14146 int this_order
= reg_order
[i
];
14147 if ((this_order
!= mem_order
[i
]
14148 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14149 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14153 /* Load the constants. */
14154 for (i
= 0; i
< nops
; i
++)
14156 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14157 sorted_regs
[i
] = regs
[reg_order
[i
]];
14158 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14161 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14163 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14166 gcc_assert (base_reg_dies
);
14172 gcc_assert (base_reg_dies
);
14173 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14177 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14179 for (i
= 0; i
< nops
; i
++)
14181 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14182 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14185 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14186 write_back
? offset
+ i
* 4 : 0));
14190 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14191 unaligned copies on processors which support unaligned semantics for those
14192 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14193 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14194 An interleave factor of 1 (the minimum) will perform no interleaving.
14195 Load/store multiple are used for aligned addresses where possible. */
14198 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14199 HOST_WIDE_INT length
,
14200 unsigned int interleave_factor
)
14202 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14203 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14204 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14205 HOST_WIDE_INT i
, j
;
14206 HOST_WIDE_INT remaining
= length
, words
;
14207 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14209 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14210 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14211 HOST_WIDE_INT srcoffset
, dstoffset
;
14212 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14215 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14217 /* Use hard registers if we have aligned source or destination so we can use
14218 load/store multiple with contiguous registers. */
14219 if (dst_aligned
|| src_aligned
)
14220 for (i
= 0; i
< interleave_factor
; i
++)
14221 regs
[i
] = gen_rtx_REG (SImode
, i
);
14223 for (i
= 0; i
< interleave_factor
; i
++)
14224 regs
[i
] = gen_reg_rtx (SImode
);
14226 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14227 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14229 srcoffset
= dstoffset
= 0;
14231 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14232 For copying the last bytes we want to subtract this offset again. */
14233 src_autoinc
= dst_autoinc
= 0;
14235 for (i
= 0; i
< interleave_factor
; i
++)
14238 /* Copy BLOCK_SIZE_BYTES chunks. */
14240 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14243 if (src_aligned
&& interleave_factor
> 1)
14245 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14246 TRUE
, srcbase
, &srcoffset
));
14247 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14251 for (j
= 0; j
< interleave_factor
; j
++)
14253 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14255 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14256 srcoffset
+ j
* UNITS_PER_WORD
);
14257 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14259 srcoffset
+= block_size_bytes
;
14263 if (dst_aligned
&& interleave_factor
> 1)
14265 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14266 TRUE
, dstbase
, &dstoffset
));
14267 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14271 for (j
= 0; j
< interleave_factor
; j
++)
14273 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14275 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14276 dstoffset
+ j
* UNITS_PER_WORD
);
14277 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14279 dstoffset
+= block_size_bytes
;
14282 remaining
-= block_size_bytes
;
14285 /* Copy any whole words left (note these aren't interleaved with any
14286 subsequent halfword/byte load/stores in the interests of simplicity). */
14288 words
= remaining
/ UNITS_PER_WORD
;
14290 gcc_assert (words
< interleave_factor
);
14292 if (src_aligned
&& words
> 1)
14294 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14296 src_autoinc
+= UNITS_PER_WORD
* words
;
14300 for (j
= 0; j
< words
; j
++)
14302 addr
= plus_constant (Pmode
, src
,
14303 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14304 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14305 srcoffset
+ j
* UNITS_PER_WORD
);
14306 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14308 srcoffset
+= words
* UNITS_PER_WORD
;
14311 if (dst_aligned
&& words
> 1)
14313 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14315 dst_autoinc
+= words
* UNITS_PER_WORD
;
14319 for (j
= 0; j
< words
; j
++)
14321 addr
= plus_constant (Pmode
, dst
,
14322 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14323 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14324 dstoffset
+ j
* UNITS_PER_WORD
);
14325 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14327 dstoffset
+= words
* UNITS_PER_WORD
;
14330 remaining
-= words
* UNITS_PER_WORD
;
14332 gcc_assert (remaining
< 4);
14334 /* Copy a halfword if necessary. */
14336 if (remaining
>= 2)
14338 halfword_tmp
= gen_reg_rtx (SImode
);
14340 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14341 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14342 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14344 /* Either write out immediately, or delay until we've loaded the last
14345 byte, depending on interleave factor. */
14346 if (interleave_factor
== 1)
14348 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14349 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14350 emit_insn (gen_unaligned_storehi (mem
,
14351 gen_lowpart (HImode
, halfword_tmp
)));
14352 halfword_tmp
= NULL
;
14360 gcc_assert (remaining
< 2);
14362 /* Copy last byte. */
14364 if ((remaining
& 1) != 0)
14366 byte_tmp
= gen_reg_rtx (SImode
);
14368 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14369 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14370 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14372 if (interleave_factor
== 1)
14374 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14375 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14376 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14385 /* Store last halfword if we haven't done so already. */
14389 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14390 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14391 emit_insn (gen_unaligned_storehi (mem
,
14392 gen_lowpart (HImode
, halfword_tmp
)));
14396 /* Likewise for last byte. */
14400 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14401 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14402 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14406 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14409 /* From mips_adjust_block_mem:
14411 Helper function for doing a loop-based block operation on memory
14412 reference MEM. Each iteration of the loop will operate on LENGTH
14415 Create a new base register for use within the loop and point it to
14416 the start of MEM. Create a new memory reference that uses this
14417 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14420 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14423 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14425 /* Although the new mem does not refer to a known location,
14426 it does keep up to LENGTH bytes of alignment. */
14427 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14428 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14431 /* From mips_block_move_loop:
14433 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14434 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14435 the memory regions do not overlap. */
14438 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14439 unsigned int interleave_factor
,
14440 HOST_WIDE_INT bytes_per_iter
)
14442 rtx src_reg
, dest_reg
, final_src
, test
;
14443 HOST_WIDE_INT leftover
;
14445 leftover
= length
% bytes_per_iter
;
14446 length
-= leftover
;
14448 /* Create registers and memory references for use within the loop. */
14449 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14450 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14452 /* Calculate the value that SRC_REG should have after the last iteration of
14454 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14455 0, 0, OPTAB_WIDEN
);
14457 /* Emit the start of the loop. */
14458 rtx_code_label
*label
= gen_label_rtx ();
14459 emit_label (label
);
14461 /* Emit the loop body. */
14462 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14463 interleave_factor
);
14465 /* Move on to the next block. */
14466 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14467 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14469 /* Emit the loop condition. */
14470 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14471 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14473 /* Mop up any left-over bytes. */
14475 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14478 /* Emit a block move when either the source or destination is unaligned (not
14479 aligned to a four-byte boundary). This may need further tuning depending on
14480 core type, optimize_size setting, etc. */
14483 arm_movmemqi_unaligned (rtx
*operands
)
14485 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14489 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14490 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14491 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14492 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14493 or dst_aligned though: allow more interleaving in those cases since the
14494 resulting code can be smaller. */
14495 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14496 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14499 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14500 interleave_factor
, bytes_per_iter
);
14502 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14503 interleave_factor
);
14507 /* Note that the loop created by arm_block_move_unaligned_loop may be
14508 subject to loop unrolling, which makes tuning this condition a little
14511 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14513 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14520 arm_gen_movmemqi (rtx
*operands
)
14522 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14523 HOST_WIDE_INT srcoffset
, dstoffset
;
14525 rtx src
, dst
, srcbase
, dstbase
;
14526 rtx part_bytes_reg
= NULL
;
14529 if (!CONST_INT_P (operands
[2])
14530 || !CONST_INT_P (operands
[3])
14531 || INTVAL (operands
[2]) > 64)
14534 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14535 return arm_movmemqi_unaligned (operands
);
14537 if (INTVAL (operands
[3]) & 3)
14540 dstbase
= operands
[0];
14541 srcbase
= operands
[1];
14543 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14544 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14546 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14547 out_words_to_go
= INTVAL (operands
[2]) / 4;
14548 last_bytes
= INTVAL (operands
[2]) & 3;
14549 dstoffset
= srcoffset
= 0;
14551 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14552 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14554 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14556 if (in_words_to_go
> 4)
14557 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14558 TRUE
, srcbase
, &srcoffset
));
14560 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14561 src
, FALSE
, srcbase
,
14564 if (out_words_to_go
)
14566 if (out_words_to_go
> 4)
14567 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14568 TRUE
, dstbase
, &dstoffset
));
14569 else if (out_words_to_go
!= 1)
14570 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14571 out_words_to_go
, dst
,
14574 dstbase
, &dstoffset
));
14577 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14578 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14579 if (last_bytes
!= 0)
14581 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14587 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14588 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14591 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14592 if (out_words_to_go
)
14596 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14597 sreg
= copy_to_reg (mem
);
14599 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14600 emit_move_insn (mem
, sreg
);
14603 gcc_assert (!in_words_to_go
); /* Sanity check */
14606 if (in_words_to_go
)
14608 gcc_assert (in_words_to_go
> 0);
14610 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14611 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14614 gcc_assert (!last_bytes
|| part_bytes_reg
);
14616 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14618 rtx tmp
= gen_reg_rtx (SImode
);
14620 /* The bytes we want are in the top end of the word. */
14621 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14622 GEN_INT (8 * (4 - last_bytes
))));
14623 part_bytes_reg
= tmp
;
14627 mem
= adjust_automodify_address (dstbase
, QImode
,
14628 plus_constant (Pmode
, dst
,
14630 dstoffset
+ last_bytes
- 1);
14631 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14635 tmp
= gen_reg_rtx (SImode
);
14636 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14637 part_bytes_reg
= tmp
;
14644 if (last_bytes
> 1)
14646 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14647 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14651 rtx tmp
= gen_reg_rtx (SImode
);
14652 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14653 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14654 part_bytes_reg
= tmp
;
14661 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14662 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14669 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14672 next_consecutive_mem (rtx mem
)
14674 machine_mode mode
= GET_MODE (mem
);
14675 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14676 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14678 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14681 /* Copy using LDRD/STRD instructions whenever possible.
14682 Returns true upon success. */
14684 gen_movmem_ldrd_strd (rtx
*operands
)
14686 unsigned HOST_WIDE_INT len
;
14687 HOST_WIDE_INT align
;
14688 rtx src
, dst
, base
;
14690 bool src_aligned
, dst_aligned
;
14691 bool src_volatile
, dst_volatile
;
14693 gcc_assert (CONST_INT_P (operands
[2]));
14694 gcc_assert (CONST_INT_P (operands
[3]));
14696 len
= UINTVAL (operands
[2]);
14700 /* Maximum alignment we can assume for both src and dst buffers. */
14701 align
= INTVAL (operands
[3]);
14703 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14706 /* Place src and dst addresses in registers
14707 and update the corresponding mem rtx. */
14709 dst_volatile
= MEM_VOLATILE_P (dst
);
14710 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14711 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14712 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14715 src_volatile
= MEM_VOLATILE_P (src
);
14716 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14717 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14718 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14720 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14723 if (src_volatile
|| dst_volatile
)
14726 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14727 if (!(dst_aligned
|| src_aligned
))
14728 return arm_gen_movmemqi (operands
);
14730 src
= adjust_address (src
, DImode
, 0);
14731 dst
= adjust_address (dst
, DImode
, 0);
14735 reg0
= gen_reg_rtx (DImode
);
14737 emit_move_insn (reg0
, src
);
14739 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14742 emit_move_insn (dst
, reg0
);
14744 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14746 src
= next_consecutive_mem (src
);
14747 dst
= next_consecutive_mem (dst
);
14750 gcc_assert (len
< 8);
14753 /* More than a word but less than a double-word to copy. Copy a word. */
14754 reg0
= gen_reg_rtx (SImode
);
14755 src
= adjust_address (src
, SImode
, 0);
14756 dst
= adjust_address (dst
, SImode
, 0);
14758 emit_move_insn (reg0
, src
);
14760 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14763 emit_move_insn (dst
, reg0
);
14765 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14767 src
= next_consecutive_mem (src
);
14768 dst
= next_consecutive_mem (dst
);
14775 /* Copy the remaining bytes. */
14778 dst
= adjust_address (dst
, HImode
, 0);
14779 src
= adjust_address (src
, HImode
, 0);
14780 reg0
= gen_reg_rtx (SImode
);
14782 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14784 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14787 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14789 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14791 src
= next_consecutive_mem (src
);
14792 dst
= next_consecutive_mem (dst
);
14797 dst
= adjust_address (dst
, QImode
, 0);
14798 src
= adjust_address (src
, QImode
, 0);
14799 reg0
= gen_reg_rtx (QImode
);
14800 emit_move_insn (reg0
, src
);
14801 emit_move_insn (dst
, reg0
);
14805 /* Select a dominance comparison mode if possible for a test of the general
14806 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14807 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14808 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14809 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14810 In all cases OP will be either EQ or NE, but we don't need to know which
14811 here. If we are unable to support a dominance comparison we return
14812 CC mode. This will then fail to match for the RTL expressions that
14813 generate this call. */
14815 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14817 enum rtx_code cond1
, cond2
;
14820 /* Currently we will probably get the wrong result if the individual
14821 comparisons are not simple. This also ensures that it is safe to
14822 reverse a comparison if necessary. */
14823 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14825 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14829 /* The if_then_else variant of this tests the second condition if the
14830 first passes, but is true if the first fails. Reverse the first
14831 condition to get a true "inclusive-or" expression. */
14832 if (cond_or
== DOM_CC_NX_OR_Y
)
14833 cond1
= reverse_condition (cond1
);
14835 /* If the comparisons are not equal, and one doesn't dominate the other,
14836 then we can't do this. */
14838 && !comparison_dominates_p (cond1
, cond2
)
14839 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14843 std::swap (cond1
, cond2
);
14848 if (cond_or
== DOM_CC_X_AND_Y
)
14853 case EQ
: return CC_DEQmode
;
14854 case LE
: return CC_DLEmode
;
14855 case LEU
: return CC_DLEUmode
;
14856 case GE
: return CC_DGEmode
;
14857 case GEU
: return CC_DGEUmode
;
14858 default: gcc_unreachable ();
14862 if (cond_or
== DOM_CC_X_AND_Y
)
14874 gcc_unreachable ();
14878 if (cond_or
== DOM_CC_X_AND_Y
)
14890 gcc_unreachable ();
14894 if (cond_or
== DOM_CC_X_AND_Y
)
14895 return CC_DLTUmode
;
14900 return CC_DLTUmode
;
14902 return CC_DLEUmode
;
14906 gcc_unreachable ();
14910 if (cond_or
== DOM_CC_X_AND_Y
)
14911 return CC_DGTUmode
;
14916 return CC_DGTUmode
;
14918 return CC_DGEUmode
;
14922 gcc_unreachable ();
14925 /* The remaining cases only occur when both comparisons are the
14928 gcc_assert (cond1
== cond2
);
14932 gcc_assert (cond1
== cond2
);
14936 gcc_assert (cond1
== cond2
);
14940 gcc_assert (cond1
== cond2
);
14941 return CC_DLEUmode
;
14944 gcc_assert (cond1
== cond2
);
14945 return CC_DGEUmode
;
14948 gcc_unreachable ();
14953 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14955 /* All floating point compares return CCFP if it is an equality
14956 comparison, and CCFPE otherwise. */
14957 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14980 gcc_unreachable ();
14984 /* A compare with a shifted operand. Because of canonicalization, the
14985 comparison will have to be swapped when we emit the assembler. */
14986 if (GET_MODE (y
) == SImode
14987 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14988 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14989 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14990 || GET_CODE (x
) == ROTATERT
))
14993 /* This operation is performed swapped, but since we only rely on the Z
14994 flag we don't need an additional mode. */
14995 if (GET_MODE (y
) == SImode
14996 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14997 && GET_CODE (x
) == NEG
14998 && (op
== EQ
|| op
== NE
))
15001 /* This is a special case that is used by combine to allow a
15002 comparison of a shifted byte load to be split into a zero-extend
15003 followed by a comparison of the shifted integer (only valid for
15004 equalities and unsigned inequalities). */
15005 if (GET_MODE (x
) == SImode
15006 && GET_CODE (x
) == ASHIFT
15007 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15008 && GET_CODE (XEXP (x
, 0)) == SUBREG
15009 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15010 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15011 && (op
== EQ
|| op
== NE
15012 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15013 && CONST_INT_P (y
))
15016 /* A construct for a conditional compare, if the false arm contains
15017 0, then both conditions must be true, otherwise either condition
15018 must be true. Not all conditions are possible, so CCmode is
15019 returned if it can't be done. */
15020 if (GET_CODE (x
) == IF_THEN_ELSE
15021 && (XEXP (x
, 2) == const0_rtx
15022 || XEXP (x
, 2) == const1_rtx
)
15023 && COMPARISON_P (XEXP (x
, 0))
15024 && COMPARISON_P (XEXP (x
, 1)))
15025 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15026 INTVAL (XEXP (x
, 2)));
15028 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15029 if (GET_CODE (x
) == AND
15030 && (op
== EQ
|| op
== NE
)
15031 && COMPARISON_P (XEXP (x
, 0))
15032 && COMPARISON_P (XEXP (x
, 1)))
15033 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15036 if (GET_CODE (x
) == IOR
15037 && (op
== EQ
|| op
== NE
)
15038 && COMPARISON_P (XEXP (x
, 0))
15039 && COMPARISON_P (XEXP (x
, 1)))
15040 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15043 /* An operation (on Thumb) where we want to test for a single bit.
15044 This is done by shifting that bit up into the top bit of a
15045 scratch register; we can then branch on the sign bit. */
15047 && GET_MODE (x
) == SImode
15048 && (op
== EQ
|| op
== NE
)
15049 && GET_CODE (x
) == ZERO_EXTRACT
15050 && XEXP (x
, 1) == const1_rtx
)
15053 /* An operation that sets the condition codes as a side-effect, the
15054 V flag is not set correctly, so we can only use comparisons where
15055 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15057 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15058 if (GET_MODE (x
) == SImode
15060 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15061 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15062 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15063 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15064 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15065 || GET_CODE (x
) == LSHIFTRT
15066 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15067 || GET_CODE (x
) == ROTATERT
15068 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15069 return CC_NOOVmode
;
15071 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15074 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15075 && GET_CODE (x
) == PLUS
15076 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15079 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15085 /* A DImode comparison against zero can be implemented by
15086 or'ing the two halves together. */
15087 if (y
== const0_rtx
)
15090 /* We can do an equality test in three Thumb instructions. */
15100 /* DImode unsigned comparisons can be implemented by cmp +
15101 cmpeq without a scratch register. Not worth doing in
15112 /* DImode signed and unsigned comparisons can be implemented
15113 by cmp + sbcs with a scratch register, but that does not
15114 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15115 gcc_assert (op
!= EQ
&& op
!= NE
);
15119 gcc_unreachable ();
15123 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15124 return GET_MODE (x
);
15129 /* X and Y are two things to compare using CODE. Emit the compare insn and
15130 return the rtx for register 0 in the proper mode. FP means this is a
15131 floating point compare: I don't think that it is needed on the arm. */
15133 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15137 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15139 /* We might have X as a constant, Y as a register because of the predicates
15140 used for cmpdi. If so, force X to a register here. */
15141 if (dimode_comparison
&& !REG_P (x
))
15142 x
= force_reg (DImode
, x
);
15144 mode
= SELECT_CC_MODE (code
, x
, y
);
15145 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15147 if (dimode_comparison
15148 && mode
!= CC_CZmode
)
15152 /* To compare two non-zero values for equality, XOR them and
15153 then compare against zero. Not used for ARM mode; there
15154 CC_CZmode is cheaper. */
15155 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15157 gcc_assert (!reload_completed
);
15158 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15162 /* A scratch register is required. */
15163 if (reload_completed
)
15164 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15166 scratch
= gen_rtx_SCRATCH (SImode
);
15168 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15169 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15170 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15173 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15178 /* Generate a sequence of insns that will generate the correct return
15179 address mask depending on the physical architecture that the program
15182 arm_gen_return_addr_mask (void)
15184 rtx reg
= gen_reg_rtx (Pmode
);
15186 emit_insn (gen_return_addr_mask (reg
));
15191 arm_reload_in_hi (rtx
*operands
)
15193 rtx ref
= operands
[1];
15195 HOST_WIDE_INT offset
= 0;
15197 if (GET_CODE (ref
) == SUBREG
)
15199 offset
= SUBREG_BYTE (ref
);
15200 ref
= SUBREG_REG (ref
);
15205 /* We have a pseudo which has been spilt onto the stack; there
15206 are two cases here: the first where there is a simple
15207 stack-slot replacement and a second where the stack-slot is
15208 out of range, or is used as a subreg. */
15209 if (reg_equiv_mem (REGNO (ref
)))
15211 ref
= reg_equiv_mem (REGNO (ref
));
15212 base
= find_replacement (&XEXP (ref
, 0));
15215 /* The slot is out of range, or was dressed up in a SUBREG. */
15216 base
= reg_equiv_address (REGNO (ref
));
15219 base
= find_replacement (&XEXP (ref
, 0));
15221 /* Handle the case where the address is too complex to be offset by 1. */
15222 if (GET_CODE (base
) == MINUS
15223 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15225 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15227 emit_set_insn (base_plus
, base
);
15230 else if (GET_CODE (base
) == PLUS
)
15232 /* The addend must be CONST_INT, or we would have dealt with it above. */
15233 HOST_WIDE_INT hi
, lo
;
15235 offset
+= INTVAL (XEXP (base
, 1));
15236 base
= XEXP (base
, 0);
15238 /* Rework the address into a legal sequence of insns. */
15239 /* Valid range for lo is -4095 -> 4095 */
15242 : -((-offset
) & 0xfff));
15244 /* Corner case, if lo is the max offset then we would be out of range
15245 once we have added the additional 1 below, so bump the msb into the
15246 pre-loading insn(s). */
15250 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15251 ^ (HOST_WIDE_INT
) 0x80000000)
15252 - (HOST_WIDE_INT
) 0x80000000);
15254 gcc_assert (hi
+ lo
== offset
);
15258 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15260 /* Get the base address; addsi3 knows how to handle constants
15261 that require more than one insn. */
15262 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15268 /* Operands[2] may overlap operands[0] (though it won't overlap
15269 operands[1]), that's why we asked for a DImode reg -- so we can
15270 use the bit that does not overlap. */
15271 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15272 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15274 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15276 emit_insn (gen_zero_extendqisi2 (scratch
,
15277 gen_rtx_MEM (QImode
,
15278 plus_constant (Pmode
, base
,
15280 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15281 gen_rtx_MEM (QImode
,
15282 plus_constant (Pmode
, base
,
15284 if (!BYTES_BIG_ENDIAN
)
15285 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15286 gen_rtx_IOR (SImode
,
15289 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15293 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15294 gen_rtx_IOR (SImode
,
15295 gen_rtx_ASHIFT (SImode
, scratch
,
15297 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15300 /* Handle storing a half-word to memory during reload by synthesizing as two
15301 byte stores. Take care not to clobber the input values until after we
15302 have moved them somewhere safe. This code assumes that if the DImode
15303 scratch in operands[2] overlaps either the input value or output address
15304 in some way, then that value must die in this insn (we absolutely need
15305 two scratch registers for some corner cases). */
15307 arm_reload_out_hi (rtx
*operands
)
15309 rtx ref
= operands
[0];
15310 rtx outval
= operands
[1];
15312 HOST_WIDE_INT offset
= 0;
15314 if (GET_CODE (ref
) == SUBREG
)
15316 offset
= SUBREG_BYTE (ref
);
15317 ref
= SUBREG_REG (ref
);
15322 /* We have a pseudo which has been spilt onto the stack; there
15323 are two cases here: the first where there is a simple
15324 stack-slot replacement and a second where the stack-slot is
15325 out of range, or is used as a subreg. */
15326 if (reg_equiv_mem (REGNO (ref
)))
15328 ref
= reg_equiv_mem (REGNO (ref
));
15329 base
= find_replacement (&XEXP (ref
, 0));
15332 /* The slot is out of range, or was dressed up in a SUBREG. */
15333 base
= reg_equiv_address (REGNO (ref
));
15336 base
= find_replacement (&XEXP (ref
, 0));
15338 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15340 /* Handle the case where the address is too complex to be offset by 1. */
15341 if (GET_CODE (base
) == MINUS
15342 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15344 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15346 /* Be careful not to destroy OUTVAL. */
15347 if (reg_overlap_mentioned_p (base_plus
, outval
))
15349 /* Updating base_plus might destroy outval, see if we can
15350 swap the scratch and base_plus. */
15351 if (!reg_overlap_mentioned_p (scratch
, outval
))
15352 std::swap (scratch
, base_plus
);
15355 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15357 /* Be conservative and copy OUTVAL into the scratch now,
15358 this should only be necessary if outval is a subreg
15359 of something larger than a word. */
15360 /* XXX Might this clobber base? I can't see how it can,
15361 since scratch is known to overlap with OUTVAL, and
15362 must be wider than a word. */
15363 emit_insn (gen_movhi (scratch_hi
, outval
));
15364 outval
= scratch_hi
;
15368 emit_set_insn (base_plus
, base
);
15371 else if (GET_CODE (base
) == PLUS
)
15373 /* The addend must be CONST_INT, or we would have dealt with it above. */
15374 HOST_WIDE_INT hi
, lo
;
15376 offset
+= INTVAL (XEXP (base
, 1));
15377 base
= XEXP (base
, 0);
15379 /* Rework the address into a legal sequence of insns. */
15380 /* Valid range for lo is -4095 -> 4095 */
15383 : -((-offset
) & 0xfff));
15385 /* Corner case, if lo is the max offset then we would be out of range
15386 once we have added the additional 1 below, so bump the msb into the
15387 pre-loading insn(s). */
15391 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15392 ^ (HOST_WIDE_INT
) 0x80000000)
15393 - (HOST_WIDE_INT
) 0x80000000);
15395 gcc_assert (hi
+ lo
== offset
);
15399 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15401 /* Be careful not to destroy OUTVAL. */
15402 if (reg_overlap_mentioned_p (base_plus
, outval
))
15404 /* Updating base_plus might destroy outval, see if we
15405 can swap the scratch and base_plus. */
15406 if (!reg_overlap_mentioned_p (scratch
, outval
))
15407 std::swap (scratch
, base_plus
);
15410 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15412 /* Be conservative and copy outval into scratch now,
15413 this should only be necessary if outval is a
15414 subreg of something larger than a word. */
15415 /* XXX Might this clobber base? I can't see how it
15416 can, since scratch is known to overlap with
15418 emit_insn (gen_movhi (scratch_hi
, outval
));
15419 outval
= scratch_hi
;
15423 /* Get the base address; addsi3 knows how to handle constants
15424 that require more than one insn. */
15425 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15431 if (BYTES_BIG_ENDIAN
)
15433 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15434 plus_constant (Pmode
, base
,
15436 gen_lowpart (QImode
, outval
)));
15437 emit_insn (gen_lshrsi3 (scratch
,
15438 gen_rtx_SUBREG (SImode
, outval
, 0),
15440 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15442 gen_lowpart (QImode
, scratch
)));
15446 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15448 gen_lowpart (QImode
, outval
)));
15449 emit_insn (gen_lshrsi3 (scratch
,
15450 gen_rtx_SUBREG (SImode
, outval
, 0),
15452 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15453 plus_constant (Pmode
, base
,
15455 gen_lowpart (QImode
, scratch
)));
15459 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15460 (padded to the size of a word) should be passed in a register. */
15463 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15465 if (TARGET_AAPCS_BASED
)
15466 return must_pass_in_stack_var_size (mode
, type
);
15468 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15472 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15473 Return true if an argument passed on the stack should be padded upwards,
15474 i.e. if the least-significant byte has useful data.
15475 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15476 aggregate types are placed in the lowest memory address. */
15479 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15481 if (!TARGET_AAPCS_BASED
)
15482 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15484 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15491 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15492 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15493 register has useful data, and return the opposite if the most
15494 significant byte does. */
15497 arm_pad_reg_upward (machine_mode mode
,
15498 tree type
, int first ATTRIBUTE_UNUSED
)
15500 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15502 /* For AAPCS, small aggregates, small fixed-point types,
15503 and small complex types are always padded upwards. */
15506 if ((AGGREGATE_TYPE_P (type
)
15507 || TREE_CODE (type
) == COMPLEX_TYPE
15508 || FIXED_POINT_TYPE_P (type
))
15509 && int_size_in_bytes (type
) <= 4)
15514 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15515 && GET_MODE_SIZE (mode
) <= 4)
15520 /* Otherwise, use default padding. */
15521 return !BYTES_BIG_ENDIAN
;
15524 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15525 assuming that the address in the base register is word aligned. */
15527 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15529 HOST_WIDE_INT max_offset
;
15531 /* Offset must be a multiple of 4 in Thumb mode. */
15532 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15537 else if (TARGET_ARM
)
15542 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15545 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15546 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15547 Assumes that the address in the base register RN is word aligned. Pattern
15548 guarantees that both memory accesses use the same base register,
15549 the offsets are constants within the range, and the gap between the offsets is 4.
15550 If preload complete then check that registers are legal. WBACK indicates whether
15551 address is updated. LOAD indicates whether memory access is load or store. */
15553 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15554 bool wback
, bool load
)
15556 unsigned int t
, t2
, n
;
15558 if (!reload_completed
)
15561 if (!offset_ok_for_ldrd_strd (offset
))
15568 if ((TARGET_THUMB2
)
15569 && ((wback
&& (n
== t
|| n
== t2
))
15570 || (t
== SP_REGNUM
)
15571 || (t
== PC_REGNUM
)
15572 || (t2
== SP_REGNUM
)
15573 || (t2
== PC_REGNUM
)
15574 || (!load
&& (n
== PC_REGNUM
))
15575 || (load
&& (t
== t2
))
15576 /* Triggers Cortex-M3 LDRD errata. */
15577 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15581 && ((wback
&& (n
== t
|| n
== t2
))
15582 || (t2
== PC_REGNUM
)
15583 || (t
% 2 != 0) /* First destination register is not even. */
15585 /* PC can be used as base register (for offset addressing only),
15586 but it is depricated. */
15587 || (n
== PC_REGNUM
)))
15593 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15594 operand MEM's address contains an immediate offset from the base
15595 register and has no side effects, in which case it sets BASE and
15596 OFFSET accordingly. */
15598 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15602 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15604 /* TODO: Handle more general memory operand patterns, such as
15605 PRE_DEC and PRE_INC. */
15607 if (side_effects_p (mem
))
15610 /* Can't deal with subregs. */
15611 if (GET_CODE (mem
) == SUBREG
)
15614 gcc_assert (MEM_P (mem
));
15616 *offset
= const0_rtx
;
15618 addr
= XEXP (mem
, 0);
15620 /* If addr isn't valid for DImode, then we can't handle it. */
15621 if (!arm_legitimate_address_p (DImode
, addr
,
15622 reload_in_progress
|| reload_completed
))
15630 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15632 *base
= XEXP (addr
, 0);
15633 *offset
= XEXP (addr
, 1);
15634 return (REG_P (*base
) && CONST_INT_P (*offset
));
15640 /* Called from a peephole2 to replace two word-size accesses with a
15641 single LDRD/STRD instruction. Returns true iff we can generate a
15642 new instruction sequence. That is, both accesses use the same base
15643 register and the gap between constant offsets is 4. This function
15644 may reorder its operands to match ldrd/strd RTL templates.
15645 OPERANDS are the operands found by the peephole matcher;
15646 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15647 corresponding memory operands. LOAD indicaates whether the access
15648 is load or store. CONST_STORE indicates a store of constant
15649 integer values held in OPERANDS[4,5] and assumes that the pattern
15650 is of length 4 insn, for the purpose of checking dead registers.
15651 COMMUTE indicates that register operands may be reordered. */
15653 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15654 bool const_store
, bool commute
)
15657 HOST_WIDE_INT offsets
[2], offset
;
15658 rtx base
= NULL_RTX
;
15659 rtx cur_base
, cur_offset
, tmp
;
15661 HARD_REG_SET regset
;
15663 gcc_assert (!const_store
|| !load
);
15664 /* Check that the memory references are immediate offsets from the
15665 same base register. Extract the base register, the destination
15666 registers, and the corresponding memory offsets. */
15667 for (i
= 0; i
< nops
; i
++)
15669 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15674 else if (REGNO (base
) != REGNO (cur_base
))
15677 offsets
[i
] = INTVAL (cur_offset
);
15678 if (GET_CODE (operands
[i
]) == SUBREG
)
15680 tmp
= SUBREG_REG (operands
[i
]);
15681 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15686 /* Make sure there is no dependency between the individual loads. */
15687 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15688 return false; /* RAW */
15690 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15691 return false; /* WAW */
15693 /* If the same input register is used in both stores
15694 when storing different constants, try to find a free register.
15695 For example, the code
15700 can be transformed into
15703 in Thumb mode assuming that r1 is free. */
15705 && REGNO (operands
[0]) == REGNO (operands
[1])
15706 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15710 CLEAR_HARD_REG_SET (regset
);
15711 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15712 if (tmp
== NULL_RTX
)
15715 /* Use the new register in the first load to ensure that
15716 if the original input register is not dead after peephole,
15717 then it will have the correct constant value. */
15720 else if (TARGET_ARM
)
15723 int regno
= REGNO (operands
[0]);
15724 if (!peep2_reg_dead_p (4, operands
[0]))
15726 /* When the input register is even and is not dead after the
15727 pattern, it has to hold the second constant but we cannot
15728 form a legal STRD in ARM mode with this register as the second
15730 if (regno
% 2 == 0)
15733 /* Is regno-1 free? */
15734 SET_HARD_REG_SET (regset
);
15735 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15736 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15737 if (tmp
== NULL_RTX
)
15744 /* Find a DImode register. */
15745 CLEAR_HARD_REG_SET (regset
);
15746 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15747 if (tmp
!= NULL_RTX
)
15749 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15750 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15754 /* Can we use the input register to form a DI register? */
15755 SET_HARD_REG_SET (regset
);
15756 CLEAR_HARD_REG_BIT(regset
,
15757 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15758 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15759 if (tmp
== NULL_RTX
)
15761 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15765 gcc_assert (operands
[0] != NULL_RTX
);
15766 gcc_assert (operands
[1] != NULL_RTX
);
15767 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15768 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15772 /* Make sure the instructions are ordered with lower memory access first. */
15773 if (offsets
[0] > offsets
[1])
15775 gap
= offsets
[0] - offsets
[1];
15776 offset
= offsets
[1];
15778 /* Swap the instructions such that lower memory is accessed first. */
15779 std::swap (operands
[0], operands
[1]);
15780 std::swap (operands
[2], operands
[3]);
15782 std::swap (operands
[4], operands
[5]);
15786 gap
= offsets
[1] - offsets
[0];
15787 offset
= offsets
[0];
15790 /* Make sure accesses are to consecutive memory locations. */
15794 /* Make sure we generate legal instructions. */
15795 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15799 /* In Thumb state, where registers are almost unconstrained, there
15800 is little hope to fix it. */
15804 if (load
&& commute
)
15806 /* Try reordering registers. */
15807 std::swap (operands
[0], operands
[1]);
15808 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15815 /* If input registers are dead after this pattern, they can be
15816 reordered or replaced by other registers that are free in the
15817 current pattern. */
15818 if (!peep2_reg_dead_p (4, operands
[0])
15819 || !peep2_reg_dead_p (4, operands
[1]))
15822 /* Try to reorder the input registers. */
15823 /* For example, the code
15828 can be transformed into
15833 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15836 std::swap (operands
[0], operands
[1]);
15840 /* Try to find a free DI register. */
15841 CLEAR_HARD_REG_SET (regset
);
15842 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15843 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15846 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15847 if (tmp
== NULL_RTX
)
15850 /* DREG must be an even-numbered register in DImode.
15851 Split it into SI registers. */
15852 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15853 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15854 gcc_assert (operands
[0] != NULL_RTX
);
15855 gcc_assert (operands
[1] != NULL_RTX
);
15856 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15857 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15859 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15871 /* Print a symbolic form of X to the debug file, F. */
15873 arm_print_value (FILE *f
, rtx x
)
15875 switch (GET_CODE (x
))
15878 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15882 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15890 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15892 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15893 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15901 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15905 fprintf (f
, "`%s'", XSTR (x
, 0));
15909 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15913 arm_print_value (f
, XEXP (x
, 0));
15917 arm_print_value (f
, XEXP (x
, 0));
15919 arm_print_value (f
, XEXP (x
, 1));
15927 fprintf (f
, "????");
15932 /* Routines for manipulation of the constant pool. */
15934 /* Arm instructions cannot load a large constant directly into a
15935 register; they have to come from a pc relative load. The constant
15936 must therefore be placed in the addressable range of the pc
15937 relative load. Depending on the precise pc relative load
15938 instruction the range is somewhere between 256 bytes and 4k. This
15939 means that we often have to dump a constant inside a function, and
15940 generate code to branch around it.
15942 It is important to minimize this, since the branches will slow
15943 things down and make the code larger.
15945 Normally we can hide the table after an existing unconditional
15946 branch so that there is no interruption of the flow, but in the
15947 worst case the code looks like this:
15965 We fix this by performing a scan after scheduling, which notices
15966 which instructions need to have their operands fetched from the
15967 constant table and builds the table.
15969 The algorithm starts by building a table of all the constants that
15970 need fixing up and all the natural barriers in the function (places
15971 where a constant table can be dropped without breaking the flow).
15972 For each fixup we note how far the pc-relative replacement will be
15973 able to reach and the offset of the instruction into the function.
15975 Having built the table we then group the fixes together to form
15976 tables that are as large as possible (subject to addressing
15977 constraints) and emit each table of constants after the last
15978 barrier that is within range of all the instructions in the group.
15979 If a group does not contain a barrier, then we forcibly create one
15980 by inserting a jump instruction into the flow. Once the table has
15981 been inserted, the insns are then modified to reference the
15982 relevant entry in the pool.
15984 Possible enhancements to the algorithm (not implemented) are:
15986 1) For some processors and object formats, there may be benefit in
15987 aligning the pools to the start of cache lines; this alignment
15988 would need to be taken into account when calculating addressability
15991 /* These typedefs are located at the start of this file, so that
15992 they can be used in the prototypes there. This comment is to
15993 remind readers of that fact so that the following structures
15994 can be understood more easily.
15996 typedef struct minipool_node Mnode;
15997 typedef struct minipool_fixup Mfix; */
15999 struct minipool_node
16001 /* Doubly linked chain of entries. */
16004 /* The maximum offset into the code that this entry can be placed. While
16005 pushing fixes for forward references, all entries are sorted in order
16006 of increasing max_address. */
16007 HOST_WIDE_INT max_address
;
16008 /* Similarly for an entry inserted for a backwards ref. */
16009 HOST_WIDE_INT min_address
;
16010 /* The number of fixes referencing this entry. This can become zero
16011 if we "unpush" an entry. In this case we ignore the entry when we
16012 come to emit the code. */
16014 /* The offset from the start of the minipool. */
16015 HOST_WIDE_INT offset
;
16016 /* The value in table. */
16018 /* The mode of value. */
16020 /* The size of the value. With iWMMXt enabled
16021 sizes > 4 also imply an alignment of 8-bytes. */
16025 struct minipool_fixup
16029 HOST_WIDE_INT address
;
16035 HOST_WIDE_INT forwards
;
16036 HOST_WIDE_INT backwards
;
16039 /* Fixes less than a word need padding out to a word boundary. */
16040 #define MINIPOOL_FIX_SIZE(mode) \
16041 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16043 static Mnode
* minipool_vector_head
;
16044 static Mnode
* minipool_vector_tail
;
16045 static rtx_code_label
*minipool_vector_label
;
16046 static int minipool_pad
;
16048 /* The linked list of all minipool fixes required for this function. */
16049 Mfix
* minipool_fix_head
;
16050 Mfix
* minipool_fix_tail
;
16051 /* The fix entry for the current minipool, once it has been placed. */
16052 Mfix
* minipool_barrier
;
16054 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16055 #define JUMP_TABLES_IN_TEXT_SECTION 0
16058 static HOST_WIDE_INT
16059 get_jump_table_size (rtx_jump_table_data
*insn
)
16061 /* ADDR_VECs only take room if read-only data does into the text
16063 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16065 rtx body
= PATTERN (insn
);
16066 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16067 HOST_WIDE_INT size
;
16068 HOST_WIDE_INT modesize
;
16070 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16071 size
= modesize
* XVECLEN (body
, elt
);
16075 /* Round up size of TBB table to a halfword boundary. */
16076 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16079 /* No padding necessary for TBH. */
16082 /* Add two bytes for alignment on Thumb. */
16087 gcc_unreachable ();
16095 /* Return the maximum amount of padding that will be inserted before
16098 static HOST_WIDE_INT
16099 get_label_padding (rtx label
)
16101 HOST_WIDE_INT align
, min_insn_size
;
16103 align
= 1 << label_to_alignment (label
);
16104 min_insn_size
= TARGET_THUMB
? 2 : 4;
16105 return align
> min_insn_size
? align
- min_insn_size
: 0;
16108 /* Move a minipool fix MP from its current location to before MAX_MP.
16109 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16110 constraints may need updating. */
16112 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16113 HOST_WIDE_INT max_address
)
16115 /* The code below assumes these are different. */
16116 gcc_assert (mp
!= max_mp
);
16118 if (max_mp
== NULL
)
16120 if (max_address
< mp
->max_address
)
16121 mp
->max_address
= max_address
;
16125 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16126 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16128 mp
->max_address
= max_address
;
16130 /* Unlink MP from its current position. Since max_mp is non-null,
16131 mp->prev must be non-null. */
16132 mp
->prev
->next
= mp
->next
;
16133 if (mp
->next
!= NULL
)
16134 mp
->next
->prev
= mp
->prev
;
16136 minipool_vector_tail
= mp
->prev
;
16138 /* Re-insert it before MAX_MP. */
16140 mp
->prev
= max_mp
->prev
;
16143 if (mp
->prev
!= NULL
)
16144 mp
->prev
->next
= mp
;
16146 minipool_vector_head
= mp
;
16149 /* Save the new entry. */
16152 /* Scan over the preceding entries and adjust their addresses as
16154 while (mp
->prev
!= NULL
16155 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16157 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16164 /* Add a constant to the minipool for a forward reference. Returns the
16165 node added or NULL if the constant will not fit in this pool. */
16167 add_minipool_forward_ref (Mfix
*fix
)
16169 /* If set, max_mp is the first pool_entry that has a lower
16170 constraint than the one we are trying to add. */
16171 Mnode
* max_mp
= NULL
;
16172 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16175 /* If the minipool starts before the end of FIX->INSN then this FIX
16176 can not be placed into the current pool. Furthermore, adding the
16177 new constant pool entry may cause the pool to start FIX_SIZE bytes
16179 if (minipool_vector_head
&&
16180 (fix
->address
+ get_attr_length (fix
->insn
)
16181 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16184 /* Scan the pool to see if a constant with the same value has
16185 already been added. While we are doing this, also note the
16186 location where we must insert the constant if it doesn't already
16188 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16190 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16191 && fix
->mode
== mp
->mode
16192 && (!LABEL_P (fix
->value
)
16193 || (CODE_LABEL_NUMBER (fix
->value
)
16194 == CODE_LABEL_NUMBER (mp
->value
)))
16195 && rtx_equal_p (fix
->value
, mp
->value
))
16197 /* More than one fix references this entry. */
16199 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16202 /* Note the insertion point if necessary. */
16204 && mp
->max_address
> max_address
)
16207 /* If we are inserting an 8-bytes aligned quantity and
16208 we have not already found an insertion point, then
16209 make sure that all such 8-byte aligned quantities are
16210 placed at the start of the pool. */
16211 if (ARM_DOUBLEWORD_ALIGN
16213 && fix
->fix_size
>= 8
16214 && mp
->fix_size
< 8)
16217 max_address
= mp
->max_address
;
16221 /* The value is not currently in the minipool, so we need to create
16222 a new entry for it. If MAX_MP is NULL, the entry will be put on
16223 the end of the list since the placement is less constrained than
16224 any existing entry. Otherwise, we insert the new fix before
16225 MAX_MP and, if necessary, adjust the constraints on the other
16228 mp
->fix_size
= fix
->fix_size
;
16229 mp
->mode
= fix
->mode
;
16230 mp
->value
= fix
->value
;
16232 /* Not yet required for a backwards ref. */
16233 mp
->min_address
= -65536;
16235 if (max_mp
== NULL
)
16237 mp
->max_address
= max_address
;
16239 mp
->prev
= minipool_vector_tail
;
16241 if (mp
->prev
== NULL
)
16243 minipool_vector_head
= mp
;
16244 minipool_vector_label
= gen_label_rtx ();
16247 mp
->prev
->next
= mp
;
16249 minipool_vector_tail
= mp
;
16253 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16254 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16256 mp
->max_address
= max_address
;
16259 mp
->prev
= max_mp
->prev
;
16261 if (mp
->prev
!= NULL
)
16262 mp
->prev
->next
= mp
;
16264 minipool_vector_head
= mp
;
16267 /* Save the new entry. */
16270 /* Scan over the preceding entries and adjust their addresses as
16272 while (mp
->prev
!= NULL
16273 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16275 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16283 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16284 HOST_WIDE_INT min_address
)
16286 HOST_WIDE_INT offset
;
16288 /* The code below assumes these are different. */
16289 gcc_assert (mp
!= min_mp
);
16291 if (min_mp
== NULL
)
16293 if (min_address
> mp
->min_address
)
16294 mp
->min_address
= min_address
;
16298 /* We will adjust this below if it is too loose. */
16299 mp
->min_address
= min_address
;
16301 /* Unlink MP from its current position. Since min_mp is non-null,
16302 mp->next must be non-null. */
16303 mp
->next
->prev
= mp
->prev
;
16304 if (mp
->prev
!= NULL
)
16305 mp
->prev
->next
= mp
->next
;
16307 minipool_vector_head
= mp
->next
;
16309 /* Reinsert it after MIN_MP. */
16311 mp
->next
= min_mp
->next
;
16313 if (mp
->next
!= NULL
)
16314 mp
->next
->prev
= mp
;
16316 minipool_vector_tail
= mp
;
16322 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16324 mp
->offset
= offset
;
16325 if (mp
->refcount
> 0)
16326 offset
+= mp
->fix_size
;
16328 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16329 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16335 /* Add a constant to the minipool for a backward reference. Returns the
16336 node added or NULL if the constant will not fit in this pool.
16338 Note that the code for insertion for a backwards reference can be
16339 somewhat confusing because the calculated offsets for each fix do
16340 not take into account the size of the pool (which is still under
16343 add_minipool_backward_ref (Mfix
*fix
)
16345 /* If set, min_mp is the last pool_entry that has a lower constraint
16346 than the one we are trying to add. */
16347 Mnode
*min_mp
= NULL
;
16348 /* This can be negative, since it is only a constraint. */
16349 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16352 /* If we can't reach the current pool from this insn, or if we can't
16353 insert this entry at the end of the pool without pushing other
16354 fixes out of range, then we don't try. This ensures that we
16355 can't fail later on. */
16356 if (min_address
>= minipool_barrier
->address
16357 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16358 >= minipool_barrier
->address
))
16361 /* Scan the pool to see if a constant with the same value has
16362 already been added. While we are doing this, also note the
16363 location where we must insert the constant if it doesn't already
16365 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16367 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16368 && fix
->mode
== mp
->mode
16369 && (!LABEL_P (fix
->value
)
16370 || (CODE_LABEL_NUMBER (fix
->value
)
16371 == CODE_LABEL_NUMBER (mp
->value
)))
16372 && rtx_equal_p (fix
->value
, mp
->value
)
16373 /* Check that there is enough slack to move this entry to the
16374 end of the table (this is conservative). */
16375 && (mp
->max_address
16376 > (minipool_barrier
->address
16377 + minipool_vector_tail
->offset
16378 + minipool_vector_tail
->fix_size
)))
16381 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16384 if (min_mp
!= NULL
)
16385 mp
->min_address
+= fix
->fix_size
;
16388 /* Note the insertion point if necessary. */
16389 if (mp
->min_address
< min_address
)
16391 /* For now, we do not allow the insertion of 8-byte alignment
16392 requiring nodes anywhere but at the start of the pool. */
16393 if (ARM_DOUBLEWORD_ALIGN
16394 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16399 else if (mp
->max_address
16400 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16402 /* Inserting before this entry would push the fix beyond
16403 its maximum address (which can happen if we have
16404 re-located a forwards fix); force the new fix to come
16406 if (ARM_DOUBLEWORD_ALIGN
16407 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16412 min_address
= mp
->min_address
+ fix
->fix_size
;
16415 /* Do not insert a non-8-byte aligned quantity before 8-byte
16416 aligned quantities. */
16417 else if (ARM_DOUBLEWORD_ALIGN
16418 && fix
->fix_size
< 8
16419 && mp
->fix_size
>= 8)
16422 min_address
= mp
->min_address
+ fix
->fix_size
;
16427 /* We need to create a new entry. */
16429 mp
->fix_size
= fix
->fix_size
;
16430 mp
->mode
= fix
->mode
;
16431 mp
->value
= fix
->value
;
16433 mp
->max_address
= minipool_barrier
->address
+ 65536;
16435 mp
->min_address
= min_address
;
16437 if (min_mp
== NULL
)
16440 mp
->next
= minipool_vector_head
;
16442 if (mp
->next
== NULL
)
16444 minipool_vector_tail
= mp
;
16445 minipool_vector_label
= gen_label_rtx ();
16448 mp
->next
->prev
= mp
;
16450 minipool_vector_head
= mp
;
16454 mp
->next
= min_mp
->next
;
16458 if (mp
->next
!= NULL
)
16459 mp
->next
->prev
= mp
;
16461 minipool_vector_tail
= mp
;
16464 /* Save the new entry. */
16472 /* Scan over the following entries and adjust their offsets. */
16473 while (mp
->next
!= NULL
)
16475 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16476 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16479 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16481 mp
->next
->offset
= mp
->offset
;
16490 assign_minipool_offsets (Mfix
*barrier
)
16492 HOST_WIDE_INT offset
= 0;
16495 minipool_barrier
= barrier
;
16497 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16499 mp
->offset
= offset
;
16501 if (mp
->refcount
> 0)
16502 offset
+= mp
->fix_size
;
16506 /* Output the literal table */
16508 dump_minipool (rtx_insn
*scan
)
16514 if (ARM_DOUBLEWORD_ALIGN
)
16515 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16516 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16523 fprintf (dump_file
,
16524 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16525 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16527 scan
= emit_label_after (gen_label_rtx (), scan
);
16528 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16529 scan
= emit_label_after (minipool_vector_label
, scan
);
16531 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16533 if (mp
->refcount
> 0)
16537 fprintf (dump_file
,
16538 ";; Offset %u, min %ld, max %ld ",
16539 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16540 (unsigned long) mp
->max_address
);
16541 arm_print_value (dump_file
, mp
->value
);
16542 fputc ('\n', dump_file
);
16545 switch (GET_MODE_SIZE (mp
->mode
))
16547 #ifdef HAVE_consttable_1
16549 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16553 #ifdef HAVE_consttable_2
16555 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16559 #ifdef HAVE_consttable_4
16561 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16565 #ifdef HAVE_consttable_8
16567 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16571 #ifdef HAVE_consttable_16
16573 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16578 gcc_unreachable ();
16586 minipool_vector_head
= minipool_vector_tail
= NULL
;
16587 scan
= emit_insn_after (gen_consttable_end (), scan
);
16588 scan
= emit_barrier_after (scan
);
16591 /* Return the cost of forcibly inserting a barrier after INSN. */
16593 arm_barrier_cost (rtx_insn
*insn
)
16595 /* Basing the location of the pool on the loop depth is preferable,
16596 but at the moment, the basic block information seems to be
16597 corrupt by this stage of the compilation. */
16598 int base_cost
= 50;
16599 rtx_insn
*next
= next_nonnote_insn (insn
);
16601 if (next
!= NULL
&& LABEL_P (next
))
16604 switch (GET_CODE (insn
))
16607 /* It will always be better to place the table before the label, rather
16616 return base_cost
- 10;
16619 return base_cost
+ 10;
16623 /* Find the best place in the insn stream in the range
16624 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16625 Create the barrier by inserting a jump and add a new fix entry for
16628 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16630 HOST_WIDE_INT count
= 0;
16631 rtx_barrier
*barrier
;
16632 rtx_insn
*from
= fix
->insn
;
16633 /* The instruction after which we will insert the jump. */
16634 rtx_insn
*selected
= NULL
;
16636 /* The address at which the jump instruction will be placed. */
16637 HOST_WIDE_INT selected_address
;
16639 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16640 rtx_code_label
*label
= gen_label_rtx ();
16642 selected_cost
= arm_barrier_cost (from
);
16643 selected_address
= fix
->address
;
16645 while (from
&& count
< max_count
)
16647 rtx_jump_table_data
*tmp
;
16650 /* This code shouldn't have been called if there was a natural barrier
16652 gcc_assert (!BARRIER_P (from
));
16654 /* Count the length of this insn. This must stay in sync with the
16655 code that pushes minipool fixes. */
16656 if (LABEL_P (from
))
16657 count
+= get_label_padding (from
);
16659 count
+= get_attr_length (from
);
16661 /* If there is a jump table, add its length. */
16662 if (tablejump_p (from
, NULL
, &tmp
))
16664 count
+= get_jump_table_size (tmp
);
16666 /* Jump tables aren't in a basic block, so base the cost on
16667 the dispatch insn. If we select this location, we will
16668 still put the pool after the table. */
16669 new_cost
= arm_barrier_cost (from
);
16671 if (count
< max_count
16672 && (!selected
|| new_cost
<= selected_cost
))
16675 selected_cost
= new_cost
;
16676 selected_address
= fix
->address
+ count
;
16679 /* Continue after the dispatch table. */
16680 from
= NEXT_INSN (tmp
);
16684 new_cost
= arm_barrier_cost (from
);
16686 if (count
< max_count
16687 && (!selected
|| new_cost
<= selected_cost
))
16690 selected_cost
= new_cost
;
16691 selected_address
= fix
->address
+ count
;
16694 from
= NEXT_INSN (from
);
16697 /* Make sure that we found a place to insert the jump. */
16698 gcc_assert (selected
);
16700 /* Make sure we do not split a call and its corresponding
16701 CALL_ARG_LOCATION note. */
16702 if (CALL_P (selected
))
16704 rtx_insn
*next
= NEXT_INSN (selected
);
16705 if (next
&& NOTE_P (next
)
16706 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16710 /* Create a new JUMP_INSN that branches around a barrier. */
16711 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16712 JUMP_LABEL (from
) = label
;
16713 barrier
= emit_barrier_after (from
);
16714 emit_label_after (label
, barrier
);
16716 /* Create a minipool barrier entry for the new barrier. */
16717 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16718 new_fix
->insn
= barrier
;
16719 new_fix
->address
= selected_address
;
16720 new_fix
->next
= fix
->next
;
16721 fix
->next
= new_fix
;
16726 /* Record that there is a natural barrier in the insn stream at
16729 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16731 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16734 fix
->address
= address
;
16737 if (minipool_fix_head
!= NULL
)
16738 minipool_fix_tail
->next
= fix
;
16740 minipool_fix_head
= fix
;
16742 minipool_fix_tail
= fix
;
16745 /* Record INSN, which will need fixing up to load a value from the
16746 minipool. ADDRESS is the offset of the insn since the start of the
16747 function; LOC is a pointer to the part of the insn which requires
16748 fixing; VALUE is the constant that must be loaded, which is of type
16751 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16752 machine_mode mode
, rtx value
)
16754 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16757 fix
->address
= address
;
16760 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16761 fix
->value
= value
;
16762 fix
->forwards
= get_attr_pool_range (insn
);
16763 fix
->backwards
= get_attr_neg_pool_range (insn
);
16764 fix
->minipool
= NULL
;
16766 /* If an insn doesn't have a range defined for it, then it isn't
16767 expecting to be reworked by this code. Better to stop now than
16768 to generate duff assembly code. */
16769 gcc_assert (fix
->forwards
|| fix
->backwards
);
16771 /* If an entry requires 8-byte alignment then assume all constant pools
16772 require 4 bytes of padding. Trying to do this later on a per-pool
16773 basis is awkward because existing pool entries have to be modified. */
16774 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16779 fprintf (dump_file
,
16780 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16781 GET_MODE_NAME (mode
),
16782 INSN_UID (insn
), (unsigned long) address
,
16783 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16784 arm_print_value (dump_file
, fix
->value
);
16785 fprintf (dump_file
, "\n");
16788 /* Add it to the chain of fixes. */
16791 if (minipool_fix_head
!= NULL
)
16792 minipool_fix_tail
->next
= fix
;
16794 minipool_fix_head
= fix
;
16796 minipool_fix_tail
= fix
;
16799 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16800 Returns the number of insns needed, or 99 if we always want to synthesize
16803 arm_max_const_double_inline_cost ()
16805 /* Let the value get synthesized to avoid the use of literal pools. */
16806 if (arm_disable_literal_pool
)
16809 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16812 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16813 Returns the number of insns needed, or 99 if we don't know how to
16816 arm_const_double_inline_cost (rtx val
)
16818 rtx lowpart
, highpart
;
16821 mode
= GET_MODE (val
);
16823 if (mode
== VOIDmode
)
16826 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16828 lowpart
= gen_lowpart (SImode
, val
);
16829 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16831 gcc_assert (CONST_INT_P (lowpart
));
16832 gcc_assert (CONST_INT_P (highpart
));
16834 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16835 NULL_RTX
, NULL_RTX
, 0, 0)
16836 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16837 NULL_RTX
, NULL_RTX
, 0, 0));
16840 /* Cost of loading a SImode constant. */
16842 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16844 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16845 NULL_RTX
, NULL_RTX
, 1, 0);
16848 /* Return true if it is worthwhile to split a 64-bit constant into two
16849 32-bit operations. This is the case if optimizing for size, or
16850 if we have load delay slots, or if one 32-bit part can be done with
16851 a single data operation. */
16853 arm_const_double_by_parts (rtx val
)
16855 machine_mode mode
= GET_MODE (val
);
16858 if (optimize_size
|| arm_ld_sched
)
16861 if (mode
== VOIDmode
)
16864 part
= gen_highpart_mode (SImode
, mode
, val
);
16866 gcc_assert (CONST_INT_P (part
));
16868 if (const_ok_for_arm (INTVAL (part
))
16869 || const_ok_for_arm (~INTVAL (part
)))
16872 part
= gen_lowpart (SImode
, val
);
16874 gcc_assert (CONST_INT_P (part
));
16876 if (const_ok_for_arm (INTVAL (part
))
16877 || const_ok_for_arm (~INTVAL (part
)))
16883 /* Return true if it is possible to inline both the high and low parts
16884 of a 64-bit constant into 32-bit data processing instructions. */
16886 arm_const_double_by_immediates (rtx val
)
16888 machine_mode mode
= GET_MODE (val
);
16891 if (mode
== VOIDmode
)
16894 part
= gen_highpart_mode (SImode
, mode
, val
);
16896 gcc_assert (CONST_INT_P (part
));
16898 if (!const_ok_for_arm (INTVAL (part
)))
16901 part
= gen_lowpart (SImode
, val
);
16903 gcc_assert (CONST_INT_P (part
));
16905 if (!const_ok_for_arm (INTVAL (part
)))
16911 /* Scan INSN and note any of its operands that need fixing.
16912 If DO_PUSHES is false we do not actually push any of the fixups
16915 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16919 extract_constrain_insn (insn
);
16921 if (recog_data
.n_alternatives
== 0)
16924 /* Fill in recog_op_alt with information about the constraints of
16926 preprocess_constraints (insn
);
16928 const operand_alternative
*op_alt
= which_op_alt ();
16929 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16931 /* Things we need to fix can only occur in inputs. */
16932 if (recog_data
.operand_type
[opno
] != OP_IN
)
16935 /* If this alternative is a memory reference, then any mention
16936 of constants in this alternative is really to fool reload
16937 into allowing us to accept one there. We need to fix them up
16938 now so that we output the right code. */
16939 if (op_alt
[opno
].memory_ok
)
16941 rtx op
= recog_data
.operand
[opno
];
16943 if (CONSTANT_P (op
))
16946 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16947 recog_data
.operand_mode
[opno
], op
);
16949 else if (MEM_P (op
)
16950 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16951 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16955 rtx cop
= avoid_constant_pool_reference (op
);
16957 /* Casting the address of something to a mode narrower
16958 than a word can cause avoid_constant_pool_reference()
16959 to return the pool reference itself. That's no good to
16960 us here. Lets just hope that we can use the
16961 constant pool value directly. */
16963 cop
= get_pool_constant (XEXP (op
, 0));
16965 push_minipool_fix (insn
, address
,
16966 recog_data
.operand_loc
[opno
],
16967 recog_data
.operand_mode
[opno
], cop
);
16977 /* Rewrite move insn into subtract of 0 if the condition codes will
16978 be useful in next conditional jump insn. */
16981 thumb1_reorg (void)
16985 FOR_EACH_BB_FN (bb
, cfun
)
16988 rtx pat
, op0
, set
= NULL
;
16989 rtx_insn
*prev
, *insn
= BB_END (bb
);
16990 bool insn_clobbered
= false;
16992 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
16993 insn
= PREV_INSN (insn
);
16995 /* Find the last cbranchsi4_insn in basic block BB. */
16996 if (insn
== BB_HEAD (bb
)
16997 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17000 /* Get the register with which we are comparing. */
17001 pat
= PATTERN (insn
);
17002 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17004 /* Find the first flag setting insn before INSN in basic block BB. */
17005 gcc_assert (insn
!= BB_HEAD (bb
));
17006 for (prev
= PREV_INSN (insn
);
17008 && prev
!= BB_HEAD (bb
)
17010 || DEBUG_INSN_P (prev
)
17011 || ((set
= single_set (prev
)) != NULL
17012 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17013 prev
= PREV_INSN (prev
))
17015 if (reg_set_p (op0
, prev
))
17016 insn_clobbered
= true;
17019 /* Skip if op0 is clobbered by insn other than prev. */
17020 if (insn_clobbered
)
17026 dest
= SET_DEST (set
);
17027 src
= SET_SRC (set
);
17028 if (!low_register_operand (dest
, SImode
)
17029 || !low_register_operand (src
, SImode
))
17032 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17033 in INSN. Both src and dest of the move insn are checked. */
17034 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17036 dest
= copy_rtx (dest
);
17037 src
= copy_rtx (src
);
17038 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17039 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17040 INSN_CODE (prev
) = -1;
17041 /* Set test register in INSN to dest. */
17042 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17043 INSN_CODE (insn
) = -1;
17048 /* Convert instructions to their cc-clobbering variant if possible, since
17049 that allows us to use smaller encodings. */
17052 thumb2_reorg (void)
17057 INIT_REG_SET (&live
);
17059 /* We are freeing block_for_insn in the toplev to keep compatibility
17060 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17061 compute_bb_for_insn ();
17064 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17066 FOR_EACH_BB_FN (bb
, cfun
)
17068 if (current_tune
->disparage_flag_setting_t16_encodings
17069 && optimize_bb_for_speed_p (bb
))
17073 Convert_Action action
= SKIP
;
17074 Convert_Action action_for_partial_flag_setting
17075 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17076 && optimize_bb_for_speed_p (bb
))
17079 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17080 df_simulate_initialize_backwards (bb
, &live
);
17081 FOR_BB_INSNS_REVERSE (bb
, insn
)
17083 if (NONJUMP_INSN_P (insn
)
17084 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17085 && GET_CODE (PATTERN (insn
)) == SET
)
17088 rtx pat
= PATTERN (insn
);
17089 rtx dst
= XEXP (pat
, 0);
17090 rtx src
= XEXP (pat
, 1);
17091 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17093 if (UNARY_P (src
) || BINARY_P (src
))
17094 op0
= XEXP (src
, 0);
17096 if (BINARY_P (src
))
17097 op1
= XEXP (src
, 1);
17099 if (low_register_operand (dst
, SImode
))
17101 switch (GET_CODE (src
))
17104 /* Adding two registers and storing the result
17105 in the first source is already a 16-bit
17107 if (rtx_equal_p (dst
, op0
)
17108 && register_operand (op1
, SImode
))
17111 if (low_register_operand (op0
, SImode
))
17113 /* ADDS <Rd>,<Rn>,<Rm> */
17114 if (low_register_operand (op1
, SImode
))
17116 /* ADDS <Rdn>,#<imm8> */
17117 /* SUBS <Rdn>,#<imm8> */
17118 else if (rtx_equal_p (dst
, op0
)
17119 && CONST_INT_P (op1
)
17120 && IN_RANGE (INTVAL (op1
), -255, 255))
17122 /* ADDS <Rd>,<Rn>,#<imm3> */
17123 /* SUBS <Rd>,<Rn>,#<imm3> */
17124 else if (CONST_INT_P (op1
)
17125 && IN_RANGE (INTVAL (op1
), -7, 7))
17128 /* ADCS <Rd>, <Rn> */
17129 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17130 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17131 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17133 && COMPARISON_P (op1
)
17134 && cc_register (XEXP (op1
, 0), VOIDmode
)
17135 && maybe_get_arm_condition_code (op1
) == ARM_CS
17136 && XEXP (op1
, 1) == const0_rtx
)
17141 /* RSBS <Rd>,<Rn>,#0
17142 Not handled here: see NEG below. */
17143 /* SUBS <Rd>,<Rn>,#<imm3>
17145 Not handled here: see PLUS above. */
17146 /* SUBS <Rd>,<Rn>,<Rm> */
17147 if (low_register_operand (op0
, SImode
)
17148 && low_register_operand (op1
, SImode
))
17153 /* MULS <Rdm>,<Rn>,<Rdm>
17154 As an exception to the rule, this is only used
17155 when optimizing for size since MULS is slow on all
17156 known implementations. We do not even want to use
17157 MULS in cold code, if optimizing for speed, so we
17158 test the global flag here. */
17159 if (!optimize_size
)
17161 /* else fall through. */
17165 /* ANDS <Rdn>,<Rm> */
17166 if (rtx_equal_p (dst
, op0
)
17167 && low_register_operand (op1
, SImode
))
17168 action
= action_for_partial_flag_setting
;
17169 else if (rtx_equal_p (dst
, op1
)
17170 && low_register_operand (op0
, SImode
))
17171 action
= action_for_partial_flag_setting
== SKIP
17172 ? SKIP
: SWAP_CONV
;
17178 /* ASRS <Rdn>,<Rm> */
17179 /* LSRS <Rdn>,<Rm> */
17180 /* LSLS <Rdn>,<Rm> */
17181 if (rtx_equal_p (dst
, op0
)
17182 && low_register_operand (op1
, SImode
))
17183 action
= action_for_partial_flag_setting
;
17184 /* ASRS <Rd>,<Rm>,#<imm5> */
17185 /* LSRS <Rd>,<Rm>,#<imm5> */
17186 /* LSLS <Rd>,<Rm>,#<imm5> */
17187 else if (low_register_operand (op0
, SImode
)
17188 && CONST_INT_P (op1
)
17189 && IN_RANGE (INTVAL (op1
), 0, 31))
17190 action
= action_for_partial_flag_setting
;
17194 /* RORS <Rdn>,<Rm> */
17195 if (rtx_equal_p (dst
, op0
)
17196 && low_register_operand (op1
, SImode
))
17197 action
= action_for_partial_flag_setting
;
17201 /* MVNS <Rd>,<Rm> */
17202 if (low_register_operand (op0
, SImode
))
17203 action
= action_for_partial_flag_setting
;
17207 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17208 if (low_register_operand (op0
, SImode
))
17213 /* MOVS <Rd>,#<imm8> */
17214 if (CONST_INT_P (src
)
17215 && IN_RANGE (INTVAL (src
), 0, 255))
17216 action
= action_for_partial_flag_setting
;
17220 /* MOVS and MOV<c> with registers have different
17221 encodings, so are not relevant here. */
17229 if (action
!= SKIP
)
17231 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17232 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17235 if (action
== SWAP_CONV
)
17237 src
= copy_rtx (src
);
17238 XEXP (src
, 0) = op1
;
17239 XEXP (src
, 1) = op0
;
17240 pat
= gen_rtx_SET (dst
, src
);
17241 vec
= gen_rtvec (2, pat
, clobber
);
17243 else /* action == CONV */
17244 vec
= gen_rtvec (2, pat
, clobber
);
17246 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17247 INSN_CODE (insn
) = -1;
17251 if (NONDEBUG_INSN_P (insn
))
17252 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17256 CLEAR_REG_SET (&live
);
17259 /* Gcc puts the pool in the wrong place for ARM, since we can only
17260 load addresses a limited distance around the pc. We do some
17261 special munging to move the constant pool values to the correct
17262 point in the code. */
17267 HOST_WIDE_INT address
= 0;
17272 else if (TARGET_THUMB2
)
17275 /* Ensure all insns that must be split have been split at this point.
17276 Otherwise, the pool placement code below may compute incorrect
17277 insn lengths. Note that when optimizing, all insns have already
17278 been split at this point. */
17280 split_all_insns_noflow ();
17282 minipool_fix_head
= minipool_fix_tail
= NULL
;
17284 /* The first insn must always be a note, or the code below won't
17285 scan it properly. */
17286 insn
= get_insns ();
17287 gcc_assert (NOTE_P (insn
));
17290 /* Scan all the insns and record the operands that will need fixing. */
17291 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17293 if (BARRIER_P (insn
))
17294 push_minipool_barrier (insn
, address
);
17295 else if (INSN_P (insn
))
17297 rtx_jump_table_data
*table
;
17299 note_invalid_constants (insn
, address
, true);
17300 address
+= get_attr_length (insn
);
17302 /* If the insn is a vector jump, add the size of the table
17303 and skip the table. */
17304 if (tablejump_p (insn
, NULL
, &table
))
17306 address
+= get_jump_table_size (table
);
17310 else if (LABEL_P (insn
))
17311 /* Add the worst-case padding due to alignment. We don't add
17312 the _current_ padding because the minipool insertions
17313 themselves might change it. */
17314 address
+= get_label_padding (insn
);
17317 fix
= minipool_fix_head
;
17319 /* Now scan the fixups and perform the required changes. */
17324 Mfix
* last_added_fix
;
17325 Mfix
* last_barrier
= NULL
;
17328 /* Skip any further barriers before the next fix. */
17329 while (fix
&& BARRIER_P (fix
->insn
))
17332 /* No more fixes. */
17336 last_added_fix
= NULL
;
17338 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17340 if (BARRIER_P (ftmp
->insn
))
17342 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17345 last_barrier
= ftmp
;
17347 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17350 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17353 /* If we found a barrier, drop back to that; any fixes that we
17354 could have reached but come after the barrier will now go in
17355 the next mini-pool. */
17356 if (last_barrier
!= NULL
)
17358 /* Reduce the refcount for those fixes that won't go into this
17360 for (fdel
= last_barrier
->next
;
17361 fdel
&& fdel
!= ftmp
;
17364 fdel
->minipool
->refcount
--;
17365 fdel
->minipool
= NULL
;
17368 ftmp
= last_barrier
;
17372 /* ftmp is first fix that we can't fit into this pool and
17373 there no natural barriers that we could use. Insert a
17374 new barrier in the code somewhere between the previous
17375 fix and this one, and arrange to jump around it. */
17376 HOST_WIDE_INT max_address
;
17378 /* The last item on the list of fixes must be a barrier, so
17379 we can never run off the end of the list of fixes without
17380 last_barrier being set. */
17383 max_address
= minipool_vector_head
->max_address
;
17384 /* Check that there isn't another fix that is in range that
17385 we couldn't fit into this pool because the pool was
17386 already too large: we need to put the pool before such an
17387 instruction. The pool itself may come just after the
17388 fix because create_fix_barrier also allows space for a
17389 jump instruction. */
17390 if (ftmp
->address
< max_address
)
17391 max_address
= ftmp
->address
+ 1;
17393 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17396 assign_minipool_offsets (last_barrier
);
17400 if (!BARRIER_P (ftmp
->insn
)
17401 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17408 /* Scan over the fixes we have identified for this pool, fixing them
17409 up and adding the constants to the pool itself. */
17410 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17411 this_fix
= this_fix
->next
)
17412 if (!BARRIER_P (this_fix
->insn
))
17415 = plus_constant (Pmode
,
17416 gen_rtx_LABEL_REF (VOIDmode
,
17417 minipool_vector_label
),
17418 this_fix
->minipool
->offset
);
17419 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17422 dump_minipool (last_barrier
->insn
);
17426 /* From now on we must synthesize any constants that we can't handle
17427 directly. This can happen if the RTL gets split during final
17428 instruction generation. */
17429 cfun
->machine
->after_arm_reorg
= 1;
17431 /* Free the minipool memory. */
17432 obstack_free (&minipool_obstack
, minipool_startobj
);
17435 /* Routines to output assembly language. */
17437 /* Return string representation of passed in real value. */
17438 static const char *
17439 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17441 if (!fp_consts_inited
)
17444 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17448 /* OPERANDS[0] is the entire list of insns that constitute pop,
17449 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17450 is in the list, UPDATE is true iff the list contains explicit
17451 update of base register. */
17453 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17459 const char *conditional
;
17460 int num_saves
= XVECLEN (operands
[0], 0);
17461 unsigned int regno
;
17462 unsigned int regno_base
= REGNO (operands
[1]);
17465 offset
+= update
? 1 : 0;
17466 offset
+= return_pc
? 1 : 0;
17468 /* Is the base register in the list? */
17469 for (i
= offset
; i
< num_saves
; i
++)
17471 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17472 /* If SP is in the list, then the base register must be SP. */
17473 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17474 /* If base register is in the list, there must be no explicit update. */
17475 if (regno
== regno_base
)
17476 gcc_assert (!update
);
17479 conditional
= reverse
? "%?%D0" : "%?%d0";
17480 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17482 /* Output pop (not stmfd) because it has a shorter encoding. */
17483 gcc_assert (update
);
17484 sprintf (pattern
, "pop%s\t{", conditional
);
17488 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17489 It's just a convention, their semantics are identical. */
17490 if (regno_base
== SP_REGNUM
)
17491 sprintf (pattern
, "ldm%sfd\t", conditional
);
17492 else if (TARGET_UNIFIED_ASM
)
17493 sprintf (pattern
, "ldmia%s\t", conditional
);
17495 sprintf (pattern
, "ldm%sia\t", conditional
);
17497 strcat (pattern
, reg_names
[regno_base
]);
17499 strcat (pattern
, "!, {");
17501 strcat (pattern
, ", {");
17504 /* Output the first destination register. */
17506 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17508 /* Output the rest of the destination registers. */
17509 for (i
= offset
+ 1; i
< num_saves
; i
++)
17511 strcat (pattern
, ", ");
17513 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17516 strcat (pattern
, "}");
17518 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17519 strcat (pattern
, "^");
17521 output_asm_insn (pattern
, &cond
);
17525 /* Output the assembly for a store multiple. */
17528 vfp_output_vstmd (rtx
* operands
)
17534 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17535 ? XEXP (operands
[0], 0)
17536 : XEXP (XEXP (operands
[0], 0), 0);
17537 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17540 strcpy (pattern
, "vpush%?.64\t{%P1");
17542 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17544 p
= strlen (pattern
);
17546 gcc_assert (REG_P (operands
[1]));
17548 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17549 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17551 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17553 strcpy (&pattern
[p
], "}");
17555 output_asm_insn (pattern
, operands
);
17560 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17561 number of bytes pushed. */
17564 vfp_emit_fstmd (int base_reg
, int count
)
17571 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17572 register pairs are stored by a store multiple insn. We avoid this
17573 by pushing an extra pair. */
17574 if (count
== 2 && !arm_arch6
)
17576 if (base_reg
== LAST_VFP_REGNUM
- 3)
17581 /* FSTMD may not store more than 16 doubleword registers at once. Split
17582 larger stores into multiple parts (up to a maximum of two, in
17587 /* NOTE: base_reg is an internal register number, so each D register
17589 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17590 saved
+= vfp_emit_fstmd (base_reg
, 16);
17594 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17595 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17597 reg
= gen_rtx_REG (DFmode
, base_reg
);
17600 XVECEXP (par
, 0, 0)
17601 = gen_rtx_SET (gen_frame_mem
17603 gen_rtx_PRE_MODIFY (Pmode
,
17606 (Pmode
, stack_pointer_rtx
,
17609 gen_rtx_UNSPEC (BLKmode
,
17610 gen_rtvec (1, reg
),
17611 UNSPEC_PUSH_MULT
));
17613 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17614 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17615 RTX_FRAME_RELATED_P (tmp
) = 1;
17616 XVECEXP (dwarf
, 0, 0) = tmp
;
17618 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17619 RTX_FRAME_RELATED_P (tmp
) = 1;
17620 XVECEXP (dwarf
, 0, 1) = tmp
;
17622 for (i
= 1; i
< count
; i
++)
17624 reg
= gen_rtx_REG (DFmode
, base_reg
);
17626 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17628 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17629 plus_constant (Pmode
,
17633 RTX_FRAME_RELATED_P (tmp
) = 1;
17634 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17637 par
= emit_insn (par
);
17638 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17639 RTX_FRAME_RELATED_P (par
) = 1;
17644 /* Emit a call instruction with pattern PAT. ADDR is the address of
17645 the call target. */
17648 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17652 insn
= emit_call_insn (pat
);
17654 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17655 If the call might use such an entry, add a use of the PIC register
17656 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17657 if (TARGET_VXWORKS_RTP
17660 && GET_CODE (addr
) == SYMBOL_REF
17661 && (SYMBOL_REF_DECL (addr
)
17662 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17663 : !SYMBOL_REF_LOCAL_P (addr
)))
17665 require_pic_register ();
17666 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17669 if (TARGET_AAPCS_BASED
)
17671 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17672 linker. We need to add an IP clobber to allow setting
17673 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17674 is not needed since it's a fixed register. */
17675 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17676 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17680 /* Output a 'call' insn. */
17682 output_call (rtx
*operands
)
17684 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17686 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17687 if (REGNO (operands
[0]) == LR_REGNUM
)
17689 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17690 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17693 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17695 if (TARGET_INTERWORK
|| arm_arch4t
)
17696 output_asm_insn ("bx%?\t%0", operands
);
17698 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17703 /* Output a 'call' insn that is a reference in memory. This is
17704 disabled for ARMv5 and we prefer a blx instead because otherwise
17705 there's a significant performance overhead. */
17707 output_call_mem (rtx
*operands
)
17709 gcc_assert (!arm_arch5
);
17710 if (TARGET_INTERWORK
)
17712 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17713 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17714 output_asm_insn ("bx%?\t%|ip", operands
);
17716 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17718 /* LR is used in the memory address. We load the address in the
17719 first instruction. It's safe to use IP as the target of the
17720 load since the call will kill it anyway. */
17721 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17722 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17724 output_asm_insn ("bx%?\t%|ip", operands
);
17726 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17730 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17731 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17738 /* Output a move from arm registers to arm registers of a long double
17739 OPERANDS[0] is the destination.
17740 OPERANDS[1] is the source. */
17742 output_mov_long_double_arm_from_arm (rtx
*operands
)
17744 /* We have to be careful here because the two might overlap. */
17745 int dest_start
= REGNO (operands
[0]);
17746 int src_start
= REGNO (operands
[1]);
17750 if (dest_start
< src_start
)
17752 for (i
= 0; i
< 3; i
++)
17754 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17755 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17756 output_asm_insn ("mov%?\t%0, %1", ops
);
17761 for (i
= 2; i
>= 0; i
--)
17763 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17764 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17765 output_asm_insn ("mov%?\t%0, %1", ops
);
17773 arm_emit_movpair (rtx dest
, rtx src
)
17775 /* If the src is an immediate, simplify it. */
17776 if (CONST_INT_P (src
))
17778 HOST_WIDE_INT val
= INTVAL (src
);
17779 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17780 if ((val
>> 16) & 0x0000ffff)
17781 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17783 GEN_INT ((val
>> 16) & 0x0000ffff));
17786 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17787 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17790 /* Output a move between double words. It must be REG<-MEM
17793 output_move_double (rtx
*operands
, bool emit
, int *count
)
17795 enum rtx_code code0
= GET_CODE (operands
[0]);
17796 enum rtx_code code1
= GET_CODE (operands
[1]);
17801 /* The only case when this might happen is when
17802 you are looking at the length of a DImode instruction
17803 that has an invalid constant in it. */
17804 if (code0
== REG
&& code1
!= MEM
)
17806 gcc_assert (!emit
);
17813 unsigned int reg0
= REGNO (operands
[0]);
17815 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17817 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17819 switch (GET_CODE (XEXP (operands
[1], 0)))
17826 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17827 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17829 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17834 gcc_assert (TARGET_LDRD
);
17836 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17843 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17845 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17853 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17855 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17860 gcc_assert (TARGET_LDRD
);
17862 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17867 /* Autoicrement addressing modes should never have overlapping
17868 base and destination registers, and overlapping index registers
17869 are already prohibited, so this doesn't need to worry about
17871 otherops
[0] = operands
[0];
17872 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17873 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17875 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17877 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17879 /* Registers overlap so split out the increment. */
17882 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17883 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17890 /* Use a single insn if we can.
17891 FIXME: IWMMXT allows offsets larger than ldrd can
17892 handle, fix these up with a pair of ldr. */
17894 || !CONST_INT_P (otherops
[2])
17895 || (INTVAL (otherops
[2]) > -256
17896 && INTVAL (otherops
[2]) < 256))
17899 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17905 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17906 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17916 /* Use a single insn if we can.
17917 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17918 fix these up with a pair of ldr. */
17920 || !CONST_INT_P (otherops
[2])
17921 || (INTVAL (otherops
[2]) > -256
17922 && INTVAL (otherops
[2]) < 256))
17925 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17931 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17932 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17942 /* We might be able to use ldrd %0, %1 here. However the range is
17943 different to ldr/adr, and it is broken on some ARMv7-M
17944 implementations. */
17945 /* Use the second register of the pair to avoid problematic
17947 otherops
[1] = operands
[1];
17949 output_asm_insn ("adr%?\t%0, %1", otherops
);
17950 operands
[1] = otherops
[0];
17954 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17956 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17963 /* ??? This needs checking for thumb2. */
17965 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17966 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17968 otherops
[0] = operands
[0];
17969 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17970 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17972 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17974 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17976 switch ((int) INTVAL (otherops
[2]))
17980 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17986 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17992 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17996 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17997 operands
[1] = otherops
[0];
17999 && (REG_P (otherops
[2])
18001 || (CONST_INT_P (otherops
[2])
18002 && INTVAL (otherops
[2]) > -256
18003 && INTVAL (otherops
[2]) < 256)))
18005 if (reg_overlap_mentioned_p (operands
[0],
18008 /* Swap base and index registers over to
18009 avoid a conflict. */
18010 std::swap (otherops
[1], otherops
[2]);
18012 /* If both registers conflict, it will usually
18013 have been fixed by a splitter. */
18014 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18015 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18019 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18020 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18027 otherops
[0] = operands
[0];
18029 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18034 if (CONST_INT_P (otherops
[2]))
18038 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18039 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18041 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18047 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18053 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18060 return "ldr%(d%)\t%0, [%1]";
18062 return "ldm%(ia%)\t%1, %M0";
18066 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18067 /* Take care of overlapping base/data reg. */
18068 if (reg_mentioned_p (operands
[0], operands
[1]))
18072 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18073 output_asm_insn ("ldr%?\t%0, %1", operands
);
18083 output_asm_insn ("ldr%?\t%0, %1", operands
);
18084 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18094 /* Constraints should ensure this. */
18095 gcc_assert (code0
== MEM
&& code1
== REG
);
18096 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18097 || (TARGET_ARM
&& TARGET_LDRD
));
18099 switch (GET_CODE (XEXP (operands
[0], 0)))
18105 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18107 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18112 gcc_assert (TARGET_LDRD
);
18114 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18121 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18123 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18131 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18133 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18138 gcc_assert (TARGET_LDRD
);
18140 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18145 otherops
[0] = operands
[1];
18146 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18147 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18149 /* IWMMXT allows offsets larger than ldrd can handle,
18150 fix these up with a pair of ldr. */
18152 && CONST_INT_P (otherops
[2])
18153 && (INTVAL(otherops
[2]) <= -256
18154 || INTVAL(otherops
[2]) >= 256))
18156 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18160 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18161 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18170 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18171 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18177 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18180 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18185 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18190 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18191 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18193 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18197 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18204 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18211 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18216 && (REG_P (otherops
[2])
18218 || (CONST_INT_P (otherops
[2])
18219 && INTVAL (otherops
[2]) > -256
18220 && INTVAL (otherops
[2]) < 256)))
18222 otherops
[0] = operands
[1];
18223 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18225 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18231 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18232 otherops
[1] = operands
[1];
18235 output_asm_insn ("str%?\t%1, %0", operands
);
18236 output_asm_insn ("str%?\t%H1, %0", otherops
);
18246 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18247 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18250 output_move_quad (rtx
*operands
)
18252 if (REG_P (operands
[0]))
18254 /* Load, or reg->reg move. */
18256 if (MEM_P (operands
[1]))
18258 switch (GET_CODE (XEXP (operands
[1], 0)))
18261 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18266 output_asm_insn ("adr%?\t%0, %1", operands
);
18267 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18271 gcc_unreachable ();
18279 gcc_assert (REG_P (operands
[1]));
18281 dest
= REGNO (operands
[0]);
18282 src
= REGNO (operands
[1]);
18284 /* This seems pretty dumb, but hopefully GCC won't try to do it
18287 for (i
= 0; i
< 4; i
++)
18289 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18290 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18291 output_asm_insn ("mov%?\t%0, %1", ops
);
18294 for (i
= 3; i
>= 0; i
--)
18296 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18297 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18298 output_asm_insn ("mov%?\t%0, %1", ops
);
18304 gcc_assert (MEM_P (operands
[0]));
18305 gcc_assert (REG_P (operands
[1]));
18306 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18308 switch (GET_CODE (XEXP (operands
[0], 0)))
18311 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18315 gcc_unreachable ();
18322 /* Output a VFP load or store instruction. */
18325 output_move_vfp (rtx
*operands
)
18327 rtx reg
, mem
, addr
, ops
[2];
18328 int load
= REG_P (operands
[0]);
18329 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18330 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18335 reg
= operands
[!load
];
18336 mem
= operands
[load
];
18338 mode
= GET_MODE (reg
);
18340 gcc_assert (REG_P (reg
));
18341 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18342 gcc_assert (mode
== SFmode
18346 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18347 gcc_assert (MEM_P (mem
));
18349 addr
= XEXP (mem
, 0);
18351 switch (GET_CODE (addr
))
18354 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18355 ops
[0] = XEXP (addr
, 0);
18360 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18361 ops
[0] = XEXP (addr
, 0);
18366 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18372 sprintf (buff
, templ
,
18373 load
? "ld" : "st",
18376 integer_p
? "\t%@ int" : "");
18377 output_asm_insn (buff
, ops
);
18382 /* Output a Neon double-word or quad-word load or store, or a load
18383 or store for larger structure modes.
18385 WARNING: The ordering of elements is weird in big-endian mode,
18386 because the EABI requires that vectors stored in memory appear
18387 as though they were stored by a VSTM, as required by the EABI.
18388 GCC RTL defines element ordering based on in-memory order.
18389 This can be different from the architectural ordering of elements
18390 within a NEON register. The intrinsics defined in arm_neon.h use the
18391 NEON register element ordering, not the GCC RTL element ordering.
18393 For example, the in-memory ordering of a big-endian a quadword
18394 vector with 16-bit elements when stored from register pair {d0,d1}
18395 will be (lowest address first, d0[N] is NEON register element N):
18397 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18399 When necessary, quadword registers (dN, dN+1) are moved to ARM
18400 registers from rN in the order:
18402 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18404 So that STM/LDM can be used on vectors in ARM registers, and the
18405 same memory layout will result as if VSTM/VLDM were used.
18407 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18408 possible, which allows use of appropriate alignment tags.
18409 Note that the choice of "64" is independent of the actual vector
18410 element size; this size simply ensures that the behavior is
18411 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18413 Due to limitations of those instructions, use of VST1.64/VLD1.64
18414 is not possible if:
18415 - the address contains PRE_DEC, or
18416 - the mode refers to more than 4 double-word registers
18418 In those cases, it would be possible to replace VSTM/VLDM by a
18419 sequence of instructions; this is not currently implemented since
18420 this is not certain to actually improve performance. */
18423 output_move_neon (rtx
*operands
)
18425 rtx reg
, mem
, addr
, ops
[2];
18426 int regno
, nregs
, load
= REG_P (operands
[0]);
18431 reg
= operands
[!load
];
18432 mem
= operands
[load
];
18434 mode
= GET_MODE (reg
);
18436 gcc_assert (REG_P (reg
));
18437 regno
= REGNO (reg
);
18438 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18439 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18440 || NEON_REGNO_OK_FOR_QUAD (regno
));
18441 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18442 || VALID_NEON_QREG_MODE (mode
)
18443 || VALID_NEON_STRUCT_MODE (mode
));
18444 gcc_assert (MEM_P (mem
));
18446 addr
= XEXP (mem
, 0);
18448 /* Strip off const from addresses like (const (plus (...))). */
18449 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18450 addr
= XEXP (addr
, 0);
18452 switch (GET_CODE (addr
))
18455 /* We have to use vldm / vstm for too-large modes. */
18458 templ
= "v%smia%%?\t%%0!, %%h1";
18459 ops
[0] = XEXP (addr
, 0);
18463 templ
= "v%s1.64\t%%h1, %%A0";
18470 /* We have to use vldm / vstm in this case, since there is no
18471 pre-decrement form of the vld1 / vst1 instructions. */
18472 templ
= "v%smdb%%?\t%%0!, %%h1";
18473 ops
[0] = XEXP (addr
, 0);
18478 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18479 gcc_unreachable ();
18482 /* We have to use vldm / vstm for too-large modes. */
18486 templ
= "v%smia%%?\t%%m0, %%h1";
18488 templ
= "v%s1.64\t%%h1, %%A0";
18494 /* Fall through. */
18500 for (i
= 0; i
< nregs
; i
++)
18502 /* We're only using DImode here because it's a convenient size. */
18503 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18504 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18505 if (reg_overlap_mentioned_p (ops
[0], mem
))
18507 gcc_assert (overlap
== -1);
18512 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18513 output_asm_insn (buff
, ops
);
18518 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18519 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18520 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18521 output_asm_insn (buff
, ops
);
18528 gcc_unreachable ();
18531 sprintf (buff
, templ
, load
? "ld" : "st");
18532 output_asm_insn (buff
, ops
);
18537 /* Compute and return the length of neon_mov<mode>, where <mode> is
18538 one of VSTRUCT modes: EI, OI, CI or XI. */
18540 arm_attr_length_move_neon (rtx_insn
*insn
)
18542 rtx reg
, mem
, addr
;
18546 extract_insn_cached (insn
);
18548 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18550 mode
= GET_MODE (recog_data
.operand
[0]);
18561 gcc_unreachable ();
18565 load
= REG_P (recog_data
.operand
[0]);
18566 reg
= recog_data
.operand
[!load
];
18567 mem
= recog_data
.operand
[load
];
18569 gcc_assert (MEM_P (mem
));
18571 mode
= GET_MODE (reg
);
18572 addr
= XEXP (mem
, 0);
18574 /* Strip off const from addresses like (const (plus (...))). */
18575 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18576 addr
= XEXP (addr
, 0);
18578 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18580 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18587 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18591 arm_address_offset_is_imm (rtx_insn
*insn
)
18595 extract_insn_cached (insn
);
18597 if (REG_P (recog_data
.operand
[0]))
18600 mem
= recog_data
.operand
[0];
18602 gcc_assert (MEM_P (mem
));
18604 addr
= XEXP (mem
, 0);
18607 || (GET_CODE (addr
) == PLUS
18608 && REG_P (XEXP (addr
, 0))
18609 && CONST_INT_P (XEXP (addr
, 1))))
18615 /* Output an ADD r, s, #n where n may be too big for one instruction.
18616 If adding zero to one register, output nothing. */
18618 output_add_immediate (rtx
*operands
)
18620 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18622 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18625 output_multi_immediate (operands
,
18626 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18629 output_multi_immediate (operands
,
18630 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18637 /* Output a multiple immediate operation.
18638 OPERANDS is the vector of operands referred to in the output patterns.
18639 INSTR1 is the output pattern to use for the first constant.
18640 INSTR2 is the output pattern to use for subsequent constants.
18641 IMMED_OP is the index of the constant slot in OPERANDS.
18642 N is the constant value. */
18643 static const char *
18644 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18645 int immed_op
, HOST_WIDE_INT n
)
18647 #if HOST_BITS_PER_WIDE_INT > 32
18653 /* Quick and easy output. */
18654 operands
[immed_op
] = const0_rtx
;
18655 output_asm_insn (instr1
, operands
);
18660 const char * instr
= instr1
;
18662 /* Note that n is never zero here (which would give no output). */
18663 for (i
= 0; i
< 32; i
+= 2)
18667 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18668 output_asm_insn (instr
, operands
);
18678 /* Return the name of a shifter operation. */
18679 static const char *
18680 arm_shift_nmem(enum rtx_code code
)
18685 return ARM_LSL_NAME
;
18701 /* Return the appropriate ARM instruction for the operation code.
18702 The returned result should not be overwritten. OP is the rtx of the
18703 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18706 arithmetic_instr (rtx op
, int shift_first_arg
)
18708 switch (GET_CODE (op
))
18714 return shift_first_arg
? "rsb" : "sub";
18729 return arm_shift_nmem(GET_CODE(op
));
18732 gcc_unreachable ();
18736 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18737 for the operation code. The returned result should not be overwritten.
18738 OP is the rtx code of the shift.
18739 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18741 static const char *
18742 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18745 enum rtx_code code
= GET_CODE (op
);
18750 if (!CONST_INT_P (XEXP (op
, 1)))
18752 output_operand_lossage ("invalid shift operand");
18757 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18765 mnem
= arm_shift_nmem(code
);
18766 if (CONST_INT_P (XEXP (op
, 1)))
18768 *amountp
= INTVAL (XEXP (op
, 1));
18770 else if (REG_P (XEXP (op
, 1)))
18777 output_operand_lossage ("invalid shift operand");
18783 /* We never have to worry about the amount being other than a
18784 power of 2, since this case can never be reloaded from a reg. */
18785 if (!CONST_INT_P (XEXP (op
, 1)))
18787 output_operand_lossage ("invalid shift operand");
18791 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18793 /* Amount must be a power of two. */
18794 if (*amountp
& (*amountp
- 1))
18796 output_operand_lossage ("invalid shift operand");
18800 *amountp
= int_log2 (*amountp
);
18801 return ARM_LSL_NAME
;
18804 output_operand_lossage ("invalid shift operand");
18808 /* This is not 100% correct, but follows from the desire to merge
18809 multiplication by a power of 2 with the recognizer for a
18810 shift. >=32 is not a valid shift for "lsl", so we must try and
18811 output a shift that produces the correct arithmetical result.
18812 Using lsr #32 is identical except for the fact that the carry bit
18813 is not set correctly if we set the flags; but we never use the
18814 carry bit from such an operation, so we can ignore that. */
18815 if (code
== ROTATERT
)
18816 /* Rotate is just modulo 32. */
18818 else if (*amountp
!= (*amountp
& 31))
18820 if (code
== ASHIFT
)
18825 /* Shifts of 0 are no-ops. */
18832 /* Obtain the shift from the POWER of two. */
18834 static HOST_WIDE_INT
18835 int_log2 (HOST_WIDE_INT power
)
18837 HOST_WIDE_INT shift
= 0;
18839 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18841 gcc_assert (shift
<= 31);
18848 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18849 because /bin/as is horribly restrictive. The judgement about
18850 whether or not each character is 'printable' (and can be output as
18851 is) or not (and must be printed with an octal escape) must be made
18852 with reference to the *host* character set -- the situation is
18853 similar to that discussed in the comments above pp_c_char in
18854 c-pretty-print.c. */
18856 #define MAX_ASCII_LEN 51
18859 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18862 int len_so_far
= 0;
18864 fputs ("\t.ascii\t\"", stream
);
18866 for (i
= 0; i
< len
; i
++)
18870 if (len_so_far
>= MAX_ASCII_LEN
)
18872 fputs ("\"\n\t.ascii\t\"", stream
);
18878 if (c
== '\\' || c
== '\"')
18880 putc ('\\', stream
);
18888 fprintf (stream
, "\\%03o", c
);
18893 fputs ("\"\n", stream
);
18896 /* Whether a register is callee saved or not. This is necessary because high
18897 registers are marked as caller saved when optimizing for size on Thumb-1
18898 targets despite being callee saved in order to avoid using them. */
18899 #define callee_saved_reg_p(reg) \
18900 (!call_used_regs[reg] \
18901 || (TARGET_THUMB1 && optimize_size \
18902 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18904 /* Compute the register save mask for registers 0 through 12
18905 inclusive. This code is used by arm_compute_save_reg_mask. */
18907 static unsigned long
18908 arm_compute_save_reg0_reg12_mask (void)
18910 unsigned long func_type
= arm_current_func_type ();
18911 unsigned long save_reg_mask
= 0;
18914 if (IS_INTERRUPT (func_type
))
18916 unsigned int max_reg
;
18917 /* Interrupt functions must not corrupt any registers,
18918 even call clobbered ones. If this is a leaf function
18919 we can just examine the registers used by the RTL, but
18920 otherwise we have to assume that whatever function is
18921 called might clobber anything, and so we have to save
18922 all the call-clobbered registers as well. */
18923 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18924 /* FIQ handlers have registers r8 - r12 banked, so
18925 we only need to check r0 - r7, Normal ISRs only
18926 bank r14 and r15, so we must check up to r12.
18927 r13 is the stack pointer which is always preserved,
18928 so we do not need to consider it here. */
18933 for (reg
= 0; reg
<= max_reg
; reg
++)
18934 if (df_regs_ever_live_p (reg
)
18935 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18936 save_reg_mask
|= (1 << reg
);
18938 /* Also save the pic base register if necessary. */
18940 && !TARGET_SINGLE_PIC_BASE
18941 && arm_pic_register
!= INVALID_REGNUM
18942 && crtl
->uses_pic_offset_table
)
18943 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18945 else if (IS_VOLATILE(func_type
))
18947 /* For noreturn functions we historically omitted register saves
18948 altogether. However this really messes up debugging. As a
18949 compromise save just the frame pointers. Combined with the link
18950 register saved elsewhere this should be sufficient to get
18952 if (frame_pointer_needed
)
18953 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18954 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18955 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18956 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18957 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18961 /* In the normal case we only need to save those registers
18962 which are call saved and which are used by this function. */
18963 for (reg
= 0; reg
<= 11; reg
++)
18964 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18965 save_reg_mask
|= (1 << reg
);
18967 /* Handle the frame pointer as a special case. */
18968 if (frame_pointer_needed
)
18969 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18971 /* If we aren't loading the PIC register,
18972 don't stack it even though it may be live. */
18974 && !TARGET_SINGLE_PIC_BASE
18975 && arm_pic_register
!= INVALID_REGNUM
18976 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18977 || crtl
->uses_pic_offset_table
))
18978 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18980 /* The prologue will copy SP into R0, so save it. */
18981 if (IS_STACKALIGN (func_type
))
18982 save_reg_mask
|= 1;
18985 /* Save registers so the exception handler can modify them. */
18986 if (crtl
->calls_eh_return
)
18992 reg
= EH_RETURN_DATA_REGNO (i
);
18993 if (reg
== INVALID_REGNUM
)
18995 save_reg_mask
|= 1 << reg
;
18999 return save_reg_mask
;
19002 /* Return true if r3 is live at the start of the function. */
19005 arm_r3_live_at_start_p (void)
19007 /* Just look at cfg info, which is still close enough to correct at this
19008 point. This gives false positives for broken functions that might use
19009 uninitialized data that happens to be allocated in r3, but who cares? */
19010 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19013 /* Compute the number of bytes used to store the static chain register on the
19014 stack, above the stack frame. We need to know this accurately to get the
19015 alignment of the rest of the stack frame correct. */
19018 arm_compute_static_chain_stack_bytes (void)
19020 /* See the defining assertion in arm_expand_prologue. */
19021 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19022 && IS_NESTED (arm_current_func_type ())
19023 && arm_r3_live_at_start_p ()
19024 && crtl
->args
.pretend_args_size
== 0)
19030 /* Compute a bit mask of which registers need to be
19031 saved on the stack for the current function.
19032 This is used by arm_get_frame_offsets, which may add extra registers. */
19034 static unsigned long
19035 arm_compute_save_reg_mask (void)
19037 unsigned int save_reg_mask
= 0;
19038 unsigned long func_type
= arm_current_func_type ();
19041 if (IS_NAKED (func_type
))
19042 /* This should never really happen. */
19045 /* If we are creating a stack frame, then we must save the frame pointer,
19046 IP (which will hold the old stack pointer), LR and the PC. */
19047 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19049 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19052 | (1 << PC_REGNUM
);
19054 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19056 /* Decide if we need to save the link register.
19057 Interrupt routines have their own banked link register,
19058 so they never need to save it.
19059 Otherwise if we do not use the link register we do not need to save
19060 it. If we are pushing other registers onto the stack however, we
19061 can save an instruction in the epilogue by pushing the link register
19062 now and then popping it back into the PC. This incurs extra memory
19063 accesses though, so we only do it when optimizing for size, and only
19064 if we know that we will not need a fancy return sequence. */
19065 if (df_regs_ever_live_p (LR_REGNUM
)
19068 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19069 && !crtl
->tail_call_emit
19070 && !crtl
->calls_eh_return
))
19071 save_reg_mask
|= 1 << LR_REGNUM
;
19073 if (cfun
->machine
->lr_save_eliminated
)
19074 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19076 if (TARGET_REALLY_IWMMXT
19077 && ((bit_count (save_reg_mask
)
19078 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19079 arm_compute_static_chain_stack_bytes())
19082 /* The total number of registers that are going to be pushed
19083 onto the stack is odd. We need to ensure that the stack
19084 is 64-bit aligned before we start to save iWMMXt registers,
19085 and also before we start to create locals. (A local variable
19086 might be a double or long long which we will load/store using
19087 an iWMMXt instruction). Therefore we need to push another
19088 ARM register, so that the stack will be 64-bit aligned. We
19089 try to avoid using the arg registers (r0 -r3) as they might be
19090 used to pass values in a tail call. */
19091 for (reg
= 4; reg
<= 12; reg
++)
19092 if ((save_reg_mask
& (1 << reg
)) == 0)
19096 save_reg_mask
|= (1 << reg
);
19099 cfun
->machine
->sibcall_blocked
= 1;
19100 save_reg_mask
|= (1 << 3);
19104 /* We may need to push an additional register for use initializing the
19105 PIC base register. */
19106 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19107 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19109 reg
= thumb_find_work_register (1 << 4);
19110 if (!call_used_regs
[reg
])
19111 save_reg_mask
|= (1 << reg
);
19114 return save_reg_mask
;
19118 /* Compute a bit mask of which registers need to be
19119 saved on the stack for the current function. */
19120 static unsigned long
19121 thumb1_compute_save_reg_mask (void)
19123 unsigned long mask
;
19127 for (reg
= 0; reg
< 12; reg
++)
19128 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19132 && !TARGET_SINGLE_PIC_BASE
19133 && arm_pic_register
!= INVALID_REGNUM
19134 && crtl
->uses_pic_offset_table
)
19135 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19137 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19138 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19139 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19141 /* LR will also be pushed if any lo regs are pushed. */
19142 if (mask
& 0xff || thumb_force_lr_save ())
19143 mask
|= (1 << LR_REGNUM
);
19145 /* Make sure we have a low work register if we need one.
19146 We will need one if we are going to push a high register,
19147 but we are not currently intending to push a low register. */
19148 if ((mask
& 0xff) == 0
19149 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19151 /* Use thumb_find_work_register to choose which register
19152 we will use. If the register is live then we will
19153 have to push it. Use LAST_LO_REGNUM as our fallback
19154 choice for the register to select. */
19155 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19156 /* Make sure the register returned by thumb_find_work_register is
19157 not part of the return value. */
19158 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19159 reg
= LAST_LO_REGNUM
;
19161 if (callee_saved_reg_p (reg
))
19165 /* The 504 below is 8 bytes less than 512 because there are two possible
19166 alignment words. We can't tell here if they will be present or not so we
19167 have to play it safe and assume that they are. */
19168 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19169 ROUND_UP_WORD (get_frame_size ()) +
19170 crtl
->outgoing_args_size
) >= 504)
19172 /* This is the same as the code in thumb1_expand_prologue() which
19173 determines which register to use for stack decrement. */
19174 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19175 if (mask
& (1 << reg
))
19178 if (reg
> LAST_LO_REGNUM
)
19180 /* Make sure we have a register available for stack decrement. */
19181 mask
|= 1 << LAST_LO_REGNUM
;
19189 /* Return the number of bytes required to save VFP registers. */
19191 arm_get_vfp_saved_size (void)
19193 unsigned int regno
;
19198 /* Space for saved VFP registers. */
19199 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19202 for (regno
= FIRST_VFP_REGNUM
;
19203 regno
< LAST_VFP_REGNUM
;
19206 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19207 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19211 /* Workaround ARM10 VFPr1 bug. */
19212 if (count
== 2 && !arm_arch6
)
19214 saved
+= count
* 8;
19223 if (count
== 2 && !arm_arch6
)
19225 saved
+= count
* 8;
19232 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19233 everything bar the final return instruction. If simple_return is true,
19234 then do not output epilogue, because it has already been emitted in RTL. */
19236 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19237 bool simple_return
)
19239 char conditional
[10];
19242 unsigned long live_regs_mask
;
19243 unsigned long func_type
;
19244 arm_stack_offsets
*offsets
;
19246 func_type
= arm_current_func_type ();
19248 if (IS_NAKED (func_type
))
19251 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19253 /* If this function was declared non-returning, and we have
19254 found a tail call, then we have to trust that the called
19255 function won't return. */
19260 /* Otherwise, trap an attempted return by aborting. */
19262 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19264 assemble_external_libcall (ops
[1]);
19265 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19271 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19273 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19275 cfun
->machine
->return_used_this_function
= 1;
19277 offsets
= arm_get_frame_offsets ();
19278 live_regs_mask
= offsets
->saved_regs_mask
;
19280 if (!simple_return
&& live_regs_mask
)
19282 const char * return_reg
;
19284 /* If we do not have any special requirements for function exit
19285 (e.g. interworking) then we can load the return address
19286 directly into the PC. Otherwise we must load it into LR. */
19288 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19289 return_reg
= reg_names
[PC_REGNUM
];
19291 return_reg
= reg_names
[LR_REGNUM
];
19293 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19295 /* There are three possible reasons for the IP register
19296 being saved. 1) a stack frame was created, in which case
19297 IP contains the old stack pointer, or 2) an ISR routine
19298 corrupted it, or 3) it was saved to align the stack on
19299 iWMMXt. In case 1, restore IP into SP, otherwise just
19301 if (frame_pointer_needed
)
19303 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19304 live_regs_mask
|= (1 << SP_REGNUM
);
19307 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19310 /* On some ARM architectures it is faster to use LDR rather than
19311 LDM to load a single register. On other architectures, the
19312 cost is the same. In 26 bit mode, or for exception handlers,
19313 we have to use LDM to load the PC so that the CPSR is also
19315 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19316 if (live_regs_mask
== (1U << reg
))
19319 if (reg
<= LAST_ARM_REGNUM
19320 && (reg
!= LR_REGNUM
19322 || ! IS_INTERRUPT (func_type
)))
19324 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19325 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19332 /* Generate the load multiple instruction to restore the
19333 registers. Note we can get here, even if
19334 frame_pointer_needed is true, but only if sp already
19335 points to the base of the saved core registers. */
19336 if (live_regs_mask
& (1 << SP_REGNUM
))
19338 unsigned HOST_WIDE_INT stack_adjust
;
19340 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19341 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19343 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19344 if (TARGET_UNIFIED_ASM
)
19345 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19347 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19350 /* If we can't use ldmib (SA110 bug),
19351 then try to pop r3 instead. */
19353 live_regs_mask
|= 1 << 3;
19355 if (TARGET_UNIFIED_ASM
)
19356 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19358 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19362 if (TARGET_UNIFIED_ASM
)
19363 sprintf (instr
, "pop%s\t{", conditional
);
19365 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19367 p
= instr
+ strlen (instr
);
19369 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19370 if (live_regs_mask
& (1 << reg
))
19372 int l
= strlen (reg_names
[reg
]);
19378 memcpy (p
, ", ", 2);
19382 memcpy (p
, "%|", 2);
19383 memcpy (p
+ 2, reg_names
[reg
], l
);
19387 if (live_regs_mask
& (1 << LR_REGNUM
))
19389 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19390 /* If returning from an interrupt, restore the CPSR. */
19391 if (IS_INTERRUPT (func_type
))
19398 output_asm_insn (instr
, & operand
);
19400 /* See if we need to generate an extra instruction to
19401 perform the actual function return. */
19403 && func_type
!= ARM_FT_INTERWORKED
19404 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19406 /* The return has already been handled
19407 by loading the LR into the PC. */
19414 switch ((int) ARM_FUNC_TYPE (func_type
))
19418 /* ??? This is wrong for unified assembly syntax. */
19419 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19422 case ARM_FT_INTERWORKED
:
19423 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19426 case ARM_FT_EXCEPTION
:
19427 /* ??? This is wrong for unified assembly syntax. */
19428 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19432 /* Use bx if it's available. */
19433 if (arm_arch5
|| arm_arch4t
)
19434 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19436 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19440 output_asm_insn (instr
, & operand
);
19446 /* Write the function name into the code section, directly preceding
19447 the function prologue.
19449 Code will be output similar to this:
19451 .ascii "arm_poke_function_name", 0
19454 .word 0xff000000 + (t1 - t0)
19455 arm_poke_function_name
19457 stmfd sp!, {fp, ip, lr, pc}
19460 When performing a stack backtrace, code can inspect the value
19461 of 'pc' stored at 'fp' + 0. If the trace function then looks
19462 at location pc - 12 and the top 8 bits are set, then we know
19463 that there is a function name embedded immediately preceding this
19464 location and has length ((pc[-3]) & 0xff000000).
19466 We assume that pc is declared as a pointer to an unsigned long.
19468 It is of no benefit to output the function name if we are assembling
19469 a leaf function. These function types will not contain a stack
19470 backtrace structure, therefore it is not possible to determine the
19473 arm_poke_function_name (FILE *stream
, const char *name
)
19475 unsigned long alignlength
;
19476 unsigned long length
;
19479 length
= strlen (name
) + 1;
19480 alignlength
= ROUND_UP_WORD (length
);
19482 ASM_OUTPUT_ASCII (stream
, name
, length
);
19483 ASM_OUTPUT_ALIGN (stream
, 2);
19484 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19485 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19488 /* Place some comments into the assembler stream
19489 describing the current function. */
19491 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19493 unsigned long func_type
;
19495 /* ??? Do we want to print some of the below anyway? */
19499 /* Sanity check. */
19500 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19502 func_type
= arm_current_func_type ();
19504 switch ((int) ARM_FUNC_TYPE (func_type
))
19507 case ARM_FT_NORMAL
:
19509 case ARM_FT_INTERWORKED
:
19510 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19513 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19516 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19518 case ARM_FT_EXCEPTION
:
19519 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19523 if (IS_NAKED (func_type
))
19524 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19526 if (IS_VOLATILE (func_type
))
19527 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19529 if (IS_NESTED (func_type
))
19530 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19531 if (IS_STACKALIGN (func_type
))
19532 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19534 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19536 crtl
->args
.pretend_args_size
, frame_size
);
19538 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19539 frame_pointer_needed
,
19540 cfun
->machine
->uses_anonymous_args
);
19542 if (cfun
->machine
->lr_save_eliminated
)
19543 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19545 if (crtl
->calls_eh_return
)
19546 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19551 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19552 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19554 arm_stack_offsets
*offsets
;
19560 /* Emit any call-via-reg trampolines that are needed for v4t support
19561 of call_reg and call_value_reg type insns. */
19562 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19564 rtx label
= cfun
->machine
->call_via
[regno
];
19568 switch_to_section (function_section (current_function_decl
));
19569 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19570 CODE_LABEL_NUMBER (label
));
19571 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19575 /* ??? Probably not safe to set this here, since it assumes that a
19576 function will be emitted as assembly immediately after we generate
19577 RTL for it. This does not happen for inline functions. */
19578 cfun
->machine
->return_used_this_function
= 0;
19580 else /* TARGET_32BIT */
19582 /* We need to take into account any stack-frame rounding. */
19583 offsets
= arm_get_frame_offsets ();
19585 gcc_assert (!use_return_insn (FALSE
, NULL
)
19586 || (cfun
->machine
->return_used_this_function
!= 0)
19587 || offsets
->saved_regs
== offsets
->outgoing_args
19588 || frame_pointer_needed
);
19592 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19593 STR and STRD. If an even number of registers are being pushed, one
19594 or more STRD patterns are created for each register pair. If an
19595 odd number of registers are pushed, emit an initial STR followed by
19596 as many STRD instructions as are needed. This works best when the
19597 stack is initially 64-bit aligned (the normal case), since it
19598 ensures that each STRD is also 64-bit aligned. */
19600 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19605 rtx par
= NULL_RTX
;
19606 rtx dwarf
= NULL_RTX
;
19610 num_regs
= bit_count (saved_regs_mask
);
19612 /* Must be at least one register to save, and can't save SP or PC. */
19613 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19614 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19615 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19617 /* Create sequence for DWARF info. All the frame-related data for
19618 debugging is held in this wrapper. */
19619 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19621 /* Describe the stack adjustment. */
19622 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19623 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19624 RTX_FRAME_RELATED_P (tmp
) = 1;
19625 XVECEXP (dwarf
, 0, 0) = tmp
;
19627 /* Find the first register. */
19628 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19633 /* If there's an odd number of registers to push. Start off by
19634 pushing a single register. This ensures that subsequent strd
19635 operations are dword aligned (assuming that SP was originally
19636 64-bit aligned). */
19637 if ((num_regs
& 1) != 0)
19639 rtx reg
, mem
, insn
;
19641 reg
= gen_rtx_REG (SImode
, regno
);
19643 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19644 stack_pointer_rtx
));
19646 mem
= gen_frame_mem (Pmode
,
19648 (Pmode
, stack_pointer_rtx
,
19649 plus_constant (Pmode
, stack_pointer_rtx
,
19652 tmp
= gen_rtx_SET (mem
, reg
);
19653 RTX_FRAME_RELATED_P (tmp
) = 1;
19654 insn
= emit_insn (tmp
);
19655 RTX_FRAME_RELATED_P (insn
) = 1;
19656 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19657 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19658 RTX_FRAME_RELATED_P (tmp
) = 1;
19661 XVECEXP (dwarf
, 0, i
) = tmp
;
19665 while (i
< num_regs
)
19666 if (saved_regs_mask
& (1 << regno
))
19668 rtx reg1
, reg2
, mem1
, mem2
;
19669 rtx tmp0
, tmp1
, tmp2
;
19672 /* Find the register to pair with this one. */
19673 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19677 reg1
= gen_rtx_REG (SImode
, regno
);
19678 reg2
= gen_rtx_REG (SImode
, regno2
);
19685 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19688 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19690 -4 * (num_regs
- 1)));
19691 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19692 plus_constant (Pmode
, stack_pointer_rtx
,
19694 tmp1
= gen_rtx_SET (mem1
, reg1
);
19695 tmp2
= gen_rtx_SET (mem2
, reg2
);
19696 RTX_FRAME_RELATED_P (tmp0
) = 1;
19697 RTX_FRAME_RELATED_P (tmp1
) = 1;
19698 RTX_FRAME_RELATED_P (tmp2
) = 1;
19699 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19700 XVECEXP (par
, 0, 0) = tmp0
;
19701 XVECEXP (par
, 0, 1) = tmp1
;
19702 XVECEXP (par
, 0, 2) = tmp2
;
19703 insn
= emit_insn (par
);
19704 RTX_FRAME_RELATED_P (insn
) = 1;
19705 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19709 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19712 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19715 tmp1
= gen_rtx_SET (mem1
, reg1
);
19716 tmp2
= gen_rtx_SET (mem2
, reg2
);
19717 RTX_FRAME_RELATED_P (tmp1
) = 1;
19718 RTX_FRAME_RELATED_P (tmp2
) = 1;
19719 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19720 XVECEXP (par
, 0, 0) = tmp1
;
19721 XVECEXP (par
, 0, 1) = tmp2
;
19725 /* Create unwind information. This is an approximation. */
19726 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19727 plus_constant (Pmode
,
19731 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19732 plus_constant (Pmode
,
19737 RTX_FRAME_RELATED_P (tmp1
) = 1;
19738 RTX_FRAME_RELATED_P (tmp2
) = 1;
19739 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19740 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19742 regno
= regno2
+ 1;
19750 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19751 whenever possible, otherwise it emits single-word stores. The first store
19752 also allocates stack space for all saved registers, using writeback with
19753 post-addressing mode. All other stores use offset addressing. If no STRD
19754 can be emitted, this function emits a sequence of single-word stores,
19755 and not an STM as before, because single-word stores provide more freedom
19756 scheduling and can be turned into an STM by peephole optimizations. */
19758 arm_emit_strd_push (unsigned long saved_regs_mask
)
19761 int i
, j
, dwarf_index
= 0;
19763 rtx dwarf
= NULL_RTX
;
19764 rtx insn
= NULL_RTX
;
19767 /* TODO: A more efficient code can be emitted by changing the
19768 layout, e.g., first push all pairs that can use STRD to keep the
19769 stack aligned, and then push all other registers. */
19770 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19771 if (saved_regs_mask
& (1 << i
))
19774 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19775 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19776 gcc_assert (num_regs
> 0);
19778 /* Create sequence for DWARF info. */
19779 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19781 /* For dwarf info, we generate explicit stack update. */
19782 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19783 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19784 RTX_FRAME_RELATED_P (tmp
) = 1;
19785 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19787 /* Save registers. */
19788 offset
= - 4 * num_regs
;
19790 while (j
<= LAST_ARM_REGNUM
)
19791 if (saved_regs_mask
& (1 << j
))
19794 && (saved_regs_mask
& (1 << (j
+ 1))))
19796 /* Current register and previous register form register pair for
19797 which STRD can be generated. */
19800 /* Allocate stack space for all saved registers. */
19801 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19802 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19803 mem
= gen_frame_mem (DImode
, tmp
);
19806 else if (offset
> 0)
19807 mem
= gen_frame_mem (DImode
,
19808 plus_constant (Pmode
,
19812 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19814 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19815 RTX_FRAME_RELATED_P (tmp
) = 1;
19816 tmp
= emit_insn (tmp
);
19818 /* Record the first store insn. */
19819 if (dwarf_index
== 1)
19822 /* Generate dwarf info. */
19823 mem
= gen_frame_mem (SImode
,
19824 plus_constant (Pmode
,
19827 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19828 RTX_FRAME_RELATED_P (tmp
) = 1;
19829 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19831 mem
= gen_frame_mem (SImode
,
19832 plus_constant (Pmode
,
19835 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19836 RTX_FRAME_RELATED_P (tmp
) = 1;
19837 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19844 /* Emit a single word store. */
19847 /* Allocate stack space for all saved registers. */
19848 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19849 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19850 mem
= gen_frame_mem (SImode
, tmp
);
19853 else if (offset
> 0)
19854 mem
= gen_frame_mem (SImode
,
19855 plus_constant (Pmode
,
19859 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19861 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19862 RTX_FRAME_RELATED_P (tmp
) = 1;
19863 tmp
= emit_insn (tmp
);
19865 /* Record the first store insn. */
19866 if (dwarf_index
== 1)
19869 /* Generate dwarf info. */
19870 mem
= gen_frame_mem (SImode
,
19871 plus_constant(Pmode
,
19874 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19875 RTX_FRAME_RELATED_P (tmp
) = 1;
19876 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19885 /* Attach dwarf info to the first insn we generate. */
19886 gcc_assert (insn
!= NULL_RTX
);
19887 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19888 RTX_FRAME_RELATED_P (insn
) = 1;
19891 /* Generate and emit an insn that we will recognize as a push_multi.
19892 Unfortunately, since this insn does not reflect very well the actual
19893 semantics of the operation, we need to annotate the insn for the benefit
19894 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19895 MASK for registers that should be annotated for DWARF2 frame unwind
19898 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19901 int num_dwarf_regs
= 0;
19905 int dwarf_par_index
;
19908 /* We don't record the PC in the dwarf frame information. */
19909 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19911 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19913 if (mask
& (1 << i
))
19915 if (dwarf_regs_mask
& (1 << i
))
19919 gcc_assert (num_regs
&& num_regs
<= 16);
19920 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19922 /* For the body of the insn we are going to generate an UNSPEC in
19923 parallel with several USEs. This allows the insn to be recognized
19924 by the push_multi pattern in the arm.md file.
19926 The body of the insn looks something like this:
19929 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19930 (const_int:SI <num>)))
19931 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19937 For the frame note however, we try to be more explicit and actually
19938 show each register being stored into the stack frame, plus a (single)
19939 decrement of the stack pointer. We do it this way in order to be
19940 friendly to the stack unwinding code, which only wants to see a single
19941 stack decrement per instruction. The RTL we generate for the note looks
19942 something like this:
19945 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19946 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19947 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19948 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19952 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19953 instead we'd have a parallel expression detailing all
19954 the stores to the various memory addresses so that debug
19955 information is more up-to-date. Remember however while writing
19956 this to take care of the constraints with the push instruction.
19958 Note also that this has to be taken care of for the VFP registers.
19960 For more see PR43399. */
19962 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19963 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19964 dwarf_par_index
= 1;
19966 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19968 if (mask
& (1 << i
))
19970 reg
= gen_rtx_REG (SImode
, i
);
19972 XVECEXP (par
, 0, 0)
19973 = gen_rtx_SET (gen_frame_mem
19975 gen_rtx_PRE_MODIFY (Pmode
,
19978 (Pmode
, stack_pointer_rtx
,
19981 gen_rtx_UNSPEC (BLKmode
,
19982 gen_rtvec (1, reg
),
19983 UNSPEC_PUSH_MULT
));
19985 if (dwarf_regs_mask
& (1 << i
))
19987 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
19989 RTX_FRAME_RELATED_P (tmp
) = 1;
19990 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19997 for (j
= 1, i
++; j
< num_regs
; i
++)
19999 if (mask
& (1 << i
))
20001 reg
= gen_rtx_REG (SImode
, i
);
20003 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20005 if (dwarf_regs_mask
& (1 << i
))
20008 = gen_rtx_SET (gen_frame_mem
20010 plus_constant (Pmode
, stack_pointer_rtx
,
20013 RTX_FRAME_RELATED_P (tmp
) = 1;
20014 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20021 par
= emit_insn (par
);
20023 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20024 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20025 RTX_FRAME_RELATED_P (tmp
) = 1;
20026 XVECEXP (dwarf
, 0, 0) = tmp
;
20028 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20033 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20034 SIZE is the offset to be adjusted.
20035 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20037 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20041 RTX_FRAME_RELATED_P (insn
) = 1;
20042 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20043 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20046 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20047 SAVED_REGS_MASK shows which registers need to be restored.
20049 Unfortunately, since this insn does not reflect very well the actual
20050 semantics of the operation, we need to annotate the insn for the benefit
20051 of DWARF2 frame unwind information. */
20053 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20058 rtx dwarf
= NULL_RTX
;
20060 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20064 offset_adj
= return_in_pc
? 1 : 0;
20065 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20066 if (saved_regs_mask
& (1 << i
))
20069 gcc_assert (num_regs
&& num_regs
<= 16);
20071 /* If SP is in reglist, then we don't emit SP update insn. */
20072 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20074 /* The parallel needs to hold num_regs SETs
20075 and one SET for the stack update. */
20076 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20079 XVECEXP (par
, 0, 0) = ret_rtx
;
20083 /* Increment the stack pointer, based on there being
20084 num_regs 4-byte registers to restore. */
20085 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20086 plus_constant (Pmode
,
20089 RTX_FRAME_RELATED_P (tmp
) = 1;
20090 XVECEXP (par
, 0, offset_adj
) = tmp
;
20093 /* Now restore every reg, which may include PC. */
20094 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20095 if (saved_regs_mask
& (1 << i
))
20097 reg
= gen_rtx_REG (SImode
, i
);
20098 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20100 /* Emit single load with writeback. */
20101 tmp
= gen_frame_mem (SImode
,
20102 gen_rtx_POST_INC (Pmode
,
20103 stack_pointer_rtx
));
20104 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20105 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20109 tmp
= gen_rtx_SET (reg
,
20112 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20113 RTX_FRAME_RELATED_P (tmp
) = 1;
20114 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20116 /* We need to maintain a sequence for DWARF info too. As dwarf info
20117 should not have PC, skip PC. */
20118 if (i
!= PC_REGNUM
)
20119 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20125 par
= emit_jump_insn (par
);
20127 par
= emit_insn (par
);
20129 REG_NOTES (par
) = dwarf
;
20131 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20132 stack_pointer_rtx
, stack_pointer_rtx
);
20135 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20136 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20138 Unfortunately, since this insn does not reflect very well the actual
20139 semantics of the operation, we need to annotate the insn for the benefit
20140 of DWARF2 frame unwind information. */
20142 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20146 rtx dwarf
= NULL_RTX
;
20149 gcc_assert (num_regs
&& num_regs
<= 32);
20151 /* Workaround ARM10 VFPr1 bug. */
20152 if (num_regs
== 2 && !arm_arch6
)
20154 if (first_reg
== 15)
20160 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20161 there could be up to 32 D-registers to restore.
20162 If there are more than 16 D-registers, make two recursive calls,
20163 each of which emits one pop_multi instruction. */
20166 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20167 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20171 /* The parallel needs to hold num_regs SETs
20172 and one SET for the stack update. */
20173 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20175 /* Increment the stack pointer, based on there being
20176 num_regs 8-byte registers to restore. */
20177 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20178 RTX_FRAME_RELATED_P (tmp
) = 1;
20179 XVECEXP (par
, 0, 0) = tmp
;
20181 /* Now show every reg that will be restored, using a SET for each. */
20182 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20184 reg
= gen_rtx_REG (DFmode
, i
);
20186 tmp
= gen_rtx_SET (reg
,
20189 plus_constant (Pmode
, base_reg
, 8 * j
)));
20190 RTX_FRAME_RELATED_P (tmp
) = 1;
20191 XVECEXP (par
, 0, j
+ 1) = tmp
;
20193 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20198 par
= emit_insn (par
);
20199 REG_NOTES (par
) = dwarf
;
20201 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20202 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20204 RTX_FRAME_RELATED_P (par
) = 1;
20205 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20208 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20209 base_reg
, base_reg
);
20212 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20213 number of registers are being popped, multiple LDRD patterns are created for
20214 all register pairs. If odd number of registers are popped, last register is
20215 loaded by using LDR pattern. */
20217 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20221 rtx par
= NULL_RTX
;
20222 rtx dwarf
= NULL_RTX
;
20223 rtx tmp
, reg
, tmp1
;
20224 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20226 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20227 if (saved_regs_mask
& (1 << i
))
20230 gcc_assert (num_regs
&& num_regs
<= 16);
20232 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20233 to be popped. So, if num_regs is even, now it will become odd,
20234 and we can generate pop with PC. If num_regs is odd, it will be
20235 even now, and ldr with return can be generated for PC. */
20239 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20241 /* Var j iterates over all the registers to gather all the registers in
20242 saved_regs_mask. Var i gives index of saved registers in stack frame.
20243 A PARALLEL RTX of register-pair is created here, so that pattern for
20244 LDRD can be matched. As PC is always last register to be popped, and
20245 we have already decremented num_regs if PC, we don't have to worry
20246 about PC in this loop. */
20247 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20248 if (saved_regs_mask
& (1 << j
))
20250 /* Create RTX for memory load. */
20251 reg
= gen_rtx_REG (SImode
, j
);
20252 tmp
= gen_rtx_SET (reg
,
20253 gen_frame_mem (SImode
,
20254 plus_constant (Pmode
,
20255 stack_pointer_rtx
, 4 * i
)));
20256 RTX_FRAME_RELATED_P (tmp
) = 1;
20260 /* When saved-register index (i) is even, the RTX to be emitted is
20261 yet to be created. Hence create it first. The LDRD pattern we
20262 are generating is :
20263 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20264 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20265 where target registers need not be consecutive. */
20266 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20270 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20271 added as 0th element and if i is odd, reg_i is added as 1st element
20272 of LDRD pattern shown above. */
20273 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20274 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20278 /* When saved-register index (i) is odd, RTXs for both the registers
20279 to be loaded are generated in above given LDRD pattern, and the
20280 pattern can be emitted now. */
20281 par
= emit_insn (par
);
20282 REG_NOTES (par
) = dwarf
;
20283 RTX_FRAME_RELATED_P (par
) = 1;
20289 /* If the number of registers pushed is odd AND return_in_pc is false OR
20290 number of registers are even AND return_in_pc is true, last register is
20291 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20292 then LDR with post increment. */
20294 /* Increment the stack pointer, based on there being
20295 num_regs 4-byte registers to restore. */
20296 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20297 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20298 RTX_FRAME_RELATED_P (tmp
) = 1;
20299 tmp
= emit_insn (tmp
);
20302 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20303 stack_pointer_rtx
, stack_pointer_rtx
);
20308 if (((num_regs
% 2) == 1 && !return_in_pc
)
20309 || ((num_regs
% 2) == 0 && return_in_pc
))
20311 /* Scan for the single register to be popped. Skip until the saved
20312 register is found. */
20313 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20315 /* Gen LDR with post increment here. */
20316 tmp1
= gen_rtx_MEM (SImode
,
20317 gen_rtx_POST_INC (SImode
,
20318 stack_pointer_rtx
));
20319 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20321 reg
= gen_rtx_REG (SImode
, j
);
20322 tmp
= gen_rtx_SET (reg
, tmp1
);
20323 RTX_FRAME_RELATED_P (tmp
) = 1;
20324 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20328 /* If return_in_pc, j must be PC_REGNUM. */
20329 gcc_assert (j
== PC_REGNUM
);
20330 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20331 XVECEXP (par
, 0, 0) = ret_rtx
;
20332 XVECEXP (par
, 0, 1) = tmp
;
20333 par
= emit_jump_insn (par
);
20337 par
= emit_insn (tmp
);
20338 REG_NOTES (par
) = dwarf
;
20339 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20340 stack_pointer_rtx
, stack_pointer_rtx
);
20344 else if ((num_regs
% 2) == 1 && return_in_pc
)
20346 /* There are 2 registers to be popped. So, generate the pattern
20347 pop_multiple_with_stack_update_and_return to pop in PC. */
20348 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20354 /* LDRD in ARM mode needs consecutive registers as operands. This function
20355 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20356 offset addressing and then generates one separate stack udpate. This provides
20357 more scheduling freedom, compared to writeback on every load. However,
20358 if the function returns using load into PC directly
20359 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20360 before the last load. TODO: Add a peephole optimization to recognize
20361 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20362 peephole optimization to merge the load at stack-offset zero
20363 with the stack update instruction using load with writeback
20364 in post-index addressing mode. */
20366 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20370 rtx par
= NULL_RTX
;
20371 rtx dwarf
= NULL_RTX
;
20374 /* Restore saved registers. */
20375 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20377 while (j
<= LAST_ARM_REGNUM
)
20378 if (saved_regs_mask
& (1 << j
))
20381 && (saved_regs_mask
& (1 << (j
+ 1)))
20382 && (j
+ 1) != PC_REGNUM
)
20384 /* Current register and next register form register pair for which
20385 LDRD can be generated. PC is always the last register popped, and
20386 we handle it separately. */
20388 mem
= gen_frame_mem (DImode
,
20389 plus_constant (Pmode
,
20393 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20395 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20396 tmp
= emit_insn (tmp
);
20397 RTX_FRAME_RELATED_P (tmp
) = 1;
20399 /* Generate dwarf info. */
20401 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20402 gen_rtx_REG (SImode
, j
),
20404 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20405 gen_rtx_REG (SImode
, j
+ 1),
20408 REG_NOTES (tmp
) = dwarf
;
20413 else if (j
!= PC_REGNUM
)
20415 /* Emit a single word load. */
20417 mem
= gen_frame_mem (SImode
,
20418 plus_constant (Pmode
,
20422 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20424 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20425 tmp
= emit_insn (tmp
);
20426 RTX_FRAME_RELATED_P (tmp
) = 1;
20428 /* Generate dwarf info. */
20429 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20430 gen_rtx_REG (SImode
, j
),
20436 else /* j == PC_REGNUM */
20442 /* Update the stack. */
20445 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20446 plus_constant (Pmode
,
20449 tmp
= emit_insn (tmp
);
20450 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20451 stack_pointer_rtx
, stack_pointer_rtx
);
20455 if (saved_regs_mask
& (1 << PC_REGNUM
))
20457 /* Only PC is to be popped. */
20458 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20459 XVECEXP (par
, 0, 0) = ret_rtx
;
20460 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20461 gen_frame_mem (SImode
,
20462 gen_rtx_POST_INC (SImode
,
20463 stack_pointer_rtx
)));
20464 RTX_FRAME_RELATED_P (tmp
) = 1;
20465 XVECEXP (par
, 0, 1) = tmp
;
20466 par
= emit_jump_insn (par
);
20468 /* Generate dwarf info. */
20469 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20470 gen_rtx_REG (SImode
, PC_REGNUM
),
20472 REG_NOTES (par
) = dwarf
;
20473 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20474 stack_pointer_rtx
, stack_pointer_rtx
);
20478 /* Calculate the size of the return value that is passed in registers. */
20480 arm_size_return_regs (void)
20484 if (crtl
->return_rtx
!= 0)
20485 mode
= GET_MODE (crtl
->return_rtx
);
20487 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20489 return GET_MODE_SIZE (mode
);
20492 /* Return true if the current function needs to save/restore LR. */
20494 thumb_force_lr_save (void)
20496 return !cfun
->machine
->lr_save_eliminated
20497 && (!leaf_function_p ()
20498 || thumb_far_jump_used_p ()
20499 || df_regs_ever_live_p (LR_REGNUM
));
20502 /* We do not know if r3 will be available because
20503 we do have an indirect tailcall happening in this
20504 particular case. */
20506 is_indirect_tailcall_p (rtx call
)
20508 rtx pat
= PATTERN (call
);
20510 /* Indirect tail call. */
20511 pat
= XVECEXP (pat
, 0, 0);
20512 if (GET_CODE (pat
) == SET
)
20513 pat
= SET_SRC (pat
);
20515 pat
= XEXP (XEXP (pat
, 0), 0);
20516 return REG_P (pat
);
20519 /* Return true if r3 is used by any of the tail call insns in the
20520 current function. */
20522 any_sibcall_could_use_r3 (void)
20527 if (!crtl
->tail_call_emit
)
20529 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20530 if (e
->flags
& EDGE_SIBCALL
)
20532 rtx call
= BB_END (e
->src
);
20533 if (!CALL_P (call
))
20534 call
= prev_nonnote_nondebug_insn (call
);
20535 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20536 if (find_regno_fusage (call
, USE
, 3)
20537 || is_indirect_tailcall_p (call
))
20544 /* Compute the distance from register FROM to register TO.
20545 These can be the arg pointer (26), the soft frame pointer (25),
20546 the stack pointer (13) or the hard frame pointer (11).
20547 In thumb mode r7 is used as the soft frame pointer, if needed.
20548 Typical stack layout looks like this:
20550 old stack pointer -> | |
20553 | | saved arguments for
20554 | | vararg functions
20557 hard FP & arg pointer -> | | \
20565 soft frame pointer -> | | /
20570 locals base pointer -> | | /
20575 current stack pointer -> | | /
20578 For a given function some or all of these stack components
20579 may not be needed, giving rise to the possibility of
20580 eliminating some of the registers.
20582 The values returned by this function must reflect the behavior
20583 of arm_expand_prologue() and arm_compute_save_reg_mask().
20585 The sign of the number returned reflects the direction of stack
20586 growth, so the values are positive for all eliminations except
20587 from the soft frame pointer to the hard frame pointer.
20589 SFP may point just inside the local variables block to ensure correct
20593 /* Calculate stack offsets. These are used to calculate register elimination
20594 offsets and in prologue/epilogue code. Also calculates which registers
20595 should be saved. */
20597 static arm_stack_offsets
*
20598 arm_get_frame_offsets (void)
20600 struct arm_stack_offsets
*offsets
;
20601 unsigned long func_type
;
20605 HOST_WIDE_INT frame_size
;
20608 offsets
= &cfun
->machine
->stack_offsets
;
20610 /* We need to know if we are a leaf function. Unfortunately, it
20611 is possible to be called after start_sequence has been called,
20612 which causes get_insns to return the insns for the sequence,
20613 not the function, which will cause leaf_function_p to return
20614 the incorrect result.
20616 to know about leaf functions once reload has completed, and the
20617 frame size cannot be changed after that time, so we can safely
20618 use the cached value. */
20620 if (reload_completed
)
20623 /* Initially this is the size of the local variables. It will translated
20624 into an offset once we have determined the size of preceding data. */
20625 frame_size
= ROUND_UP_WORD (get_frame_size ());
20627 leaf
= leaf_function_p ();
20629 /* Space for variadic functions. */
20630 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20632 /* In Thumb mode this is incorrect, but never used. */
20634 = (offsets
->saved_args
20635 + arm_compute_static_chain_stack_bytes ()
20636 + (frame_pointer_needed
? 4 : 0));
20640 unsigned int regno
;
20642 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20643 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20644 saved
= core_saved
;
20646 /* We know that SP will be doubleword aligned on entry, and we must
20647 preserve that condition at any subroutine call. We also require the
20648 soft frame pointer to be doubleword aligned. */
20650 if (TARGET_REALLY_IWMMXT
)
20652 /* Check for the call-saved iWMMXt registers. */
20653 for (regno
= FIRST_IWMMXT_REGNUM
;
20654 regno
<= LAST_IWMMXT_REGNUM
;
20656 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20660 func_type
= arm_current_func_type ();
20661 /* Space for saved VFP registers. */
20662 if (! IS_VOLATILE (func_type
)
20663 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20664 saved
+= arm_get_vfp_saved_size ();
20666 else /* TARGET_THUMB1 */
20668 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20669 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20670 saved
= core_saved
;
20671 if (TARGET_BACKTRACE
)
20675 /* Saved registers include the stack frame. */
20676 offsets
->saved_regs
20677 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20678 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20680 /* A leaf function does not need any stack alignment if it has nothing
20682 if (leaf
&& frame_size
== 0
20683 /* However if it calls alloca(), we have a dynamically allocated
20684 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20685 && ! cfun
->calls_alloca
)
20687 offsets
->outgoing_args
= offsets
->soft_frame
;
20688 offsets
->locals_base
= offsets
->soft_frame
;
20692 /* Ensure SFP has the correct alignment. */
20693 if (ARM_DOUBLEWORD_ALIGN
20694 && (offsets
->soft_frame
& 7))
20696 offsets
->soft_frame
+= 4;
20697 /* Try to align stack by pushing an extra reg. Don't bother doing this
20698 when there is a stack frame as the alignment will be rolled into
20699 the normal stack adjustment. */
20700 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20704 /* Register r3 is caller-saved. Normally it does not need to be
20705 saved on entry by the prologue. However if we choose to save
20706 it for padding then we may confuse the compiler into thinking
20707 a prologue sequence is required when in fact it is not. This
20708 will occur when shrink-wrapping if r3 is used as a scratch
20709 register and there are no other callee-saved writes.
20711 This situation can be avoided when other callee-saved registers
20712 are available and r3 is not mandatory if we choose a callee-saved
20713 register for padding. */
20714 bool prefer_callee_reg_p
= false;
20716 /* If it is safe to use r3, then do so. This sometimes
20717 generates better code on Thumb-2 by avoiding the need to
20718 use 32-bit push/pop instructions. */
20719 if (! any_sibcall_could_use_r3 ()
20720 && arm_size_return_regs () <= 12
20721 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20723 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20726 if (!TARGET_THUMB2
)
20727 prefer_callee_reg_p
= true;
20730 || prefer_callee_reg_p
)
20732 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20734 /* Avoid fixed registers; they may be changed at
20735 arbitrary times so it's unsafe to restore them
20736 during the epilogue. */
20738 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20748 offsets
->saved_regs
+= 4;
20749 offsets
->saved_regs_mask
|= (1 << reg
);
20754 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20755 offsets
->outgoing_args
= (offsets
->locals_base
20756 + crtl
->outgoing_args_size
);
20758 if (ARM_DOUBLEWORD_ALIGN
)
20760 /* Ensure SP remains doubleword aligned. */
20761 if (offsets
->outgoing_args
& 7)
20762 offsets
->outgoing_args
+= 4;
20763 gcc_assert (!(offsets
->outgoing_args
& 7));
20770 /* Calculate the relative offsets for the different stack pointers. Positive
20771 offsets are in the direction of stack growth. */
20774 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20776 arm_stack_offsets
*offsets
;
20778 offsets
= arm_get_frame_offsets ();
20780 /* OK, now we have enough information to compute the distances.
20781 There must be an entry in these switch tables for each pair
20782 of registers in ELIMINABLE_REGS, even if some of the entries
20783 seem to be redundant or useless. */
20786 case ARG_POINTER_REGNUM
:
20789 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20792 case FRAME_POINTER_REGNUM
:
20793 /* This is the reverse of the soft frame pointer
20794 to hard frame pointer elimination below. */
20795 return offsets
->soft_frame
- offsets
->saved_args
;
20797 case ARM_HARD_FRAME_POINTER_REGNUM
:
20798 /* This is only non-zero in the case where the static chain register
20799 is stored above the frame. */
20800 return offsets
->frame
- offsets
->saved_args
- 4;
20802 case STACK_POINTER_REGNUM
:
20803 /* If nothing has been pushed on the stack at all
20804 then this will return -4. This *is* correct! */
20805 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20808 gcc_unreachable ();
20810 gcc_unreachable ();
20812 case FRAME_POINTER_REGNUM
:
20815 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20818 case ARM_HARD_FRAME_POINTER_REGNUM
:
20819 /* The hard frame pointer points to the top entry in the
20820 stack frame. The soft frame pointer to the bottom entry
20821 in the stack frame. If there is no stack frame at all,
20822 then they are identical. */
20824 return offsets
->frame
- offsets
->soft_frame
;
20826 case STACK_POINTER_REGNUM
:
20827 return offsets
->outgoing_args
- offsets
->soft_frame
;
20830 gcc_unreachable ();
20832 gcc_unreachable ();
20835 /* You cannot eliminate from the stack pointer.
20836 In theory you could eliminate from the hard frame
20837 pointer to the stack pointer, but this will never
20838 happen, since if a stack frame is not needed the
20839 hard frame pointer will never be used. */
20840 gcc_unreachable ();
20844 /* Given FROM and TO register numbers, say whether this elimination is
20845 allowed. Frame pointer elimination is automatically handled.
20847 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20848 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20849 pointer, we must eliminate FRAME_POINTER_REGNUM into
20850 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20851 ARG_POINTER_REGNUM. */
20854 arm_can_eliminate (const int from
, const int to
)
20856 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20857 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20858 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20859 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20863 /* Emit RTL to save coprocessor registers on function entry. Returns the
20864 number of bytes pushed. */
20867 arm_save_coproc_regs(void)
20869 int saved_size
= 0;
20871 unsigned start_reg
;
20874 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20875 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20877 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20878 insn
= gen_rtx_MEM (V2SImode
, insn
);
20879 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20880 RTX_FRAME_RELATED_P (insn
) = 1;
20884 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20886 start_reg
= FIRST_VFP_REGNUM
;
20888 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20890 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20891 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20893 if (start_reg
!= reg
)
20894 saved_size
+= vfp_emit_fstmd (start_reg
,
20895 (reg
- start_reg
) / 2);
20896 start_reg
= reg
+ 2;
20899 if (start_reg
!= reg
)
20900 saved_size
+= vfp_emit_fstmd (start_reg
,
20901 (reg
- start_reg
) / 2);
20907 /* Set the Thumb frame pointer from the stack pointer. */
20910 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20912 HOST_WIDE_INT amount
;
20915 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20917 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20918 stack_pointer_rtx
, GEN_INT (amount
)));
20921 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20922 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20923 expects the first two operands to be the same. */
20926 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20928 hard_frame_pointer_rtx
));
20932 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20933 hard_frame_pointer_rtx
,
20934 stack_pointer_rtx
));
20936 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
20937 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20938 RTX_FRAME_RELATED_P (dwarf
) = 1;
20939 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20942 RTX_FRAME_RELATED_P (insn
) = 1;
20945 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20948 arm_expand_prologue (void)
20953 unsigned long live_regs_mask
;
20954 unsigned long func_type
;
20956 int saved_pretend_args
= 0;
20957 int saved_regs
= 0;
20958 unsigned HOST_WIDE_INT args_to_push
;
20959 arm_stack_offsets
*offsets
;
20961 func_type
= arm_current_func_type ();
20963 /* Naked functions don't have prologues. */
20964 if (IS_NAKED (func_type
))
20967 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20968 args_to_push
= crtl
->args
.pretend_args_size
;
20970 /* Compute which register we will have to save onto the stack. */
20971 offsets
= arm_get_frame_offsets ();
20972 live_regs_mask
= offsets
->saved_regs_mask
;
20974 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20976 if (IS_STACKALIGN (func_type
))
20980 /* Handle a word-aligned stack pointer. We generate the following:
20985 <save and restore r0 in normal prologue/epilogue>
20989 The unwinder doesn't need to know about the stack realignment.
20990 Just tell it we saved SP in r0. */
20991 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20993 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
20994 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
20996 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20997 RTX_FRAME_RELATED_P (insn
) = 1;
20998 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21000 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21002 /* ??? The CFA changes here, which may cause GDB to conclude that it
21003 has entered a different function. That said, the unwind info is
21004 correct, individually, before and after this instruction because
21005 we've described the save of SP, which will override the default
21006 handling of SP as restoring from the CFA. */
21007 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21010 /* For APCS frames, if IP register is clobbered
21011 when creating frame, save that register in a special
21013 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21015 if (IS_INTERRUPT (func_type
))
21017 /* Interrupt functions must not corrupt any registers.
21018 Creating a frame pointer however, corrupts the IP
21019 register, so we must push it first. */
21020 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21022 /* Do not set RTX_FRAME_RELATED_P on this insn.
21023 The dwarf stack unwinding code only wants to see one
21024 stack decrement per function, and this is not it. If
21025 this instruction is labeled as being part of the frame
21026 creation sequence then dwarf2out_frame_debug_expr will
21027 die when it encounters the assignment of IP to FP
21028 later on, since the use of SP here establishes SP as
21029 the CFA register and not IP.
21031 Anyway this instruction is not really part of the stack
21032 frame creation although it is part of the prologue. */
21034 else if (IS_NESTED (func_type
))
21036 /* The static chain register is the same as the IP register
21037 used as a scratch register during stack frame creation.
21038 To get around this need to find somewhere to store IP
21039 whilst the frame is being created. We try the following
21042 1. The last argument register r3 if it is available.
21043 2. A slot on the stack above the frame if there are no
21044 arguments to push onto the stack.
21045 3. Register r3 again, after pushing the argument registers
21046 onto the stack, if this is a varargs function.
21047 4. The last slot on the stack created for the arguments to
21048 push, if this isn't a varargs function.
21050 Note - we only need to tell the dwarf2 backend about the SP
21051 adjustment in the second variant; the static chain register
21052 doesn't need to be unwound, as it doesn't contain a value
21053 inherited from the caller. */
21055 if (!arm_r3_live_at_start_p ())
21056 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21057 else if (args_to_push
== 0)
21061 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21064 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21065 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21068 /* Just tell the dwarf backend that we adjusted SP. */
21069 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21070 plus_constant (Pmode
, stack_pointer_rtx
,
21072 RTX_FRAME_RELATED_P (insn
) = 1;
21073 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21077 /* Store the args on the stack. */
21078 if (cfun
->machine
->uses_anonymous_args
)
21081 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21082 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21083 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21084 saved_pretend_args
= 1;
21090 if (args_to_push
== 4)
21091 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21094 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21095 plus_constant (Pmode
,
21099 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21101 /* Just tell the dwarf backend that we adjusted SP. */
21103 = gen_rtx_SET (stack_pointer_rtx
,
21104 plus_constant (Pmode
, stack_pointer_rtx
,
21106 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21109 RTX_FRAME_RELATED_P (insn
) = 1;
21110 fp_offset
= args_to_push
;
21115 insn
= emit_set_insn (ip_rtx
,
21116 plus_constant (Pmode
, stack_pointer_rtx
,
21118 RTX_FRAME_RELATED_P (insn
) = 1;
21123 /* Push the argument registers, or reserve space for them. */
21124 if (cfun
->machine
->uses_anonymous_args
)
21125 insn
= emit_multi_reg_push
21126 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21127 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21130 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21131 GEN_INT (- args_to_push
)));
21132 RTX_FRAME_RELATED_P (insn
) = 1;
21135 /* If this is an interrupt service routine, and the link register
21136 is going to be pushed, and we're not generating extra
21137 push of IP (needed when frame is needed and frame layout if apcs),
21138 subtracting four from LR now will mean that the function return
21139 can be done with a single instruction. */
21140 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21141 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21142 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21145 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21147 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21150 if (live_regs_mask
)
21152 unsigned long dwarf_regs_mask
= live_regs_mask
;
21154 saved_regs
+= bit_count (live_regs_mask
) * 4;
21155 if (optimize_size
&& !frame_pointer_needed
21156 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21158 /* If no coprocessor registers are being pushed and we don't have
21159 to worry about a frame pointer then push extra registers to
21160 create the stack frame. This is done is a way that does not
21161 alter the frame layout, so is independent of the epilogue. */
21165 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21167 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21168 if (frame
&& n
* 4 >= frame
)
21171 live_regs_mask
|= (1 << n
) - 1;
21172 saved_regs
+= frame
;
21177 && current_tune
->prefer_ldrd_strd
21178 && !optimize_function_for_size_p (cfun
))
21180 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21182 thumb2_emit_strd_push (live_regs_mask
);
21183 else if (TARGET_ARM
21184 && !TARGET_APCS_FRAME
21185 && !IS_INTERRUPT (func_type
))
21186 arm_emit_strd_push (live_regs_mask
);
21189 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21190 RTX_FRAME_RELATED_P (insn
) = 1;
21195 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21196 RTX_FRAME_RELATED_P (insn
) = 1;
21200 if (! IS_VOLATILE (func_type
))
21201 saved_regs
+= arm_save_coproc_regs ();
21203 if (frame_pointer_needed
&& TARGET_ARM
)
21205 /* Create the new frame pointer. */
21206 if (TARGET_APCS_FRAME
)
21208 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21209 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21210 RTX_FRAME_RELATED_P (insn
) = 1;
21212 if (IS_NESTED (func_type
))
21214 /* Recover the static chain register. */
21215 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21216 insn
= gen_rtx_REG (SImode
, 3);
21219 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21220 insn
= gen_frame_mem (SImode
, insn
);
21222 emit_set_insn (ip_rtx
, insn
);
21223 /* Add a USE to stop propagate_one_insn() from barfing. */
21224 emit_insn (gen_force_register_use (ip_rtx
));
21229 insn
= GEN_INT (saved_regs
- 4);
21230 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21231 stack_pointer_rtx
, insn
));
21232 RTX_FRAME_RELATED_P (insn
) = 1;
21236 if (flag_stack_usage_info
)
21237 current_function_static_stack_size
21238 = offsets
->outgoing_args
- offsets
->saved_args
;
21240 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21242 /* This add can produce multiple insns for a large constant, so we
21243 need to get tricky. */
21244 rtx_insn
*last
= get_last_insn ();
21246 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21247 - offsets
->outgoing_args
);
21249 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21253 last
= last
? NEXT_INSN (last
) : get_insns ();
21254 RTX_FRAME_RELATED_P (last
) = 1;
21256 while (last
!= insn
);
21258 /* If the frame pointer is needed, emit a special barrier that
21259 will prevent the scheduler from moving stores to the frame
21260 before the stack adjustment. */
21261 if (frame_pointer_needed
)
21262 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21263 hard_frame_pointer_rtx
));
21267 if (frame_pointer_needed
&& TARGET_THUMB2
)
21268 thumb_set_frame_pointer (offsets
);
21270 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21272 unsigned long mask
;
21274 mask
= live_regs_mask
;
21275 mask
&= THUMB2_WORK_REGS
;
21276 if (!IS_NESTED (func_type
))
21277 mask
|= (1 << IP_REGNUM
);
21278 arm_load_pic_register (mask
);
21281 /* If we are profiling, make sure no instructions are scheduled before
21282 the call to mcount. Similarly if the user has requested no
21283 scheduling in the prolog. Similarly if we want non-call exceptions
21284 using the EABI unwinder, to prevent faulting instructions from being
21285 swapped with a stack adjustment. */
21286 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21287 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21288 && cfun
->can_throw_non_call_exceptions
))
21289 emit_insn (gen_blockage ());
21291 /* If the link register is being kept alive, with the return address in it,
21292 then make sure that it does not get reused by the ce2 pass. */
21293 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21294 cfun
->machine
->lr_save_eliminated
= 1;
21297 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21299 arm_print_condition (FILE *stream
)
21301 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21303 /* Branch conversion is not implemented for Thumb-2. */
21306 output_operand_lossage ("predicated Thumb instruction");
21309 if (current_insn_predicate
!= NULL
)
21311 output_operand_lossage
21312 ("predicated instruction in conditional sequence");
21316 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21318 else if (current_insn_predicate
)
21320 enum arm_cond_code code
;
21324 output_operand_lossage ("predicated Thumb instruction");
21328 code
= get_arm_condition_code (current_insn_predicate
);
21329 fputs (arm_condition_codes
[code
], stream
);
21334 /* Globally reserved letters: acln
21335 Puncutation letters currently used: @_|?().!#
21336 Lower case letters currently used: bcdefhimpqtvwxyz
21337 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21338 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21340 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21342 If CODE is 'd', then the X is a condition operand and the instruction
21343 should only be executed if the condition is true.
21344 if CODE is 'D', then the X is a condition operand and the instruction
21345 should only be executed if the condition is false: however, if the mode
21346 of the comparison is CCFPEmode, then always execute the instruction -- we
21347 do this because in these circumstances !GE does not necessarily imply LT;
21348 in these cases the instruction pattern will take care to make sure that
21349 an instruction containing %d will follow, thereby undoing the effects of
21350 doing this instruction unconditionally.
21351 If CODE is 'N' then X is a floating point operand that must be negated
21353 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21354 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21356 arm_print_operand (FILE *stream
, rtx x
, int code
)
21361 fputs (ASM_COMMENT_START
, stream
);
21365 fputs (user_label_prefix
, stream
);
21369 fputs (REGISTER_PREFIX
, stream
);
21373 arm_print_condition (stream
);
21377 /* Nothing in unified syntax, otherwise the current condition code. */
21378 if (!TARGET_UNIFIED_ASM
)
21379 arm_print_condition (stream
);
21383 /* The current condition code in unified syntax, otherwise nothing. */
21384 if (TARGET_UNIFIED_ASM
)
21385 arm_print_condition (stream
);
21389 /* The current condition code for a condition code setting instruction.
21390 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21391 if (TARGET_UNIFIED_ASM
)
21393 fputc('s', stream
);
21394 arm_print_condition (stream
);
21398 arm_print_condition (stream
);
21399 fputc('s', stream
);
21404 /* If the instruction is conditionally executed then print
21405 the current condition code, otherwise print 's'. */
21406 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21407 if (current_insn_predicate
)
21408 arm_print_condition (stream
);
21410 fputc('s', stream
);
21413 /* %# is a "break" sequence. It doesn't output anything, but is used to
21414 separate e.g. operand numbers from following text, if that text consists
21415 of further digits which we don't want to be part of the operand
21423 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21424 r
= real_value_negate (&r
);
21425 fprintf (stream
, "%s", fp_const_from_val (&r
));
21429 /* An integer or symbol address without a preceding # sign. */
21431 switch (GET_CODE (x
))
21434 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21438 output_addr_const (stream
, x
);
21442 if (GET_CODE (XEXP (x
, 0)) == PLUS
21443 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21445 output_addr_const (stream
, x
);
21448 /* Fall through. */
21451 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21455 /* An integer that we want to print in HEX. */
21457 switch (GET_CODE (x
))
21460 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21464 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21469 if (CONST_INT_P (x
))
21472 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21473 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21477 putc ('~', stream
);
21478 output_addr_const (stream
, x
);
21483 /* Print the log2 of a CONST_INT. */
21487 if (!CONST_INT_P (x
)
21488 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21489 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21491 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21496 /* The low 16 bits of an immediate constant. */
21497 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21501 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21505 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21513 shift
= shift_op (x
, &val
);
21517 fprintf (stream
, ", %s ", shift
);
21519 arm_print_operand (stream
, XEXP (x
, 1), 0);
21521 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21526 /* An explanation of the 'Q', 'R' and 'H' register operands:
21528 In a pair of registers containing a DI or DF value the 'Q'
21529 operand returns the register number of the register containing
21530 the least significant part of the value. The 'R' operand returns
21531 the register number of the register containing the most
21532 significant part of the value.
21534 The 'H' operand returns the higher of the two register numbers.
21535 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21536 same as the 'Q' operand, since the most significant part of the
21537 value is held in the lower number register. The reverse is true
21538 on systems where WORDS_BIG_ENDIAN is false.
21540 The purpose of these operands is to distinguish between cases
21541 where the endian-ness of the values is important (for example
21542 when they are added together), and cases where the endian-ness
21543 is irrelevant, but the order of register operations is important.
21544 For example when loading a value from memory into a register
21545 pair, the endian-ness does not matter. Provided that the value
21546 from the lower memory address is put into the lower numbered
21547 register, and the value from the higher address is put into the
21548 higher numbered register, the load will work regardless of whether
21549 the value being loaded is big-wordian or little-wordian. The
21550 order of the two register loads can matter however, if the address
21551 of the memory location is actually held in one of the registers
21552 being overwritten by the load.
21554 The 'Q' and 'R' constraints are also available for 64-bit
21557 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21559 rtx part
= gen_lowpart (SImode
, x
);
21560 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21564 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21566 output_operand_lossage ("invalid operand for code '%c'", code
);
21570 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21574 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21576 machine_mode mode
= GET_MODE (x
);
21579 if (mode
== VOIDmode
)
21581 part
= gen_highpart_mode (SImode
, mode
, x
);
21582 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21586 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21588 output_operand_lossage ("invalid operand for code '%c'", code
);
21592 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21596 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21598 output_operand_lossage ("invalid operand for code '%c'", code
);
21602 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21606 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21608 output_operand_lossage ("invalid operand for code '%c'", code
);
21612 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21616 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21618 output_operand_lossage ("invalid operand for code '%c'", code
);
21622 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21626 asm_fprintf (stream
, "%r",
21627 REG_P (XEXP (x
, 0))
21628 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21632 asm_fprintf (stream
, "{%r-%r}",
21634 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21637 /* Like 'M', but writing doubleword vector registers, for use by Neon
21641 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21642 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21644 asm_fprintf (stream
, "{d%d}", regno
);
21646 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21651 /* CONST_TRUE_RTX means always -- that's the default. */
21652 if (x
== const_true_rtx
)
21655 if (!COMPARISON_P (x
))
21657 output_operand_lossage ("invalid operand for code '%c'", code
);
21661 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21666 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21667 want to do that. */
21668 if (x
== const_true_rtx
)
21670 output_operand_lossage ("instruction never executed");
21673 if (!COMPARISON_P (x
))
21675 output_operand_lossage ("invalid operand for code '%c'", code
);
21679 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21680 (get_arm_condition_code (x
))],
21690 /* Former Maverick support, removed after GCC-4.7. */
21691 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21696 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21697 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21698 /* Bad value for wCG register number. */
21700 output_operand_lossage ("invalid operand for code '%c'", code
);
21705 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21708 /* Print an iWMMXt control register name. */
21710 if (!CONST_INT_P (x
)
21712 || INTVAL (x
) >= 16)
21713 /* Bad value for wC register number. */
21715 output_operand_lossage ("invalid operand for code '%c'", code
);
21721 static const char * wc_reg_names
[16] =
21723 "wCID", "wCon", "wCSSF", "wCASF",
21724 "wC4", "wC5", "wC6", "wC7",
21725 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21726 "wC12", "wC13", "wC14", "wC15"
21729 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21733 /* Print the high single-precision register of a VFP double-precision
21737 machine_mode mode
= GET_MODE (x
);
21740 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21742 output_operand_lossage ("invalid operand for code '%c'", code
);
21747 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21749 output_operand_lossage ("invalid operand for code '%c'", code
);
21753 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21757 /* Print a VFP/Neon double precision or quad precision register name. */
21761 machine_mode mode
= GET_MODE (x
);
21762 int is_quad
= (code
== 'q');
21765 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21767 output_operand_lossage ("invalid operand for code '%c'", code
);
21772 || !IS_VFP_REGNUM (REGNO (x
)))
21774 output_operand_lossage ("invalid operand for code '%c'", code
);
21779 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21780 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21782 output_operand_lossage ("invalid operand for code '%c'", code
);
21786 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21787 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21791 /* These two codes print the low/high doubleword register of a Neon quad
21792 register, respectively. For pair-structure types, can also print
21793 low/high quadword registers. */
21797 machine_mode mode
= GET_MODE (x
);
21800 if ((GET_MODE_SIZE (mode
) != 16
21801 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21803 output_operand_lossage ("invalid operand for code '%c'", code
);
21808 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21810 output_operand_lossage ("invalid operand for code '%c'", code
);
21814 if (GET_MODE_SIZE (mode
) == 16)
21815 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21816 + (code
== 'f' ? 1 : 0));
21818 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21819 + (code
== 'f' ? 1 : 0));
21823 /* Print a VFPv3 floating-point constant, represented as an integer
21827 int index
= vfp3_const_double_index (x
);
21828 gcc_assert (index
!= -1);
21829 fprintf (stream
, "%d", index
);
21833 /* Print bits representing opcode features for Neon.
21835 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21836 and polynomials as unsigned.
21838 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21840 Bit 2 is 1 for rounding functions, 0 otherwise. */
21842 /* Identify the type as 's', 'u', 'p' or 'f'. */
21845 HOST_WIDE_INT bits
= INTVAL (x
);
21846 fputc ("uspf"[bits
& 3], stream
);
21850 /* Likewise, but signed and unsigned integers are both 'i'. */
21853 HOST_WIDE_INT bits
= INTVAL (x
);
21854 fputc ("iipf"[bits
& 3], stream
);
21858 /* As for 'T', but emit 'u' instead of 'p'. */
21861 HOST_WIDE_INT bits
= INTVAL (x
);
21862 fputc ("usuf"[bits
& 3], stream
);
21866 /* Bit 2: rounding (vs none). */
21869 HOST_WIDE_INT bits
= INTVAL (x
);
21870 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21874 /* Memory operand for vld1/vst1 instruction. */
21878 bool postinc
= FALSE
;
21879 rtx postinc_reg
= NULL
;
21880 unsigned align
, memsize
, align_bits
;
21882 gcc_assert (MEM_P (x
));
21883 addr
= XEXP (x
, 0);
21884 if (GET_CODE (addr
) == POST_INC
)
21887 addr
= XEXP (addr
, 0);
21889 if (GET_CODE (addr
) == POST_MODIFY
)
21891 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21892 addr
= XEXP (addr
, 0);
21894 asm_fprintf (stream
, "[%r", REGNO (addr
));
21896 /* We know the alignment of this access, so we can emit a hint in the
21897 instruction (for some alignments) as an aid to the memory subsystem
21899 align
= MEM_ALIGN (x
) >> 3;
21900 memsize
= MEM_SIZE (x
);
21902 /* Only certain alignment specifiers are supported by the hardware. */
21903 if (memsize
== 32 && (align
% 32) == 0)
21905 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21907 else if (memsize
>= 8 && (align
% 8) == 0)
21912 if (align_bits
!= 0)
21913 asm_fprintf (stream
, ":%d", align_bits
);
21915 asm_fprintf (stream
, "]");
21918 fputs("!", stream
);
21920 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
21928 gcc_assert (MEM_P (x
));
21929 addr
= XEXP (x
, 0);
21930 gcc_assert (REG_P (addr
));
21931 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21935 /* Translate an S register number into a D register number and element index. */
21938 machine_mode mode
= GET_MODE (x
);
21941 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21943 output_operand_lossage ("invalid operand for code '%c'", code
);
21948 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21950 output_operand_lossage ("invalid operand for code '%c'", code
);
21954 regno
= regno
- FIRST_VFP_REGNUM
;
21955 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21960 gcc_assert (CONST_DOUBLE_P (x
));
21962 result
= vfp3_const_double_for_fract_bits (x
);
21964 result
= vfp3_const_double_for_bits (x
);
21965 fprintf (stream
, "#%d", result
);
21968 /* Register specifier for vld1.16/vst1.16. Translate the S register
21969 number into a D register number and element index. */
21972 machine_mode mode
= GET_MODE (x
);
21975 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21977 output_operand_lossage ("invalid operand for code '%c'", code
);
21982 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21984 output_operand_lossage ("invalid operand for code '%c'", code
);
21988 regno
= regno
- FIRST_VFP_REGNUM
;
21989 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21996 output_operand_lossage ("missing operand");
22000 switch (GET_CODE (x
))
22003 asm_fprintf (stream
, "%r", REGNO (x
));
22007 output_memory_reference_mode
= GET_MODE (x
);
22008 output_address (XEXP (x
, 0));
22014 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22015 sizeof (fpstr
), 0, 1);
22016 fprintf (stream
, "#%s", fpstr
);
22021 gcc_assert (GET_CODE (x
) != NEG
);
22022 fputc ('#', stream
);
22023 if (GET_CODE (x
) == HIGH
)
22025 fputs (":lower16:", stream
);
22029 output_addr_const (stream
, x
);
22035 /* Target hook for printing a memory address. */
22037 arm_print_operand_address (FILE *stream
, rtx x
)
22041 int is_minus
= GET_CODE (x
) == MINUS
;
22044 asm_fprintf (stream
, "[%r]", REGNO (x
));
22045 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22047 rtx base
= XEXP (x
, 0);
22048 rtx index
= XEXP (x
, 1);
22049 HOST_WIDE_INT offset
= 0;
22051 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22053 /* Ensure that BASE is a register. */
22054 /* (one of them must be). */
22055 /* Also ensure the SP is not used as in index register. */
22056 std::swap (base
, index
);
22058 switch (GET_CODE (index
))
22061 offset
= INTVAL (index
);
22064 asm_fprintf (stream
, "[%r, #%wd]",
22065 REGNO (base
), offset
);
22069 asm_fprintf (stream
, "[%r, %s%r]",
22070 REGNO (base
), is_minus
? "-" : "",
22080 asm_fprintf (stream
, "[%r, %s%r",
22081 REGNO (base
), is_minus
? "-" : "",
22082 REGNO (XEXP (index
, 0)));
22083 arm_print_operand (stream
, index
, 'S');
22084 fputs ("]", stream
);
22089 gcc_unreachable ();
22092 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22093 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22095 extern machine_mode output_memory_reference_mode
;
22097 gcc_assert (REG_P (XEXP (x
, 0)));
22099 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22100 asm_fprintf (stream
, "[%r, #%s%d]!",
22101 REGNO (XEXP (x
, 0)),
22102 GET_CODE (x
) == PRE_DEC
? "-" : "",
22103 GET_MODE_SIZE (output_memory_reference_mode
));
22105 asm_fprintf (stream
, "[%r], #%s%d",
22106 REGNO (XEXP (x
, 0)),
22107 GET_CODE (x
) == POST_DEC
? "-" : "",
22108 GET_MODE_SIZE (output_memory_reference_mode
));
22110 else if (GET_CODE (x
) == PRE_MODIFY
)
22112 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22113 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22114 asm_fprintf (stream
, "#%wd]!",
22115 INTVAL (XEXP (XEXP (x
, 1), 1)));
22117 asm_fprintf (stream
, "%r]!",
22118 REGNO (XEXP (XEXP (x
, 1), 1)));
22120 else if (GET_CODE (x
) == POST_MODIFY
)
22122 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22123 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22124 asm_fprintf (stream
, "#%wd",
22125 INTVAL (XEXP (XEXP (x
, 1), 1)));
22127 asm_fprintf (stream
, "%r",
22128 REGNO (XEXP (XEXP (x
, 1), 1)));
22130 else output_addr_const (stream
, x
);
22135 asm_fprintf (stream
, "[%r]", REGNO (x
));
22136 else if (GET_CODE (x
) == POST_INC
)
22137 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22138 else if (GET_CODE (x
) == PLUS
)
22140 gcc_assert (REG_P (XEXP (x
, 0)));
22141 if (CONST_INT_P (XEXP (x
, 1)))
22142 asm_fprintf (stream
, "[%r, #%wd]",
22143 REGNO (XEXP (x
, 0)),
22144 INTVAL (XEXP (x
, 1)));
22146 asm_fprintf (stream
, "[%r, %r]",
22147 REGNO (XEXP (x
, 0)),
22148 REGNO (XEXP (x
, 1)));
22151 output_addr_const (stream
, x
);
22155 /* Target hook for indicating whether a punctuation character for
22156 TARGET_PRINT_OPERAND is valid. */
22158 arm_print_operand_punct_valid_p (unsigned char code
)
22160 return (code
== '@' || code
== '|' || code
== '.'
22161 || code
== '(' || code
== ')' || code
== '#'
22162 || (TARGET_32BIT
&& (code
== '?'))
22163 || (TARGET_THUMB2
&& (code
== '!'))
22164 || (TARGET_THUMB
&& (code
== '_')));
22167 /* Target hook for assembling integer objects. The ARM version needs to
22168 handle word-sized values specially. */
22170 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22174 if (size
== UNITS_PER_WORD
&& aligned_p
)
22176 fputs ("\t.word\t", asm_out_file
);
22177 output_addr_const (asm_out_file
, x
);
22179 /* Mark symbols as position independent. We only do this in the
22180 .text segment, not in the .data segment. */
22181 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22182 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22184 /* See legitimize_pic_address for an explanation of the
22185 TARGET_VXWORKS_RTP check. */
22186 if (!arm_pic_data_is_text_relative
22187 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22188 fputs ("(GOT)", asm_out_file
);
22190 fputs ("(GOTOFF)", asm_out_file
);
22192 fputc ('\n', asm_out_file
);
22196 mode
= GET_MODE (x
);
22198 if (arm_vector_mode_supported_p (mode
))
22202 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22204 units
= CONST_VECTOR_NUNITS (x
);
22205 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22207 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22208 for (i
= 0; i
< units
; i
++)
22210 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22212 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22215 for (i
= 0; i
< units
; i
++)
22217 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22218 REAL_VALUE_TYPE rval
;
22220 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22223 (rval
, GET_MODE_INNER (mode
),
22224 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22230 return default_assemble_integer (x
, size
, aligned_p
);
22234 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22238 if (!TARGET_AAPCS_BASED
)
22241 default_named_section_asm_out_constructor
22242 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22246 /* Put these in the .init_array section, using a special relocation. */
22247 if (priority
!= DEFAULT_INIT_PRIORITY
)
22250 sprintf (buf
, "%s.%.5u",
22251 is_ctor
? ".init_array" : ".fini_array",
22253 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22260 switch_to_section (s
);
22261 assemble_align (POINTER_SIZE
);
22262 fputs ("\t.word\t", asm_out_file
);
22263 output_addr_const (asm_out_file
, symbol
);
22264 fputs ("(target1)\n", asm_out_file
);
22267 /* Add a function to the list of static constructors. */
22270 arm_elf_asm_constructor (rtx symbol
, int priority
)
22272 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22275 /* Add a function to the list of static destructors. */
22278 arm_elf_asm_destructor (rtx symbol
, int priority
)
22280 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22283 /* A finite state machine takes care of noticing whether or not instructions
22284 can be conditionally executed, and thus decrease execution time and code
22285 size by deleting branch instructions. The fsm is controlled by
22286 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22288 /* The state of the fsm controlling condition codes are:
22289 0: normal, do nothing special
22290 1: make ASM_OUTPUT_OPCODE not output this instruction
22291 2: make ASM_OUTPUT_OPCODE not output this instruction
22292 3: make instructions conditional
22293 4: make instructions conditional
22295 State transitions (state->state by whom under condition):
22296 0 -> 1 final_prescan_insn if the `target' is a label
22297 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22298 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22299 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22300 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22301 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22302 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22303 (the target insn is arm_target_insn).
22305 If the jump clobbers the conditions then we use states 2 and 4.
22307 A similar thing can be done with conditional return insns.
22309 XXX In case the `target' is an unconditional branch, this conditionalising
22310 of the instructions always reduces code size, but not always execution
22311 time. But then, I want to reduce the code size to somewhere near what
22312 /bin/cc produces. */
22314 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22315 instructions. When a COND_EXEC instruction is seen the subsequent
22316 instructions are scanned so that multiple conditional instructions can be
22317 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22318 specify the length and true/false mask for the IT block. These will be
22319 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22321 /* Returns the index of the ARM condition code string in
22322 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22323 COMPARISON should be an rtx like `(eq (...) (...))'. */
22326 maybe_get_arm_condition_code (rtx comparison
)
22328 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22329 enum arm_cond_code code
;
22330 enum rtx_code comp_code
= GET_CODE (comparison
);
22332 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22333 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22334 XEXP (comparison
, 1));
22338 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22339 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22340 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22341 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22342 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22343 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22344 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22345 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22346 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22347 case CC_DLTUmode
: code
= ARM_CC
;
22350 if (comp_code
== EQ
)
22351 return ARM_INVERSE_CONDITION_CODE (code
);
22352 if (comp_code
== NE
)
22359 case NE
: return ARM_NE
;
22360 case EQ
: return ARM_EQ
;
22361 case GE
: return ARM_PL
;
22362 case LT
: return ARM_MI
;
22363 default: return ARM_NV
;
22369 case NE
: return ARM_NE
;
22370 case EQ
: return ARM_EQ
;
22371 default: return ARM_NV
;
22377 case NE
: return ARM_MI
;
22378 case EQ
: return ARM_PL
;
22379 default: return ARM_NV
;
22384 /* We can handle all cases except UNEQ and LTGT. */
22387 case GE
: return ARM_GE
;
22388 case GT
: return ARM_GT
;
22389 case LE
: return ARM_LS
;
22390 case LT
: return ARM_MI
;
22391 case NE
: return ARM_NE
;
22392 case EQ
: return ARM_EQ
;
22393 case ORDERED
: return ARM_VC
;
22394 case UNORDERED
: return ARM_VS
;
22395 case UNLT
: return ARM_LT
;
22396 case UNLE
: return ARM_LE
;
22397 case UNGT
: return ARM_HI
;
22398 case UNGE
: return ARM_PL
;
22399 /* UNEQ and LTGT do not have a representation. */
22400 case UNEQ
: /* Fall through. */
22401 case LTGT
: /* Fall through. */
22402 default: return ARM_NV
;
22408 case NE
: return ARM_NE
;
22409 case EQ
: return ARM_EQ
;
22410 case GE
: return ARM_LE
;
22411 case GT
: return ARM_LT
;
22412 case LE
: return ARM_GE
;
22413 case LT
: return ARM_GT
;
22414 case GEU
: return ARM_LS
;
22415 case GTU
: return ARM_CC
;
22416 case LEU
: return ARM_CS
;
22417 case LTU
: return ARM_HI
;
22418 default: return ARM_NV
;
22424 case LTU
: return ARM_CS
;
22425 case GEU
: return ARM_CC
;
22426 default: return ARM_NV
;
22432 case NE
: return ARM_NE
;
22433 case EQ
: return ARM_EQ
;
22434 case GEU
: return ARM_CS
;
22435 case GTU
: return ARM_HI
;
22436 case LEU
: return ARM_LS
;
22437 case LTU
: return ARM_CC
;
22438 default: return ARM_NV
;
22444 case GE
: return ARM_GE
;
22445 case LT
: return ARM_LT
;
22446 case GEU
: return ARM_CS
;
22447 case LTU
: return ARM_CC
;
22448 default: return ARM_NV
;
22454 case NE
: return ARM_NE
;
22455 case EQ
: return ARM_EQ
;
22456 case GE
: return ARM_GE
;
22457 case GT
: return ARM_GT
;
22458 case LE
: return ARM_LE
;
22459 case LT
: return ARM_LT
;
22460 case GEU
: return ARM_CS
;
22461 case GTU
: return ARM_HI
;
22462 case LEU
: return ARM_LS
;
22463 case LTU
: return ARM_CC
;
22464 default: return ARM_NV
;
22467 default: gcc_unreachable ();
22471 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22472 static enum arm_cond_code
22473 get_arm_condition_code (rtx comparison
)
22475 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22476 gcc_assert (code
!= ARM_NV
);
22480 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22483 thumb2_final_prescan_insn (rtx_insn
*insn
)
22485 rtx_insn
*first_insn
= insn
;
22486 rtx body
= PATTERN (insn
);
22488 enum arm_cond_code code
;
22493 /* max_insns_skipped in the tune was already taken into account in the
22494 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22495 just emit the IT blocks as we can. It does not make sense to split
22497 max
= MAX_INSN_PER_IT_BLOCK
;
22499 /* Remove the previous insn from the count of insns to be output. */
22500 if (arm_condexec_count
)
22501 arm_condexec_count
--;
22503 /* Nothing to do if we are already inside a conditional block. */
22504 if (arm_condexec_count
)
22507 if (GET_CODE (body
) != COND_EXEC
)
22510 /* Conditional jumps are implemented directly. */
22514 predicate
= COND_EXEC_TEST (body
);
22515 arm_current_cc
= get_arm_condition_code (predicate
);
22517 n
= get_attr_ce_count (insn
);
22518 arm_condexec_count
= 1;
22519 arm_condexec_mask
= (1 << n
) - 1;
22520 arm_condexec_masklen
= n
;
22521 /* See if subsequent instructions can be combined into the same block. */
22524 insn
= next_nonnote_insn (insn
);
22526 /* Jumping into the middle of an IT block is illegal, so a label or
22527 barrier terminates the block. */
22528 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22531 body
= PATTERN (insn
);
22532 /* USE and CLOBBER aren't really insns, so just skip them. */
22533 if (GET_CODE (body
) == USE
22534 || GET_CODE (body
) == CLOBBER
)
22537 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22538 if (GET_CODE (body
) != COND_EXEC
)
22540 /* Maximum number of conditionally executed instructions in a block. */
22541 n
= get_attr_ce_count (insn
);
22542 if (arm_condexec_masklen
+ n
> max
)
22545 predicate
= COND_EXEC_TEST (body
);
22546 code
= get_arm_condition_code (predicate
);
22547 mask
= (1 << n
) - 1;
22548 if (arm_current_cc
== code
)
22549 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22550 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22553 arm_condexec_count
++;
22554 arm_condexec_masklen
+= n
;
22556 /* A jump must be the last instruction in a conditional block. */
22560 /* Restore recog_data (getting the attributes of other insns can
22561 destroy this array, but final.c assumes that it remains intact
22562 across this call). */
22563 extract_constrain_insn_cached (first_insn
);
22567 arm_final_prescan_insn (rtx_insn
*insn
)
22569 /* BODY will hold the body of INSN. */
22570 rtx body
= PATTERN (insn
);
22572 /* This will be 1 if trying to repeat the trick, and things need to be
22573 reversed if it appears to fail. */
22576 /* If we start with a return insn, we only succeed if we find another one. */
22577 int seeking_return
= 0;
22578 enum rtx_code return_code
= UNKNOWN
;
22580 /* START_INSN will hold the insn from where we start looking. This is the
22581 first insn after the following code_label if REVERSE is true. */
22582 rtx_insn
*start_insn
= insn
;
22584 /* If in state 4, check if the target branch is reached, in order to
22585 change back to state 0. */
22586 if (arm_ccfsm_state
== 4)
22588 if (insn
== arm_target_insn
)
22590 arm_target_insn
= NULL
;
22591 arm_ccfsm_state
= 0;
22596 /* If in state 3, it is possible to repeat the trick, if this insn is an
22597 unconditional branch to a label, and immediately following this branch
22598 is the previous target label which is only used once, and the label this
22599 branch jumps to is not too far off. */
22600 if (arm_ccfsm_state
== 3)
22602 if (simplejump_p (insn
))
22604 start_insn
= next_nonnote_insn (start_insn
);
22605 if (BARRIER_P (start_insn
))
22607 /* XXX Isn't this always a barrier? */
22608 start_insn
= next_nonnote_insn (start_insn
);
22610 if (LABEL_P (start_insn
)
22611 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22612 && LABEL_NUSES (start_insn
) == 1)
22617 else if (ANY_RETURN_P (body
))
22619 start_insn
= next_nonnote_insn (start_insn
);
22620 if (BARRIER_P (start_insn
))
22621 start_insn
= next_nonnote_insn (start_insn
);
22622 if (LABEL_P (start_insn
)
22623 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22624 && LABEL_NUSES (start_insn
) == 1)
22627 seeking_return
= 1;
22628 return_code
= GET_CODE (body
);
22637 gcc_assert (!arm_ccfsm_state
|| reverse
);
22638 if (!JUMP_P (insn
))
22641 /* This jump might be paralleled with a clobber of the condition codes
22642 the jump should always come first */
22643 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22644 body
= XVECEXP (body
, 0, 0);
22647 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22648 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22651 int fail
= FALSE
, succeed
= FALSE
;
22652 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22653 int then_not_else
= TRUE
;
22654 rtx_insn
*this_insn
= start_insn
;
22657 /* Register the insn jumped to. */
22660 if (!seeking_return
)
22661 label
= XEXP (SET_SRC (body
), 0);
22663 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22664 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22665 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22667 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22668 then_not_else
= FALSE
;
22670 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22672 seeking_return
= 1;
22673 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22675 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22677 seeking_return
= 1;
22678 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22679 then_not_else
= FALSE
;
22682 gcc_unreachable ();
22684 /* See how many insns this branch skips, and what kind of insns. If all
22685 insns are okay, and the label or unconditional branch to the same
22686 label is not too far away, succeed. */
22687 for (insns_skipped
= 0;
22688 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22692 this_insn
= next_nonnote_insn (this_insn
);
22696 switch (GET_CODE (this_insn
))
22699 /* Succeed if it is the target label, otherwise fail since
22700 control falls in from somewhere else. */
22701 if (this_insn
== label
)
22703 arm_ccfsm_state
= 1;
22711 /* Succeed if the following insn is the target label.
22713 If return insns are used then the last insn in a function
22714 will be a barrier. */
22715 this_insn
= next_nonnote_insn (this_insn
);
22716 if (this_insn
&& this_insn
== label
)
22718 arm_ccfsm_state
= 1;
22726 /* The AAPCS says that conditional calls should not be
22727 used since they make interworking inefficient (the
22728 linker can't transform BL<cond> into BLX). That's
22729 only a problem if the machine has BLX. */
22736 /* Succeed if the following insn is the target label, or
22737 if the following two insns are a barrier and the
22739 this_insn
= next_nonnote_insn (this_insn
);
22740 if (this_insn
&& BARRIER_P (this_insn
))
22741 this_insn
= next_nonnote_insn (this_insn
);
22743 if (this_insn
&& this_insn
== label
22744 && insns_skipped
< max_insns_skipped
)
22746 arm_ccfsm_state
= 1;
22754 /* If this is an unconditional branch to the same label, succeed.
22755 If it is to another label, do nothing. If it is conditional,
22757 /* XXX Probably, the tests for SET and the PC are
22760 scanbody
= PATTERN (this_insn
);
22761 if (GET_CODE (scanbody
) == SET
22762 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22764 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22765 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22767 arm_ccfsm_state
= 2;
22770 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22773 /* Fail if a conditional return is undesirable (e.g. on a
22774 StrongARM), but still allow this if optimizing for size. */
22775 else if (GET_CODE (scanbody
) == return_code
22776 && !use_return_insn (TRUE
, NULL
)
22779 else if (GET_CODE (scanbody
) == return_code
)
22781 arm_ccfsm_state
= 2;
22784 else if (GET_CODE (scanbody
) == PARALLEL
)
22786 switch (get_attr_conds (this_insn
))
22796 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22801 /* Instructions using or affecting the condition codes make it
22803 scanbody
= PATTERN (this_insn
);
22804 if (!(GET_CODE (scanbody
) == SET
22805 || GET_CODE (scanbody
) == PARALLEL
)
22806 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22816 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22817 arm_target_label
= CODE_LABEL_NUMBER (label
);
22820 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22822 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22824 this_insn
= next_nonnote_insn (this_insn
);
22825 gcc_assert (!this_insn
22826 || (!BARRIER_P (this_insn
)
22827 && !LABEL_P (this_insn
)));
22831 /* Oh, dear! we ran off the end.. give up. */
22832 extract_constrain_insn_cached (insn
);
22833 arm_ccfsm_state
= 0;
22834 arm_target_insn
= NULL
;
22837 arm_target_insn
= this_insn
;
22840 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22843 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22845 if (reverse
|| then_not_else
)
22846 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22849 /* Restore recog_data (getting the attributes of other insns can
22850 destroy this array, but final.c assumes that it remains intact
22851 across this call. */
22852 extract_constrain_insn_cached (insn
);
22856 /* Output IT instructions. */
22858 thumb2_asm_output_opcode (FILE * stream
)
22863 if (arm_condexec_mask
)
22865 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22866 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22868 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22869 arm_condition_codes
[arm_current_cc
]);
22870 arm_condexec_mask
= 0;
22874 /* Returns true if REGNO is a valid register
22875 for holding a quantity of type MODE. */
22877 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22879 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22880 return (regno
== CC_REGNUM
22881 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22882 && regno
== VFPCC_REGNUM
));
22884 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
22888 /* For the Thumb we only allow values bigger than SImode in
22889 registers 0 - 6, so that there is always a second low
22890 register available to hold the upper part of the value.
22891 We probably we ought to ensure that the register is the
22892 start of an even numbered register pair. */
22893 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22895 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22896 && IS_VFP_REGNUM (regno
))
22898 if (mode
== SFmode
|| mode
== SImode
)
22899 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22901 if (mode
== DFmode
)
22902 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22904 /* VFP registers can hold HFmode values, but there is no point in
22905 putting them there unless we have hardware conversion insns. */
22906 if (mode
== HFmode
)
22907 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22910 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22911 || (VALID_NEON_QREG_MODE (mode
)
22912 && NEON_REGNO_OK_FOR_QUAD (regno
))
22913 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22914 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22915 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22916 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22917 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22922 if (TARGET_REALLY_IWMMXT
)
22924 if (IS_IWMMXT_GR_REGNUM (regno
))
22925 return mode
== SImode
;
22927 if (IS_IWMMXT_REGNUM (regno
))
22928 return VALID_IWMMXT_REG_MODE (mode
);
22931 /* We allow almost any value to be stored in the general registers.
22932 Restrict doubleword quantities to even register pairs in ARM state
22933 so that we can use ldrd. Do not allow very large Neon structure
22934 opaque modes in general registers; they would use too many. */
22935 if (regno
<= LAST_ARM_REGNUM
)
22937 if (ARM_NUM_REGS (mode
) > 4)
22943 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
22946 if (regno
== FRAME_POINTER_REGNUM
22947 || regno
== ARG_POINTER_REGNUM
)
22948 /* We only allow integers in the fake hard registers. */
22949 return GET_MODE_CLASS (mode
) == MODE_INT
;
22954 /* Implement MODES_TIEABLE_P. */
22957 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
22959 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22962 /* We specifically want to allow elements of "structure" modes to
22963 be tieable to the structure. This more general condition allows
22964 other rarer situations too. */
22966 && (VALID_NEON_DREG_MODE (mode1
)
22967 || VALID_NEON_QREG_MODE (mode1
)
22968 || VALID_NEON_STRUCT_MODE (mode1
))
22969 && (VALID_NEON_DREG_MODE (mode2
)
22970 || VALID_NEON_QREG_MODE (mode2
)
22971 || VALID_NEON_STRUCT_MODE (mode2
)))
22977 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22978 not used in arm mode. */
22981 arm_regno_class (int regno
)
22983 if (regno
== PC_REGNUM
)
22988 if (regno
== STACK_POINTER_REGNUM
)
22990 if (regno
== CC_REGNUM
)
22997 if (TARGET_THUMB2
&& regno
< 8)
23000 if ( regno
<= LAST_ARM_REGNUM
23001 || regno
== FRAME_POINTER_REGNUM
23002 || regno
== ARG_POINTER_REGNUM
)
23003 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23005 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23006 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23008 if (IS_VFP_REGNUM (regno
))
23010 if (regno
<= D7_VFP_REGNUM
)
23011 return VFP_D0_D7_REGS
;
23012 else if (regno
<= LAST_LO_VFP_REGNUM
)
23013 return VFP_LO_REGS
;
23015 return VFP_HI_REGS
;
23018 if (IS_IWMMXT_REGNUM (regno
))
23019 return IWMMXT_REGS
;
23021 if (IS_IWMMXT_GR_REGNUM (regno
))
23022 return IWMMXT_GR_REGS
;
23027 /* Handle a special case when computing the offset
23028 of an argument from the frame pointer. */
23030 arm_debugger_arg_offset (int value
, rtx addr
)
23034 /* We are only interested if dbxout_parms() failed to compute the offset. */
23038 /* We can only cope with the case where the address is held in a register. */
23042 /* If we are using the frame pointer to point at the argument, then
23043 an offset of 0 is correct. */
23044 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23047 /* If we are using the stack pointer to point at the
23048 argument, then an offset of 0 is correct. */
23049 /* ??? Check this is consistent with thumb2 frame layout. */
23050 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23051 && REGNO (addr
) == SP_REGNUM
)
23054 /* Oh dear. The argument is pointed to by a register rather
23055 than being held in a register, or being stored at a known
23056 offset from the frame pointer. Since GDB only understands
23057 those two kinds of argument we must translate the address
23058 held in the register into an offset from the frame pointer.
23059 We do this by searching through the insns for the function
23060 looking to see where this register gets its value. If the
23061 register is initialized from the frame pointer plus an offset
23062 then we are in luck and we can continue, otherwise we give up.
23064 This code is exercised by producing debugging information
23065 for a function with arguments like this:
23067 double func (double a, double b, int c, double d) {return d;}
23069 Without this code the stab for parameter 'd' will be set to
23070 an offset of 0 from the frame pointer, rather than 8. */
23072 /* The if() statement says:
23074 If the insn is a normal instruction
23075 and if the insn is setting the value in a register
23076 and if the register being set is the register holding the address of the argument
23077 and if the address is computing by an addition
23078 that involves adding to a register
23079 which is the frame pointer
23084 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23086 if ( NONJUMP_INSN_P (insn
)
23087 && GET_CODE (PATTERN (insn
)) == SET
23088 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23089 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23090 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23091 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23092 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23095 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23104 warning (0, "unable to compute real location of stacked parameter");
23105 value
= 8; /* XXX magic hack */
23111 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23113 static const char *
23114 arm_invalid_parameter_type (const_tree t
)
23116 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23117 return N_("function parameters cannot have __fp16 type");
23121 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23123 static const char *
23124 arm_invalid_return_type (const_tree t
)
23126 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23127 return N_("functions cannot return __fp16 type");
23131 /* Implement TARGET_PROMOTED_TYPE. */
23134 arm_promoted_type (const_tree t
)
23136 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23137 return float_type_node
;
23141 /* Implement TARGET_CONVERT_TO_TYPE.
23142 Specifically, this hook implements the peculiarity of the ARM
23143 half-precision floating-point C semantics that requires conversions between
23144 __fp16 to or from double to do an intermediate conversion to float. */
23147 arm_convert_to_type (tree type
, tree expr
)
23149 tree fromtype
= TREE_TYPE (expr
);
23150 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23152 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23153 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23154 return convert (type
, convert (float_type_node
, expr
));
23158 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23159 This simply adds HFmode as a supported mode; even though we don't
23160 implement arithmetic on this type directly, it's supported by
23161 optabs conversions, much the way the double-word arithmetic is
23162 special-cased in the default hook. */
23165 arm_scalar_mode_supported_p (machine_mode mode
)
23167 if (mode
== HFmode
)
23168 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23169 else if (ALL_FIXED_POINT_MODE_P (mode
))
23172 return default_scalar_mode_supported_p (mode
);
23175 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23177 neon_reinterpret (rtx dest
, rtx src
)
23179 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23182 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23183 not to early-clobber SRC registers in the process.
23185 We assume that the operands described by SRC and DEST represent a
23186 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23187 number of components into which the copy has been decomposed. */
23189 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23193 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23194 || REGNO (operands
[0]) < REGNO (operands
[1]))
23196 for (i
= 0; i
< count
; i
++)
23198 operands
[2 * i
] = dest
[i
];
23199 operands
[2 * i
+ 1] = src
[i
];
23204 for (i
= 0; i
< count
; i
++)
23206 operands
[2 * i
] = dest
[count
- i
- 1];
23207 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23212 /* Split operands into moves from op[1] + op[2] into op[0]. */
23215 neon_split_vcombine (rtx operands
[3])
23217 unsigned int dest
= REGNO (operands
[0]);
23218 unsigned int src1
= REGNO (operands
[1]);
23219 unsigned int src2
= REGNO (operands
[2]);
23220 machine_mode halfmode
= GET_MODE (operands
[1]);
23221 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23222 rtx destlo
, desthi
;
23224 if (src1
== dest
&& src2
== dest
+ halfregs
)
23226 /* No-op move. Can't split to nothing; emit something. */
23227 emit_note (NOTE_INSN_DELETED
);
23231 /* Preserve register attributes for variable tracking. */
23232 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23233 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23234 GET_MODE_SIZE (halfmode
));
23236 /* Special case of reversed high/low parts. Use VSWP. */
23237 if (src2
== dest
&& src1
== dest
+ halfregs
)
23239 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23240 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23241 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23245 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23247 /* Try to avoid unnecessary moves if part of the result
23248 is in the right place already. */
23250 emit_move_insn (destlo
, operands
[1]);
23251 if (src2
!= dest
+ halfregs
)
23252 emit_move_insn (desthi
, operands
[2]);
23256 if (src2
!= dest
+ halfregs
)
23257 emit_move_insn (desthi
, operands
[2]);
23259 emit_move_insn (destlo
, operands
[1]);
23263 /* Return the number (counting from 0) of
23264 the least significant set bit in MASK. */
23267 number_of_first_bit_set (unsigned mask
)
23269 return ctz_hwi (mask
);
23272 /* Like emit_multi_reg_push, but allowing for a different set of
23273 registers to be described as saved. MASK is the set of registers
23274 to be saved; REAL_REGS is the set of registers to be described as
23275 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23278 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23280 unsigned long regno
;
23281 rtx par
[10], tmp
, reg
;
23285 /* Build the parallel of the registers actually being stored. */
23286 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23288 regno
= ctz_hwi (mask
);
23289 reg
= gen_rtx_REG (SImode
, regno
);
23292 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23294 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23299 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23300 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23301 tmp
= gen_frame_mem (BLKmode
, tmp
);
23302 tmp
= gen_rtx_SET (tmp
, par
[0]);
23305 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23306 insn
= emit_insn (tmp
);
23308 /* Always build the stack adjustment note for unwind info. */
23309 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23310 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23313 /* Build the parallel of the registers recorded as saved for unwind. */
23314 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23316 regno
= ctz_hwi (real_regs
);
23317 reg
= gen_rtx_REG (SImode
, regno
);
23319 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23320 tmp
= gen_frame_mem (SImode
, tmp
);
23321 tmp
= gen_rtx_SET (tmp
, reg
);
23322 RTX_FRAME_RELATED_P (tmp
) = 1;
23330 RTX_FRAME_RELATED_P (par
[0]) = 1;
23331 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23334 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23339 /* Emit code to push or pop registers to or from the stack. F is the
23340 assembly file. MASK is the registers to pop. */
23342 thumb_pop (FILE *f
, unsigned long mask
)
23345 int lo_mask
= mask
& 0xFF;
23346 int pushed_words
= 0;
23350 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23352 /* Special case. Do not generate a POP PC statement here, do it in
23354 thumb_exit (f
, -1);
23358 fprintf (f
, "\tpop\t{");
23360 /* Look at the low registers first. */
23361 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23365 asm_fprintf (f
, "%r", regno
);
23367 if ((lo_mask
& ~1) != 0)
23374 if (mask
& (1 << PC_REGNUM
))
23376 /* Catch popping the PC. */
23377 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23378 || crtl
->calls_eh_return
)
23380 /* The PC is never poped directly, instead
23381 it is popped into r3 and then BX is used. */
23382 fprintf (f
, "}\n");
23384 thumb_exit (f
, -1);
23393 asm_fprintf (f
, "%r", PC_REGNUM
);
23397 fprintf (f
, "}\n");
23400 /* Generate code to return from a thumb function.
23401 If 'reg_containing_return_addr' is -1, then the return address is
23402 actually on the stack, at the stack pointer. */
23404 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23406 unsigned regs_available_for_popping
;
23407 unsigned regs_to_pop
;
23409 unsigned available
;
23413 int restore_a4
= FALSE
;
23415 /* Compute the registers we need to pop. */
23419 if (reg_containing_return_addr
== -1)
23421 regs_to_pop
|= 1 << LR_REGNUM
;
23425 if (TARGET_BACKTRACE
)
23427 /* Restore the (ARM) frame pointer and stack pointer. */
23428 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23432 /* If there is nothing to pop then just emit the BX instruction and
23434 if (pops_needed
== 0)
23436 if (crtl
->calls_eh_return
)
23437 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23439 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23442 /* Otherwise if we are not supporting interworking and we have not created
23443 a backtrace structure and the function was not entered in ARM mode then
23444 just pop the return address straight into the PC. */
23445 else if (!TARGET_INTERWORK
23446 && !TARGET_BACKTRACE
23447 && !is_called_in_ARM_mode (current_function_decl
)
23448 && !crtl
->calls_eh_return
)
23450 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23454 /* Find out how many of the (return) argument registers we can corrupt. */
23455 regs_available_for_popping
= 0;
23457 /* If returning via __builtin_eh_return, the bottom three registers
23458 all contain information needed for the return. */
23459 if (crtl
->calls_eh_return
)
23463 /* If we can deduce the registers used from the function's
23464 return value. This is more reliable that examining
23465 df_regs_ever_live_p () because that will be set if the register is
23466 ever used in the function, not just if the register is used
23467 to hold a return value. */
23469 if (crtl
->return_rtx
!= 0)
23470 mode
= GET_MODE (crtl
->return_rtx
);
23472 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23474 size
= GET_MODE_SIZE (mode
);
23478 /* In a void function we can use any argument register.
23479 In a function that returns a structure on the stack
23480 we can use the second and third argument registers. */
23481 if (mode
== VOIDmode
)
23482 regs_available_for_popping
=
23483 (1 << ARG_REGISTER (1))
23484 | (1 << ARG_REGISTER (2))
23485 | (1 << ARG_REGISTER (3));
23487 regs_available_for_popping
=
23488 (1 << ARG_REGISTER (2))
23489 | (1 << ARG_REGISTER (3));
23491 else if (size
<= 4)
23492 regs_available_for_popping
=
23493 (1 << ARG_REGISTER (2))
23494 | (1 << ARG_REGISTER (3));
23495 else if (size
<= 8)
23496 regs_available_for_popping
=
23497 (1 << ARG_REGISTER (3));
23500 /* Match registers to be popped with registers into which we pop them. */
23501 for (available
= regs_available_for_popping
,
23502 required
= regs_to_pop
;
23503 required
!= 0 && available
!= 0;
23504 available
&= ~(available
& - available
),
23505 required
&= ~(required
& - required
))
23508 /* If we have any popping registers left over, remove them. */
23510 regs_available_for_popping
&= ~available
;
23512 /* Otherwise if we need another popping register we can use
23513 the fourth argument register. */
23514 else if (pops_needed
)
23516 /* If we have not found any free argument registers and
23517 reg a4 contains the return address, we must move it. */
23518 if (regs_available_for_popping
== 0
23519 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23521 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23522 reg_containing_return_addr
= LR_REGNUM
;
23524 else if (size
> 12)
23526 /* Register a4 is being used to hold part of the return value,
23527 but we have dire need of a free, low register. */
23530 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23533 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23535 /* The fourth argument register is available. */
23536 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23542 /* Pop as many registers as we can. */
23543 thumb_pop (f
, regs_available_for_popping
);
23545 /* Process the registers we popped. */
23546 if (reg_containing_return_addr
== -1)
23548 /* The return address was popped into the lowest numbered register. */
23549 regs_to_pop
&= ~(1 << LR_REGNUM
);
23551 reg_containing_return_addr
=
23552 number_of_first_bit_set (regs_available_for_popping
);
23554 /* Remove this register for the mask of available registers, so that
23555 the return address will not be corrupted by further pops. */
23556 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23559 /* If we popped other registers then handle them here. */
23560 if (regs_available_for_popping
)
23564 /* Work out which register currently contains the frame pointer. */
23565 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23567 /* Move it into the correct place. */
23568 asm_fprintf (f
, "\tmov\t%r, %r\n",
23569 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23571 /* (Temporarily) remove it from the mask of popped registers. */
23572 regs_available_for_popping
&= ~(1 << frame_pointer
);
23573 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23575 if (regs_available_for_popping
)
23579 /* We popped the stack pointer as well,
23580 find the register that contains it. */
23581 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23583 /* Move it into the stack register. */
23584 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23586 /* At this point we have popped all necessary registers, so
23587 do not worry about restoring regs_available_for_popping
23588 to its correct value:
23590 assert (pops_needed == 0)
23591 assert (regs_available_for_popping == (1 << frame_pointer))
23592 assert (regs_to_pop == (1 << STACK_POINTER)) */
23596 /* Since we have just move the popped value into the frame
23597 pointer, the popping register is available for reuse, and
23598 we know that we still have the stack pointer left to pop. */
23599 regs_available_for_popping
|= (1 << frame_pointer
);
23603 /* If we still have registers left on the stack, but we no longer have
23604 any registers into which we can pop them, then we must move the return
23605 address into the link register and make available the register that
23607 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23609 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23611 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23612 reg_containing_return_addr
);
23614 reg_containing_return_addr
= LR_REGNUM
;
23617 /* If we have registers left on the stack then pop some more.
23618 We know that at most we will want to pop FP and SP. */
23619 if (pops_needed
> 0)
23624 thumb_pop (f
, regs_available_for_popping
);
23626 /* We have popped either FP or SP.
23627 Move whichever one it is into the correct register. */
23628 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23629 move_to
= number_of_first_bit_set (regs_to_pop
);
23631 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23633 regs_to_pop
&= ~(1 << move_to
);
23638 /* If we still have not popped everything then we must have only
23639 had one register available to us and we are now popping the SP. */
23640 if (pops_needed
> 0)
23644 thumb_pop (f
, regs_available_for_popping
);
23646 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23648 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23650 assert (regs_to_pop == (1 << STACK_POINTER))
23651 assert (pops_needed == 1)
23655 /* If necessary restore the a4 register. */
23658 if (reg_containing_return_addr
!= LR_REGNUM
)
23660 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23661 reg_containing_return_addr
= LR_REGNUM
;
23664 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23667 if (crtl
->calls_eh_return
)
23668 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23670 /* Return to caller. */
23671 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23674 /* Scan INSN just before assembler is output for it.
23675 For Thumb-1, we track the status of the condition codes; this
23676 information is used in the cbranchsi4_insn pattern. */
23678 thumb1_final_prescan_insn (rtx_insn
*insn
)
23680 if (flag_print_asm_name
)
23681 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23682 INSN_ADDRESSES (INSN_UID (insn
)));
23683 /* Don't overwrite the previous setter when we get to a cbranch. */
23684 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23686 enum attr_conds conds
;
23688 if (cfun
->machine
->thumb1_cc_insn
)
23690 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23691 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23694 conds
= get_attr_conds (insn
);
23695 if (conds
== CONDS_SET
)
23697 rtx set
= single_set (insn
);
23698 cfun
->machine
->thumb1_cc_insn
= insn
;
23699 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23700 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23701 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23702 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23704 rtx src1
= XEXP (SET_SRC (set
), 1);
23705 if (src1
== const0_rtx
)
23706 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23708 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23710 /* Record the src register operand instead of dest because
23711 cprop_hardreg pass propagates src. */
23712 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23715 else if (conds
!= CONDS_NOCOND
)
23716 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23719 /* Check if unexpected far jump is used. */
23720 if (cfun
->machine
->lr_save_eliminated
23721 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23722 internal_error("Unexpected thumb1 far jump");
23726 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23728 unsigned HOST_WIDE_INT mask
= 0xff;
23731 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23732 if (val
== 0) /* XXX */
23735 for (i
= 0; i
< 25; i
++)
23736 if ((val
& (mask
<< i
)) == val
)
23742 /* Returns nonzero if the current function contains,
23743 or might contain a far jump. */
23745 thumb_far_jump_used_p (void)
23748 bool far_jump
= false;
23749 unsigned int func_size
= 0;
23751 /* This test is only important for leaf functions. */
23752 /* assert (!leaf_function_p ()); */
23754 /* If we have already decided that far jumps may be used,
23755 do not bother checking again, and always return true even if
23756 it turns out that they are not being used. Once we have made
23757 the decision that far jumps are present (and that hence the link
23758 register will be pushed onto the stack) we cannot go back on it. */
23759 if (cfun
->machine
->far_jump_used
)
23762 /* If this function is not being called from the prologue/epilogue
23763 generation code then it must be being called from the
23764 INITIAL_ELIMINATION_OFFSET macro. */
23765 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23767 /* In this case we know that we are being asked about the elimination
23768 of the arg pointer register. If that register is not being used,
23769 then there are no arguments on the stack, and we do not have to
23770 worry that a far jump might force the prologue to push the link
23771 register, changing the stack offsets. In this case we can just
23772 return false, since the presence of far jumps in the function will
23773 not affect stack offsets.
23775 If the arg pointer is live (or if it was live, but has now been
23776 eliminated and so set to dead) then we do have to test to see if
23777 the function might contain a far jump. This test can lead to some
23778 false negatives, since before reload is completed, then length of
23779 branch instructions is not known, so gcc defaults to returning their
23780 longest length, which in turn sets the far jump attribute to true.
23782 A false negative will not result in bad code being generated, but it
23783 will result in a needless push and pop of the link register. We
23784 hope that this does not occur too often.
23786 If we need doubleword stack alignment this could affect the other
23787 elimination offsets so we can't risk getting it wrong. */
23788 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23789 cfun
->machine
->arg_pointer_live
= 1;
23790 else if (!cfun
->machine
->arg_pointer_live
)
23794 /* We should not change far_jump_used during or after reload, as there is
23795 no chance to change stack frame layout. */
23796 if (reload_in_progress
|| reload_completed
)
23799 /* Check to see if the function contains a branch
23800 insn with the far jump attribute set. */
23801 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23803 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23807 func_size
+= get_attr_length (insn
);
23810 /* Attribute far_jump will always be true for thumb1 before
23811 shorten_branch pass. So checking far_jump attribute before
23812 shorten_branch isn't much useful.
23814 Following heuristic tries to estimate more accurately if a far jump
23815 may finally be used. The heuristic is very conservative as there is
23816 no chance to roll-back the decision of not to use far jump.
23818 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23819 2-byte insn is associated with a 4 byte constant pool. Using
23820 function size 2048/3 as the threshold is conservative enough. */
23823 if ((func_size
* 3) >= 2048)
23825 /* Record the fact that we have decided that
23826 the function does use far jumps. */
23827 cfun
->machine
->far_jump_used
= 1;
23835 /* Return nonzero if FUNC must be entered in ARM mode. */
23837 is_called_in_ARM_mode (tree func
)
23839 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23841 /* Ignore the problem about functions whose address is taken. */
23842 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23846 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23852 /* Given the stack offsets and register mask in OFFSETS, decide how
23853 many additional registers to push instead of subtracting a constant
23854 from SP. For epilogues the principle is the same except we use pop.
23855 FOR_PROLOGUE indicates which we're generating. */
23857 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23859 HOST_WIDE_INT amount
;
23860 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23861 /* Extract a mask of the ones we can give to the Thumb's push/pop
23863 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23864 /* Then count how many other high registers will need to be pushed. */
23865 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23866 int n_free
, reg_base
, size
;
23868 if (!for_prologue
&& frame_pointer_needed
)
23869 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23871 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23873 /* If the stack frame size is 512 exactly, we can save one load
23874 instruction, which should make this a win even when optimizing
23876 if (!optimize_size
&& amount
!= 512)
23879 /* Can't do this if there are high registers to push. */
23880 if (high_regs_pushed
!= 0)
23883 /* Shouldn't do it in the prologue if no registers would normally
23884 be pushed at all. In the epilogue, also allow it if we'll have
23885 a pop insn for the PC. */
23888 || TARGET_BACKTRACE
23889 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
23890 || TARGET_INTERWORK
23891 || crtl
->args
.pretend_args_size
!= 0))
23894 /* Don't do this if thumb_expand_prologue wants to emit instructions
23895 between the push and the stack frame allocation. */
23897 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23898 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
23905 size
= arm_size_return_regs ();
23906 reg_base
= ARM_NUM_INTS (size
);
23907 live_regs_mask
>>= reg_base
;
23910 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
23911 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
23913 live_regs_mask
>>= 1;
23919 gcc_assert (amount
/ 4 * 4 == amount
);
23921 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
23922 return (amount
- 508) / 4;
23923 if (amount
<= n_free
* 4)
23928 /* The bits which aren't usefully expanded as rtl. */
23930 thumb1_unexpanded_epilogue (void)
23932 arm_stack_offsets
*offsets
;
23934 unsigned long live_regs_mask
= 0;
23935 int high_regs_pushed
= 0;
23937 int had_to_push_lr
;
23940 if (cfun
->machine
->return_used_this_function
!= 0)
23943 if (IS_NAKED (arm_current_func_type ()))
23946 offsets
= arm_get_frame_offsets ();
23947 live_regs_mask
= offsets
->saved_regs_mask
;
23948 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23950 /* If we can deduce the registers used from the function's return value.
23951 This is more reliable that examining df_regs_ever_live_p () because that
23952 will be set if the register is ever used in the function, not just if
23953 the register is used to hold a return value. */
23954 size
= arm_size_return_regs ();
23956 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
23959 unsigned long extra_mask
= (1 << extra_pop
) - 1;
23960 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
23963 /* The prolog may have pushed some high registers to use as
23964 work registers. e.g. the testsuite file:
23965 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23966 compiles to produce:
23967 push {r4, r5, r6, r7, lr}
23971 as part of the prolog. We have to undo that pushing here. */
23973 if (high_regs_pushed
)
23975 unsigned long mask
= live_regs_mask
& 0xff;
23978 /* The available low registers depend on the size of the value we are
23986 /* Oh dear! We have no low registers into which we can pop
23989 ("no low registers available for popping high registers");
23991 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
23992 if (live_regs_mask
& (1 << next_hi_reg
))
23995 while (high_regs_pushed
)
23997 /* Find lo register(s) into which the high register(s) can
23999 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24001 if (mask
& (1 << regno
))
24002 high_regs_pushed
--;
24003 if (high_regs_pushed
== 0)
24007 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24009 /* Pop the values into the low register(s). */
24010 thumb_pop (asm_out_file
, mask
);
24012 /* Move the value(s) into the high registers. */
24013 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24015 if (mask
& (1 << regno
))
24017 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24020 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24021 if (live_regs_mask
& (1 << next_hi_reg
))
24026 live_regs_mask
&= ~0x0f00;
24029 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24030 live_regs_mask
&= 0xff;
24032 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24034 /* Pop the return address into the PC. */
24035 if (had_to_push_lr
)
24036 live_regs_mask
|= 1 << PC_REGNUM
;
24038 /* Either no argument registers were pushed or a backtrace
24039 structure was created which includes an adjusted stack
24040 pointer, so just pop everything. */
24041 if (live_regs_mask
)
24042 thumb_pop (asm_out_file
, live_regs_mask
);
24044 /* We have either just popped the return address into the
24045 PC or it is was kept in LR for the entire function.
24046 Note that thumb_pop has already called thumb_exit if the
24047 PC was in the list. */
24048 if (!had_to_push_lr
)
24049 thumb_exit (asm_out_file
, LR_REGNUM
);
24053 /* Pop everything but the return address. */
24054 if (live_regs_mask
)
24055 thumb_pop (asm_out_file
, live_regs_mask
);
24057 if (had_to_push_lr
)
24061 /* We have no free low regs, so save one. */
24062 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24066 /* Get the return address into a temporary register. */
24067 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24071 /* Move the return address to lr. */
24072 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24074 /* Restore the low register. */
24075 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24080 regno
= LAST_ARG_REGNUM
;
24085 /* Remove the argument registers that were pushed onto the stack. */
24086 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24087 SP_REGNUM
, SP_REGNUM
,
24088 crtl
->args
.pretend_args_size
);
24090 thumb_exit (asm_out_file
, regno
);
24096 /* Functions to save and restore machine-specific function data. */
24097 static struct machine_function
*
24098 arm_init_machine_status (void)
24100 struct machine_function
*machine
;
24101 machine
= ggc_cleared_alloc
<machine_function
> ();
24103 #if ARM_FT_UNKNOWN != 0
24104 machine
->func_type
= ARM_FT_UNKNOWN
;
24109 /* Return an RTX indicating where the return address to the
24110 calling function can be found. */
24112 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24117 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24120 /* Do anything needed before RTL is emitted for each function. */
24122 arm_init_expanders (void)
24124 /* Arrange to initialize and mark the machine per-function status. */
24125 init_machine_status
= arm_init_machine_status
;
24127 /* This is to stop the combine pass optimizing away the alignment
24128 adjustment of va_arg. */
24129 /* ??? It is claimed that this should not be necessary. */
24131 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24135 /* Like arm_compute_initial_elimination offset. Simpler because there
24136 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24137 to point at the base of the local variables after static stack
24138 space for a function has been allocated. */
24141 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24143 arm_stack_offsets
*offsets
;
24145 offsets
= arm_get_frame_offsets ();
24149 case ARG_POINTER_REGNUM
:
24152 case STACK_POINTER_REGNUM
:
24153 return offsets
->outgoing_args
- offsets
->saved_args
;
24155 case FRAME_POINTER_REGNUM
:
24156 return offsets
->soft_frame
- offsets
->saved_args
;
24158 case ARM_HARD_FRAME_POINTER_REGNUM
:
24159 return offsets
->saved_regs
- offsets
->saved_args
;
24161 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24162 return offsets
->locals_base
- offsets
->saved_args
;
24165 gcc_unreachable ();
24169 case FRAME_POINTER_REGNUM
:
24172 case STACK_POINTER_REGNUM
:
24173 return offsets
->outgoing_args
- offsets
->soft_frame
;
24175 case ARM_HARD_FRAME_POINTER_REGNUM
:
24176 return offsets
->saved_regs
- offsets
->soft_frame
;
24178 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24179 return offsets
->locals_base
- offsets
->soft_frame
;
24182 gcc_unreachable ();
24187 gcc_unreachable ();
24191 /* Generate the function's prologue. */
24194 thumb1_expand_prologue (void)
24198 HOST_WIDE_INT amount
;
24199 arm_stack_offsets
*offsets
;
24200 unsigned long func_type
;
24202 unsigned long live_regs_mask
;
24203 unsigned long l_mask
;
24204 unsigned high_regs_pushed
= 0;
24206 func_type
= arm_current_func_type ();
24208 /* Naked functions don't have prologues. */
24209 if (IS_NAKED (func_type
))
24212 if (IS_INTERRUPT (func_type
))
24214 error ("interrupt Service Routines cannot be coded in Thumb mode");
24218 if (is_called_in_ARM_mode (current_function_decl
))
24219 emit_insn (gen_prologue_thumb1_interwork ());
24221 offsets
= arm_get_frame_offsets ();
24222 live_regs_mask
= offsets
->saved_regs_mask
;
24224 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24225 l_mask
= live_regs_mask
& 0x40ff;
24226 /* Then count how many other high registers will need to be pushed. */
24227 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24229 if (crtl
->args
.pretend_args_size
)
24231 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24233 if (cfun
->machine
->uses_anonymous_args
)
24235 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24236 unsigned long mask
;
24238 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24239 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24241 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24245 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24246 stack_pointer_rtx
, x
));
24248 RTX_FRAME_RELATED_P (insn
) = 1;
24251 if (TARGET_BACKTRACE
)
24253 HOST_WIDE_INT offset
= 0;
24254 unsigned work_register
;
24255 rtx work_reg
, x
, arm_hfp_rtx
;
24257 /* We have been asked to create a stack backtrace structure.
24258 The code looks like this:
24262 0 sub SP, #16 Reserve space for 4 registers.
24263 2 push {R7} Push low registers.
24264 4 add R7, SP, #20 Get the stack pointer before the push.
24265 6 str R7, [SP, #8] Store the stack pointer
24266 (before reserving the space).
24267 8 mov R7, PC Get hold of the start of this code + 12.
24268 10 str R7, [SP, #16] Store it.
24269 12 mov R7, FP Get hold of the current frame pointer.
24270 14 str R7, [SP, #4] Store it.
24271 16 mov R7, LR Get hold of the current return address.
24272 18 str R7, [SP, #12] Store it.
24273 20 add R7, SP, #16 Point at the start of the
24274 backtrace structure.
24275 22 mov FP, R7 Put this value into the frame pointer. */
24277 work_register
= thumb_find_work_register (live_regs_mask
);
24278 work_reg
= gen_rtx_REG (SImode
, work_register
);
24279 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24281 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24282 stack_pointer_rtx
, GEN_INT (-16)));
24283 RTX_FRAME_RELATED_P (insn
) = 1;
24287 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24288 RTX_FRAME_RELATED_P (insn
) = 1;
24290 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24293 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24294 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24296 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24297 x
= gen_frame_mem (SImode
, x
);
24298 emit_move_insn (x
, work_reg
);
24300 /* Make sure that the instruction fetching the PC is in the right place
24301 to calculate "start of backtrace creation code + 12". */
24302 /* ??? The stores using the common WORK_REG ought to be enough to
24303 prevent the scheduler from doing anything weird. Failing that
24304 we could always move all of the following into an UNSPEC_VOLATILE. */
24307 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24308 emit_move_insn (work_reg
, x
);
24310 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24311 x
= gen_frame_mem (SImode
, x
);
24312 emit_move_insn (x
, work_reg
);
24314 emit_move_insn (work_reg
, arm_hfp_rtx
);
24316 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24317 x
= gen_frame_mem (SImode
, x
);
24318 emit_move_insn (x
, work_reg
);
24322 emit_move_insn (work_reg
, arm_hfp_rtx
);
24324 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24325 x
= gen_frame_mem (SImode
, x
);
24326 emit_move_insn (x
, work_reg
);
24328 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24329 emit_move_insn (work_reg
, x
);
24331 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24332 x
= gen_frame_mem (SImode
, x
);
24333 emit_move_insn (x
, work_reg
);
24336 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24337 emit_move_insn (work_reg
, x
);
24339 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24340 x
= gen_frame_mem (SImode
, x
);
24341 emit_move_insn (x
, work_reg
);
24343 x
= GEN_INT (offset
+ 12);
24344 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24346 emit_move_insn (arm_hfp_rtx
, work_reg
);
24348 /* Optimization: If we are not pushing any low registers but we are going
24349 to push some high registers then delay our first push. This will just
24350 be a push of LR and we can combine it with the push of the first high
24352 else if ((l_mask
& 0xff) != 0
24353 || (high_regs_pushed
== 0 && l_mask
))
24355 unsigned long mask
= l_mask
;
24356 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24357 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24358 RTX_FRAME_RELATED_P (insn
) = 1;
24361 if (high_regs_pushed
)
24363 unsigned pushable_regs
;
24364 unsigned next_hi_reg
;
24365 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24366 : crtl
->args
.info
.nregs
;
24367 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24369 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24370 if (live_regs_mask
& (1 << next_hi_reg
))
24373 /* Here we need to mask out registers used for passing arguments
24374 even if they can be pushed. This is to avoid using them to stash the high
24375 registers. Such kind of stash may clobber the use of arguments. */
24376 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24378 if (pushable_regs
== 0)
24379 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24381 while (high_regs_pushed
> 0)
24383 unsigned long real_regs_mask
= 0;
24385 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24387 if (pushable_regs
& (1 << regno
))
24389 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24390 gen_rtx_REG (SImode
, next_hi_reg
));
24392 high_regs_pushed
--;
24393 real_regs_mask
|= (1 << next_hi_reg
);
24395 if (high_regs_pushed
)
24397 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24399 if (live_regs_mask
& (1 << next_hi_reg
))
24404 pushable_regs
&= ~((1 << regno
) - 1);
24410 /* If we had to find a work register and we have not yet
24411 saved the LR then add it to the list of regs to push. */
24412 if (l_mask
== (1 << LR_REGNUM
))
24414 pushable_regs
|= l_mask
;
24415 real_regs_mask
|= l_mask
;
24419 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24420 RTX_FRAME_RELATED_P (insn
) = 1;
24424 /* Load the pic register before setting the frame pointer,
24425 so we can use r7 as a temporary work register. */
24426 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24427 arm_load_pic_register (live_regs_mask
);
24429 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24430 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24431 stack_pointer_rtx
);
24433 if (flag_stack_usage_info
)
24434 current_function_static_stack_size
24435 = offsets
->outgoing_args
- offsets
->saved_args
;
24437 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24438 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24443 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24444 GEN_INT (- amount
)));
24445 RTX_FRAME_RELATED_P (insn
) = 1;
24451 /* The stack decrement is too big for an immediate value in a single
24452 insn. In theory we could issue multiple subtracts, but after
24453 three of them it becomes more space efficient to place the full
24454 value in the constant pool and load into a register. (Also the
24455 ARM debugger really likes to see only one stack decrement per
24456 function). So instead we look for a scratch register into which
24457 we can load the decrement, and then we subtract this from the
24458 stack pointer. Unfortunately on the thumb the only available
24459 scratch registers are the argument registers, and we cannot use
24460 these as they may hold arguments to the function. Instead we
24461 attempt to locate a call preserved register which is used by this
24462 function. If we can find one, then we know that it will have
24463 been pushed at the start of the prologue and so we can corrupt
24465 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24466 if (live_regs_mask
& (1 << regno
))
24469 gcc_assert(regno
<= LAST_LO_REGNUM
);
24471 reg
= gen_rtx_REG (SImode
, regno
);
24473 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24475 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24476 stack_pointer_rtx
, reg
));
24478 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24479 plus_constant (Pmode
, stack_pointer_rtx
,
24481 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24482 RTX_FRAME_RELATED_P (insn
) = 1;
24486 if (frame_pointer_needed
)
24487 thumb_set_frame_pointer (offsets
);
24489 /* If we are profiling, make sure no instructions are scheduled before
24490 the call to mcount. Similarly if the user has requested no
24491 scheduling in the prolog. Similarly if we want non-call exceptions
24492 using the EABI unwinder, to prevent faulting instructions from being
24493 swapped with a stack adjustment. */
24494 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24495 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24496 && cfun
->can_throw_non_call_exceptions
))
24497 emit_insn (gen_blockage ());
24499 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24500 if (live_regs_mask
& 0xff)
24501 cfun
->machine
->lr_save_eliminated
= 0;
24504 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24505 POP instruction can be generated. LR should be replaced by PC. All
24506 the checks required are already done by USE_RETURN_INSN (). Hence,
24507 all we really need to check here is if single register is to be
24508 returned, or multiple register return. */
24510 thumb2_expand_return (bool simple_return
)
24513 unsigned long saved_regs_mask
;
24514 arm_stack_offsets
*offsets
;
24516 offsets
= arm_get_frame_offsets ();
24517 saved_regs_mask
= offsets
->saved_regs_mask
;
24519 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24520 if (saved_regs_mask
& (1 << i
))
24523 if (!simple_return
&& saved_regs_mask
)
24527 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24528 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24529 rtx addr
= gen_rtx_MEM (SImode
,
24530 gen_rtx_POST_INC (SImode
,
24531 stack_pointer_rtx
));
24532 set_mem_alias_set (addr
, get_frame_alias_set ());
24533 XVECEXP (par
, 0, 0) = ret_rtx
;
24534 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24535 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24536 emit_jump_insn (par
);
24540 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24541 saved_regs_mask
|= (1 << PC_REGNUM
);
24542 arm_emit_multi_reg_pop (saved_regs_mask
);
24547 emit_jump_insn (simple_return_rtx
);
24552 thumb1_expand_epilogue (void)
24554 HOST_WIDE_INT amount
;
24555 arm_stack_offsets
*offsets
;
24558 /* Naked functions don't have prologues. */
24559 if (IS_NAKED (arm_current_func_type ()))
24562 offsets
= arm_get_frame_offsets ();
24563 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24565 if (frame_pointer_needed
)
24567 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24568 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24570 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24572 gcc_assert (amount
>= 0);
24575 emit_insn (gen_blockage ());
24578 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24579 GEN_INT (amount
)));
24582 /* r3 is always free in the epilogue. */
24583 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24585 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24586 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24590 /* Emit a USE (stack_pointer_rtx), so that
24591 the stack adjustment will not be deleted. */
24592 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24594 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24595 emit_insn (gen_blockage ());
24597 /* Emit a clobber for each insn that will be restored in the epilogue,
24598 so that flow2 will get register lifetimes correct. */
24599 for (regno
= 0; regno
< 13; regno
++)
24600 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24601 emit_clobber (gen_rtx_REG (SImode
, regno
));
24603 if (! df_regs_ever_live_p (LR_REGNUM
))
24604 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24607 /* Epilogue code for APCS frame. */
24609 arm_expand_epilogue_apcs_frame (bool really_return
)
24611 unsigned long func_type
;
24612 unsigned long saved_regs_mask
;
24615 int floats_from_frame
= 0;
24616 arm_stack_offsets
*offsets
;
24618 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24619 func_type
= arm_current_func_type ();
24621 /* Get frame offsets for ARM. */
24622 offsets
= arm_get_frame_offsets ();
24623 saved_regs_mask
= offsets
->saved_regs_mask
;
24625 /* Find the offset of the floating-point save area in the frame. */
24627 = (offsets
->saved_args
24628 + arm_compute_static_chain_stack_bytes ()
24631 /* Compute how many core registers saved and how far away the floats are. */
24632 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24633 if (saved_regs_mask
& (1 << i
))
24636 floats_from_frame
+= 4;
24639 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24642 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24644 /* The offset is from IP_REGNUM. */
24645 int saved_size
= arm_get_vfp_saved_size ();
24646 if (saved_size
> 0)
24649 floats_from_frame
+= saved_size
;
24650 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24651 hard_frame_pointer_rtx
,
24652 GEN_INT (-floats_from_frame
)));
24653 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24654 ip_rtx
, hard_frame_pointer_rtx
);
24657 /* Generate VFP register multi-pop. */
24658 start_reg
= FIRST_VFP_REGNUM
;
24660 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24661 /* Look for a case where a reg does not need restoring. */
24662 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24663 && (!df_regs_ever_live_p (i
+ 1)
24664 || call_used_regs
[i
+ 1]))
24666 if (start_reg
!= i
)
24667 arm_emit_vfp_multi_reg_pop (start_reg
,
24668 (i
- start_reg
) / 2,
24669 gen_rtx_REG (SImode
,
24674 /* Restore the remaining regs that we have discovered (or possibly
24675 even all of them, if the conditional in the for loop never
24677 if (start_reg
!= i
)
24678 arm_emit_vfp_multi_reg_pop (start_reg
,
24679 (i
- start_reg
) / 2,
24680 gen_rtx_REG (SImode
, IP_REGNUM
));
24685 /* The frame pointer is guaranteed to be non-double-word aligned, as
24686 it is set to double-word-aligned old_stack_pointer - 4. */
24688 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24690 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24691 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24693 rtx addr
= gen_frame_mem (V2SImode
,
24694 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24696 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24697 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24698 gen_rtx_REG (V2SImode
, i
),
24704 /* saved_regs_mask should contain IP which contains old stack pointer
24705 at the time of activation creation. Since SP and IP are adjacent registers,
24706 we can restore the value directly into SP. */
24707 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24708 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24709 saved_regs_mask
|= (1 << SP_REGNUM
);
24711 /* There are two registers left in saved_regs_mask - LR and PC. We
24712 only need to restore LR (the return address), but to
24713 save time we can load it directly into PC, unless we need a
24714 special function exit sequence, or we are not really returning. */
24716 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24717 && !crtl
->calls_eh_return
)
24718 /* Delete LR from the register mask, so that LR on
24719 the stack is loaded into the PC in the register mask. */
24720 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24722 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24724 num_regs
= bit_count (saved_regs_mask
);
24725 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24728 emit_insn (gen_blockage ());
24729 /* Unwind the stack to just below the saved registers. */
24730 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24731 hard_frame_pointer_rtx
,
24732 GEN_INT (- 4 * num_regs
)));
24734 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24735 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24738 arm_emit_multi_reg_pop (saved_regs_mask
);
24740 if (IS_INTERRUPT (func_type
))
24742 /* Interrupt handlers will have pushed the
24743 IP onto the stack, so restore it now. */
24745 rtx addr
= gen_rtx_MEM (SImode
,
24746 gen_rtx_POST_INC (SImode
,
24747 stack_pointer_rtx
));
24748 set_mem_alias_set (addr
, get_frame_alias_set ());
24749 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24750 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24751 gen_rtx_REG (SImode
, IP_REGNUM
),
24755 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24758 if (crtl
->calls_eh_return
)
24759 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24761 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24763 if (IS_STACKALIGN (func_type
))
24764 /* Restore the original stack pointer. Before prologue, the stack was
24765 realigned and the original stack pointer saved in r0. For details,
24766 see comment in arm_expand_prologue. */
24767 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24769 emit_jump_insn (simple_return_rtx
);
24772 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24773 function is not a sibcall. */
24775 arm_expand_epilogue (bool really_return
)
24777 unsigned long func_type
;
24778 unsigned long saved_regs_mask
;
24782 arm_stack_offsets
*offsets
;
24784 func_type
= arm_current_func_type ();
24786 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24787 let output_return_instruction take care of instruction emission if any. */
24788 if (IS_NAKED (func_type
)
24789 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24792 emit_jump_insn (simple_return_rtx
);
24796 /* If we are throwing an exception, then we really must be doing a
24797 return, so we can't tail-call. */
24798 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24800 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24802 arm_expand_epilogue_apcs_frame (really_return
);
24806 /* Get frame offsets for ARM. */
24807 offsets
= arm_get_frame_offsets ();
24808 saved_regs_mask
= offsets
->saved_regs_mask
;
24809 num_regs
= bit_count (saved_regs_mask
);
24811 if (frame_pointer_needed
)
24814 /* Restore stack pointer if necessary. */
24817 /* In ARM mode, frame pointer points to first saved register.
24818 Restore stack pointer to last saved register. */
24819 amount
= offsets
->frame
- offsets
->saved_regs
;
24821 /* Force out any pending memory operations that reference stacked data
24822 before stack de-allocation occurs. */
24823 emit_insn (gen_blockage ());
24824 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24825 hard_frame_pointer_rtx
,
24826 GEN_INT (amount
)));
24827 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24829 hard_frame_pointer_rtx
);
24831 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24833 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24837 /* In Thumb-2 mode, the frame pointer points to the last saved
24839 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24842 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24843 hard_frame_pointer_rtx
,
24844 GEN_INT (amount
)));
24845 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24846 hard_frame_pointer_rtx
,
24847 hard_frame_pointer_rtx
);
24850 /* Force out any pending memory operations that reference stacked data
24851 before stack de-allocation occurs. */
24852 emit_insn (gen_blockage ());
24853 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24854 hard_frame_pointer_rtx
));
24855 arm_add_cfa_adjust_cfa_note (insn
, 0,
24857 hard_frame_pointer_rtx
);
24858 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24860 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24865 /* Pop off outgoing args and local frame to adjust stack pointer to
24866 last saved register. */
24867 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24871 /* Force out any pending memory operations that reference stacked data
24872 before stack de-allocation occurs. */
24873 emit_insn (gen_blockage ());
24874 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24876 GEN_INT (amount
)));
24877 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24878 stack_pointer_rtx
, stack_pointer_rtx
);
24879 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24881 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24885 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24887 /* Generate VFP register multi-pop. */
24888 int end_reg
= LAST_VFP_REGNUM
+ 1;
24890 /* Scan the registers in reverse order. We need to match
24891 any groupings made in the prologue and generate matching
24892 vldm operations. The need to match groups is because,
24893 unlike pop, vldm can only do consecutive regs. */
24894 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
24895 /* Look for a case where a reg does not need restoring. */
24896 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24897 && (!df_regs_ever_live_p (i
+ 1)
24898 || call_used_regs
[i
+ 1]))
24900 /* Restore the regs discovered so far (from reg+2 to
24902 if (end_reg
> i
+ 2)
24903 arm_emit_vfp_multi_reg_pop (i
+ 2,
24904 (end_reg
- (i
+ 2)) / 2,
24905 stack_pointer_rtx
);
24909 /* Restore the remaining regs that we have discovered (or possibly
24910 even all of them, if the conditional in the for loop never
24912 if (end_reg
> i
+ 2)
24913 arm_emit_vfp_multi_reg_pop (i
+ 2,
24914 (end_reg
- (i
+ 2)) / 2,
24915 stack_pointer_rtx
);
24919 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
24920 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24923 rtx addr
= gen_rtx_MEM (V2SImode
,
24924 gen_rtx_POST_INC (SImode
,
24925 stack_pointer_rtx
));
24926 set_mem_alias_set (addr
, get_frame_alias_set ());
24927 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24928 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24929 gen_rtx_REG (V2SImode
, i
),
24931 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24932 stack_pointer_rtx
, stack_pointer_rtx
);
24935 if (saved_regs_mask
)
24938 bool return_in_pc
= false;
24940 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
24941 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
24942 && !IS_STACKALIGN (func_type
)
24944 && crtl
->args
.pretend_args_size
== 0
24945 && saved_regs_mask
& (1 << LR_REGNUM
)
24946 && !crtl
->calls_eh_return
)
24948 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24949 saved_regs_mask
|= (1 << PC_REGNUM
);
24950 return_in_pc
= true;
24953 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
24955 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24956 if (saved_regs_mask
& (1 << i
))
24958 rtx addr
= gen_rtx_MEM (SImode
,
24959 gen_rtx_POST_INC (SImode
,
24960 stack_pointer_rtx
));
24961 set_mem_alias_set (addr
, get_frame_alias_set ());
24963 if (i
== PC_REGNUM
)
24965 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24966 XVECEXP (insn
, 0, 0) = ret_rtx
;
24967 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
24969 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
24970 insn
= emit_jump_insn (insn
);
24974 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
24976 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24977 gen_rtx_REG (SImode
, i
),
24979 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24981 stack_pointer_rtx
);
24988 && current_tune
->prefer_ldrd_strd
24989 && !optimize_function_for_size_p (cfun
))
24992 thumb2_emit_ldrd_pop (saved_regs_mask
);
24993 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
24994 arm_emit_ldrd_pop (saved_regs_mask
);
24996 arm_emit_multi_reg_pop (saved_regs_mask
);
24999 arm_emit_multi_reg_pop (saved_regs_mask
);
25006 if (crtl
->args
.pretend_args_size
)
25009 rtx dwarf
= NULL_RTX
;
25011 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25013 GEN_INT (crtl
->args
.pretend_args_size
)));
25015 RTX_FRAME_RELATED_P (tmp
) = 1;
25017 if (cfun
->machine
->uses_anonymous_args
)
25019 /* Restore pretend args. Refer arm_expand_prologue on how to save
25020 pretend_args in stack. */
25021 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25022 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25023 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25024 if (saved_regs_mask
& (1 << i
))
25026 rtx reg
= gen_rtx_REG (SImode
, i
);
25027 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25030 REG_NOTES (tmp
) = dwarf
;
25032 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25033 stack_pointer_rtx
, stack_pointer_rtx
);
25036 if (!really_return
)
25039 if (crtl
->calls_eh_return
)
25040 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25042 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25044 if (IS_STACKALIGN (func_type
))
25045 /* Restore the original stack pointer. Before prologue, the stack was
25046 realigned and the original stack pointer saved in r0. For details,
25047 see comment in arm_expand_prologue. */
25048 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25050 emit_jump_insn (simple_return_rtx
);
25053 /* Implementation of insn prologue_thumb1_interwork. This is the first
25054 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25057 thumb1_output_interwork (void)
25060 FILE *f
= asm_out_file
;
25062 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25063 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25065 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25067 /* Generate code sequence to switch us into Thumb mode. */
25068 /* The .code 32 directive has already been emitted by
25069 ASM_DECLARE_FUNCTION_NAME. */
25070 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25071 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25073 /* Generate a label, so that the debugger will notice the
25074 change in instruction sets. This label is also used by
25075 the assembler to bypass the ARM code when this function
25076 is called from a Thumb encoded function elsewhere in the
25077 same file. Hence the definition of STUB_NAME here must
25078 agree with the definition in gas/config/tc-arm.c. */
25080 #define STUB_NAME ".real_start_of"
25082 fprintf (f
, "\t.code\t16\n");
25084 if (arm_dllexport_name_p (name
))
25085 name
= arm_strip_name_encoding (name
);
25087 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25088 fprintf (f
, "\t.thumb_func\n");
25089 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25094 /* Handle the case of a double word load into a low register from
25095 a computed memory address. The computed address may involve a
25096 register which is overwritten by the load. */
25098 thumb_load_double_from_address (rtx
*operands
)
25106 gcc_assert (REG_P (operands
[0]));
25107 gcc_assert (MEM_P (operands
[1]));
25109 /* Get the memory address. */
25110 addr
= XEXP (operands
[1], 0);
25112 /* Work out how the memory address is computed. */
25113 switch (GET_CODE (addr
))
25116 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25118 if (REGNO (operands
[0]) == REGNO (addr
))
25120 output_asm_insn ("ldr\t%H0, %2", operands
);
25121 output_asm_insn ("ldr\t%0, %1", operands
);
25125 output_asm_insn ("ldr\t%0, %1", operands
);
25126 output_asm_insn ("ldr\t%H0, %2", operands
);
25131 /* Compute <address> + 4 for the high order load. */
25132 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25134 output_asm_insn ("ldr\t%0, %1", operands
);
25135 output_asm_insn ("ldr\t%H0, %2", operands
);
25139 arg1
= XEXP (addr
, 0);
25140 arg2
= XEXP (addr
, 1);
25142 if (CONSTANT_P (arg1
))
25143 base
= arg2
, offset
= arg1
;
25145 base
= arg1
, offset
= arg2
;
25147 gcc_assert (REG_P (base
));
25149 /* Catch the case of <address> = <reg> + <reg> */
25150 if (REG_P (offset
))
25152 int reg_offset
= REGNO (offset
);
25153 int reg_base
= REGNO (base
);
25154 int reg_dest
= REGNO (operands
[0]);
25156 /* Add the base and offset registers together into the
25157 higher destination register. */
25158 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25159 reg_dest
+ 1, reg_base
, reg_offset
);
25161 /* Load the lower destination register from the address in
25162 the higher destination register. */
25163 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25164 reg_dest
, reg_dest
+ 1);
25166 /* Load the higher destination register from its own address
25168 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25169 reg_dest
+ 1, reg_dest
+ 1);
25173 /* Compute <address> + 4 for the high order load. */
25174 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25176 /* If the computed address is held in the low order register
25177 then load the high order register first, otherwise always
25178 load the low order register first. */
25179 if (REGNO (operands
[0]) == REGNO (base
))
25181 output_asm_insn ("ldr\t%H0, %2", operands
);
25182 output_asm_insn ("ldr\t%0, %1", operands
);
25186 output_asm_insn ("ldr\t%0, %1", operands
);
25187 output_asm_insn ("ldr\t%H0, %2", operands
);
25193 /* With no registers to worry about we can just load the value
25195 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25197 output_asm_insn ("ldr\t%H0, %2", operands
);
25198 output_asm_insn ("ldr\t%0, %1", operands
);
25202 gcc_unreachable ();
25209 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25216 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25219 operands
[4] = operands
[5];
25222 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25223 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25227 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25228 std::swap (operands
[4], operands
[5]);
25229 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25230 std::swap (operands
[5], operands
[6]);
25231 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25232 std::swap (operands
[4], operands
[5]);
25234 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25235 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25239 gcc_unreachable ();
25245 /* Output a call-via instruction for thumb state. */
25247 thumb_call_via_reg (rtx reg
)
25249 int regno
= REGNO (reg
);
25252 gcc_assert (regno
< LR_REGNUM
);
25254 /* If we are in the normal text section we can use a single instance
25255 per compilation unit. If we are doing function sections, then we need
25256 an entry per section, since we can't rely on reachability. */
25257 if (in_section
== text_section
)
25259 thumb_call_reg_needed
= 1;
25261 if (thumb_call_via_label
[regno
] == NULL
)
25262 thumb_call_via_label
[regno
] = gen_label_rtx ();
25263 labelp
= thumb_call_via_label
+ regno
;
25267 if (cfun
->machine
->call_via
[regno
] == NULL
)
25268 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25269 labelp
= cfun
->machine
->call_via
+ regno
;
25272 output_asm_insn ("bl\t%a0", labelp
);
25276 /* Routines for generating rtl. */
25278 thumb_expand_movmemqi (rtx
*operands
)
25280 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25281 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25282 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25283 HOST_WIDE_INT offset
= 0;
25287 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25293 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25299 rtx reg
= gen_reg_rtx (SImode
);
25300 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25301 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25308 rtx reg
= gen_reg_rtx (HImode
);
25309 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25310 plus_constant (Pmode
, in
,
25312 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25321 rtx reg
= gen_reg_rtx (QImode
);
25322 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25323 plus_constant (Pmode
, in
,
25325 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25332 thumb_reload_out_hi (rtx
*operands
)
25334 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25337 /* Handle reading a half-word from memory during reload. */
25339 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25341 gcc_unreachable ();
25344 /* Return the length of a function name prefix
25345 that starts with the character 'c'. */
25347 arm_get_strip_length (int c
)
25351 ARM_NAME_ENCODING_LENGTHS
25356 /* Return a pointer to a function's name with any
25357 and all prefix encodings stripped from it. */
25359 arm_strip_name_encoding (const char *name
)
25363 while ((skip
= arm_get_strip_length (* name
)))
25369 /* If there is a '*' anywhere in the name's prefix, then
25370 emit the stripped name verbatim, otherwise prepend an
25371 underscore if leading underscores are being used. */
25373 arm_asm_output_labelref (FILE *stream
, const char *name
)
25378 while ((skip
= arm_get_strip_length (* name
)))
25380 verbatim
|= (*name
== '*');
25385 fputs (name
, stream
);
25387 asm_fprintf (stream
, "%U%s", name
);
25390 /* This function is used to emit an EABI tag and its associated value.
25391 We emit the numerical value of the tag in case the assembler does not
25392 support textual tags. (Eg gas prior to 2.20). If requested we include
25393 the tag name in a comment so that anyone reading the assembler output
25394 will know which tag is being set.
25396 This function is not static because arm-c.c needs it too. */
25399 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25401 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25402 if (flag_verbose_asm
|| flag_debug_asm
)
25403 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25404 asm_fprintf (asm_out_file
, "\n");
25407 /* This function is used to print CPU tuning information as comment
25408 in assembler file. Pointers are not printed for now. */
25411 arm_print_tune_info (void)
25413 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25414 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25415 current_tune
->constant_limit
);
25416 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25417 current_tune
->max_insns_skipped
);
25418 asm_fprintf (asm_out_file
, "\t\t@num_prefetch_slots:\t%d\n",
25419 current_tune
->num_prefetch_slots
);
25420 asm_fprintf (asm_out_file
, "\t\t@l1_cache_size:\t%d\n",
25421 current_tune
->l1_cache_size
);
25422 asm_fprintf (asm_out_file
, "\t\t@l1_cache_line_size:\t%d\n",
25423 current_tune
->l1_cache_line_size
);
25424 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25425 (int) current_tune
->prefer_constant_pool
);
25426 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25427 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25428 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25429 current_tune
->branch_cost (false, false));
25430 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25431 current_tune
->branch_cost (false, true));
25432 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25433 current_tune
->branch_cost (true, false));
25434 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25435 current_tune
->branch_cost (true, true));
25436 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25437 (int) current_tune
->prefer_ldrd_strd
);
25438 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25439 (int) current_tune
->logical_op_non_short_circuit
[0],
25440 (int) current_tune
->logical_op_non_short_circuit
[1]);
25441 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25442 (int) current_tune
->prefer_neon_for_64bits
);
25443 asm_fprintf (asm_out_file
,
25444 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25445 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25446 asm_fprintf (asm_out_file
,
25447 "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25449 ->disparage_partial_flag_setting_t16_encodings
);
25450 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25451 (int) current_tune
->string_ops_prefer_neon
);
25452 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25453 current_tune
->max_insns_inline_memset
);
25454 asm_fprintf (asm_out_file
, "\t\t@fuseable_ops:\t%u\n",
25455 current_tune
->fuseable_ops
);
25456 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25457 (int) current_tune
->sched_autopref
);
25461 arm_file_start (void)
25465 if (TARGET_UNIFIED_ASM
)
25466 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
25470 const char *fpu_name
;
25471 if (arm_selected_arch
)
25473 /* armv7ve doesn't support any extensions. */
25474 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25476 /* Keep backward compatability for assemblers
25477 which don't support armv7ve. */
25478 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25479 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25480 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25481 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25482 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25486 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25490 gcc_assert (strlen (arm_selected_arch
->name
)
25491 <= sizeof (buf
) / sizeof (*pos
));
25492 strncpy (buf
, arm_selected_arch
->name
,
25493 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25494 buf
[pos
- arm_selected_arch
->name
] = '\0';
25495 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25496 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25499 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25502 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25503 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25506 const char* truncated_name
25507 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25508 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25511 if (print_tune_info
)
25512 arm_print_tune_info ();
25514 if (TARGET_SOFT_FLOAT
)
25516 fpu_name
= "softvfp";
25520 fpu_name
= arm_fpu_desc
->name
;
25521 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25523 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25524 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25526 if (TARGET_HARD_FLOAT_ABI
)
25527 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25530 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25532 /* Some of these attributes only apply when the corresponding features
25533 are used. However we don't have any easy way of figuring this out.
25534 Conservatively record the setting that would have been used. */
25536 if (flag_rounding_math
)
25537 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25539 if (!flag_unsafe_math_optimizations
)
25541 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25542 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25544 if (flag_signaling_nans
)
25545 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25547 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25548 flag_finite_math_only
? 1 : 3);
25550 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25551 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25552 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25553 flag_short_enums
? 1 : 2);
25555 /* Tag_ABI_optimization_goals. */
25558 else if (optimize
>= 2)
25564 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25566 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25569 if (arm_fp16_format
)
25570 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25571 (int) arm_fp16_format
);
25573 if (arm_lang_output_object_attributes_hook
)
25574 arm_lang_output_object_attributes_hook();
25577 default_file_start ();
25581 arm_file_end (void)
25585 if (NEED_INDICATE_EXEC_STACK
)
25586 /* Add .note.GNU-stack. */
25587 file_end_indicate_exec_stack ();
25589 if (! thumb_call_reg_needed
)
25592 switch_to_section (text_section
);
25593 asm_fprintf (asm_out_file
, "\t.code 16\n");
25594 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25596 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25598 rtx label
= thumb_call_via_label
[regno
];
25602 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25603 CODE_LABEL_NUMBER (label
));
25604 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25610 /* Symbols in the text segment can be accessed without indirecting via the
25611 constant pool; it may take an extra binary operation, but this is still
25612 faster than indirecting via memory. Don't do this when not optimizing,
25613 since we won't be calculating al of the offsets necessary to do this
25617 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25619 if (optimize
> 0 && TREE_CONSTANT (decl
))
25620 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25622 default_encode_section_info (decl
, rtl
, first
);
25624 #endif /* !ARM_PE */
25627 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25629 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25630 && !strcmp (prefix
, "L"))
25632 arm_ccfsm_state
= 0;
25633 arm_target_insn
= NULL
;
25635 default_internal_label (stream
, prefix
, labelno
);
25638 /* Output code to add DELTA to the first argument, and then jump
25639 to FUNCTION. Used for C++ multiple inheritance. */
25641 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25642 HOST_WIDE_INT delta
,
25643 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25646 static int thunk_label
= 0;
25649 int mi_delta
= delta
;
25650 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25652 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25655 mi_delta
= - mi_delta
;
25657 final_start_function (emit_barrier (), file
, 1);
25661 int labelno
= thunk_label
++;
25662 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25663 /* Thunks are entered in arm mode when avaiable. */
25664 if (TARGET_THUMB1_ONLY
)
25666 /* push r3 so we can use it as a temporary. */
25667 /* TODO: Omit this save if r3 is not used. */
25668 fputs ("\tpush {r3}\n", file
);
25669 fputs ("\tldr\tr3, ", file
);
25673 fputs ("\tldr\tr12, ", file
);
25675 assemble_name (file
, label
);
25676 fputc ('\n', file
);
25679 /* If we are generating PIC, the ldr instruction below loads
25680 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25681 the address of the add + 8, so we have:
25683 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25686 Note that we have "+ 1" because some versions of GNU ld
25687 don't set the low bit of the result for R_ARM_REL32
25688 relocations against thumb function symbols.
25689 On ARMv6M this is +4, not +8. */
25690 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25691 assemble_name (file
, labelpc
);
25692 fputs (":\n", file
);
25693 if (TARGET_THUMB1_ONLY
)
25695 /* This is 2 insns after the start of the thunk, so we know it
25696 is 4-byte aligned. */
25697 fputs ("\tadd\tr3, pc, r3\n", file
);
25698 fputs ("\tmov r12, r3\n", file
);
25701 fputs ("\tadd\tr12, pc, r12\n", file
);
25703 else if (TARGET_THUMB1_ONLY
)
25704 fputs ("\tmov r12, r3\n", file
);
25706 if (TARGET_THUMB1_ONLY
)
25708 if (mi_delta
> 255)
25710 fputs ("\tldr\tr3, ", file
);
25711 assemble_name (file
, label
);
25712 fputs ("+4\n", file
);
25713 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25714 mi_op
, this_regno
, this_regno
);
25716 else if (mi_delta
!= 0)
25718 /* Thumb1 unified syntax requires s suffix in instruction name when
25719 one of the operands is immediate. */
25720 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25721 mi_op
, this_regno
, this_regno
,
25727 /* TODO: Use movw/movt for large constants when available. */
25728 while (mi_delta
!= 0)
25730 if ((mi_delta
& (3 << shift
)) == 0)
25734 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25735 mi_op
, this_regno
, this_regno
,
25736 mi_delta
& (0xff << shift
));
25737 mi_delta
&= ~(0xff << shift
);
25744 if (TARGET_THUMB1_ONLY
)
25745 fputs ("\tpop\t{r3}\n", file
);
25747 fprintf (file
, "\tbx\tr12\n");
25748 ASM_OUTPUT_ALIGN (file
, 2);
25749 assemble_name (file
, label
);
25750 fputs (":\n", file
);
25753 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25754 rtx tem
= XEXP (DECL_RTL (function
), 0);
25755 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25756 pipeline offset is four rather than eight. Adjust the offset
25758 tem
= plus_constant (GET_MODE (tem
), tem
,
25759 TARGET_THUMB1_ONLY
? -3 : -7);
25760 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25762 gen_rtx_SYMBOL_REF (Pmode
,
25763 ggc_strdup (labelpc
)));
25764 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25767 /* Output ".word .LTHUNKn". */
25768 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25770 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25771 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25775 fputs ("\tb\t", file
);
25776 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25777 if (NEED_PLT_RELOC
)
25778 fputs ("(PLT)", file
);
25779 fputc ('\n', file
);
25782 final_end_function ();
25786 arm_emit_vector_const (FILE *file
, rtx x
)
25789 const char * pattern
;
25791 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25793 switch (GET_MODE (x
))
25795 case V2SImode
: pattern
= "%08x"; break;
25796 case V4HImode
: pattern
= "%04x"; break;
25797 case V8QImode
: pattern
= "%02x"; break;
25798 default: gcc_unreachable ();
25801 fprintf (file
, "0x");
25802 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25806 element
= CONST_VECTOR_ELT (x
, i
);
25807 fprintf (file
, pattern
, INTVAL (element
));
25813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25814 HFmode constant pool entries are actually loaded with ldr. */
25816 arm_emit_fp16_const (rtx c
)
25821 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25822 bits
= real_to_target (NULL
, &r
, HFmode
);
25823 if (WORDS_BIG_ENDIAN
)
25824 assemble_zeros (2);
25825 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25826 if (!WORDS_BIG_ENDIAN
)
25827 assemble_zeros (2);
25831 arm_output_load_gr (rtx
*operands
)
25838 if (!MEM_P (operands
[1])
25839 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25840 || !REG_P (reg
= XEXP (sum
, 0))
25841 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25842 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25843 return "wldrw%?\t%0, %1";
25845 /* Fix up an out-of-range load of a GR register. */
25846 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25847 wcgr
= operands
[0];
25849 output_asm_insn ("ldr%?\t%0, %1", operands
);
25851 operands
[0] = wcgr
;
25853 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25854 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25861 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25862 named arg and all anonymous args onto the stack.
25863 XXX I know the prologue shouldn't be pushing registers, but it is faster
25867 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25871 int second_time ATTRIBUTE_UNUSED
)
25873 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25876 cfun
->machine
->uses_anonymous_args
= 1;
25877 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25879 nregs
= pcum
->aapcs_ncrn
;
25880 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25884 nregs
= pcum
->nregs
;
25886 if (nregs
< NUM_ARG_REGS
)
25887 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25890 /* We can't rely on the caller doing the proper promotion when
25891 using APCS or ATPCS. */
25894 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25896 return !TARGET_AAPCS_BASED
;
25899 static machine_mode
25900 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25902 int *punsignedp ATTRIBUTE_UNUSED
,
25903 const_tree fntype ATTRIBUTE_UNUSED
,
25904 int for_return ATTRIBUTE_UNUSED
)
25906 if (GET_MODE_CLASS (mode
) == MODE_INT
25907 && GET_MODE_SIZE (mode
) < 4)
25913 /* AAPCS based ABIs use short enums by default. */
25916 arm_default_short_enums (void)
25918 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25922 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25925 arm_align_anon_bitfield (void)
25927 return TARGET_AAPCS_BASED
;
25931 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25934 arm_cxx_guard_type (void)
25936 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
25940 /* The EABI says test the least significant bit of a guard variable. */
25943 arm_cxx_guard_mask_bit (void)
25945 return TARGET_AAPCS_BASED
;
25949 /* The EABI specifies that all array cookies are 8 bytes long. */
25952 arm_get_cookie_size (tree type
)
25956 if (!TARGET_AAPCS_BASED
)
25957 return default_cxx_get_cookie_size (type
);
25959 size
= build_int_cst (sizetype
, 8);
25964 /* The EABI says that array cookies should also contain the element size. */
25967 arm_cookie_has_size (void)
25969 return TARGET_AAPCS_BASED
;
25973 /* The EABI says constructors and destructors should return a pointer to
25974 the object constructed/destroyed. */
25977 arm_cxx_cdtor_returns_this (void)
25979 return TARGET_AAPCS_BASED
;
25982 /* The EABI says that an inline function may never be the key
25986 arm_cxx_key_method_may_be_inline (void)
25988 return !TARGET_AAPCS_BASED
;
25992 arm_cxx_determine_class_data_visibility (tree decl
)
25994 if (!TARGET_AAPCS_BASED
25995 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
25998 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25999 is exported. However, on systems without dynamic vague linkage,
26000 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26001 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26002 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26004 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26005 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26009 arm_cxx_class_data_always_comdat (void)
26011 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26012 vague linkage if the class has no key function. */
26013 return !TARGET_AAPCS_BASED
;
26017 /* The EABI says __aeabi_atexit should be used to register static
26021 arm_cxx_use_aeabi_atexit (void)
26023 return TARGET_AAPCS_BASED
;
26028 arm_set_return_address (rtx source
, rtx scratch
)
26030 arm_stack_offsets
*offsets
;
26031 HOST_WIDE_INT delta
;
26033 unsigned long saved_regs
;
26035 offsets
= arm_get_frame_offsets ();
26036 saved_regs
= offsets
->saved_regs_mask
;
26038 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26039 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26042 if (frame_pointer_needed
)
26043 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26046 /* LR will be the first saved register. */
26047 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26052 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26053 GEN_INT (delta
& ~4095)));
26058 addr
= stack_pointer_rtx
;
26060 addr
= plus_constant (Pmode
, addr
, delta
);
26062 /* The store needs to be marked as frame related in order to prevent
26063 DSE from deleting it as dead if it is based on fp. */
26064 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26065 RTX_FRAME_RELATED_P (insn
) = 1;
26066 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26072 thumb_set_return_address (rtx source
, rtx scratch
)
26074 arm_stack_offsets
*offsets
;
26075 HOST_WIDE_INT delta
;
26076 HOST_WIDE_INT limit
;
26079 unsigned long mask
;
26083 offsets
= arm_get_frame_offsets ();
26084 mask
= offsets
->saved_regs_mask
;
26085 if (mask
& (1 << LR_REGNUM
))
26088 /* Find the saved regs. */
26089 if (frame_pointer_needed
)
26091 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26092 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26098 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26101 /* Allow for the stack frame. */
26102 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26104 /* The link register is always the first saved register. */
26107 /* Construct the address. */
26108 addr
= gen_rtx_REG (SImode
, reg
);
26111 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26112 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26116 addr
= plus_constant (Pmode
, addr
, delta
);
26118 /* The store needs to be marked as frame related in order to prevent
26119 DSE from deleting it as dead if it is based on fp. */
26120 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26121 RTX_FRAME_RELATED_P (insn
) = 1;
26122 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26125 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26128 /* Implements target hook vector_mode_supported_p. */
26130 arm_vector_mode_supported_p (machine_mode mode
)
26132 /* Neon also supports V2SImode, etc. listed in the clause below. */
26133 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26134 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26137 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26138 && ((mode
== V2SImode
)
26139 || (mode
== V4HImode
)
26140 || (mode
== V8QImode
)))
26143 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26144 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26145 || mode
== V2HAmode
))
26151 /* Implements target hook array_mode_supported_p. */
26154 arm_array_mode_supported_p (machine_mode mode
,
26155 unsigned HOST_WIDE_INT nelems
)
26158 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26159 && (nelems
>= 2 && nelems
<= 4))
26165 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26166 registers when autovectorizing for Neon, at least until multiple vector
26167 widths are supported properly by the middle-end. */
26169 static machine_mode
26170 arm_preferred_simd_mode (machine_mode mode
)
26176 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26178 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26180 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26182 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26184 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26191 if (TARGET_REALLY_IWMMXT
)
26207 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26209 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26210 using r0-r4 for function arguments, r7 for the stack frame and don't have
26211 enough left over to do doubleword arithmetic. For Thumb-2 all the
26212 potentially problematic instructions accept high registers so this is not
26213 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26214 that require many low registers. */
26216 arm_class_likely_spilled_p (reg_class_t rclass
)
26218 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26219 || rclass
== CC_REG
)
26225 /* Implements target hook small_register_classes_for_mode_p. */
26227 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26229 return TARGET_THUMB1
;
26232 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26233 ARM insns and therefore guarantee that the shift count is modulo 256.
26234 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26235 guarantee no particular behavior for out-of-range counts. */
26237 static unsigned HOST_WIDE_INT
26238 arm_shift_truncation_mask (machine_mode mode
)
26240 return mode
== SImode
? 255 : 0;
26244 /* Map internal gcc register numbers to DWARF2 register numbers. */
26247 arm_dbx_register_number (unsigned int regno
)
26252 if (IS_VFP_REGNUM (regno
))
26254 /* See comment in arm_dwarf_register_span. */
26255 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26256 return 64 + regno
- FIRST_VFP_REGNUM
;
26258 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26261 if (IS_IWMMXT_GR_REGNUM (regno
))
26262 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26264 if (IS_IWMMXT_REGNUM (regno
))
26265 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26267 gcc_unreachable ();
26270 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26271 GCC models tham as 64 32-bit registers, so we need to describe this to
26272 the DWARF generation code. Other registers can use the default. */
26274 arm_dwarf_register_span (rtx rtl
)
26282 regno
= REGNO (rtl
);
26283 if (!IS_VFP_REGNUM (regno
))
26286 /* XXX FIXME: The EABI defines two VFP register ranges:
26287 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26289 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26290 corresponding D register. Until GDB supports this, we shall use the
26291 legacy encodings. We also use these encodings for D0-D15 for
26292 compatibility with older debuggers. */
26293 mode
= GET_MODE (rtl
);
26294 if (GET_MODE_SIZE (mode
) < 8)
26297 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26299 nregs
= GET_MODE_SIZE (mode
) / 4;
26300 for (i
= 0; i
< nregs
; i
+= 2)
26301 if (TARGET_BIG_END
)
26303 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26304 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26308 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26309 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26314 nregs
= GET_MODE_SIZE (mode
) / 8;
26315 for (i
= 0; i
< nregs
; i
++)
26316 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26319 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26322 #if ARM_UNWIND_INFO
26323 /* Emit unwind directives for a store-multiple instruction or stack pointer
26324 push during alignment.
26325 These should only ever be generated by the function prologue code, so
26326 expect them to have a particular form.
26327 The store-multiple instruction sometimes pushes pc as the last register,
26328 although it should not be tracked into unwind information, or for -Os
26329 sometimes pushes some dummy registers before first register that needs
26330 to be tracked in unwind information; such dummy registers are there just
26331 to avoid separate stack adjustment, and will not be restored in the
26335 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26338 HOST_WIDE_INT offset
;
26339 HOST_WIDE_INT nregs
;
26343 unsigned padfirst
= 0, padlast
= 0;
26346 e
= XVECEXP (p
, 0, 0);
26347 gcc_assert (GET_CODE (e
) == SET
);
26349 /* First insn will adjust the stack pointer. */
26350 gcc_assert (GET_CODE (e
) == SET
26351 && REG_P (SET_DEST (e
))
26352 && REGNO (SET_DEST (e
)) == SP_REGNUM
26353 && GET_CODE (SET_SRC (e
)) == PLUS
);
26355 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26356 nregs
= XVECLEN (p
, 0) - 1;
26357 gcc_assert (nregs
);
26359 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26362 /* For -Os dummy registers can be pushed at the beginning to
26363 avoid separate stack pointer adjustment. */
26364 e
= XVECEXP (p
, 0, 1);
26365 e
= XEXP (SET_DEST (e
), 0);
26366 if (GET_CODE (e
) == PLUS
)
26367 padfirst
= INTVAL (XEXP (e
, 1));
26368 gcc_assert (padfirst
== 0 || optimize_size
);
26369 /* The function prologue may also push pc, but not annotate it as it is
26370 never restored. We turn this into a stack pointer adjustment. */
26371 e
= XVECEXP (p
, 0, nregs
);
26372 e
= XEXP (SET_DEST (e
), 0);
26373 if (GET_CODE (e
) == PLUS
)
26374 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26376 padlast
= offset
- 4;
26377 gcc_assert (padlast
== 0 || padlast
== 4);
26379 fprintf (asm_out_file
, "\t.pad #4\n");
26381 fprintf (asm_out_file
, "\t.save {");
26383 else if (IS_VFP_REGNUM (reg
))
26386 fprintf (asm_out_file
, "\t.vsave {");
26389 /* Unknown register type. */
26390 gcc_unreachable ();
26392 /* If the stack increment doesn't match the size of the saved registers,
26393 something has gone horribly wrong. */
26394 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26398 /* The remaining insns will describe the stores. */
26399 for (i
= 1; i
<= nregs
; i
++)
26401 /* Expect (set (mem <addr>) (reg)).
26402 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26403 e
= XVECEXP (p
, 0, i
);
26404 gcc_assert (GET_CODE (e
) == SET
26405 && MEM_P (SET_DEST (e
))
26406 && REG_P (SET_SRC (e
)));
26408 reg
= REGNO (SET_SRC (e
));
26409 gcc_assert (reg
>= lastreg
);
26412 fprintf (asm_out_file
, ", ");
26413 /* We can't use %r for vfp because we need to use the
26414 double precision register names. */
26415 if (IS_VFP_REGNUM (reg
))
26416 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26418 asm_fprintf (asm_out_file
, "%r", reg
);
26420 #ifdef ENABLE_CHECKING
26421 /* Check that the addresses are consecutive. */
26422 e
= XEXP (SET_DEST (e
), 0);
26423 if (GET_CODE (e
) == PLUS
)
26424 gcc_assert (REG_P (XEXP (e
, 0))
26425 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26426 && CONST_INT_P (XEXP (e
, 1))
26427 && offset
== INTVAL (XEXP (e
, 1)));
26431 && REGNO (e
) == SP_REGNUM
);
26432 offset
+= reg_size
;
26435 fprintf (asm_out_file
, "}\n");
26437 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26440 /* Emit unwind directives for a SET. */
26443 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26451 switch (GET_CODE (e0
))
26454 /* Pushing a single register. */
26455 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26456 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26457 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26460 asm_fprintf (asm_out_file
, "\t.save ");
26461 if (IS_VFP_REGNUM (REGNO (e1
)))
26462 asm_fprintf(asm_out_file
, "{d%d}\n",
26463 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26465 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26469 if (REGNO (e0
) == SP_REGNUM
)
26471 /* A stack increment. */
26472 if (GET_CODE (e1
) != PLUS
26473 || !REG_P (XEXP (e1
, 0))
26474 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26475 || !CONST_INT_P (XEXP (e1
, 1)))
26478 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26479 -INTVAL (XEXP (e1
, 1)));
26481 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26483 HOST_WIDE_INT offset
;
26485 if (GET_CODE (e1
) == PLUS
)
26487 if (!REG_P (XEXP (e1
, 0))
26488 || !CONST_INT_P (XEXP (e1
, 1)))
26490 reg
= REGNO (XEXP (e1
, 0));
26491 offset
= INTVAL (XEXP (e1
, 1));
26492 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26493 HARD_FRAME_POINTER_REGNUM
, reg
,
26496 else if (REG_P (e1
))
26499 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26500 HARD_FRAME_POINTER_REGNUM
, reg
);
26505 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26507 /* Move from sp to reg. */
26508 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26510 else if (GET_CODE (e1
) == PLUS
26511 && REG_P (XEXP (e1
, 0))
26512 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26513 && CONST_INT_P (XEXP (e1
, 1)))
26515 /* Set reg to offset from sp. */
26516 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26517 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26529 /* Emit unwind directives for the given insn. */
26532 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26535 bool handled_one
= false;
26537 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26540 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26541 && (TREE_NOTHROW (current_function_decl
)
26542 || crtl
->all_throwers_are_sibcalls
))
26545 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26548 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26550 switch (REG_NOTE_KIND (note
))
26552 case REG_FRAME_RELATED_EXPR
:
26553 pat
= XEXP (note
, 0);
26556 case REG_CFA_REGISTER
:
26557 pat
= XEXP (note
, 0);
26560 pat
= PATTERN (insn
);
26561 if (GET_CODE (pat
) == PARALLEL
)
26562 pat
= XVECEXP (pat
, 0, 0);
26565 /* Only emitted for IS_STACKALIGN re-alignment. */
26570 src
= SET_SRC (pat
);
26571 dest
= SET_DEST (pat
);
26573 gcc_assert (src
== stack_pointer_rtx
);
26574 reg
= REGNO (dest
);
26575 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26578 handled_one
= true;
26581 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26582 to get correct dwarf information for shrink-wrap. We should not
26583 emit unwind information for it because these are used either for
26584 pretend arguments or notes to adjust sp and restore registers from
26586 case REG_CFA_DEF_CFA
:
26587 case REG_CFA_ADJUST_CFA
:
26588 case REG_CFA_RESTORE
:
26591 case REG_CFA_EXPRESSION
:
26592 case REG_CFA_OFFSET
:
26593 /* ??? Only handling here what we actually emit. */
26594 gcc_unreachable ();
26602 pat
= PATTERN (insn
);
26605 switch (GET_CODE (pat
))
26608 arm_unwind_emit_set (asm_out_file
, pat
);
26612 /* Store multiple. */
26613 arm_unwind_emit_sequence (asm_out_file
, pat
);
26622 /* Output a reference from a function exception table to the type_info
26623 object X. The EABI specifies that the symbol should be relocated by
26624 an R_ARM_TARGET2 relocation. */
26627 arm_output_ttype (rtx x
)
26629 fputs ("\t.word\t", asm_out_file
);
26630 output_addr_const (asm_out_file
, x
);
26631 /* Use special relocations for symbol references. */
26632 if (!CONST_INT_P (x
))
26633 fputs ("(TARGET2)", asm_out_file
);
26634 fputc ('\n', asm_out_file
);
26639 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26642 arm_asm_emit_except_personality (rtx personality
)
26644 fputs ("\t.personality\t", asm_out_file
);
26645 output_addr_const (asm_out_file
, personality
);
26646 fputc ('\n', asm_out_file
);
26649 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26652 arm_asm_init_sections (void)
26654 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26657 #endif /* ARM_UNWIND_INFO */
26659 /* Output unwind directives for the start/end of a function. */
26662 arm_output_fn_unwind (FILE * f
, bool prologue
)
26664 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26668 fputs ("\t.fnstart\n", f
);
26671 /* If this function will never be unwound, then mark it as such.
26672 The came condition is used in arm_unwind_emit to suppress
26673 the frame annotations. */
26674 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26675 && (TREE_NOTHROW (current_function_decl
)
26676 || crtl
->all_throwers_are_sibcalls
))
26677 fputs("\t.cantunwind\n", f
);
26679 fputs ("\t.fnend\n", f
);
26684 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26686 enum tls_reloc reloc
;
26689 val
= XVECEXP (x
, 0, 0);
26690 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26692 output_addr_const (fp
, val
);
26697 fputs ("(tlsgd)", fp
);
26700 fputs ("(tlsldm)", fp
);
26703 fputs ("(tlsldo)", fp
);
26706 fputs ("(gottpoff)", fp
);
26709 fputs ("(tpoff)", fp
);
26712 fputs ("(tlsdesc)", fp
);
26715 gcc_unreachable ();
26724 fputs (" + (. - ", fp
);
26725 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26726 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26727 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26728 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26738 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26741 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26743 gcc_assert (size
== 4);
26744 fputs ("\t.word\t", file
);
26745 output_addr_const (file
, x
);
26746 fputs ("(tlsldo)", file
);
26749 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26752 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26754 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26755 return arm_emit_tls_decoration (fp
, x
);
26756 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26759 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26761 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26762 assemble_name_raw (fp
, label
);
26766 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26768 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26772 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26776 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26778 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26782 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26786 else if (GET_CODE (x
) == CONST_VECTOR
)
26787 return arm_emit_vector_const (fp
, x
);
26792 /* Output assembly for a shift instruction.
26793 SET_FLAGS determines how the instruction modifies the condition codes.
26794 0 - Do not set condition codes.
26795 1 - Set condition codes.
26796 2 - Use smallest instruction. */
26798 arm_output_shift(rtx
* operands
, int set_flags
)
26801 static const char flag_chars
[3] = {'?', '.', '!'};
26806 c
= flag_chars
[set_flags
];
26807 if (TARGET_UNIFIED_ASM
)
26809 shift
= shift_op(operands
[3], &val
);
26813 operands
[2] = GEN_INT(val
);
26814 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26817 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26820 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26821 output_asm_insn (pattern
, operands
);
26825 /* Output assembly for a WMMX immediate shift instruction. */
26827 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26829 int shift
= INTVAL (operands
[2]);
26831 machine_mode opmode
= GET_MODE (operands
[0]);
26833 gcc_assert (shift
>= 0);
26835 /* If the shift value in the register versions is > 63 (for D qualifier),
26836 31 (for W qualifier) or 15 (for H qualifier). */
26837 if (((opmode
== V4HImode
) && (shift
> 15))
26838 || ((opmode
== V2SImode
) && (shift
> 31))
26839 || ((opmode
== DImode
) && (shift
> 63)))
26843 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26844 output_asm_insn (templ
, operands
);
26845 if (opmode
== DImode
)
26847 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26848 output_asm_insn (templ
, operands
);
26853 /* The destination register will contain all zeros. */
26854 sprintf (templ
, "wzero\t%%0");
26855 output_asm_insn (templ
, operands
);
26860 if ((opmode
== DImode
) && (shift
> 32))
26862 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26863 output_asm_insn (templ
, operands
);
26864 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26865 output_asm_insn (templ
, operands
);
26869 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26870 output_asm_insn (templ
, operands
);
26875 /* Output assembly for a WMMX tinsr instruction. */
26877 arm_output_iwmmxt_tinsr (rtx
*operands
)
26879 int mask
= INTVAL (operands
[3]);
26882 int units
= mode_nunits
[GET_MODE (operands
[0])];
26883 gcc_assert ((mask
& (mask
- 1)) == 0);
26884 for (i
= 0; i
< units
; ++i
)
26886 if ((mask
& 0x01) == 1)
26892 gcc_assert (i
< units
);
26894 switch (GET_MODE (operands
[0]))
26897 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26900 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26903 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26906 gcc_unreachable ();
26909 output_asm_insn (templ
, operands
);
26914 /* Output a Thumb-1 casesi dispatch sequence. */
26916 thumb1_output_casesi (rtx
*operands
)
26918 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26920 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26922 switch (GET_MODE(diff_vec
))
26925 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26926 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26928 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26929 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26931 return "bl\t%___gnu_thumb1_case_si";
26933 gcc_unreachable ();
26937 /* Output a Thumb-2 casesi instruction. */
26939 thumb2_output_casesi (rtx
*operands
)
26941 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
26943 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26945 output_asm_insn ("cmp\t%0, %1", operands
);
26946 output_asm_insn ("bhi\t%l3", operands
);
26947 switch (GET_MODE(diff_vec
))
26950 return "tbb\t[%|pc, %0]";
26952 return "tbh\t[%|pc, %0, lsl #1]";
26956 output_asm_insn ("adr\t%4, %l2", operands
);
26957 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
26958 output_asm_insn ("add\t%4, %4, %5", operands
);
26963 output_asm_insn ("adr\t%4, %l2", operands
);
26964 return "ldr\t%|pc, [%4, %0, lsl #2]";
26967 gcc_unreachable ();
26971 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26972 per-core tuning structs. */
26974 arm_issue_rate (void)
26976 return current_tune
->issue_rate
;
26979 /* Return how many instructions should scheduler lookahead to choose the
26982 arm_first_cycle_multipass_dfa_lookahead (void)
26984 int issue_rate
= arm_issue_rate ();
26986 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
26989 /* Enable modeling of L2 auto-prefetcher. */
26991 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
26993 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
26997 arm_mangle_type (const_tree type
)
26999 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27000 has to be managled as if it is in the "std" namespace. */
27001 if (TARGET_AAPCS_BASED
27002 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27003 return "St9__va_list";
27005 /* Half-precision float. */
27006 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27009 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27011 if (TYPE_NAME (type
) != NULL
)
27012 return arm_mangle_builtin_type (type
);
27014 /* Use the default mangling. */
27018 /* Order of allocation of core registers for Thumb: this allocation is
27019 written over the corresponding initial entries of the array
27020 initialized with REG_ALLOC_ORDER. We allocate all low registers
27021 first. Saving and restoring a low register is usually cheaper than
27022 using a call-clobbered high register. */
27024 static const int thumb_core_reg_alloc_order
[] =
27026 3, 2, 1, 0, 4, 5, 6, 7,
27027 14, 12, 8, 9, 10, 11
27030 /* Adjust register allocation order when compiling for Thumb. */
27033 arm_order_regs_for_local_alloc (void)
27035 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27036 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27038 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27039 sizeof (thumb_core_reg_alloc_order
));
27042 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27045 arm_frame_pointer_required (void)
27047 return (cfun
->has_nonlocal_label
27048 || SUBTARGET_FRAME_POINTER_REQUIRED
27049 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27052 /* Only thumb1 can't support conditional execution, so return true if
27053 the target is not thumb1. */
27055 arm_have_conditional_execution (void)
27057 return !TARGET_THUMB1
;
27060 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27061 static HOST_WIDE_INT
27062 arm_vector_alignment (const_tree type
)
27064 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27066 if (TARGET_AAPCS_BASED
)
27067 align
= MIN (align
, 64);
27072 static unsigned int
27073 arm_autovectorize_vector_sizes (void)
27075 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27079 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27081 /* Vectors which aren't in packed structures will not be less aligned than
27082 the natural alignment of their element type, so this is safe. */
27083 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27086 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27090 arm_builtin_support_vector_misalignment (machine_mode mode
,
27091 const_tree type
, int misalignment
,
27094 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27096 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27101 /* If the misalignment is unknown, we should be able to handle the access
27102 so long as it is not to a member of a packed data structure. */
27103 if (misalignment
== -1)
27106 /* Return true if the misalignment is a multiple of the natural alignment
27107 of the vector's element type. This is probably always going to be
27108 true in practice, since we've already established that this isn't a
27110 return ((misalignment
% align
) == 0);
27113 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27118 arm_conditional_register_usage (void)
27122 if (TARGET_THUMB1
&& optimize_size
)
27124 /* When optimizing for size on Thumb-1, it's better not
27125 to use the HI regs, because of the overhead of
27127 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27128 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27131 /* The link register can be clobbered by any branch insn,
27132 but we have no way to track that at present, so mark
27133 it as unavailable. */
27135 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27137 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27139 /* VFPv3 registers are disabled when earlier VFP
27140 versions are selected due to the definition of
27141 LAST_VFP_REGNUM. */
27142 for (regno
= FIRST_VFP_REGNUM
;
27143 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27145 fixed_regs
[regno
] = 0;
27146 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27147 || regno
>= FIRST_VFP_REGNUM
+ 32;
27151 if (TARGET_REALLY_IWMMXT
)
27153 regno
= FIRST_IWMMXT_GR_REGNUM
;
27154 /* The 2002/10/09 revision of the XScale ABI has wCG0
27155 and wCG1 as call-preserved registers. The 2002/11/21
27156 revision changed this so that all wCG registers are
27157 scratch registers. */
27158 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27159 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27160 fixed_regs
[regno
] = 0;
27161 /* The XScale ABI has wR0 - wR9 as scratch registers,
27162 the rest as call-preserved registers. */
27163 for (regno
= FIRST_IWMMXT_REGNUM
;
27164 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27166 fixed_regs
[regno
] = 0;
27167 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27171 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27173 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27174 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27176 else if (TARGET_APCS_STACK
)
27178 fixed_regs
[10] = 1;
27179 call_used_regs
[10] = 1;
27181 /* -mcaller-super-interworking reserves r11 for calls to
27182 _interwork_r11_call_via_rN(). Making the register global
27183 is an easy way of ensuring that it remains valid for all
27185 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27186 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27188 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27189 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27190 if (TARGET_CALLER_INTERWORKING
)
27191 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27193 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27197 arm_preferred_rename_class (reg_class_t rclass
)
27199 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27200 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27201 and code size can be reduced. */
27202 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27208 /* Compute the atrribute "length" of insn "*push_multi".
27209 So this function MUST be kept in sync with that insn pattern. */
27211 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27213 int i
, regno
, hi_reg
;
27214 int num_saves
= XVECLEN (parallel_op
, 0);
27224 regno
= REGNO (first_op
);
27225 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27226 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27228 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27229 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27237 /* Compute the number of instructions emitted by output_move_double. */
27239 arm_count_output_move_double_insns (rtx
*operands
)
27243 /* output_move_double may modify the operands array, so call it
27244 here on a copy of the array. */
27245 ops
[0] = operands
[0];
27246 ops
[1] = operands
[1];
27247 output_move_double (ops
, false, &count
);
27252 vfp3_const_double_for_fract_bits (rtx operand
)
27254 REAL_VALUE_TYPE r0
;
27256 if (!CONST_DOUBLE_P (operand
))
27259 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27260 if (exact_real_inverse (DFmode
, &r0
))
27262 if (exact_real_truncate (DFmode
, &r0
))
27264 HOST_WIDE_INT value
= real_to_integer (&r0
);
27265 value
= value
& 0xffffffff;
27266 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27267 return int_log2 (value
);
27274 vfp3_const_double_for_bits (rtx operand
)
27276 REAL_VALUE_TYPE r0
;
27278 if (!CONST_DOUBLE_P (operand
))
27281 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27282 if (exact_real_truncate (DFmode
, &r0
))
27284 HOST_WIDE_INT value
= real_to_integer (&r0
);
27285 value
= value
& 0xffffffff;
27286 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27287 return int_log2 (value
);
27293 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27296 arm_pre_atomic_barrier (enum memmodel model
)
27298 if (need_atomic_barrier_p (model
, true))
27299 emit_insn (gen_memory_barrier ());
27303 arm_post_atomic_barrier (enum memmodel model
)
27305 if (need_atomic_barrier_p (model
, false))
27306 emit_insn (gen_memory_barrier ());
27309 /* Emit the load-exclusive and store-exclusive instructions.
27310 Use acquire and release versions if necessary. */
27313 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27315 rtx (*gen
) (rtx
, rtx
);
27321 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27322 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27323 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27324 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27326 gcc_unreachable ();
27333 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27334 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27335 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27336 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27338 gcc_unreachable ();
27342 emit_insn (gen (rval
, mem
));
27346 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27349 rtx (*gen
) (rtx
, rtx
, rtx
);
27355 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27356 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27357 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27358 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27360 gcc_unreachable ();
27367 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27368 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27369 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27370 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27372 gcc_unreachable ();
27376 emit_insn (gen (bval
, rval
, mem
));
27379 /* Mark the previous jump instruction as unlikely. */
27382 emit_unlikely_jump (rtx insn
)
27384 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27386 insn
= emit_jump_insn (insn
);
27387 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27390 /* Expand a compare and swap pattern. */
27393 arm_expand_compare_and_swap (rtx operands
[])
27395 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27397 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27399 bval
= operands
[0];
27400 rval
= operands
[1];
27402 oldval
= operands
[3];
27403 newval
= operands
[4];
27404 is_weak
= operands
[5];
27405 mod_s
= operands
[6];
27406 mod_f
= operands
[7];
27407 mode
= GET_MODE (mem
);
27409 /* Normally the succ memory model must be stronger than fail, but in the
27410 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27411 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27413 if (TARGET_HAVE_LDACQ
27414 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
27415 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
27416 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27422 /* For narrow modes, we're going to perform the comparison in SImode,
27423 so do the zero-extension now. */
27424 rval
= gen_reg_rtx (SImode
);
27425 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27429 /* Force the value into a register if needed. We waited until after
27430 the zero-extension above to do this properly. */
27431 if (!arm_add_operand (oldval
, SImode
))
27432 oldval
= force_reg (SImode
, oldval
);
27436 if (!cmpdi_operand (oldval
, mode
))
27437 oldval
= force_reg (mode
, oldval
);
27441 gcc_unreachable ();
27446 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27447 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27448 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27449 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27451 gcc_unreachable ();
27454 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27456 if (mode
== QImode
|| mode
== HImode
)
27457 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27459 /* In all cases, we arrange for success to be signaled by Z set.
27460 This arrangement allows for the boolean result to be used directly
27461 in a subsequent branch, post optimization. */
27462 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27463 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27464 emit_insn (gen_rtx_SET (bval
, x
));
27467 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27468 another memory store between the load-exclusive and store-exclusive can
27469 reset the monitor from Exclusive to Open state. This means we must wait
27470 until after reload to split the pattern, lest we get a register spill in
27471 the middle of the atomic sequence. */
27474 arm_split_compare_and_swap (rtx operands
[])
27476 rtx rval
, mem
, oldval
, newval
, scratch
;
27478 enum memmodel mod_s
, mod_f
;
27480 rtx_code_label
*label1
, *label2
;
27483 rval
= operands
[0];
27485 oldval
= operands
[2];
27486 newval
= operands
[3];
27487 is_weak
= (operands
[4] != const0_rtx
);
27488 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
27489 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
27490 scratch
= operands
[7];
27491 mode
= GET_MODE (mem
);
27493 bool use_acquire
= TARGET_HAVE_LDACQ
27494 && !(mod_s
== MEMMODEL_RELAXED
27495 || mod_s
== MEMMODEL_CONSUME
27496 || mod_s
== MEMMODEL_RELEASE
);
27498 bool use_release
= TARGET_HAVE_LDACQ
27499 && !(mod_s
== MEMMODEL_RELAXED
27500 || mod_s
== MEMMODEL_CONSUME
27501 || mod_s
== MEMMODEL_ACQUIRE
);
27503 /* Checks whether a barrier is needed and emits one accordingly. */
27504 if (!(use_acquire
|| use_release
))
27505 arm_pre_atomic_barrier (mod_s
);
27510 label1
= gen_label_rtx ();
27511 emit_label (label1
);
27513 label2
= gen_label_rtx ();
27515 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27517 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27518 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27519 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27520 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27521 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27523 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27525 /* Weak or strong, we want EQ to be true for success, so that we
27526 match the flags that we got from the compare above. */
27527 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27528 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27529 emit_insn (gen_rtx_SET (cond
, x
));
27533 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27534 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27535 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27536 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27539 if (mod_f
!= MEMMODEL_RELAXED
)
27540 emit_label (label2
);
27542 /* Checks whether a barrier is needed and emits one accordingly. */
27543 if (!(use_acquire
|| use_release
))
27544 arm_post_atomic_barrier (mod_s
);
27546 if (mod_f
== MEMMODEL_RELAXED
)
27547 emit_label (label2
);
27551 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27552 rtx value
, rtx model_rtx
, rtx cond
)
27554 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
27555 machine_mode mode
= GET_MODE (mem
);
27556 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27557 rtx_code_label
*label
;
27560 bool use_acquire
= TARGET_HAVE_LDACQ
27561 && !(model
== MEMMODEL_RELAXED
27562 || model
== MEMMODEL_CONSUME
27563 || model
== MEMMODEL_RELEASE
);
27565 bool use_release
= TARGET_HAVE_LDACQ
27566 && !(model
== MEMMODEL_RELAXED
27567 || model
== MEMMODEL_CONSUME
27568 || model
== MEMMODEL_ACQUIRE
);
27570 /* Checks whether a barrier is needed and emits one accordingly. */
27571 if (!(use_acquire
|| use_release
))
27572 arm_pre_atomic_barrier (model
);
27574 label
= gen_label_rtx ();
27575 emit_label (label
);
27578 new_out
= gen_lowpart (wmode
, new_out
);
27580 old_out
= gen_lowpart (wmode
, old_out
);
27583 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27585 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27594 x
= gen_rtx_AND (wmode
, old_out
, value
);
27595 emit_insn (gen_rtx_SET (new_out
, x
));
27596 x
= gen_rtx_NOT (wmode
, new_out
);
27597 emit_insn (gen_rtx_SET (new_out
, x
));
27601 if (CONST_INT_P (value
))
27603 value
= GEN_INT (-INTVAL (value
));
27609 if (mode
== DImode
)
27611 /* DImode plus/minus need to clobber flags. */
27612 /* The adddi3 and subdi3 patterns are incorrectly written so that
27613 they require matching operands, even when we could easily support
27614 three operands. Thankfully, this can be fixed up post-splitting,
27615 as the individual add+adc patterns do accept three operands and
27616 post-reload cprop can make these moves go away. */
27617 emit_move_insn (new_out
, old_out
);
27619 x
= gen_adddi3 (new_out
, new_out
, value
);
27621 x
= gen_subdi3 (new_out
, new_out
, value
);
27628 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27629 emit_insn (gen_rtx_SET (new_out
, x
));
27633 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27636 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27637 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27639 /* Checks whether a barrier is needed and emits one accordingly. */
27640 if (!(use_acquire
|| use_release
))
27641 arm_post_atomic_barrier (model
);
27644 #define MAX_VECT_LEN 16
27646 struct expand_vec_perm_d
27648 rtx target
, op0
, op1
;
27649 unsigned char perm
[MAX_VECT_LEN
];
27650 machine_mode vmode
;
27651 unsigned char nelt
;
27656 /* Generate a variable permutation. */
27659 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27661 machine_mode vmode
= GET_MODE (target
);
27662 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27664 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27665 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27666 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27667 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27668 gcc_checking_assert (TARGET_NEON
);
27672 if (vmode
== V8QImode
)
27673 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27675 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27681 if (vmode
== V8QImode
)
27683 pair
= gen_reg_rtx (V16QImode
);
27684 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27685 pair
= gen_lowpart (TImode
, pair
);
27686 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27690 pair
= gen_reg_rtx (OImode
);
27691 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27692 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27698 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27700 machine_mode vmode
= GET_MODE (target
);
27701 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27702 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27703 rtx rmask
[MAX_VECT_LEN
], mask
;
27705 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27706 numbering of elements for big-endian, we must reverse the order. */
27707 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27709 /* The VTBL instruction does not use a modulo index, so we must take care
27710 of that ourselves. */
27711 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27712 for (i
= 0; i
< nelt
; ++i
)
27714 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27715 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27717 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27720 /* Generate or test for an insn that supports a constant permutation. */
27722 /* Recognize patterns for the VUZP insns. */
27725 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27727 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27728 rtx out0
, out1
, in0
, in1
, x
;
27729 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27731 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27734 /* Note that these are little-endian tests. Adjust for big-endian later. */
27735 if (d
->perm
[0] == 0)
27737 else if (d
->perm
[0] == 1)
27741 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27743 for (i
= 0; i
< nelt
; i
++)
27745 unsigned elt
= (i
* 2 + odd
) & mask
;
27746 if (d
->perm
[i
] != elt
)
27756 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27757 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27758 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27759 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27760 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27761 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27762 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27763 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27765 gcc_unreachable ();
27770 if (BYTES_BIG_ENDIAN
)
27772 x
= in0
, in0
= in1
, in1
= x
;
27777 out1
= gen_reg_rtx (d
->vmode
);
27779 x
= out0
, out0
= out1
, out1
= x
;
27781 emit_insn (gen (out0
, in0
, in1
, out1
));
27785 /* Recognize patterns for the VZIP insns. */
27788 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27790 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27791 rtx out0
, out1
, in0
, in1
, x
;
27792 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27794 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27797 /* Note that these are little-endian tests. Adjust for big-endian later. */
27799 if (d
->perm
[0] == high
)
27801 else if (d
->perm
[0] == 0)
27805 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27807 for (i
= 0; i
< nelt
/ 2; i
++)
27809 unsigned elt
= (i
+ high
) & mask
;
27810 if (d
->perm
[i
* 2] != elt
)
27812 elt
= (elt
+ nelt
) & mask
;
27813 if (d
->perm
[i
* 2 + 1] != elt
)
27823 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27824 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27825 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27826 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27827 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27828 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27829 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27830 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27832 gcc_unreachable ();
27837 if (BYTES_BIG_ENDIAN
)
27839 x
= in0
, in0
= in1
, in1
= x
;
27844 out1
= gen_reg_rtx (d
->vmode
);
27846 x
= out0
, out0
= out1
, out1
= x
;
27848 emit_insn (gen (out0
, in0
, in1
, out1
));
27852 /* Recognize patterns for the VREV insns. */
27855 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27857 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27858 rtx (*gen
)(rtx
, rtx
);
27860 if (!d
->one_vector_p
)
27869 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27870 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27878 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27879 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27880 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27881 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27889 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27890 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27891 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27892 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27893 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27894 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27895 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27896 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27905 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27906 for (j
= 0; j
<= diff
; j
+= 1)
27908 /* This is guaranteed to be true as the value of diff
27909 is 7, 3, 1 and we should have enough elements in the
27910 queue to generate this. Getting a vector mask with a
27911 value of diff other than these values implies that
27912 something is wrong by the time we get here. */
27913 gcc_assert (i
+ j
< nelt
);
27914 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27922 emit_insn (gen (d
->target
, d
->op0
));
27926 /* Recognize patterns for the VTRN insns. */
27929 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27931 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27932 rtx out0
, out1
, in0
, in1
, x
;
27933 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27935 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27938 /* Note that these are little-endian tests. Adjust for big-endian later. */
27939 if (d
->perm
[0] == 0)
27941 else if (d
->perm
[0] == 1)
27945 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27947 for (i
= 0; i
< nelt
; i
+= 2)
27949 if (d
->perm
[i
] != i
+ odd
)
27951 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
27961 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
27962 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
27963 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
27964 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
27965 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
27966 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
27967 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
27968 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
27970 gcc_unreachable ();
27975 if (BYTES_BIG_ENDIAN
)
27977 x
= in0
, in0
= in1
, in1
= x
;
27982 out1
= gen_reg_rtx (d
->vmode
);
27984 x
= out0
, out0
= out1
, out1
= x
;
27986 emit_insn (gen (out0
, in0
, in1
, out1
));
27990 /* Recognize patterns for the VEXT insns. */
27993 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
27995 unsigned int i
, nelt
= d
->nelt
;
27996 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
27999 unsigned int location
;
28001 unsigned int next
= d
->perm
[0] + 1;
28003 /* TODO: Handle GCC's numbering of elements for big-endian. */
28004 if (BYTES_BIG_ENDIAN
)
28007 /* Check if the extracted indexes are increasing by one. */
28008 for (i
= 1; i
< nelt
; next
++, i
++)
28010 /* If we hit the most significant element of the 2nd vector in
28011 the previous iteration, no need to test further. */
28012 if (next
== 2 * nelt
)
28015 /* If we are operating on only one vector: it could be a
28016 rotation. If there are only two elements of size < 64, let
28017 arm_evpc_neon_vrev catch it. */
28018 if (d
->one_vector_p
&& (next
== nelt
))
28020 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28026 if (d
->perm
[i
] != next
)
28030 location
= d
->perm
[0];
28034 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28035 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28036 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28037 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28038 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28039 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28040 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28041 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28042 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28051 offset
= GEN_INT (location
);
28052 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28056 /* The NEON VTBL instruction is a fully variable permuation that's even
28057 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28058 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28059 can do slightly better by expanding this as a constant where we don't
28060 have to apply a mask. */
28063 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28065 rtx rperm
[MAX_VECT_LEN
], sel
;
28066 machine_mode vmode
= d
->vmode
;
28067 unsigned int i
, nelt
= d
->nelt
;
28069 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28070 numbering of elements for big-endian, we must reverse the order. */
28071 if (BYTES_BIG_ENDIAN
)
28077 /* Generic code will try constant permutation twice. Once with the
28078 original mode and again with the elements lowered to QImode.
28079 So wait and don't do the selector expansion ourselves. */
28080 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28083 for (i
= 0; i
< nelt
; ++i
)
28084 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28085 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28086 sel
= force_reg (vmode
, sel
);
28088 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28093 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28095 /* Check if the input mask matches vext before reordering the
28098 if (arm_evpc_neon_vext (d
))
28101 /* The pattern matching functions above are written to look for a small
28102 number to begin the sequence (0, 1, N/2). If we begin with an index
28103 from the second operand, we can swap the operands. */
28104 if (d
->perm
[0] >= d
->nelt
)
28106 unsigned i
, nelt
= d
->nelt
;
28109 for (i
= 0; i
< nelt
; ++i
)
28110 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28119 if (arm_evpc_neon_vuzp (d
))
28121 if (arm_evpc_neon_vzip (d
))
28123 if (arm_evpc_neon_vrev (d
))
28125 if (arm_evpc_neon_vtrn (d
))
28127 return arm_evpc_neon_vtbl (d
);
28132 /* Expand a vec_perm_const pattern. */
28135 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28137 struct expand_vec_perm_d d
;
28138 int i
, nelt
, which
;
28144 d
.vmode
= GET_MODE (target
);
28145 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28146 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28147 d
.testing_p
= false;
28149 for (i
= which
= 0; i
< nelt
; ++i
)
28151 rtx e
= XVECEXP (sel
, 0, i
);
28152 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28153 which
|= (ei
< nelt
? 1 : 2);
28163 d
.one_vector_p
= false;
28164 if (!rtx_equal_p (op0
, op1
))
28167 /* The elements of PERM do not suggest that only the first operand
28168 is used, but both operands are identical. Allow easier matching
28169 of the permutation by folding the permutation into the single
28173 for (i
= 0; i
< nelt
; ++i
)
28174 d
.perm
[i
] &= nelt
- 1;
28176 d
.one_vector_p
= true;
28181 d
.one_vector_p
= true;
28185 return arm_expand_vec_perm_const_1 (&d
);
28188 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28191 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28192 const unsigned char *sel
)
28194 struct expand_vec_perm_d d
;
28195 unsigned int i
, nelt
, which
;
28199 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28200 d
.testing_p
= true;
28201 memcpy (d
.perm
, sel
, nelt
);
28203 /* Categorize the set of elements in the selector. */
28204 for (i
= which
= 0; i
< nelt
; ++i
)
28206 unsigned char e
= d
.perm
[i
];
28207 gcc_assert (e
< 2 * nelt
);
28208 which
|= (e
< nelt
? 1 : 2);
28211 /* For all elements from second vector, fold the elements to first. */
28213 for (i
= 0; i
< nelt
; ++i
)
28216 /* Check whether the mask can be applied to the vector type. */
28217 d
.one_vector_p
= (which
!= 3);
28219 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28220 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28221 if (!d
.one_vector_p
)
28222 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28225 ret
= arm_expand_vec_perm_const_1 (&d
);
28232 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28234 /* If we are soft float and we do not have ldrd
28235 then all auto increment forms are ok. */
28236 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28241 /* Post increment and Pre Decrement are supported for all
28242 instruction forms except for vector forms. */
28245 if (VECTOR_MODE_P (mode
))
28247 if (code
!= ARM_PRE_DEC
)
28257 /* Without LDRD and mode size greater than
28258 word size, there is no point in auto-incrementing
28259 because ldm and stm will not have these forms. */
28260 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28263 /* Vector and floating point modes do not support
28264 these auto increment forms. */
28265 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28278 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28279 on ARM, since we know that shifts by negative amounts are no-ops.
28280 Additionally, the default expansion code is not available or suitable
28281 for post-reload insn splits (this can occur when the register allocator
28282 chooses not to do a shift in NEON).
28284 This function is used in both initial expand and post-reload splits, and
28285 handles all kinds of 64-bit shifts.
28287 Input requirements:
28288 - It is safe for the input and output to be the same register, but
28289 early-clobber rules apply for the shift amount and scratch registers.
28290 - Shift by register requires both scratch registers. In all other cases
28291 the scratch registers may be NULL.
28292 - Ashiftrt by a register also clobbers the CC register. */
28294 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28295 rtx amount
, rtx scratch1
, rtx scratch2
)
28297 rtx out_high
= gen_highpart (SImode
, out
);
28298 rtx out_low
= gen_lowpart (SImode
, out
);
28299 rtx in_high
= gen_highpart (SImode
, in
);
28300 rtx in_low
= gen_lowpart (SImode
, in
);
28303 in = the register pair containing the input value.
28304 out = the destination register pair.
28305 up = the high- or low-part of each pair.
28306 down = the opposite part to "up".
28307 In a shift, we can consider bits to shift from "up"-stream to
28308 "down"-stream, so in a left-shift "up" is the low-part and "down"
28309 is the high-part of each register pair. */
28311 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28312 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28313 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28314 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28316 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28318 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28319 && GET_MODE (out
) == DImode
);
28321 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28322 && GET_MODE (in
) == DImode
);
28324 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28325 && GET_MODE (amount
) == SImode
)
28326 || CONST_INT_P (amount
)));
28327 gcc_assert (scratch1
== NULL
28328 || (GET_CODE (scratch1
) == SCRATCH
)
28329 || (GET_MODE (scratch1
) == SImode
28330 && REG_P (scratch1
)));
28331 gcc_assert (scratch2
== NULL
28332 || (GET_CODE (scratch2
) == SCRATCH
)
28333 || (GET_MODE (scratch2
) == SImode
28334 && REG_P (scratch2
)));
28335 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28336 || !HARD_REGISTER_P (out
)
28337 || (REGNO (out
) != REGNO (amount
)
28338 && REGNO (out
) + 1 != REGNO (amount
)));
28340 /* Macros to make following code more readable. */
28341 #define SUB_32(DEST,SRC) \
28342 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28343 #define RSB_32(DEST,SRC) \
28344 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28345 #define SUB_S_32(DEST,SRC) \
28346 gen_addsi3_compare0 ((DEST), (SRC), \
28348 #define SET(DEST,SRC) \
28349 gen_rtx_SET ((DEST), (SRC))
28350 #define SHIFT(CODE,SRC,AMOUNT) \
28351 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28352 #define LSHIFT(CODE,SRC,AMOUNT) \
28353 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28354 SImode, (SRC), (AMOUNT))
28355 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28356 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28357 SImode, (SRC), (AMOUNT))
28359 gen_rtx_IOR (SImode, (A), (B))
28360 #define BRANCH(COND,LABEL) \
28361 gen_arm_cond_branch ((LABEL), \
28362 gen_rtx_ ## COND (CCmode, cc_reg, \
28366 /* Shifts by register and shifts by constant are handled separately. */
28367 if (CONST_INT_P (amount
))
28369 /* We have a shift-by-constant. */
28371 /* First, handle out-of-range shift amounts.
28372 In both cases we try to match the result an ARM instruction in a
28373 shift-by-register would give. This helps reduce execution
28374 differences between optimization levels, but it won't stop other
28375 parts of the compiler doing different things. This is "undefined
28376 behaviour, in any case. */
28377 if (INTVAL (amount
) <= 0)
28378 emit_insn (gen_movdi (out
, in
));
28379 else if (INTVAL (amount
) >= 64)
28381 if (code
== ASHIFTRT
)
28383 rtx const31_rtx
= GEN_INT (31);
28384 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28385 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28388 emit_insn (gen_movdi (out
, const0_rtx
));
28391 /* Now handle valid shifts. */
28392 else if (INTVAL (amount
) < 32)
28394 /* Shifts by a constant less than 32. */
28395 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28397 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28398 emit_insn (SET (out_down
,
28399 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28401 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28405 /* Shifts by a constant greater than 31. */
28406 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28408 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28409 if (code
== ASHIFTRT
)
28410 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28413 emit_insn (SET (out_up
, const0_rtx
));
28418 /* We have a shift-by-register. */
28419 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28421 /* This alternative requires the scratch registers. */
28422 gcc_assert (scratch1
&& REG_P (scratch1
));
28423 gcc_assert (scratch2
&& REG_P (scratch2
));
28425 /* We will need the values "amount-32" and "32-amount" later.
28426 Swapping them around now allows the later code to be more general. */
28430 emit_insn (SUB_32 (scratch1
, amount
));
28431 emit_insn (RSB_32 (scratch2
, amount
));
28434 emit_insn (RSB_32 (scratch1
, amount
));
28435 /* Also set CC = amount > 32. */
28436 emit_insn (SUB_S_32 (scratch2
, amount
));
28439 emit_insn (RSB_32 (scratch1
, amount
));
28440 emit_insn (SUB_32 (scratch2
, amount
));
28443 gcc_unreachable ();
28446 /* Emit code like this:
28449 out_down = in_down << amount;
28450 out_down = (in_up << (amount - 32)) | out_down;
28451 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28452 out_up = in_up << amount;
28455 out_down = in_down >> amount;
28456 out_down = (in_up << (32 - amount)) | out_down;
28458 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28459 out_up = in_up << amount;
28462 out_down = in_down >> amount;
28463 out_down = (in_up << (32 - amount)) | out_down;
28465 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28466 out_up = in_up << amount;
28468 The ARM and Thumb2 variants are the same but implemented slightly
28469 differently. If this were only called during expand we could just
28470 use the Thumb2 case and let combine do the right thing, but this
28471 can also be called from post-reload splitters. */
28473 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28475 if (!TARGET_THUMB2
)
28477 /* Emit code for ARM mode. */
28478 emit_insn (SET (out_down
,
28479 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28480 if (code
== ASHIFTRT
)
28482 rtx_code_label
*done_label
= gen_label_rtx ();
28483 emit_jump_insn (BRANCH (LT
, done_label
));
28484 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28486 emit_label (done_label
);
28489 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28494 /* Emit code for Thumb2 mode.
28495 Thumb2 can't do shift and or in one insn. */
28496 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28497 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28499 if (code
== ASHIFTRT
)
28501 rtx_code_label
*done_label
= gen_label_rtx ();
28502 emit_jump_insn (BRANCH (LT
, done_label
));
28503 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28504 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28505 emit_label (done_label
);
28509 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28510 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28514 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28529 /* Returns true if a valid comparison operation and makes
28530 the operands in a form that is valid. */
28532 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28534 enum rtx_code code
= GET_CODE (*comparison
);
28536 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28537 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28539 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28541 if (code
== UNEQ
|| code
== LTGT
)
28544 code_int
= (int)code
;
28545 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28546 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28551 if (!arm_add_operand (*op1
, mode
))
28552 *op1
= force_reg (mode
, *op1
);
28553 if (!arm_add_operand (*op2
, mode
))
28554 *op2
= force_reg (mode
, *op2
);
28558 if (!cmpdi_operand (*op1
, mode
))
28559 *op1
= force_reg (mode
, *op1
);
28560 if (!cmpdi_operand (*op2
, mode
))
28561 *op2
= force_reg (mode
, *op2
);
28566 if (!arm_float_compare_operand (*op1
, mode
))
28567 *op1
= force_reg (mode
, *op1
);
28568 if (!arm_float_compare_operand (*op2
, mode
))
28569 *op2
= force_reg (mode
, *op2
);
28579 /* Maximum number of instructions to set block of memory. */
28581 arm_block_set_max_insns (void)
28583 if (optimize_function_for_size_p (cfun
))
28586 return current_tune
->max_insns_inline_memset
;
28589 /* Return TRUE if it's profitable to set block of memory for
28590 non-vectorized case. VAL is the value to set the memory
28591 with. LENGTH is the number of bytes to set. ALIGN is the
28592 alignment of the destination memory in bytes. UNALIGNED_P
28593 is TRUE if we can only set the memory with instructions
28594 meeting alignment requirements. USE_STRD_P is TRUE if we
28595 can use strd to set the memory. */
28597 arm_block_set_non_vect_profit_p (rtx val
,
28598 unsigned HOST_WIDE_INT length
,
28599 unsigned HOST_WIDE_INT align
,
28600 bool unaligned_p
, bool use_strd_p
)
28603 /* For leftovers in bytes of 0-7, we can set the memory block using
28604 strb/strh/str with minimum instruction number. */
28605 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28609 num
= arm_const_inline_cost (SET
, val
);
28610 num
+= length
/ align
+ length
% align
;
28612 else if (use_strd_p
)
28614 num
= arm_const_double_inline_cost (val
);
28615 num
+= (length
>> 3) + leftover
[length
& 7];
28619 num
= arm_const_inline_cost (SET
, val
);
28620 num
+= (length
>> 2) + leftover
[length
& 3];
28623 /* We may be able to combine last pair STRH/STRB into a single STR
28624 by shifting one byte back. */
28625 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28628 return (num
<= arm_block_set_max_insns ());
28631 /* Return TRUE if it's profitable to set block of memory for
28632 vectorized case. LENGTH is the number of bytes to set.
28633 ALIGN is the alignment of destination memory in bytes.
28634 MODE is the vector mode used to set the memory. */
28636 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28637 unsigned HOST_WIDE_INT align
,
28641 bool unaligned_p
= ((align
& 3) != 0);
28642 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28644 /* Instruction loading constant value. */
28646 /* Instructions storing the memory. */
28647 num
+= (length
+ nelt
- 1) / nelt
;
28648 /* Instructions adjusting the address expression. Only need to
28649 adjust address expression if it's 4 bytes aligned and bytes
28650 leftover can only be stored by mis-aligned store instruction. */
28651 if (!unaligned_p
&& (length
& 3) != 0)
28654 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28655 if (!unaligned_p
&& mode
== V16QImode
)
28658 return (num
<= arm_block_set_max_insns ());
28661 /* Set a block of memory using vectorization instructions for the
28662 unaligned case. We fill the first LENGTH bytes of the memory
28663 area starting from DSTBASE with byte constant VALUE. ALIGN is
28664 the alignment requirement of memory. Return TRUE if succeeded. */
28666 arm_block_set_unaligned_vect (rtx dstbase
,
28667 unsigned HOST_WIDE_INT length
,
28668 unsigned HOST_WIDE_INT value
,
28669 unsigned HOST_WIDE_INT align
)
28671 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28673 rtx val_elt
, val_vec
, reg
;
28674 rtx rval
[MAX_VECT_LEN
];
28675 rtx (*gen_func
) (rtx
, rtx
);
28677 unsigned HOST_WIDE_INT v
= value
;
28679 gcc_assert ((align
& 0x3) != 0);
28680 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28681 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28682 if (length
>= nelt_v16
)
28685 gen_func
= gen_movmisalignv16qi
;
28690 gen_func
= gen_movmisalignv8qi
;
28692 nelt_mode
= GET_MODE_NUNITS (mode
);
28693 gcc_assert (length
>= nelt_mode
);
28694 /* Skip if it isn't profitable. */
28695 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28698 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28699 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28701 v
= sext_hwi (v
, BITS_PER_WORD
);
28702 val_elt
= GEN_INT (v
);
28703 for (j
= 0; j
< nelt_mode
; j
++)
28706 reg
= gen_reg_rtx (mode
);
28707 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28708 /* Emit instruction loading the constant value. */
28709 emit_move_insn (reg
, val_vec
);
28711 /* Handle nelt_mode bytes in a vector. */
28712 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28714 emit_insn ((*gen_func
) (mem
, reg
));
28715 if (i
+ 2 * nelt_mode
<= length
)
28716 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28719 /* If there are not less than nelt_v8 bytes leftover, we must be in
28721 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28723 /* Handle (8, 16) bytes leftover. */
28724 if (i
+ nelt_v8
< length
)
28726 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28727 /* We are shifting bytes back, set the alignment accordingly. */
28728 if ((length
& 1) != 0 && align
>= 2)
28729 set_mem_align (mem
, BITS_PER_UNIT
);
28731 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28733 /* Handle (0, 8] bytes leftover. */
28734 else if (i
< length
&& i
+ nelt_v8
>= length
)
28736 if (mode
== V16QImode
)
28738 reg
= gen_lowpart (V8QImode
, reg
);
28739 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28741 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28742 + (nelt_mode
- nelt_v8
))));
28743 /* We are shifting bytes back, set the alignment accordingly. */
28744 if ((length
& 1) != 0 && align
>= 2)
28745 set_mem_align (mem
, BITS_PER_UNIT
);
28747 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28753 /* Set a block of memory using vectorization instructions for the
28754 aligned case. We fill the first LENGTH bytes of the memory area
28755 starting from DSTBASE with byte constant VALUE. ALIGN is the
28756 alignment requirement of memory. Return TRUE if succeeded. */
28758 arm_block_set_aligned_vect (rtx dstbase
,
28759 unsigned HOST_WIDE_INT length
,
28760 unsigned HOST_WIDE_INT value
,
28761 unsigned HOST_WIDE_INT align
)
28763 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28764 rtx dst
, addr
, mem
;
28765 rtx val_elt
, val_vec
, reg
;
28766 rtx rval
[MAX_VECT_LEN
];
28768 unsigned HOST_WIDE_INT v
= value
;
28770 gcc_assert ((align
& 0x3) == 0);
28771 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28772 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28773 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28778 nelt_mode
= GET_MODE_NUNITS (mode
);
28779 gcc_assert (length
>= nelt_mode
);
28780 /* Skip if it isn't profitable. */
28781 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28784 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28786 v
= sext_hwi (v
, BITS_PER_WORD
);
28787 val_elt
= GEN_INT (v
);
28788 for (j
= 0; j
< nelt_mode
; j
++)
28791 reg
= gen_reg_rtx (mode
);
28792 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28793 /* Emit instruction loading the constant value. */
28794 emit_move_insn (reg
, val_vec
);
28797 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28798 if (mode
== V16QImode
)
28800 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28801 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28803 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28804 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28806 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28807 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28808 /* We are shifting bytes back, set the alignment accordingly. */
28809 if ((length
& 0x3) == 0)
28810 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28811 else if ((length
& 0x1) == 0)
28812 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28814 set_mem_align (mem
, BITS_PER_UNIT
);
28816 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28819 /* Fall through for bytes leftover. */
28821 nelt_mode
= GET_MODE_NUNITS (mode
);
28822 reg
= gen_lowpart (V8QImode
, reg
);
28825 /* Handle 8 bytes in a vector. */
28826 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28828 addr
= plus_constant (Pmode
, dst
, i
);
28829 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28830 emit_move_insn (mem
, reg
);
28833 /* Handle single word leftover by shifting 4 bytes back. We can
28834 use aligned access for this case. */
28835 if (i
+ UNITS_PER_WORD
== length
)
28837 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28838 mem
= adjust_automodify_address (dstbase
, mode
,
28839 addr
, i
- UNITS_PER_WORD
);
28840 /* We are shifting 4 bytes back, set the alignment accordingly. */
28841 if (align
> UNITS_PER_WORD
)
28842 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28844 emit_move_insn (mem
, reg
);
28846 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28847 We have to use unaligned access for this case. */
28848 else if (i
< length
)
28850 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28851 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28852 /* We are shifting bytes back, set the alignment accordingly. */
28853 if ((length
& 1) == 0)
28854 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28856 set_mem_align (mem
, BITS_PER_UNIT
);
28858 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28864 /* Set a block of memory using plain strh/strb instructions, only
28865 using instructions allowed by ALIGN on processor. We fill the
28866 first LENGTH bytes of the memory area starting from DSTBASE
28867 with byte constant VALUE. ALIGN is the alignment requirement
28870 arm_block_set_unaligned_non_vect (rtx dstbase
,
28871 unsigned HOST_WIDE_INT length
,
28872 unsigned HOST_WIDE_INT value
,
28873 unsigned HOST_WIDE_INT align
)
28876 rtx dst
, addr
, mem
;
28877 rtx val_exp
, val_reg
, reg
;
28879 HOST_WIDE_INT v
= value
;
28881 gcc_assert (align
== 1 || align
== 2);
28884 v
|= (value
<< BITS_PER_UNIT
);
28886 v
= sext_hwi (v
, BITS_PER_WORD
);
28887 val_exp
= GEN_INT (v
);
28888 /* Skip if it isn't profitable. */
28889 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28890 align
, true, false))
28893 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28894 mode
= (align
== 2 ? HImode
: QImode
);
28895 val_reg
= force_reg (SImode
, val_exp
);
28896 reg
= gen_lowpart (mode
, val_reg
);
28898 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28900 addr
= plus_constant (Pmode
, dst
, i
);
28901 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28902 emit_move_insn (mem
, reg
);
28905 /* Handle single byte leftover. */
28906 if (i
+ 1 == length
)
28908 reg
= gen_lowpart (QImode
, val_reg
);
28909 addr
= plus_constant (Pmode
, dst
, i
);
28910 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28911 emit_move_insn (mem
, reg
);
28915 gcc_assert (i
== length
);
28919 /* Set a block of memory using plain strd/str/strh/strb instructions,
28920 to permit unaligned copies on processors which support unaligned
28921 semantics for those instructions. We fill the first LENGTH bytes
28922 of the memory area starting from DSTBASE with byte constant VALUE.
28923 ALIGN is the alignment requirement of memory. */
28925 arm_block_set_aligned_non_vect (rtx dstbase
,
28926 unsigned HOST_WIDE_INT length
,
28927 unsigned HOST_WIDE_INT value
,
28928 unsigned HOST_WIDE_INT align
)
28931 rtx dst
, addr
, mem
;
28932 rtx val_exp
, val_reg
, reg
;
28933 unsigned HOST_WIDE_INT v
;
28936 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
28937 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
28939 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
28940 if (length
< UNITS_PER_WORD
)
28941 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
28944 v
|= (v
<< BITS_PER_WORD
);
28946 v
= sext_hwi (v
, BITS_PER_WORD
);
28948 val_exp
= GEN_INT (v
);
28949 /* Skip if it isn't profitable. */
28950 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28951 align
, false, use_strd_p
))
28956 /* Try without strd. */
28957 v
= (v
>> BITS_PER_WORD
);
28958 v
= sext_hwi (v
, BITS_PER_WORD
);
28959 val_exp
= GEN_INT (v
);
28960 use_strd_p
= false;
28961 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28962 align
, false, use_strd_p
))
28967 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28968 /* Handle double words using strd if possible. */
28971 val_reg
= force_reg (DImode
, val_exp
);
28973 for (; (i
+ 8 <= length
); i
+= 8)
28975 addr
= plus_constant (Pmode
, dst
, i
);
28976 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
28977 emit_move_insn (mem
, reg
);
28981 val_reg
= force_reg (SImode
, val_exp
);
28983 /* Handle words. */
28984 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
28985 for (; (i
+ 4 <= length
); i
+= 4)
28987 addr
= plus_constant (Pmode
, dst
, i
);
28988 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
28989 if ((align
& 3) == 0)
28990 emit_move_insn (mem
, reg
);
28992 emit_insn (gen_unaligned_storesi (mem
, reg
));
28995 /* Merge last pair of STRH and STRB into a STR if possible. */
28996 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
28998 addr
= plus_constant (Pmode
, dst
, i
- 1);
28999 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29000 /* We are shifting one byte back, set the alignment accordingly. */
29001 if ((align
& 1) == 0)
29002 set_mem_align (mem
, BITS_PER_UNIT
);
29004 /* Most likely this is an unaligned access, and we can't tell at
29005 compilation time. */
29006 emit_insn (gen_unaligned_storesi (mem
, reg
));
29010 /* Handle half word leftover. */
29011 if (i
+ 2 <= length
)
29013 reg
= gen_lowpart (HImode
, val_reg
);
29014 addr
= plus_constant (Pmode
, dst
, i
);
29015 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29016 if ((align
& 1) == 0)
29017 emit_move_insn (mem
, reg
);
29019 emit_insn (gen_unaligned_storehi (mem
, reg
));
29024 /* Handle single byte leftover. */
29025 if (i
+ 1 == length
)
29027 reg
= gen_lowpart (QImode
, val_reg
);
29028 addr
= plus_constant (Pmode
, dst
, i
);
29029 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29030 emit_move_insn (mem
, reg
);
29036 /* Set a block of memory using vectorization instructions for both
29037 aligned and unaligned cases. We fill the first LENGTH bytes of
29038 the memory area starting from DSTBASE with byte constant VALUE.
29039 ALIGN is the alignment requirement of memory. */
29041 arm_block_set_vect (rtx dstbase
,
29042 unsigned HOST_WIDE_INT length
,
29043 unsigned HOST_WIDE_INT value
,
29044 unsigned HOST_WIDE_INT align
)
29046 /* Check whether we need to use unaligned store instruction. */
29047 if (((align
& 3) != 0 || (length
& 3) != 0)
29048 /* Check whether unaligned store instruction is available. */
29049 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29052 if ((align
& 3) == 0)
29053 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29055 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29058 /* Expand string store operation. Firstly we try to do that by using
29059 vectorization instructions, then try with ARM unaligned access and
29060 double-word store if profitable. OPERANDS[0] is the destination,
29061 OPERANDS[1] is the number of bytes, operands[2] is the value to
29062 initialize the memory, OPERANDS[3] is the known alignment of the
29065 arm_gen_setmem (rtx
*operands
)
29067 rtx dstbase
= operands
[0];
29068 unsigned HOST_WIDE_INT length
;
29069 unsigned HOST_WIDE_INT value
;
29070 unsigned HOST_WIDE_INT align
;
29072 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29075 length
= UINTVAL (operands
[1]);
29079 value
= (UINTVAL (operands
[2]) & 0xFF);
29080 align
= UINTVAL (operands
[3]);
29081 if (TARGET_NEON
&& length
>= 8
29082 && current_tune
->string_ops_prefer_neon
29083 && arm_block_set_vect (dstbase
, length
, value
, align
))
29086 if (!unaligned_access
&& (align
& 3) != 0)
29087 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29089 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29094 arm_macro_fusion_p (void)
29096 return current_tune
->fuseable_ops
!= ARM_FUSE_NOTHING
;
29101 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29104 rtx prev_set
= single_set (prev
);
29105 rtx curr_set
= single_set (curr
);
29111 if (any_condjump_p (curr
))
29114 if (!arm_macro_fusion_p ())
29117 if (current_tune
->fuseable_ops
& ARM_FUSE_MOVW_MOVT
)
29119 /* We are trying to fuse
29120 movw imm / movt imm
29121 instructions as a group that gets scheduled together. */
29123 set_dest
= SET_DEST (curr_set
);
29125 if (GET_MODE (set_dest
) != SImode
)
29128 /* We are trying to match:
29129 prev (movw) == (set (reg r0) (const_int imm16))
29130 curr (movt) == (set (zero_extract (reg r0)
29133 (const_int imm16_1))
29135 prev (movw) == (set (reg r1)
29136 (high (symbol_ref ("SYM"))))
29137 curr (movt) == (set (reg r0)
29139 (symbol_ref ("SYM")))) */
29140 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29142 if (CONST_INT_P (SET_SRC (curr_set
))
29143 && CONST_INT_P (SET_SRC (prev_set
))
29144 && REG_P (XEXP (set_dest
, 0))
29145 && REG_P (SET_DEST (prev_set
))
29146 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29149 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29150 && REG_P (SET_DEST (curr_set
))
29151 && REG_P (SET_DEST (prev_set
))
29152 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29153 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29159 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29161 static unsigned HOST_WIDE_INT
29162 arm_asan_shadow_offset (void)
29164 return (unsigned HOST_WIDE_INT
) 1 << 29;
29168 /* This is a temporary fix for PR60655. Ideally we need
29169 to handle most of these cases in the generic part but
29170 currently we reject minus (..) (sym_ref). We try to
29171 ameliorate the case with minus (sym_ref1) (sym_ref2)
29172 where they are in the same section. */
29175 arm_const_not_ok_for_debug_p (rtx p
)
29177 tree decl_op0
= NULL
;
29178 tree decl_op1
= NULL
;
29180 if (GET_CODE (p
) == MINUS
)
29182 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29184 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29186 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29187 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29189 if ((TREE_CODE (decl_op1
) == VAR_DECL
29190 || TREE_CODE (decl_op1
) == CONST_DECL
)
29191 && (TREE_CODE (decl_op0
) == VAR_DECL
29192 || TREE_CODE (decl_op0
) == CONST_DECL
))
29193 return (get_variable_section (decl_op1
, false)
29194 != get_variable_section (decl_op0
, false));
29196 if (TREE_CODE (decl_op1
) == LABEL_DECL
29197 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29198 return (DECL_CONTEXT (decl_op1
)
29199 != DECL_CONTEXT (decl_op0
));
29209 /* return TRUE if x is a reference to a value in a constant pool */
29211 arm_is_constant_pool_ref (rtx x
)
29214 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29215 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29218 /* If MEM is in the form of [base+offset], extract the two parts
29219 of address and set to BASE and OFFSET, otherwise return false
29220 after clearing BASE and OFFSET. */
29223 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29227 gcc_assert (MEM_P (mem
));
29229 addr
= XEXP (mem
, 0);
29231 /* Strip off const from addresses like (const (addr)). */
29232 if (GET_CODE (addr
) == CONST
)
29233 addr
= XEXP (addr
, 0);
29235 if (GET_CODE (addr
) == REG
)
29238 *offset
= const0_rtx
;
29242 if (GET_CODE (addr
) == PLUS
29243 && GET_CODE (XEXP (addr
, 0)) == REG
29244 && CONST_INT_P (XEXP (addr
, 1)))
29246 *base
= XEXP (addr
, 0);
29247 *offset
= XEXP (addr
, 1);
29252 *offset
= NULL_RTX
;
29257 /* If INSN is a load or store of address in the form of [base+offset],
29258 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29259 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29260 otherwise return FALSE. */
29263 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29267 gcc_assert (INSN_P (insn
));
29268 x
= PATTERN (insn
);
29269 if (GET_CODE (x
) != SET
)
29273 dest
= SET_DEST (x
);
29274 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29277 extract_base_offset_in_addr (dest
, base
, offset
);
29279 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29282 extract_base_offset_in_addr (src
, base
, offset
);
29287 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29290 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29292 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29293 and PRI are only calculated for these instructions. For other instruction,
29294 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29295 instruction fusion can be supported by returning different priorities.
29297 It's important that irrelevant instructions get the largest FUSION_PRI. */
29300 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29301 int *fusion_pri
, int *pri
)
29307 gcc_assert (INSN_P (insn
));
29310 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29317 /* Load goes first. */
29319 *fusion_pri
= tmp
- 1;
29321 *fusion_pri
= tmp
- 2;
29325 /* INSN with smaller base register goes first. */
29326 tmp
-= ((REGNO (base
) & 0xff) << 20);
29328 /* INSN with smaller offset goes first. */
29329 off_val
= (int)(INTVAL (offset
));
29331 tmp
-= (off_val
& 0xfffff);
29333 tmp
+= ((- off_val
) & 0xfffff);
29338 #include "gt-arm.h"