1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
50 #include "insn-codes.h"
52 #include "diagnostic-core.h"
55 #include "dominance.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
65 #include "plugin-api.h"
72 #include "sched-int.h"
73 #include "target-def.h"
75 #include "langhooks.h"
82 #include "gimple-expr.h"
84 #include "tm-constrs.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode
;
89 typedef struct minipool_fixup Mfix
;
91 void (*arm_lang_output_object_attributes_hook
)(void);
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx
);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets
*arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
106 HOST_WIDE_INT
, rtx
, rtx
, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx
, int);
109 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
110 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
111 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
112 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
113 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
114 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
115 inline static int thumb1_index_register_rtx_p (rtx
, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx
, unsigned int, int);
120 static void arm_print_operand (FILE *, rtx
, int);
121 static void arm_print_operand_address (FILE *, rtx
);
122 static bool arm_print_operand_punct_valid_p (unsigned char code
);
123 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
124 static arm_cc
get_arm_condition_code (rtx
);
125 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
126 static const char *output_multi_immediate (rtx
*, const char *, const char *,
128 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
129 static struct machine_function
*arm_init_machine_status (void);
130 static void thumb_exit (FILE *, int);
131 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
132 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
133 static Mnode
*add_minipool_forward_ref (Mfix
*);
134 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
135 static Mnode
*add_minipool_backward_ref (Mfix
*);
136 static void assign_minipool_offsets (Mfix
*);
137 static void arm_print_value (FILE *, rtx
);
138 static void dump_minipool (rtx_insn
*);
139 static int arm_barrier_cost (rtx
);
140 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
141 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
142 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
144 static void arm_reorg (void);
145 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
147 static unsigned long arm_compute_save_reg_mask (void);
148 static unsigned long arm_isr_value (tree
);
149 static unsigned long arm_compute_func_type (void);
150 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
151 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
154 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
158 static int arm_comp_type_attributes (const_tree
, const_tree
);
159 static void arm_set_default_type_attributes (tree
);
160 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
161 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
162 static int optimal_immediate_sequence (enum rtx_code code
,
163 unsigned HOST_WIDE_INT val
,
164 struct four_ints
*return_sequence
);
165 static int optimal_immediate_sequence_1 (enum rtx_code code
,
166 unsigned HOST_WIDE_INT val
,
167 struct four_ints
*return_sequence
,
169 static int arm_get_strip_length (int);
170 static bool arm_function_ok_for_sibcall (tree
, tree
);
171 static machine_mode
arm_promote_function_mode (const_tree
,
174 static bool arm_return_in_memory (const_tree
, const_tree
);
175 static rtx
arm_function_value (const_tree
, const_tree
, bool);
176 static rtx
arm_libcall_value_1 (machine_mode
);
177 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
178 static bool arm_function_value_regno_p (const unsigned int);
179 static void arm_internal_label (FILE *, const char *, unsigned long);
180 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
182 static bool arm_have_conditional_execution (void);
183 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
184 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
185 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
186 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
187 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
188 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
189 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
190 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
191 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
192 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
193 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
194 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
195 static void emit_constant_insn (rtx cond
, rtx pattern
);
196 static rtx_insn
*emit_set_insn (rtx
, rtx
);
197 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
198 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
200 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
202 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
204 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
205 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
207 static rtx
aapcs_libcall_value (machine_mode
);
208 static int aapcs_select_return_coproc (const_tree
, const_tree
);
210 #ifdef OBJECT_FORMAT_ELF
211 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
212 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
215 static void arm_encode_section_info (tree
, rtx
, int);
218 static void arm_file_end (void);
219 static void arm_file_start (void);
221 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
223 static bool arm_pass_by_reference (cumulative_args_t
,
224 machine_mode
, const_tree
, bool);
225 static bool arm_promote_prototypes (const_tree
);
226 static bool arm_default_short_enums (void);
227 static bool arm_align_anon_bitfield (void);
228 static bool arm_return_in_msb (const_tree
);
229 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
230 static bool arm_return_in_memory (const_tree
, const_tree
);
232 static void arm_unwind_emit (FILE *, rtx_insn
*);
233 static bool arm_output_ttype (rtx
);
234 static void arm_asm_emit_except_personality (rtx
);
235 static void arm_asm_init_sections (void);
237 static rtx
arm_dwarf_register_span (rtx
);
239 static tree
arm_cxx_guard_type (void);
240 static bool arm_cxx_guard_mask_bit (void);
241 static tree
arm_get_cookie_size (tree
);
242 static bool arm_cookie_has_size (void);
243 static bool arm_cxx_cdtor_returns_this (void);
244 static bool arm_cxx_key_method_may_be_inline (void);
245 static void arm_cxx_determine_class_data_visibility (tree
);
246 static bool arm_cxx_class_data_always_comdat (void);
247 static bool arm_cxx_use_aeabi_atexit (void);
248 static void arm_init_libfuncs (void);
249 static tree
arm_build_builtin_va_list (void);
250 static void arm_expand_builtin_va_start (tree
, rtx
);
251 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
252 static void arm_option_override (void);
253 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
254 static bool arm_cannot_copy_insn_p (rtx_insn
*);
255 static int arm_issue_rate (void);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
257 static bool arm_output_addr_const_extra (FILE *, rtx
);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree
);
260 static const char *arm_invalid_parameter_type (const_tree t
);
261 static const char *arm_invalid_return_type (const_tree t
);
262 static tree
arm_promoted_type (const_tree t
);
263 static tree
arm_convert_to_type (tree type
, tree expr
);
264 static bool arm_scalar_mode_supported_p (machine_mode
);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx
, tree
, rtx
);
269 static rtx
arm_trampoline_adjust_address (rtx
);
270 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
274 static bool arm_array_mode_supported_p (machine_mode
,
275 unsigned HOST_WIDE_INT
);
276 static machine_mode
arm_preferred_simd_mode (machine_mode
);
277 static bool arm_class_likely_spilled_p (reg_class_t
);
278 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
279 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
284 static void arm_conditional_register_usage (void);
285 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
292 const unsigned char *sel
);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
296 int misalign ATTRIBUTE_UNUSED
);
297 static unsigned arm_add_stmt_cost (void *data
, int count
,
298 enum vect_cost_for_stmt kind
,
299 struct _stmt_vec_info
*stmt_info
,
301 enum vect_cost_model_location where
);
303 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
304 bool op0_preserve_value
);
305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
309 /* Table of machine attributes. */
310 static const struct attribute_spec arm_attribute_table
[] =
312 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
313 affects_type_identity } */
314 /* Function calls made to this symbol must be done indirectly, because
315 it may lie outside of the 26 bit addressing range of a normal function
317 { "long_call", 0, 0, false, true, true, NULL
, false },
318 /* Whereas these functions are always known to reside within the 26 bit
320 { "short_call", 0, 0, false, true, true, NULL
, false },
321 /* Specify the procedure call conventions for a function. */
322 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
324 /* Interrupt Service Routines have special prologue and epilogue requirements. */
325 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
327 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
329 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
332 /* ARM/PE has three new attributes:
334 dllexport - for exporting a function/variable that will live in a dll
335 dllimport - for importing a function/variable from a dll
337 Microsoft allows multiple declspecs in one __declspec, separating
338 them with spaces. We do NOT support this. Instead, use __declspec
341 { "dllimport", 0, 0, true, false, false, NULL
, false },
342 { "dllexport", 0, 0, true, false, false, NULL
, false },
343 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
346 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
347 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
348 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
351 { NULL
, 0, 0, false, false, false, NULL
, false }
354 /* Initialize the GCC target structure. */
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 #undef TARGET_MERGE_DECL_ATTRIBUTES
357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
360 #undef TARGET_LEGITIMIZE_ADDRESS
361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
364 #define TARGET_LRA_P arm_lra_p
366 #undef TARGET_ATTRIBUTE_TABLE
367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START arm_file_start
371 #undef TARGET_ASM_FILE_END
372 #define TARGET_ASM_FILE_END arm_file_end
374 #undef TARGET_ASM_ALIGNED_SI_OP
375 #define TARGET_ASM_ALIGNED_SI_OP NULL
376 #undef TARGET_ASM_INTEGER
377 #define TARGET_ASM_INTEGER arm_assemble_integer
379 #undef TARGET_PRINT_OPERAND
380 #define TARGET_PRINT_OPERAND arm_print_operand
381 #undef TARGET_PRINT_OPERAND_ADDRESS
382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
389 #undef TARGET_ASM_FUNCTION_PROLOGUE
390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
392 #undef TARGET_ASM_FUNCTION_EPILOGUE
393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE arm_option_override
398 #undef TARGET_COMP_TYPE_ATTRIBUTES
399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
401 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
404 #undef TARGET_SCHED_ADJUST_COST
405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
407 #undef TARGET_SCHED_REORDER
408 #define TARGET_SCHED_REORDER arm_sched_reorder
410 #undef TARGET_REGISTER_MOVE_COST
411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
413 #undef TARGET_MEMORY_MOVE_COST
414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
416 #undef TARGET_ENCODE_SECTION_INFO
418 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
420 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
423 #undef TARGET_STRIP_NAME_ENCODING
424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
426 #undef TARGET_ASM_INTERNAL_LABEL
427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
429 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
432 #undef TARGET_FUNCTION_VALUE
433 #define TARGET_FUNCTION_VALUE arm_function_value
435 #undef TARGET_LIBCALL_VALUE
436 #define TARGET_LIBCALL_VALUE arm_libcall_value
438 #undef TARGET_FUNCTION_VALUE_REGNO_P
439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
441 #undef TARGET_ASM_OUTPUT_MI_THUNK
442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
443 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
446 #undef TARGET_RTX_COSTS
447 #define TARGET_RTX_COSTS arm_rtx_costs
448 #undef TARGET_ADDRESS_COST
449 #define TARGET_ADDRESS_COST arm_address_cost
451 #undef TARGET_SHIFT_TRUNCATION_MASK
452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
461 arm_autovectorize_vector_sizes
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
466 #undef TARGET_INIT_BUILTINS
467 #define TARGET_INIT_BUILTINS arm_init_builtins
468 #undef TARGET_EXPAND_BUILTIN
469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL arm_builtin_decl
473 #undef TARGET_INIT_LIBFUNCS
474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
476 #undef TARGET_PROMOTE_FUNCTION_MODE
477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
478 #undef TARGET_PROMOTE_PROTOTYPES
479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
480 #undef TARGET_PASS_BY_REFERENCE
481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
482 #undef TARGET_ARG_PARTIAL_BYTES
483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
484 #undef TARGET_FUNCTION_ARG
485 #define TARGET_FUNCTION_ARG arm_function_arg
486 #undef TARGET_FUNCTION_ARG_ADVANCE
487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
488 #undef TARGET_FUNCTION_ARG_BOUNDARY
489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
491 #undef TARGET_SETUP_INCOMING_VARARGS
492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
499 #undef TARGET_TRAMPOLINE_INIT
500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
504 #undef TARGET_WARN_FUNC_RETURN
505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
507 #undef TARGET_DEFAULT_SHORT_ENUMS
508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
510 #undef TARGET_ALIGN_ANON_BITFIELD
511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
513 #undef TARGET_NARROW_VOLATILE_BITFIELD
514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
516 #undef TARGET_CXX_GUARD_TYPE
517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
519 #undef TARGET_CXX_GUARD_MASK_BIT
520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
522 #undef TARGET_CXX_GET_COOKIE_SIZE
523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
525 #undef TARGET_CXX_COOKIE_HAS_SIZE
526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
534 #undef TARGET_CXX_USE_AEABI_ATEXIT
535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
539 arm_cxx_determine_class_data_visibility
541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
544 #undef TARGET_RETURN_IN_MSB
545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
547 #undef TARGET_RETURN_IN_MEMORY
548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
550 #undef TARGET_MUST_PASS_IN_STACK
551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
554 #undef TARGET_ASM_UNWIND_EMIT
555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
557 /* EABI unwinding tables use a different format for the typeinfo tables. */
558 #undef TARGET_ASM_TTYPE
559 #define TARGET_ASM_TTYPE arm_output_ttype
561 #undef TARGET_ARM_EABI_UNWINDER
562 #define TARGET_ARM_EABI_UNWINDER true
564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
567 #undef TARGET_ASM_INIT_SECTIONS
568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
569 #endif /* ARM_UNWIND_INFO */
571 #undef TARGET_DWARF_REGISTER_SPAN
572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
574 #undef TARGET_CANNOT_COPY_INSN_P
575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
578 #undef TARGET_HAVE_TLS
579 #define TARGET_HAVE_TLS true
582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
588 #undef TARGET_CANNOT_FORCE_CONST_MEM
589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
591 #undef TARGET_MAX_ANCHOR_OFFSET
592 #define TARGET_MAX_ANCHOR_OFFSET 4095
594 /* The minimum is set such that the total size of the block
595 for a particular anchor is -4088 + 1 + 4095 bytes, which is
596 divisible by eight, ensuring natural spacing of anchors. */
597 #undef TARGET_MIN_ANCHOR_OFFSET
598 #define TARGET_MIN_ANCHOR_OFFSET -4088
600 #undef TARGET_SCHED_ISSUE_RATE
601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE arm_mangle_type
606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
609 #undef TARGET_BUILD_BUILTIN_VA_LIST
610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
611 #undef TARGET_EXPAND_BUILTIN_VA_START
612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
621 #undef TARGET_LEGITIMATE_ADDRESS_P
622 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
624 #undef TARGET_PREFERRED_RELOAD_CLASS
625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
627 #undef TARGET_INVALID_PARAMETER_TYPE
628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
630 #undef TARGET_INVALID_RETURN_TYPE
631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
633 #undef TARGET_PROMOTED_TYPE
634 #define TARGET_PROMOTED_TYPE arm_promoted_type
636 #undef TARGET_CONVERT_TO_TYPE
637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
642 #undef TARGET_FRAME_POINTER_REQUIRED
643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
645 #undef TARGET_CAN_ELIMINATE
646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
651 #undef TARGET_CLASS_LIKELY_SPILLED_P
652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
654 #undef TARGET_VECTORIZE_BUILTINS
655 #define TARGET_VECTORIZE_BUILTINS
657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
659 arm_builtin_vectorized_function
661 #undef TARGET_VECTOR_ALIGNMENT
662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
666 arm_vector_alignment_reachable
668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
670 arm_builtin_support_vector_misalignment
672 #undef TARGET_PREFERRED_RENAME_CLASS
673 #define TARGET_PREFERRED_RENAME_CLASS \
674 arm_preferred_rename_class
676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
678 arm_vectorize_vec_perm_const_ok
680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
682 arm_builtin_vectorization_cost
683 #undef TARGET_VECTORIZE_ADD_STMT_COST
684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
686 #undef TARGET_CANONICALIZE_COMPARISON
687 #define TARGET_CANONICALIZE_COMPARISON \
688 arm_canonicalize_comparison
690 #undef TARGET_ASAN_SHADOW_OFFSET
691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
693 #undef MAX_INSN_PER_IT_BLOCK
694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
696 #undef TARGET_CAN_USE_DOLOOP_P
697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
705 #undef TARGET_SCHED_FUSION_PRIORITY
706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
708 struct gcc_target targetm
= TARGET_INITIALIZER
;
710 /* Obstack for minipool constant handling. */
711 static struct obstack minipool_obstack
;
712 static char * minipool_startobj
;
714 /* The maximum number of insns skipped which
715 will be conditionalised if possible. */
716 static int max_insns_skipped
= 5;
718 extern FILE * asm_out_file
;
720 /* True if we are currently building a constant table. */
721 int making_const_table
;
723 /* The processor for which instructions should be scheduled. */
724 enum processor_type arm_tune
= arm_none
;
726 /* The current tuning set. */
727 const struct tune_params
*current_tune
;
729 /* Which floating point hardware to schedule for. */
732 /* Which floating popint hardware to use. */
733 const struct arm_fpu_desc
*arm_fpu_desc
;
735 /* Used for Thumb call_via trampolines. */
736 rtx thumb_call_via_label
[14];
737 static int thumb_call_reg_needed
;
739 /* The bits in this mask specify which
740 instructions we are allowed to generate. */
741 unsigned long insn_flags
= 0;
743 /* The bits in this mask specify which instruction scheduling options should
745 unsigned long tune_flags
= 0;
747 /* The highest ARM architecture version supported by the
749 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
751 /* The following are used in the arm.md file as equivalents to bits
752 in the above two flag variables. */
754 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
757 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
760 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
763 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
766 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
769 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
772 /* Nonzero if this chip supports the ARM 6K extensions. */
775 /* Nonzero if instructions present in ARMv6-M can be used. */
778 /* Nonzero if this chip supports the ARM 7 extensions. */
781 /* Nonzero if instructions not present in the 'M' profile can be used. */
782 int arm_arch_notm
= 0;
784 /* Nonzero if instructions present in ARMv7E-M can be used. */
787 /* Nonzero if instructions present in ARMv8 can be used. */
790 /* Nonzero if this chip can benefit from load scheduling. */
791 int arm_ld_sched
= 0;
793 /* Nonzero if this chip is a StrongARM. */
794 int arm_tune_strongarm
= 0;
796 /* Nonzero if this chip supports Intel Wireless MMX technology. */
797 int arm_arch_iwmmxt
= 0;
799 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
800 int arm_arch_iwmmxt2
= 0;
802 /* Nonzero if this chip is an XScale. */
803 int arm_arch_xscale
= 0;
805 /* Nonzero if tuning for XScale */
806 int arm_tune_xscale
= 0;
808 /* Nonzero if we want to tune for stores that access the write-buffer.
809 This typically means an ARM6 or ARM7 with MMU or MPU. */
810 int arm_tune_wbuf
= 0;
812 /* Nonzero if tuning for Cortex-A9. */
813 int arm_tune_cortex_a9
= 0;
815 /* Nonzero if generating Thumb instructions. */
818 /* Nonzero if generating Thumb-1 instructions. */
821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
823 XXX This is a bit of a hack, it's intended to help work around
824 problems in GLD which doesn't understand that armv5t code is
825 interworking clean. */
826 int arm_cpp_interwork
= 0;
828 /* Nonzero if chip supports Thumb 2. */
831 /* Nonzero if chip supports integer division instruction. */
832 int arm_arch_arm_hwdiv
;
833 int arm_arch_thumb_hwdiv
;
835 /* Nonzero if we should use Neon to handle 64-bits operations rather
836 than core registers. */
837 int prefer_neon_for_64bits
= 0;
839 /* Nonzero if we shouldn't use literal pools. */
840 bool arm_disable_literal_pool
= false;
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 machine_mode output_memory_reference_mode
;
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register
= INVALID_REGNUM
;
850 enum arm_pcs arm_pcs_default
;
852 /* For an explanation of these variables, see final_prescan_insn below. */
854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
855 enum arm_cond_code arm_current_cc
;
858 int arm_target_label
;
859 /* The number of conditionally executed insns, including the current insn. */
860 int arm_condexec_count
= 0;
861 /* A bitmask specifying the patterns for the IT block.
862 Zero means do not output an IT block before this insn. */
863 int arm_condexec_mask
= 0;
864 /* The number of bits used in arm_condexec_mask. */
865 int arm_condexec_masklen
= 0;
867 /* Nonzero if chip supports the ARMv8 CRC instructions. */
868 int arm_arch_crc
= 0;
870 /* Nonzero if the core has a very small, high-latency, multiply unit. */
871 int arm_m_profile_small_mul
= 0;
873 /* The condition codes of the ARM, and the inverse function. */
874 static const char * const arm_condition_codes
[] =
876 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
877 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
880 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
881 int arm_regs_in_sequence
[] =
883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
889 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
890 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
891 | (1 << PIC_OFFSET_TABLE_REGNUM)))
893 /* Initialization code. */
897 const char *const name
;
898 enum processor_type core
;
900 enum base_architecture base_arch
;
901 const unsigned long flags
;
902 const struct tune_params
*const tune
;
906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
912 /* arm generic vectorizer costs. */
914 struct cpu_vec_costs arm_default_vec_cost
= {
915 1, /* scalar_stmt_cost. */
916 1, /* scalar load_cost. */
917 1, /* scalar_store_cost. */
918 1, /* vec_stmt_cost. */
919 1, /* vec_to_scalar_cost. */
920 1, /* scalar_to_vec_cost. */
921 1, /* vec_align_load_cost. */
922 1, /* vec_unalign_load_cost. */
923 1, /* vec_unalign_store_cost. */
924 1, /* vec_store_cost. */
925 3, /* cond_taken_branch_cost. */
926 1, /* cond_not_taken_branch_cost. */
929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
930 #include "aarch-cost-tables.h"
934 const struct cpu_cost_table cortexa9_extra_costs
=
941 COSTS_N_INSNS (1), /* shift_reg. */
942 COSTS_N_INSNS (1), /* arith_shift. */
943 COSTS_N_INSNS (2), /* arith_shift_reg. */
945 COSTS_N_INSNS (1), /* log_shift_reg. */
946 COSTS_N_INSNS (1), /* extend. */
947 COSTS_N_INSNS (2), /* extend_arith. */
948 COSTS_N_INSNS (1), /* bfi. */
949 COSTS_N_INSNS (1), /* bfx. */
953 true /* non_exec_costs_exec. */
958 COSTS_N_INSNS (3), /* simple. */
959 COSTS_N_INSNS (3), /* flag_setting. */
960 COSTS_N_INSNS (2), /* extend. */
961 COSTS_N_INSNS (3), /* add. */
962 COSTS_N_INSNS (2), /* extend_add. */
963 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
967 0, /* simple (N/A). */
968 0, /* flag_setting (N/A). */
969 COSTS_N_INSNS (4), /* extend. */
971 COSTS_N_INSNS (4), /* extend_add. */
977 COSTS_N_INSNS (2), /* load. */
978 COSTS_N_INSNS (2), /* load_sign_extend. */
979 COSTS_N_INSNS (2), /* ldrd. */
980 COSTS_N_INSNS (2), /* ldm_1st. */
981 1, /* ldm_regs_per_insn_1st. */
982 2, /* ldm_regs_per_insn_subsequent. */
983 COSTS_N_INSNS (5), /* loadf. */
984 COSTS_N_INSNS (5), /* loadd. */
985 COSTS_N_INSNS (1), /* load_unaligned. */
986 COSTS_N_INSNS (2), /* store. */
987 COSTS_N_INSNS (2), /* strd. */
988 COSTS_N_INSNS (2), /* stm_1st. */
989 1, /* stm_regs_per_insn_1st. */
990 2, /* stm_regs_per_insn_subsequent. */
991 COSTS_N_INSNS (1), /* storef. */
992 COSTS_N_INSNS (1), /* stored. */
993 COSTS_N_INSNS (1) /* store_unaligned. */
998 COSTS_N_INSNS (14), /* div. */
999 COSTS_N_INSNS (4), /* mult. */
1000 COSTS_N_INSNS (7), /* mult_addsub. */
1001 COSTS_N_INSNS (30), /* fma. */
1002 COSTS_N_INSNS (3), /* addsub. */
1003 COSTS_N_INSNS (1), /* fpconst. */
1004 COSTS_N_INSNS (1), /* neg. */
1005 COSTS_N_INSNS (3), /* compare. */
1006 COSTS_N_INSNS (3), /* widen. */
1007 COSTS_N_INSNS (3), /* narrow. */
1008 COSTS_N_INSNS (3), /* toint. */
1009 COSTS_N_INSNS (3), /* fromint. */
1010 COSTS_N_INSNS (3) /* roundint. */
1014 COSTS_N_INSNS (24), /* div. */
1015 COSTS_N_INSNS (5), /* mult. */
1016 COSTS_N_INSNS (8), /* mult_addsub. */
1017 COSTS_N_INSNS (30), /* fma. */
1018 COSTS_N_INSNS (3), /* addsub. */
1019 COSTS_N_INSNS (1), /* fpconst. */
1020 COSTS_N_INSNS (1), /* neg. */
1021 COSTS_N_INSNS (3), /* compare. */
1022 COSTS_N_INSNS (3), /* widen. */
1023 COSTS_N_INSNS (3), /* narrow. */
1024 COSTS_N_INSNS (3), /* toint. */
1025 COSTS_N_INSNS (3), /* fromint. */
1026 COSTS_N_INSNS (3) /* roundint. */
1031 COSTS_N_INSNS (1) /* alu. */
1035 const struct cpu_cost_table cortexa8_extra_costs
=
1041 COSTS_N_INSNS (1), /* shift. */
1043 COSTS_N_INSNS (1), /* arith_shift. */
1044 0, /* arith_shift_reg. */
1045 COSTS_N_INSNS (1), /* log_shift. */
1046 0, /* log_shift_reg. */
1048 0, /* extend_arith. */
1054 true /* non_exec_costs_exec. */
1059 COSTS_N_INSNS (1), /* simple. */
1060 COSTS_N_INSNS (1), /* flag_setting. */
1061 COSTS_N_INSNS (1), /* extend. */
1062 COSTS_N_INSNS (1), /* add. */
1063 COSTS_N_INSNS (1), /* extend_add. */
1064 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1068 0, /* simple (N/A). */
1069 0, /* flag_setting (N/A). */
1070 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (2), /* extend_add. */
1078 COSTS_N_INSNS (1), /* load. */
1079 COSTS_N_INSNS (1), /* load_sign_extend. */
1080 COSTS_N_INSNS (1), /* ldrd. */
1081 COSTS_N_INSNS (1), /* ldm_1st. */
1082 1, /* ldm_regs_per_insn_1st. */
1083 2, /* ldm_regs_per_insn_subsequent. */
1084 COSTS_N_INSNS (1), /* loadf. */
1085 COSTS_N_INSNS (1), /* loadd. */
1086 COSTS_N_INSNS (1), /* load_unaligned. */
1087 COSTS_N_INSNS (1), /* store. */
1088 COSTS_N_INSNS (1), /* strd. */
1089 COSTS_N_INSNS (1), /* stm_1st. */
1090 1, /* stm_regs_per_insn_1st. */
1091 2, /* stm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* storef. */
1093 COSTS_N_INSNS (1), /* stored. */
1094 COSTS_N_INSNS (1) /* store_unaligned. */
1099 COSTS_N_INSNS (36), /* div. */
1100 COSTS_N_INSNS (11), /* mult. */
1101 COSTS_N_INSNS (20), /* mult_addsub. */
1102 COSTS_N_INSNS (30), /* fma. */
1103 COSTS_N_INSNS (9), /* addsub. */
1104 COSTS_N_INSNS (3), /* fpconst. */
1105 COSTS_N_INSNS (3), /* neg. */
1106 COSTS_N_INSNS (6), /* compare. */
1107 COSTS_N_INSNS (4), /* widen. */
1108 COSTS_N_INSNS (4), /* narrow. */
1109 COSTS_N_INSNS (8), /* toint. */
1110 COSTS_N_INSNS (8), /* fromint. */
1111 COSTS_N_INSNS (8) /* roundint. */
1115 COSTS_N_INSNS (64), /* div. */
1116 COSTS_N_INSNS (16), /* mult. */
1117 COSTS_N_INSNS (25), /* mult_addsub. */
1118 COSTS_N_INSNS (30), /* fma. */
1119 COSTS_N_INSNS (9), /* addsub. */
1120 COSTS_N_INSNS (3), /* fpconst. */
1121 COSTS_N_INSNS (3), /* neg. */
1122 COSTS_N_INSNS (6), /* compare. */
1123 COSTS_N_INSNS (6), /* widen. */
1124 COSTS_N_INSNS (6), /* narrow. */
1125 COSTS_N_INSNS (8), /* toint. */
1126 COSTS_N_INSNS (8), /* fromint. */
1127 COSTS_N_INSNS (8) /* roundint. */
1132 COSTS_N_INSNS (1) /* alu. */
1136 const struct cpu_cost_table cortexa5_extra_costs
=
1142 COSTS_N_INSNS (1), /* shift. */
1143 COSTS_N_INSNS (1), /* shift_reg. */
1144 COSTS_N_INSNS (1), /* arith_shift. */
1145 COSTS_N_INSNS (1), /* arith_shift_reg. */
1146 COSTS_N_INSNS (1), /* log_shift. */
1147 COSTS_N_INSNS (1), /* log_shift_reg. */
1148 COSTS_N_INSNS (1), /* extend. */
1149 COSTS_N_INSNS (1), /* extend_arith. */
1150 COSTS_N_INSNS (1), /* bfi. */
1151 COSTS_N_INSNS (1), /* bfx. */
1152 COSTS_N_INSNS (1), /* clz. */
1153 COSTS_N_INSNS (1), /* rev. */
1155 true /* non_exec_costs_exec. */
1162 COSTS_N_INSNS (1), /* flag_setting. */
1163 COSTS_N_INSNS (1), /* extend. */
1164 COSTS_N_INSNS (1), /* add. */
1165 COSTS_N_INSNS (1), /* extend_add. */
1166 COSTS_N_INSNS (7) /* idiv. */
1170 0, /* simple (N/A). */
1171 0, /* flag_setting (N/A). */
1172 COSTS_N_INSNS (1), /* extend. */
1174 COSTS_N_INSNS (2), /* extend_add. */
1180 COSTS_N_INSNS (1), /* load. */
1181 COSTS_N_INSNS (1), /* load_sign_extend. */
1182 COSTS_N_INSNS (6), /* ldrd. */
1183 COSTS_N_INSNS (1), /* ldm_1st. */
1184 1, /* ldm_regs_per_insn_1st. */
1185 2, /* ldm_regs_per_insn_subsequent. */
1186 COSTS_N_INSNS (2), /* loadf. */
1187 COSTS_N_INSNS (4), /* loadd. */
1188 COSTS_N_INSNS (1), /* load_unaligned. */
1189 COSTS_N_INSNS (1), /* store. */
1190 COSTS_N_INSNS (3), /* strd. */
1191 COSTS_N_INSNS (1), /* stm_1st. */
1192 1, /* stm_regs_per_insn_1st. */
1193 2, /* stm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (2), /* storef. */
1195 COSTS_N_INSNS (2), /* stored. */
1196 COSTS_N_INSNS (1) /* store_unaligned. */
1201 COSTS_N_INSNS (15), /* div. */
1202 COSTS_N_INSNS (3), /* mult. */
1203 COSTS_N_INSNS (7), /* mult_addsub. */
1204 COSTS_N_INSNS (7), /* fma. */
1205 COSTS_N_INSNS (3), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (3), /* compare. */
1209 COSTS_N_INSNS (3), /* widen. */
1210 COSTS_N_INSNS (3), /* narrow. */
1211 COSTS_N_INSNS (3), /* toint. */
1212 COSTS_N_INSNS (3), /* fromint. */
1213 COSTS_N_INSNS (3) /* roundint. */
1217 COSTS_N_INSNS (30), /* div. */
1218 COSTS_N_INSNS (6), /* mult. */
1219 COSTS_N_INSNS (10), /* mult_addsub. */
1220 COSTS_N_INSNS (7), /* fma. */
1221 COSTS_N_INSNS (3), /* addsub. */
1222 COSTS_N_INSNS (3), /* fpconst. */
1223 COSTS_N_INSNS (3), /* neg. */
1224 COSTS_N_INSNS (3), /* compare. */
1225 COSTS_N_INSNS (3), /* widen. */
1226 COSTS_N_INSNS (3), /* narrow. */
1227 COSTS_N_INSNS (3), /* toint. */
1228 COSTS_N_INSNS (3), /* fromint. */
1229 COSTS_N_INSNS (3) /* roundint. */
1234 COSTS_N_INSNS (1) /* alu. */
1239 const struct cpu_cost_table cortexa7_extra_costs
=
1245 COSTS_N_INSNS (1), /* shift. */
1246 COSTS_N_INSNS (1), /* shift_reg. */
1247 COSTS_N_INSNS (1), /* arith_shift. */
1248 COSTS_N_INSNS (1), /* arith_shift_reg. */
1249 COSTS_N_INSNS (1), /* log_shift. */
1250 COSTS_N_INSNS (1), /* log_shift_reg. */
1251 COSTS_N_INSNS (1), /* extend. */
1252 COSTS_N_INSNS (1), /* extend_arith. */
1253 COSTS_N_INSNS (1), /* bfi. */
1254 COSTS_N_INSNS (1), /* bfx. */
1255 COSTS_N_INSNS (1), /* clz. */
1256 COSTS_N_INSNS (1), /* rev. */
1258 true /* non_exec_costs_exec. */
1265 COSTS_N_INSNS (1), /* flag_setting. */
1266 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (1), /* add. */
1268 COSTS_N_INSNS (1), /* extend_add. */
1269 COSTS_N_INSNS (7) /* idiv. */
1273 0, /* simple (N/A). */
1274 0, /* flag_setting (N/A). */
1275 COSTS_N_INSNS (1), /* extend. */
1277 COSTS_N_INSNS (2), /* extend_add. */
1283 COSTS_N_INSNS (1), /* load. */
1284 COSTS_N_INSNS (1), /* load_sign_extend. */
1285 COSTS_N_INSNS (3), /* ldrd. */
1286 COSTS_N_INSNS (1), /* ldm_1st. */
1287 1, /* ldm_regs_per_insn_1st. */
1288 2, /* ldm_regs_per_insn_subsequent. */
1289 COSTS_N_INSNS (2), /* loadf. */
1290 COSTS_N_INSNS (2), /* loadd. */
1291 COSTS_N_INSNS (1), /* load_unaligned. */
1292 COSTS_N_INSNS (1), /* store. */
1293 COSTS_N_INSNS (3), /* strd. */
1294 COSTS_N_INSNS (1), /* stm_1st. */
1295 1, /* stm_regs_per_insn_1st. */
1296 2, /* stm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* storef. */
1298 COSTS_N_INSNS (2), /* stored. */
1299 COSTS_N_INSNS (1) /* store_unaligned. */
1304 COSTS_N_INSNS (15), /* div. */
1305 COSTS_N_INSNS (3), /* mult. */
1306 COSTS_N_INSNS (7), /* mult_addsub. */
1307 COSTS_N_INSNS (7), /* fma. */
1308 COSTS_N_INSNS (3), /* addsub. */
1309 COSTS_N_INSNS (3), /* fpconst. */
1310 COSTS_N_INSNS (3), /* neg. */
1311 COSTS_N_INSNS (3), /* compare. */
1312 COSTS_N_INSNS (3), /* widen. */
1313 COSTS_N_INSNS (3), /* narrow. */
1314 COSTS_N_INSNS (3), /* toint. */
1315 COSTS_N_INSNS (3), /* fromint. */
1316 COSTS_N_INSNS (3) /* roundint. */
1320 COSTS_N_INSNS (30), /* div. */
1321 COSTS_N_INSNS (6), /* mult. */
1322 COSTS_N_INSNS (10), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1337 COSTS_N_INSNS (1) /* alu. */
1341 const struct cpu_cost_table cortexa12_extra_costs
=
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1360 true /* non_exec_costs_exec. */
1365 COSTS_N_INSNS (2), /* simple. */
1366 COSTS_N_INSNS (3), /* flag_setting. */
1367 COSTS_N_INSNS (2), /* extend. */
1368 COSTS_N_INSNS (3), /* add. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1370 COSTS_N_INSNS (18) /* idiv. */
1374 0, /* simple (N/A). */
1375 0, /* flag_setting (N/A). */
1376 COSTS_N_INSNS (3), /* extend. */
1378 COSTS_N_INSNS (3), /* extend_add. */
1384 COSTS_N_INSNS (3), /* load. */
1385 COSTS_N_INSNS (3), /* load_sign_extend. */
1386 COSTS_N_INSNS (3), /* ldrd. */
1387 COSTS_N_INSNS (3), /* ldm_1st. */
1388 1, /* ldm_regs_per_insn_1st. */
1389 2, /* ldm_regs_per_insn_subsequent. */
1390 COSTS_N_INSNS (3), /* loadf. */
1391 COSTS_N_INSNS (3), /* loadd. */
1392 0, /* load_unaligned. */
1396 1, /* stm_regs_per_insn_1st. */
1397 2, /* stm_regs_per_insn_subsequent. */
1398 COSTS_N_INSNS (2), /* storef. */
1399 COSTS_N_INSNS (2), /* stored. */
1400 0 /* store_unaligned. */
1405 COSTS_N_INSNS (17), /* div. */
1406 COSTS_N_INSNS (4), /* mult. */
1407 COSTS_N_INSNS (8), /* mult_addsub. */
1408 COSTS_N_INSNS (8), /* fma. */
1409 COSTS_N_INSNS (4), /* addsub. */
1410 COSTS_N_INSNS (2), /* fpconst. */
1411 COSTS_N_INSNS (2), /* neg. */
1412 COSTS_N_INSNS (2), /* compare. */
1413 COSTS_N_INSNS (4), /* widen. */
1414 COSTS_N_INSNS (4), /* narrow. */
1415 COSTS_N_INSNS (4), /* toint. */
1416 COSTS_N_INSNS (4), /* fromint. */
1417 COSTS_N_INSNS (4) /* roundint. */
1421 COSTS_N_INSNS (31), /* div. */
1422 COSTS_N_INSNS (4), /* mult. */
1423 COSTS_N_INSNS (8), /* mult_addsub. */
1424 COSTS_N_INSNS (8), /* fma. */
1425 COSTS_N_INSNS (4), /* addsub. */
1426 COSTS_N_INSNS (2), /* fpconst. */
1427 COSTS_N_INSNS (2), /* neg. */
1428 COSTS_N_INSNS (2), /* compare. */
1429 COSTS_N_INSNS (4), /* widen. */
1430 COSTS_N_INSNS (4), /* narrow. */
1431 COSTS_N_INSNS (4), /* toint. */
1432 COSTS_N_INSNS (4), /* fromint. */
1433 COSTS_N_INSNS (4) /* roundint. */
1438 COSTS_N_INSNS (1) /* alu. */
1442 const struct cpu_cost_table cortexa15_extra_costs
=
1450 COSTS_N_INSNS (1), /* arith_shift. */
1451 COSTS_N_INSNS (1), /* arith_shift_reg. */
1452 COSTS_N_INSNS (1), /* log_shift. */
1453 COSTS_N_INSNS (1), /* log_shift_reg. */
1455 COSTS_N_INSNS (1), /* extend_arith. */
1456 COSTS_N_INSNS (1), /* bfi. */
1461 true /* non_exec_costs_exec. */
1466 COSTS_N_INSNS (2), /* simple. */
1467 COSTS_N_INSNS (3), /* flag_setting. */
1468 COSTS_N_INSNS (2), /* extend. */
1469 COSTS_N_INSNS (2), /* add. */
1470 COSTS_N_INSNS (2), /* extend_add. */
1471 COSTS_N_INSNS (18) /* idiv. */
1475 0, /* simple (N/A). */
1476 0, /* flag_setting (N/A). */
1477 COSTS_N_INSNS (3), /* extend. */
1479 COSTS_N_INSNS (3), /* extend_add. */
1485 COSTS_N_INSNS (3), /* load. */
1486 COSTS_N_INSNS (3), /* load_sign_extend. */
1487 COSTS_N_INSNS (3), /* ldrd. */
1488 COSTS_N_INSNS (4), /* ldm_1st. */
1489 1, /* ldm_regs_per_insn_1st. */
1490 2, /* ldm_regs_per_insn_subsequent. */
1491 COSTS_N_INSNS (4), /* loadf. */
1492 COSTS_N_INSNS (4), /* loadd. */
1493 0, /* load_unaligned. */
1496 COSTS_N_INSNS (1), /* stm_1st. */
1497 1, /* stm_regs_per_insn_1st. */
1498 2, /* stm_regs_per_insn_subsequent. */
1501 0 /* store_unaligned. */
1506 COSTS_N_INSNS (17), /* div. */
1507 COSTS_N_INSNS (4), /* mult. */
1508 COSTS_N_INSNS (8), /* mult_addsub. */
1509 COSTS_N_INSNS (8), /* fma. */
1510 COSTS_N_INSNS (4), /* addsub. */
1511 COSTS_N_INSNS (2), /* fpconst. */
1512 COSTS_N_INSNS (2), /* neg. */
1513 COSTS_N_INSNS (5), /* compare. */
1514 COSTS_N_INSNS (4), /* widen. */
1515 COSTS_N_INSNS (4), /* narrow. */
1516 COSTS_N_INSNS (4), /* toint. */
1517 COSTS_N_INSNS (4), /* fromint. */
1518 COSTS_N_INSNS (4) /* roundint. */
1522 COSTS_N_INSNS (31), /* div. */
1523 COSTS_N_INSNS (4), /* mult. */
1524 COSTS_N_INSNS (8), /* mult_addsub. */
1525 COSTS_N_INSNS (8), /* fma. */
1526 COSTS_N_INSNS (4), /* addsub. */
1527 COSTS_N_INSNS (2), /* fpconst. */
1528 COSTS_N_INSNS (2), /* neg. */
1529 COSTS_N_INSNS (2), /* compare. */
1530 COSTS_N_INSNS (4), /* widen. */
1531 COSTS_N_INSNS (4), /* narrow. */
1532 COSTS_N_INSNS (4), /* toint. */
1533 COSTS_N_INSNS (4), /* fromint. */
1534 COSTS_N_INSNS (4) /* roundint. */
1539 COSTS_N_INSNS (1) /* alu. */
1543 const struct cpu_cost_table v7m_extra_costs
=
1551 0, /* arith_shift. */
1552 COSTS_N_INSNS (1), /* arith_shift_reg. */
1554 COSTS_N_INSNS (1), /* log_shift_reg. */
1556 COSTS_N_INSNS (1), /* extend_arith. */
1561 COSTS_N_INSNS (1), /* non_exec. */
1562 false /* non_exec_costs_exec. */
1567 COSTS_N_INSNS (1), /* simple. */
1568 COSTS_N_INSNS (1), /* flag_setting. */
1569 COSTS_N_INSNS (2), /* extend. */
1570 COSTS_N_INSNS (1), /* add. */
1571 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (8) /* idiv. */
1576 0, /* simple (N/A). */
1577 0, /* flag_setting (N/A). */
1578 COSTS_N_INSNS (2), /* extend. */
1580 COSTS_N_INSNS (3), /* extend_add. */
1586 COSTS_N_INSNS (2), /* load. */
1587 0, /* load_sign_extend. */
1588 COSTS_N_INSNS (3), /* ldrd. */
1589 COSTS_N_INSNS (2), /* ldm_1st. */
1590 1, /* ldm_regs_per_insn_1st. */
1591 1, /* ldm_regs_per_insn_subsequent. */
1592 COSTS_N_INSNS (2), /* loadf. */
1593 COSTS_N_INSNS (3), /* loadd. */
1594 COSTS_N_INSNS (1), /* load_unaligned. */
1595 COSTS_N_INSNS (2), /* store. */
1596 COSTS_N_INSNS (3), /* strd. */
1597 COSTS_N_INSNS (2), /* stm_1st. */
1598 1, /* stm_regs_per_insn_1st. */
1599 1, /* stm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (2), /* storef. */
1601 COSTS_N_INSNS (3), /* stored. */
1602 COSTS_N_INSNS (1) /* store_unaligned. */
1607 COSTS_N_INSNS (7), /* div. */
1608 COSTS_N_INSNS (2), /* mult. */
1609 COSTS_N_INSNS (5), /* mult_addsub. */
1610 COSTS_N_INSNS (3), /* fma. */
1611 COSTS_N_INSNS (1), /* addsub. */
1623 COSTS_N_INSNS (15), /* div. */
1624 COSTS_N_INSNS (5), /* mult. */
1625 COSTS_N_INSNS (7), /* mult_addsub. */
1626 COSTS_N_INSNS (7), /* fma. */
1627 COSTS_N_INSNS (3), /* addsub. */
1640 COSTS_N_INSNS (1) /* alu. */
1644 const struct tune_params arm_slowmul_tune
=
1646 arm_slowmul_rtx_costs
,
1648 NULL
, /* Sched adj cost. */
1649 3, /* Constant limit. */
1650 5, /* Max cond insns. */
1651 ARM_PREFETCH_NOT_BENEFICIAL
,
1652 true, /* Prefer constant pool. */
1653 arm_default_branch_cost
,
1654 false, /* Prefer LDRD/STRD. */
1655 {true, true}, /* Prefer non short circuit. */
1656 &arm_default_vec_cost
, /* Vectorizer costs. */
1657 false, /* Prefer Neon for 64-bits bitops. */
1658 false, false, /* Prefer 32-bit encodings. */
1659 false, /* Prefer Neon for stringops. */
1660 8 /* Maximum insns to inline memset. */
1663 const struct tune_params arm_fastmul_tune
=
1665 arm_fastmul_rtx_costs
,
1667 NULL
, /* Sched adj cost. */
1668 1, /* Constant limit. */
1669 5, /* Max cond insns. */
1670 ARM_PREFETCH_NOT_BENEFICIAL
,
1671 true, /* Prefer constant pool. */
1672 arm_default_branch_cost
,
1673 false, /* Prefer LDRD/STRD. */
1674 {true, true}, /* Prefer non short circuit. */
1675 &arm_default_vec_cost
, /* Vectorizer costs. */
1676 false, /* Prefer Neon for 64-bits bitops. */
1677 false, false, /* Prefer 32-bit encodings. */
1678 false, /* Prefer Neon for stringops. */
1679 8 /* Maximum insns to inline memset. */
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683 skipping is shorter. Set max_insns_skipped to a lower value. */
1685 const struct tune_params arm_strongarm_tune
=
1687 arm_fastmul_rtx_costs
,
1689 NULL
, /* Sched adj cost. */
1690 1, /* Constant limit. */
1691 3, /* Max cond insns. */
1692 ARM_PREFETCH_NOT_BENEFICIAL
,
1693 true, /* Prefer constant pool. */
1694 arm_default_branch_cost
,
1695 false, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost
, /* Vectorizer costs. */
1698 false, /* Prefer Neon for 64-bits bitops. */
1699 false, false, /* Prefer 32-bit encodings. */
1700 false, /* Prefer Neon for stringops. */
1701 8 /* Maximum insns to inline memset. */
1704 const struct tune_params arm_xscale_tune
=
1706 arm_xscale_rtx_costs
,
1708 xscale_sched_adjust_cost
,
1709 2, /* Constant limit. */
1710 3, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL
,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost
,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost
, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false, /* Prefer 32-bit encodings. */
1719 false, /* Prefer Neon for stringops. */
1720 8 /* Maximum insns to inline memset. */
1723 const struct tune_params arm_9e_tune
=
1727 NULL
, /* Sched adj cost. */
1728 1, /* Constant limit. */
1729 5, /* Max cond insns. */
1730 ARM_PREFETCH_NOT_BENEFICIAL
,
1731 true, /* Prefer constant pool. */
1732 arm_default_branch_cost
,
1733 false, /* Prefer LDRD/STRD. */
1734 {true, true}, /* Prefer non short circuit. */
1735 &arm_default_vec_cost
, /* Vectorizer costs. */
1736 false, /* Prefer Neon for 64-bits bitops. */
1737 false, false, /* Prefer 32-bit encodings. */
1738 false, /* Prefer Neon for stringops. */
1739 8 /* Maximum insns to inline memset. */
1742 const struct tune_params arm_v6t2_tune
=
1746 NULL
, /* Sched adj cost. */
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL
,
1750 false, /* Prefer constant pool. */
1751 arm_default_branch_cost
,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost
, /* Vectorizer costs. */
1755 false, /* Prefer Neon for 64-bits bitops. */
1756 false, false, /* Prefer 32-bit encodings. */
1757 false, /* Prefer Neon for stringops. */
1758 8 /* Maximum insns to inline memset. */
1761 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1762 const struct tune_params arm_cortex_tune
=
1765 &generic_extra_costs
,
1766 NULL
, /* Sched adj cost. */
1767 1, /* Constant limit. */
1768 5, /* Max cond insns. */
1769 ARM_PREFETCH_NOT_BENEFICIAL
,
1770 false, /* Prefer constant pool. */
1771 arm_default_branch_cost
,
1772 false, /* Prefer LDRD/STRD. */
1773 {true, true}, /* Prefer non short circuit. */
1774 &arm_default_vec_cost
, /* Vectorizer costs. */
1775 false, /* Prefer Neon for 64-bits bitops. */
1776 false, false, /* Prefer 32-bit encodings. */
1777 false, /* Prefer Neon for stringops. */
1778 8 /* Maximum insns to inline memset. */
1781 const struct tune_params arm_cortex_a8_tune
=
1784 &cortexa8_extra_costs
,
1785 NULL
, /* Sched adj cost. */
1786 1, /* Constant limit. */
1787 5, /* Max cond insns. */
1788 ARM_PREFETCH_NOT_BENEFICIAL
,
1789 false, /* Prefer constant pool. */
1790 arm_default_branch_cost
,
1791 false, /* Prefer LDRD/STRD. */
1792 {true, true}, /* Prefer non short circuit. */
1793 &arm_default_vec_cost
, /* Vectorizer costs. */
1794 false, /* Prefer Neon for 64-bits bitops. */
1795 false, false, /* Prefer 32-bit encodings. */
1796 true, /* Prefer Neon for stringops. */
1797 8 /* Maximum insns to inline memset. */
1800 const struct tune_params arm_cortex_a7_tune
=
1803 &cortexa7_extra_costs
,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 ARM_PREFETCH_NOT_BENEFICIAL
,
1808 false, /* Prefer constant pool. */
1809 arm_default_branch_cost
,
1810 false, /* Prefer LDRD/STRD. */
1811 {true, true}, /* Prefer non short circuit. */
1812 &arm_default_vec_cost
, /* Vectorizer costs. */
1813 false, /* Prefer Neon for 64-bits bitops. */
1814 false, false, /* Prefer 32-bit encodings. */
1815 true, /* Prefer Neon for stringops. */
1816 8 /* Maximum insns to inline memset. */
1819 const struct tune_params arm_cortex_a15_tune
=
1822 &cortexa15_extra_costs
,
1823 NULL
, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 2, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL
,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost
,
1829 true, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost
, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 true, true, /* Prefer 32-bit encodings. */
1834 true, /* Prefer Neon for stringops. */
1835 8 /* Maximum insns to inline memset. */
1838 const struct tune_params arm_cortex_a53_tune
=
1841 &cortexa53_extra_costs
,
1842 NULL
, /* Scheduler cost adjustment. */
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 ARM_PREFETCH_NOT_BENEFICIAL
,
1846 false, /* Prefer constant pool. */
1847 arm_default_branch_cost
,
1848 false, /* Prefer LDRD/STRD. */
1849 {true, true}, /* Prefer non short circuit. */
1850 &arm_default_vec_cost
, /* Vectorizer costs. */
1851 false, /* Prefer Neon for 64-bits bitops. */
1852 false, false, /* Prefer 32-bit encodings. */
1853 false, /* Prefer Neon for stringops. */
1854 8 /* Maximum insns to inline memset. */
1857 const struct tune_params arm_cortex_a57_tune
=
1860 &cortexa57_extra_costs
,
1861 NULL
, /* Scheduler cost adjustment. */
1862 1, /* Constant limit. */
1863 2, /* Max cond insns. */
1864 ARM_PREFETCH_NOT_BENEFICIAL
,
1865 false, /* Prefer constant pool. */
1866 arm_default_branch_cost
,
1867 true, /* Prefer LDRD/STRD. */
1868 {true, true}, /* Prefer non short circuit. */
1869 &arm_default_vec_cost
, /* Vectorizer costs. */
1870 false, /* Prefer Neon for 64-bits bitops. */
1871 true, true, /* Prefer 32-bit encodings. */
1872 false, /* Prefer Neon for stringops. */
1873 8 /* Maximum insns to inline memset. */
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877 less appealing. Set max_insns_skipped to a low value. */
1879 const struct tune_params arm_cortex_a5_tune
=
1882 &cortexa5_extra_costs
,
1883 NULL
, /* Sched adj cost. */
1884 1, /* Constant limit. */
1885 1, /* Max cond insns. */
1886 ARM_PREFETCH_NOT_BENEFICIAL
,
1887 false, /* Prefer constant pool. */
1888 arm_cortex_a5_branch_cost
,
1889 false, /* Prefer LDRD/STRD. */
1890 {false, false}, /* Prefer non short circuit. */
1891 &arm_default_vec_cost
, /* Vectorizer costs. */
1892 false, /* Prefer Neon for 64-bits bitops. */
1893 false, false, /* Prefer 32-bit encodings. */
1894 true, /* Prefer Neon for stringops. */
1895 8 /* Maximum insns to inline memset. */
1898 const struct tune_params arm_cortex_a9_tune
=
1901 &cortexa9_extra_costs
,
1902 cortex_a9_sched_adjust_cost
,
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_BENEFICIAL(4,32,32),
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost
,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost
, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 false, /* Prefer Neon for stringops. */
1914 8 /* Maximum insns to inline memset. */
1917 const struct tune_params arm_cortex_a12_tune
=
1920 &cortexa12_extra_costs
,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 ARM_PREFETCH_BENEFICIAL(4,32,32),
1925 false, /* Prefer constant pool. */
1926 arm_default_branch_cost
,
1927 true, /* Prefer LDRD/STRD. */
1928 {true, true}, /* Prefer non short circuit. */
1929 &arm_default_vec_cost
, /* Vectorizer costs. */
1930 false, /* Prefer Neon for 64-bits bitops. */
1931 false, false, /* Prefer 32-bit encodings. */
1932 true, /* Prefer Neon for stringops. */
1933 8 /* Maximum insns to inline memset. */
1936 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1937 cycle to execute each. An LDR from the constant pool also takes two cycles
1938 to execute, but mildly increases pipelining opportunity (consecutive
1939 loads/stores can be pipelined together, saving one cycle), and may also
1940 improve icache utilisation. Hence we prefer the constant pool for such
1943 const struct tune_params arm_v7m_tune
=
1947 NULL
, /* Sched adj cost. */
1948 1, /* Constant limit. */
1949 2, /* Max cond insns. */
1950 ARM_PREFETCH_NOT_BENEFICIAL
,
1951 true, /* Prefer constant pool. */
1952 arm_cortex_m_branch_cost
,
1953 false, /* Prefer LDRD/STRD. */
1954 {false, false}, /* Prefer non short circuit. */
1955 &arm_default_vec_cost
, /* Vectorizer costs. */
1956 false, /* Prefer Neon for 64-bits bitops. */
1957 false, false, /* Prefer 32-bit encodings. */
1958 false, /* Prefer Neon for stringops. */
1959 8 /* Maximum insns to inline memset. */
1962 /* Cortex-M7 tuning. */
1964 const struct tune_params arm_cortex_m7_tune
=
1968 NULL
, /* Sched adj cost. */
1969 0, /* Constant limit. */
1970 0, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL
,
1972 true, /* Prefer constant pool. */
1973 arm_cortex_m_branch_cost
,
1974 false, /* Prefer LDRD/STRD. */
1975 {true, true}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost
, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 false, /* Prefer Neon for stringops. */
1980 8 /* Maximum insns to inline memset. */
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1985 const struct tune_params arm_v6m_tune
=
1989 NULL
, /* Sched adj cost. */
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_NOT_BENEFICIAL
,
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost
,
1995 false, /* Prefer LDRD/STRD. */
1996 {false, false}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost
, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8 /* Maximum insns to inline memset. */
2004 const struct tune_params arm_fa726te_tune
=
2008 fa726te_sched_adjust_cost
,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 ARM_PREFETCH_NOT_BENEFICIAL
,
2012 true, /* Prefer constant pool. */
2013 arm_default_branch_cost
,
2014 false, /* Prefer LDRD/STRD. */
2015 {true, true}, /* Prefer non short circuit. */
2016 &arm_default_vec_cost
, /* Vectorizer costs. */
2017 false, /* Prefer Neon for 64-bits bitops. */
2018 false, false, /* Prefer 32-bit encodings. */
2019 false, /* Prefer Neon for stringops. */
2020 8 /* Maximum insns to inline memset. */
2024 /* Not all of these give usefully different compilation alternatives,
2025 but there is no simple way of generalizing them. */
2026 static const struct processors all_cores
[] =
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2031 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2034 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2037 static const struct processors all_architectures
[] =
2039 /* ARM Architectures */
2040 /* We don't specify tuning costs here as it will be figured out
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2047 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2051 /* These are populated as commandline arguments are processed, or NULL
2052 if not specified. */
2053 static const struct processors
*arm_selected_arch
;
2054 static const struct processors
*arm_selected_cpu
;
2055 static const struct processors
*arm_selected_tune
;
2057 /* The name of the preprocessor macro to define for this architecture. */
2059 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2061 /* Available values for -mfpu=. */
2063 static const struct arm_fpu_desc all_fpus
[] =
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2072 /* Supported TLS relocations. */
2080 TLS_DESCSEQ
/* GNU scheme */
2083 /* The maximum number of insns to be used when loading a constant. */
2085 arm_constant_limit (bool size_p
)
2087 return size_p
? 1 : current_tune
->constant_limit
;
2090 /* Emit an insn that's a simple single-set. Both the operands must be known
2092 inline static rtx_insn
*
2093 emit_set_insn (rtx x
, rtx y
)
2095 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2098 /* Return the number of bits set in VALUE. */
2100 bit_count (unsigned long value
)
2102 unsigned long count
= 0;
2107 value
&= value
- 1; /* Clear the least-significant set bit. */
2117 } arm_fixed_mode_set
;
2119 /* A small helper for setting fixed-point library libfuncs. */
2122 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2123 const char *funcname
, const char *modename
,
2128 if (num_suffix
== 0)
2129 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2131 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2133 set_optab_libfunc (optable
, mode
, buffer
);
2137 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2138 machine_mode from
, const char *funcname
,
2139 const char *toname
, const char *fromname
)
2142 const char *maybe_suffix_2
= "";
2144 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2145 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2146 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2147 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2148 maybe_suffix_2
= "2";
2150 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2153 set_conv_libfunc (optable
, to
, from
, buffer
);
2156 /* Set up library functions unique to ARM. */
2159 arm_init_libfuncs (void)
2161 /* For Linux, we have access to kernel support for atomic operations. */
2162 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2163 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2165 /* There are no special library functions unless we are using the
2170 /* The functions below are described in Section 4 of the "Run-Time
2171 ABI for the ARM architecture", Version 1.0. */
2173 /* Double-precision floating-point arithmetic. Table 2. */
2174 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2175 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2176 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2177 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2178 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2180 /* Double-precision comparisons. Table 3. */
2181 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2182 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2183 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2184 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2185 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2186 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2187 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2189 /* Single-precision floating-point arithmetic. Table 4. */
2190 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2191 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2192 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2193 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2194 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2196 /* Single-precision comparisons. Table 5. */
2197 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2198 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2199 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2200 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2201 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2202 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2203 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2205 /* Floating-point to integer conversions. Table 6. */
2206 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2207 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2208 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2209 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2210 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2211 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2212 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2213 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2215 /* Conversions between floating types. Table 7. */
2216 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2217 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2219 /* Integer to floating-point conversions. Table 8. */
2220 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2221 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2222 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2223 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2224 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2225 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2226 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2227 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2229 /* Long long. Table 9. */
2230 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2231 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2232 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2233 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2234 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2235 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2236 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2237 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2239 /* Integer (32/32->32) division. \S 4.3.1. */
2240 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2241 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2243 /* The divmod functions are designed so that they can be used for
2244 plain division, even though they return both the quotient and the
2245 remainder. The quotient is returned in the usual location (i.e.,
2246 r0 for SImode, {r0, r1} for DImode), just as would be expected
2247 for an ordinary division routine. Because the AAPCS calling
2248 conventions specify that all of { r0, r1, r2, r3 } are
2249 callee-saved registers, there is no need to tell the compiler
2250 explicitly that those registers are clobbered by these
2252 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2253 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2255 /* For SImode division the ABI provides div-without-mod routines,
2256 which are faster. */
2257 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2258 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2260 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2261 divmod libcalls instead. */
2262 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2263 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2264 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2265 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2267 /* Half-precision float operations. The compiler handles all operations
2268 with NULL libfuncs by converting the SFmode. */
2269 switch (arm_fp16_format
)
2271 case ARM_FP16_FORMAT_IEEE
:
2272 case ARM_FP16_FORMAT_ALTERNATIVE
:
2275 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2276 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2278 : "__gnu_f2h_alternative"));
2279 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2280 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2282 : "__gnu_h2f_alternative"));
2285 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2286 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2287 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2288 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2289 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2292 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2293 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2294 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2295 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2296 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2297 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2298 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2305 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2307 const arm_fixed_mode_set fixed_arith_modes
[] =
2328 const arm_fixed_mode_set fixed_conv_modes
[] =
2358 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2360 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2361 "add", fixed_arith_modes
[i
].name
, 3);
2362 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2363 "ssadd", fixed_arith_modes
[i
].name
, 3);
2364 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2365 "usadd", fixed_arith_modes
[i
].name
, 3);
2366 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2367 "sub", fixed_arith_modes
[i
].name
, 3);
2368 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2369 "sssub", fixed_arith_modes
[i
].name
, 3);
2370 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2371 "ussub", fixed_arith_modes
[i
].name
, 3);
2372 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2373 "mul", fixed_arith_modes
[i
].name
, 3);
2374 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2375 "ssmul", fixed_arith_modes
[i
].name
, 3);
2376 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2377 "usmul", fixed_arith_modes
[i
].name
, 3);
2378 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2379 "div", fixed_arith_modes
[i
].name
, 3);
2380 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2381 "udiv", fixed_arith_modes
[i
].name
, 3);
2382 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2383 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2384 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2385 "usdiv", fixed_arith_modes
[i
].name
, 3);
2386 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2387 "neg", fixed_arith_modes
[i
].name
, 2);
2388 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2389 "ssneg", fixed_arith_modes
[i
].name
, 2);
2390 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2391 "usneg", fixed_arith_modes
[i
].name
, 2);
2392 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2393 "ashl", fixed_arith_modes
[i
].name
, 3);
2394 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2395 "ashr", fixed_arith_modes
[i
].name
, 3);
2396 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2397 "lshr", fixed_arith_modes
[i
].name
, 3);
2398 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2399 "ssashl", fixed_arith_modes
[i
].name
, 3);
2400 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2401 "usashl", fixed_arith_modes
[i
].name
, 3);
2402 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2403 "cmp", fixed_arith_modes
[i
].name
, 2);
2406 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2407 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2410 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2411 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2414 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2415 fixed_conv_modes
[j
].mode
, "fract",
2416 fixed_conv_modes
[i
].name
,
2417 fixed_conv_modes
[j
].name
);
2418 arm_set_fixed_conv_libfunc (satfract_optab
,
2419 fixed_conv_modes
[i
].mode
,
2420 fixed_conv_modes
[j
].mode
, "satfract",
2421 fixed_conv_modes
[i
].name
,
2422 fixed_conv_modes
[j
].name
);
2423 arm_set_fixed_conv_libfunc (fractuns_optab
,
2424 fixed_conv_modes
[i
].mode
,
2425 fixed_conv_modes
[j
].mode
, "fractuns",
2426 fixed_conv_modes
[i
].name
,
2427 fixed_conv_modes
[j
].name
);
2428 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2429 fixed_conv_modes
[i
].mode
,
2430 fixed_conv_modes
[j
].mode
, "satfractuns",
2431 fixed_conv_modes
[i
].name
,
2432 fixed_conv_modes
[j
].name
);
2436 if (TARGET_AAPCS_BASED
)
2437 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2440 /* On AAPCS systems, this is the "struct __va_list". */
2441 static GTY(()) tree va_list_type
;
2443 /* Return the type to use as __builtin_va_list. */
2445 arm_build_builtin_va_list (void)
2450 if (!TARGET_AAPCS_BASED
)
2451 return std_build_builtin_va_list ();
2453 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2461 The C Library ABI further reinforces this definition in \S
2464 We must follow this definition exactly. The structure tag
2465 name is visible in C++ mangled names, and thus forms a part
2466 of the ABI. The field name may be used by people who
2467 #include <stdarg.h>. */
2468 /* Create the type. */
2469 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2470 /* Give it the required name. */
2471 va_list_name
= build_decl (BUILTINS_LOCATION
,
2473 get_identifier ("__va_list"),
2475 DECL_ARTIFICIAL (va_list_name
) = 1;
2476 TYPE_NAME (va_list_type
) = va_list_name
;
2477 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2478 /* Create the __ap field. */
2479 ap_field
= build_decl (BUILTINS_LOCATION
,
2481 get_identifier ("__ap"),
2483 DECL_ARTIFICIAL (ap_field
) = 1;
2484 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2485 TYPE_FIELDS (va_list_type
) = ap_field
;
2486 /* Compute its layout. */
2487 layout_type (va_list_type
);
2489 return va_list_type
;
2492 /* Return an expression of type "void *" pointing to the next
2493 available argument in a variable-argument list. VALIST is the
2494 user-level va_list object, of type __builtin_va_list. */
2496 arm_extract_valist_ptr (tree valist
)
2498 if (TREE_TYPE (valist
) == error_mark_node
)
2499 return error_mark_node
;
2501 /* On an AAPCS target, the pointer is stored within "struct
2503 if (TARGET_AAPCS_BASED
)
2505 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2506 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2507 valist
, ap_field
, NULL_TREE
);
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2515 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2517 valist
= arm_extract_valist_ptr (valist
);
2518 std_expand_builtin_va_start (valist
, nextarg
);
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2523 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2526 valist
= arm_extract_valist_ptr (valist
);
2527 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2530 /* Fix up any incompatible options that the user has specified. */
2532 arm_option_override (void)
2534 if (global_options_set
.x_arm_arch_option
)
2535 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2537 if (global_options_set
.x_arm_cpu_option
)
2539 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2540 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2543 if (global_options_set
.x_arm_tune_option
)
2544 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547 SUBTARGET_OVERRIDE_OPTIONS
;
2550 if (arm_selected_arch
)
2552 if (arm_selected_cpu
)
2554 /* Check for conflict between mcpu and march. */
2555 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2559 /* -march wins for code generation.
2560 -mcpu wins for default tuning. */
2561 if (!arm_selected_tune
)
2562 arm_selected_tune
= arm_selected_cpu
;
2564 arm_selected_cpu
= arm_selected_arch
;
2568 arm_selected_arch
= NULL
;
2571 /* Pick a CPU based on the architecture. */
2572 arm_selected_cpu
= arm_selected_arch
;
2575 /* If the user did not specify a processor, choose one for them. */
2576 if (!arm_selected_cpu
)
2578 const struct processors
* sel
;
2579 unsigned int sought
;
2581 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2582 if (!arm_selected_cpu
->name
)
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585 /* Use the subtarget default CPU if none was specified by
2587 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2589 /* Default to ARM6. */
2590 if (!arm_selected_cpu
->name
)
2591 arm_selected_cpu
= &all_cores
[arm6
];
2594 sel
= arm_selected_cpu
;
2595 insn_flags
= sel
->flags
;
2597 /* Now check to see if the user has specified some command line
2598 switch that require certain abilities from the cpu. */
2601 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2603 sought
|= (FL_THUMB
| FL_MODE32
);
2605 /* There are no ARM processors that support both APCS-26 and
2606 interworking. Therefore we force FL_MODE26 to be removed
2607 from insn_flags here (if it was set), so that the search
2608 below will always be able to find a compatible processor. */
2609 insn_flags
&= ~FL_MODE26
;
2612 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2614 /* Try to locate a CPU type that supports all of the abilities
2615 of the default CPU, plus the extra abilities requested by
2617 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2618 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2621 if (sel
->name
== NULL
)
2623 unsigned current_bit_count
= 0;
2624 const struct processors
* best_fit
= NULL
;
2626 /* Ideally we would like to issue an error message here
2627 saying that it was not possible to find a CPU compatible
2628 with the default CPU, but which also supports the command
2629 line options specified by the programmer, and so they
2630 ought to use the -mcpu=<name> command line option to
2631 override the default CPU type.
2633 If we cannot find a cpu that has both the
2634 characteristics of the default cpu and the given
2635 command line options we scan the array again looking
2636 for a best match. */
2637 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2638 if ((sel
->flags
& sought
) == sought
)
2642 count
= bit_count (sel
->flags
& insn_flags
);
2644 if (count
>= current_bit_count
)
2647 current_bit_count
= count
;
2651 gcc_assert (best_fit
);
2655 arm_selected_cpu
= sel
;
2659 gcc_assert (arm_selected_cpu
);
2660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2661 if (!arm_selected_tune
)
2662 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2664 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2665 insn_flags
= arm_selected_cpu
->flags
;
2666 arm_base_arch
= arm_selected_cpu
->base_arch
;
2668 arm_tune
= arm_selected_tune
->core
;
2669 tune_flags
= arm_selected_tune
->flags
;
2670 current_tune
= arm_selected_tune
->tune
;
2672 /* Make sure that the processor choice does not conflict with any of the
2673 other command line choices. */
2674 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2675 error ("target CPU does not support ARM mode");
2677 /* BPABI targets use linker tricks to allow interworking on cores
2678 without thumb support. */
2679 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2681 warning (0, "target CPU does not support interworking" );
2682 target_flags
&= ~MASK_INTERWORK
;
2685 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2687 warning (0, "target CPU does not support THUMB instructions");
2688 target_flags
&= ~MASK_THUMB
;
2691 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694 target_flags
&= ~MASK_APCS_FRAME
;
2697 /* Callee super interworking implies thumb interworking. Adding
2698 this to the flags here simplifies the logic elsewhere. */
2699 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2700 target_flags
|= MASK_INTERWORK
;
2702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703 from here where no function is being compiled currently. */
2704 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2707 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2710 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713 target_flags
|= MASK_APCS_FRAME
;
2716 if (TARGET_POKE_FUNCTION_NAME
)
2717 target_flags
|= MASK_APCS_FRAME
;
2719 if (TARGET_APCS_REENT
&& flag_pic
)
2720 error ("-fpic and -mapcs-reent are incompatible");
2722 if (TARGET_APCS_REENT
)
2723 warning (0, "APCS reentrant code not supported. Ignored");
2725 /* If this target is normally configured to use APCS frames, warn if they
2726 are turned off and debugging is turned on. */
2728 && write_symbols
!= NO_DEBUG
2729 && !TARGET_APCS_FRAME
2730 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2733 if (TARGET_APCS_FLOAT
)
2734 warning (0, "passing floating point arguments in fp regs not yet supported");
2736 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2737 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2738 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2739 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2740 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2741 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2742 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2743 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2744 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2745 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2746 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2747 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2748 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2749 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2750 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2752 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2753 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2754 thumb_code
= TARGET_ARM
== 0;
2755 thumb1_code
= TARGET_THUMB1
!= 0;
2756 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2757 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2758 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2759 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2760 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2761 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2762 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2763 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2764 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
2765 if (arm_restrict_it
== 2)
2766 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2769 arm_restrict_it
= 0;
2771 /* If we are not using the default (ARM mode) section anchor offset
2772 ranges, then set the correct ranges now. */
2775 /* Thumb-1 LDR instructions cannot have negative offsets.
2776 Permissible positive offset ranges are 5-bit (for byte loads),
2777 6-bit (for halfword loads), or 7-bit (for word loads).
2778 Empirical results suggest a 7-bit anchor range gives the best
2779 overall code size. */
2780 targetm
.min_anchor_offset
= 0;
2781 targetm
.max_anchor_offset
= 127;
2783 else if (TARGET_THUMB2
)
2785 /* The minimum is set such that the total size of the block
2786 for a particular anchor is 248 + 1 + 4095 bytes, which is
2787 divisible by eight, ensuring natural spacing of anchors. */
2788 targetm
.min_anchor_offset
= -248;
2789 targetm
.max_anchor_offset
= 4095;
2792 /* V5 code we generate is completely interworking capable, so we turn off
2793 TARGET_INTERWORK here to avoid many tests later on. */
2795 /* XXX However, we must pass the right pre-processor defines to CPP
2796 or GLD can get confused. This is a hack. */
2797 if (TARGET_INTERWORK
)
2798 arm_cpp_interwork
= 1;
2801 target_flags
&= ~MASK_INTERWORK
;
2803 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2804 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2806 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2807 error ("iwmmxt abi requires an iwmmxt capable cpu");
2809 if (!global_options_set
.x_arm_fpu_index
)
2811 const char *target_fpu_name
;
2814 #ifdef FPUTYPE_DEFAULT
2815 target_fpu_name
= FPUTYPE_DEFAULT
;
2817 target_fpu_name
= "vfp";
2820 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2825 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2827 if (TARGET_NEON
&& !arm_arch7
)
2828 error ("target CPU does not support NEON");
2830 switch (arm_fpu_desc
->model
)
2832 case ARM_FP_MODEL_VFP
:
2833 arm_fpu_attr
= FPU_VFP
;
2840 if (TARGET_AAPCS_BASED
)
2842 if (TARGET_CALLER_INTERWORKING
)
2843 error ("AAPCS does not support -mcaller-super-interworking");
2845 if (TARGET_CALLEE_INTERWORKING
)
2846 error ("AAPCS does not support -mcallee-super-interworking");
2849 /* iWMMXt and NEON are incompatible. */
2850 if (TARGET_IWMMXT
&& TARGET_NEON
)
2851 error ("iWMMXt and NEON are incompatible");
2853 /* iWMMXt unsupported under Thumb mode. */
2854 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2855 error ("iWMMXt unsupported under Thumb mode");
2857 /* __fp16 support currently assumes the core has ldrh. */
2858 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2859 sorry ("__fp16 and no ldrh");
2861 /* If soft-float is specified then don't use FPU. */
2862 if (TARGET_SOFT_FLOAT
)
2863 arm_fpu_attr
= FPU_NONE
;
2865 if (TARGET_AAPCS_BASED
)
2867 if (arm_abi
== ARM_ABI_IWMMXT
)
2868 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2869 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2870 && TARGET_HARD_FLOAT
2872 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2874 arm_pcs_default
= ARM_PCS_AAPCS
;
2878 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2879 sorry ("-mfloat-abi=hard and VFP");
2881 if (arm_abi
== ARM_ABI_APCS
)
2882 arm_pcs_default
= ARM_PCS_APCS
;
2884 arm_pcs_default
= ARM_PCS_ATPCS
;
2887 /* For arm2/3 there is no need to do any scheduling if we are doing
2888 software floating-point. */
2889 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2890 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2892 /* Use the cp15 method if it is available. */
2893 if (target_thread_pointer
== TP_AUTO
)
2895 if (arm_arch6k
&& !TARGET_THUMB1
)
2896 target_thread_pointer
= TP_CP15
;
2898 target_thread_pointer
= TP_SOFT
;
2901 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2902 error ("can not use -mtp=cp15 with 16-bit Thumb");
2904 /* Override the default structure alignment for AAPCS ABI. */
2905 if (!global_options_set
.x_arm_structure_size_boundary
)
2907 if (TARGET_AAPCS_BASED
)
2908 arm_structure_size_boundary
= 8;
2912 if (arm_structure_size_boundary
!= 8
2913 && arm_structure_size_boundary
!= 32
2914 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2916 if (ARM_DOUBLEWORD_ALIGN
)
2918 "structure size boundary can only be set to 8, 32 or 64");
2920 warning (0, "structure size boundary can only be set to 8 or 32");
2921 arm_structure_size_boundary
2922 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2926 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2928 error ("RTP PIC is incompatible with Thumb");
2932 /* If stack checking is disabled, we can use r10 as the PIC register,
2933 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2934 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2936 if (TARGET_VXWORKS_RTP
)
2937 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2938 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2941 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2942 arm_pic_register
= 9;
2944 if (arm_pic_register_string
!= NULL
)
2946 int pic_register
= decode_reg_name (arm_pic_register_string
);
2949 warning (0, "-mpic-register= is useless without -fpic");
2951 /* Prevent the user from choosing an obviously stupid PIC register. */
2952 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2953 || pic_register
== HARD_FRAME_POINTER_REGNUM
2954 || pic_register
== STACK_POINTER_REGNUM
2955 || pic_register
>= PC_REGNUM
2956 || (TARGET_VXWORKS_RTP
2957 && (unsigned int) pic_register
!= arm_pic_register
))
2958 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2960 arm_pic_register
= pic_register
;
2963 if (TARGET_VXWORKS_RTP
2964 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2965 arm_pic_data_is_text_relative
= 0;
2967 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2968 if (fix_cm3_ldrd
== 2)
2970 if (arm_selected_cpu
->core
== cortexm3
)
2976 /* Enable -munaligned-access by default for
2977 - all ARMv6 architecture-based processors
2978 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979 - ARMv8 architecture-base processors.
2981 Disable -munaligned-access by default for
2982 - all pre-ARMv6 architecture-based processors
2983 - ARMv6-M architecture-based processors. */
2985 if (unaligned_access
== 2)
2987 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2988 unaligned_access
= 1;
2990 unaligned_access
= 0;
2992 else if (unaligned_access
== 1
2993 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2995 warning (0, "target CPU does not support unaligned accesses");
2996 unaligned_access
= 0;
2999 if (TARGET_THUMB1
&& flag_schedule_insns
)
3001 /* Don't warn since it's on by default in -O2. */
3002 flag_schedule_insns
= 0;
3007 /* If optimizing for size, bump the number of instructions that we
3008 are prepared to conditionally execute (even on a StrongARM). */
3009 max_insns_skipped
= 6;
3011 /* For THUMB2, we limit the conditional sequence to one IT block. */
3013 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3016 max_insns_skipped
= current_tune
->max_insns_skipped
;
3018 /* Hot/Cold partitioning is not currently supported, since we can't
3019 handle literal pool placement in that case. */
3020 if (flag_reorder_blocks_and_partition
)
3022 inform (input_location
,
3023 "-freorder-blocks-and-partition not supported on this architecture");
3024 flag_reorder_blocks_and_partition
= 0;
3025 flag_reorder_blocks
= 1;
3029 /* Hoisting PIC address calculations more aggressively provides a small,
3030 but measurable, size reduction for PIC code. Therefore, we decrease
3031 the bar for unrestricted expression hoisting to the cost of PIC address
3032 calculation, which is 2 instructions. */
3033 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3034 global_options
.x_param_values
,
3035 global_options_set
.x_param_values
);
3037 /* ARM EABI defaults to strict volatile bitfields. */
3038 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3039 && abi_version_at_least(2))
3040 flag_strict_volatile_bitfields
= 1;
3042 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3043 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3044 if (flag_prefetch_loop_arrays
< 0
3047 && current_tune
->num_prefetch_slots
> 0)
3048 flag_prefetch_loop_arrays
= 1;
3050 /* Set up parameters to be used in prefetching algorithm. Do not override the
3051 defaults unless we are tuning for a core we have researched values for. */
3052 if (current_tune
->num_prefetch_slots
> 0)
3053 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3054 current_tune
->num_prefetch_slots
,
3055 global_options
.x_param_values
,
3056 global_options_set
.x_param_values
);
3057 if (current_tune
->l1_cache_line_size
>= 0)
3058 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3059 current_tune
->l1_cache_line_size
,
3060 global_options
.x_param_values
,
3061 global_options_set
.x_param_values
);
3062 if (current_tune
->l1_cache_size
>= 0)
3063 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3064 current_tune
->l1_cache_size
,
3065 global_options
.x_param_values
,
3066 global_options_set
.x_param_values
);
3068 /* Use Neon to perform 64-bits operations rather than core
3070 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3071 if (use_neon_for_64bits
== 1)
3072 prefer_neon_for_64bits
= true;
3074 /* Use the alternative scheduling-pressure algorithm by default. */
3075 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3076 global_options
.x_param_values
,
3077 global_options_set
.x_param_values
);
3079 /* Disable shrink-wrap when optimizing function for size, since it tends to
3080 generate additional returns. */
3081 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3082 flag_shrink_wrap
= false;
3083 /* TBD: Dwarf info for apcs frame is not handled yet. */
3084 if (TARGET_APCS_FRAME
)
3085 flag_shrink_wrap
= false;
3087 /* We only support -mslow-flash-data on armv7-m targets. */
3088 if (target_slow_flash_data
3089 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3090 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3091 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3093 /* Currently, for slow flash data, we just disable literal pools. */
3094 if (target_slow_flash_data
)
3095 arm_disable_literal_pool
= true;
3097 /* Thumb2 inline assembly code should always use unified syntax.
3098 This will apply to ARM and Thumb1 eventually. */
3100 inline_asm_unified
= 1;
3102 /* Disable scheduling fusion by default if it's not armv7 processor
3103 or doesn't prefer ldrd/strd. */
3104 if (flag_schedule_fusion
== 2
3105 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3106 flag_schedule_fusion
= 0;
3108 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3109 - epilogue_insns - does not accurately model the corresponding insns
3110 emitted in the asm file. In particular, see the comment in thumb_exit
3111 'Find out how many of the (return) argument registers we can corrupt'.
3112 As a consequence, the epilogue may clobber registers without
3113 fuse-caller-save finding out about it. Therefore, disable fuse-caller-save
3115 TODO: Accurately model clobbers for epilogue_insns and reenable
3116 fuse-caller-save. */
3118 flag_use_caller_save
= 0;
3120 /* Register global variables with the garbage collector. */
3121 arm_add_gc_roots ();
3125 arm_add_gc_roots (void)
3127 gcc_obstack_init(&minipool_obstack
);
3128 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3131 /* A table of known ARM exception types.
3132 For use with the interrupt function attribute. */
3136 const char *const arg
;
3137 const unsigned long return_value
;
3141 static const isr_attribute_arg isr_attribute_args
[] =
3143 { "IRQ", ARM_FT_ISR
},
3144 { "irq", ARM_FT_ISR
},
3145 { "FIQ", ARM_FT_FIQ
},
3146 { "fiq", ARM_FT_FIQ
},
3147 { "ABORT", ARM_FT_ISR
},
3148 { "abort", ARM_FT_ISR
},
3149 { "ABORT", ARM_FT_ISR
},
3150 { "abort", ARM_FT_ISR
},
3151 { "UNDEF", ARM_FT_EXCEPTION
},
3152 { "undef", ARM_FT_EXCEPTION
},
3153 { "SWI", ARM_FT_EXCEPTION
},
3154 { "swi", ARM_FT_EXCEPTION
},
3155 { NULL
, ARM_FT_NORMAL
}
3158 /* Returns the (interrupt) function type of the current
3159 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3161 static unsigned long
3162 arm_isr_value (tree argument
)
3164 const isr_attribute_arg
* ptr
;
3168 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3170 /* No argument - default to IRQ. */
3171 if (argument
== NULL_TREE
)
3174 /* Get the value of the argument. */
3175 if (TREE_VALUE (argument
) == NULL_TREE
3176 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3177 return ARM_FT_UNKNOWN
;
3179 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3181 /* Check it against the list of known arguments. */
3182 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3183 if (streq (arg
, ptr
->arg
))
3184 return ptr
->return_value
;
3186 /* An unrecognized interrupt type. */
3187 return ARM_FT_UNKNOWN
;
3190 /* Computes the type of the current function. */
3192 static unsigned long
3193 arm_compute_func_type (void)
3195 unsigned long type
= ARM_FT_UNKNOWN
;
3199 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3201 /* Decide if the current function is volatile. Such functions
3202 never return, and many memory cycles can be saved by not storing
3203 register values that will never be needed again. This optimization
3204 was added to speed up context switching in a kernel application. */
3206 && (TREE_NOTHROW (current_function_decl
)
3207 || !(flag_unwind_tables
3209 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3210 && TREE_THIS_VOLATILE (current_function_decl
))
3211 type
|= ARM_FT_VOLATILE
;
3213 if (cfun
->static_chain_decl
!= NULL
)
3214 type
|= ARM_FT_NESTED
;
3216 attr
= DECL_ATTRIBUTES (current_function_decl
);
3218 a
= lookup_attribute ("naked", attr
);
3220 type
|= ARM_FT_NAKED
;
3222 a
= lookup_attribute ("isr", attr
);
3224 a
= lookup_attribute ("interrupt", attr
);
3227 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3229 type
|= arm_isr_value (TREE_VALUE (a
));
3234 /* Returns the type of the current function. */
3237 arm_current_func_type (void)
3239 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3240 cfun
->machine
->func_type
= arm_compute_func_type ();
3242 return cfun
->machine
->func_type
;
3246 arm_allocate_stack_slots_for_args (void)
3248 /* Naked functions should not allocate stack slots for arguments. */
3249 return !IS_NAKED (arm_current_func_type ());
3253 arm_warn_func_return (tree decl
)
3255 /* Naked functions are implemented entirely in assembly, including the
3256 return sequence, so suppress warnings about this. */
3257 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3261 /* Output assembler code for a block containing the constant parts
3262 of a trampoline, leaving space for the variable parts.
3264 On the ARM, (if r8 is the static chain regnum, and remembering that
3265 referencing pc adds an offset of 8) the trampoline looks like:
3268 .word static chain value
3269 .word function's address
3270 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3273 arm_asm_trampoline_template (FILE *f
)
3277 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3278 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3280 else if (TARGET_THUMB2
)
3282 /* The Thumb-2 trampoline is similar to the arm implementation.
3283 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3284 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3285 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3286 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3290 ASM_OUTPUT_ALIGN (f
, 2);
3291 fprintf (f
, "\t.code\t16\n");
3292 fprintf (f
, ".Ltrampoline_start:\n");
3293 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3294 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3295 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3296 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3297 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3298 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3300 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3301 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3304 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3307 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3309 rtx fnaddr
, mem
, a_tramp
;
3311 emit_block_move (m_tramp
, assemble_trampoline_template (),
3312 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3314 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3315 emit_move_insn (mem
, chain_value
);
3317 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3318 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3319 emit_move_insn (mem
, fnaddr
);
3321 a_tramp
= XEXP (m_tramp
, 0);
3322 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3323 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3324 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3327 /* Thumb trampolines should be entered in thumb mode, so set
3328 the bottom bit of the address. */
3331 arm_trampoline_adjust_address (rtx addr
)
3334 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3335 NULL
, 0, OPTAB_LIB_WIDEN
);
3339 /* Return 1 if it is possible to return using a single instruction.
3340 If SIBLING is non-null, this is a test for a return before a sibling
3341 call. SIBLING is the call insn, so we can examine its register usage. */
3344 use_return_insn (int iscond
, rtx sibling
)
3347 unsigned int func_type
;
3348 unsigned long saved_int_regs
;
3349 unsigned HOST_WIDE_INT stack_adjust
;
3350 arm_stack_offsets
*offsets
;
3352 /* Never use a return instruction before reload has run. */
3353 if (!reload_completed
)
3356 func_type
= arm_current_func_type ();
3358 /* Naked, volatile and stack alignment functions need special
3360 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3363 /* So do interrupt functions that use the frame pointer and Thumb
3364 interrupt functions. */
3365 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3368 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3369 && !optimize_function_for_size_p (cfun
))
3372 offsets
= arm_get_frame_offsets ();
3373 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3375 /* As do variadic functions. */
3376 if (crtl
->args
.pretend_args_size
3377 || cfun
->machine
->uses_anonymous_args
3378 /* Or if the function calls __builtin_eh_return () */
3379 || crtl
->calls_eh_return
3380 /* Or if the function calls alloca */
3381 || cfun
->calls_alloca
3382 /* Or if there is a stack adjustment. However, if the stack pointer
3383 is saved on the stack, we can use a pre-incrementing stack load. */
3384 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3385 && stack_adjust
== 4)))
3388 saved_int_regs
= offsets
->saved_regs_mask
;
3390 /* Unfortunately, the insn
3392 ldmib sp, {..., sp, ...}
3394 triggers a bug on most SA-110 based devices, such that the stack
3395 pointer won't be correctly restored if the instruction takes a
3396 page fault. We work around this problem by popping r3 along with
3397 the other registers, since that is never slower than executing
3398 another instruction.
3400 We test for !arm_arch5 here, because code for any architecture
3401 less than this could potentially be run on one of the buggy
3403 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3405 /* Validate that r3 is a call-clobbered register (always true in
3406 the default abi) ... */
3407 if (!call_used_regs
[3])
3410 /* ... that it isn't being used for a return value ... */
3411 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3414 /* ... or for a tail-call argument ... */
3417 gcc_assert (CALL_P (sibling
));
3419 if (find_regno_fusage (sibling
, USE
, 3))
3423 /* ... and that there are no call-saved registers in r0-r2
3424 (always true in the default ABI). */
3425 if (saved_int_regs
& 0x7)
3429 /* Can't be done if interworking with Thumb, and any registers have been
3431 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3434 /* On StrongARM, conditional returns are expensive if they aren't
3435 taken and multiple registers have been stacked. */
3436 if (iscond
&& arm_tune_strongarm
)
3438 /* Conditional return when just the LR is stored is a simple
3439 conditional-load instruction, that's not expensive. */
3440 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3444 && arm_pic_register
!= INVALID_REGNUM
3445 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3449 /* If there are saved registers but the LR isn't saved, then we need
3450 two instructions for the return. */
3451 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3454 /* Can't be done if any of the VFP regs are pushed,
3455 since this also requires an insn. */
3456 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3457 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3458 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3461 if (TARGET_REALLY_IWMMXT
)
3462 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3463 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3469 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3470 shrink-wrapping if possible. This is the case if we need to emit a
3471 prologue, which we can test by looking at the offsets. */
3473 use_simple_return_p (void)
3475 arm_stack_offsets
*offsets
;
3477 offsets
= arm_get_frame_offsets ();
3478 return offsets
->outgoing_args
!= 0;
3481 /* Return TRUE if int I is a valid immediate ARM constant. */
3484 const_ok_for_arm (HOST_WIDE_INT i
)
3488 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3489 be all zero, or all one. */
3490 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3491 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3492 != ((~(unsigned HOST_WIDE_INT
) 0)
3493 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3496 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3498 /* Fast return for 0 and small values. We must do this for zero, since
3499 the code below can't handle that one case. */
3500 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3503 /* Get the number of trailing zeros. */
3504 lowbit
= ffs((int) i
) - 1;
3506 /* Only even shifts are allowed in ARM mode so round down to the
3507 nearest even number. */
3511 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3516 /* Allow rotated constants in ARM mode. */
3518 && ((i
& ~0xc000003f) == 0
3519 || (i
& ~0xf000000f) == 0
3520 || (i
& ~0xfc000003) == 0))
3527 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3530 if (i
== v
|| i
== (v
| (v
<< 8)))
3533 /* Allow repeated pattern 0xXY00XY00. */
3543 /* Return true if I is a valid constant for the operation CODE. */
3545 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3547 if (const_ok_for_arm (i
))
3553 /* See if we can use movw. */
3554 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3557 /* Otherwise, try mvn. */
3558 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3561 /* See if we can use addw or subw. */
3563 && ((i
& 0xfffff000) == 0
3564 || ((-i
) & 0xfffff000) == 0))
3566 /* else fall through. */
3586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3588 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3605 /* Return true if I is a valid di mode constant for the operation CODE. */
3607 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3609 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3610 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3611 rtx hi
= GEN_INT (hi_val
);
3612 rtx lo
= GEN_INT (lo_val
);
3622 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3623 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3625 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3632 /* Emit a sequence of insns to handle a large constant.
3633 CODE is the code of the operation required, it can be any of SET, PLUS,
3634 IOR, AND, XOR, MINUS;
3635 MODE is the mode in which the operation is being performed;
3636 VAL is the integer to operate on;
3637 SOURCE is the other operand (a register, or a null-pointer for SET);
3638 SUBTARGETS means it is safe to create scratch registers if that will
3639 either produce a simpler sequence, or we will want to cse the values.
3640 Return value is the number of insns emitted. */
3642 /* ??? Tweak this for thumb2. */
3644 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3645 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3649 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3650 cond
= COND_EXEC_TEST (PATTERN (insn
));
3654 if (subtargets
|| code
== SET
3655 || (REG_P (target
) && REG_P (source
)
3656 && REGNO (target
) != REGNO (source
)))
3658 /* After arm_reorg has been called, we can't fix up expensive
3659 constants by pushing them into memory so we must synthesize
3660 them in-line, regardless of the cost. This is only likely to
3661 be more costly on chips that have load delay slots and we are
3662 compiling without running the scheduler (so no splitting
3663 occurred before the final instruction emission).
3665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3667 if (!cfun
->machine
->after_arm_reorg
3669 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3671 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3676 /* Currently SET is the only monadic value for CODE, all
3677 the rest are diadic. */
3678 if (TARGET_USE_MOVT
)
3679 arm_emit_movpair (target
, GEN_INT (val
));
3681 emit_set_insn (target
, GEN_INT (val
));
3687 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3689 if (TARGET_USE_MOVT
)
3690 arm_emit_movpair (temp
, GEN_INT (val
));
3692 emit_set_insn (temp
, GEN_INT (val
));
3694 /* For MINUS, the value is subtracted from, since we never
3695 have subtraction of a constant. */
3697 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3699 emit_set_insn (target
,
3700 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3706 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3711 ARM/THUMB2 immediates, and add up to VAL.
3712 Thr function return value gives the number of insns required. */
3714 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3715 struct four_ints
*return_sequence
)
3717 int best_consecutive_zeros
= 0;
3721 struct four_ints tmp_sequence
;
3723 /* If we aren't targeting ARM, the best place to start is always at
3724 the bottom, otherwise look more closely. */
3727 for (i
= 0; i
< 32; i
+= 2)
3729 int consecutive_zeros
= 0;
3731 if (!(val
& (3 << i
)))
3733 while ((i
< 32) && !(val
& (3 << i
)))
3735 consecutive_zeros
+= 2;
3738 if (consecutive_zeros
> best_consecutive_zeros
)
3740 best_consecutive_zeros
= consecutive_zeros
;
3741 best_start
= i
- consecutive_zeros
;
3748 /* So long as it won't require any more insns to do so, it's
3749 desirable to emit a small constant (in bits 0...9) in the last
3750 insn. This way there is more chance that it can be combined with
3751 a later addressing insn to form a pre-indexed load or store
3752 operation. Consider:
3754 *((volatile int *)0xe0000100) = 1;
3755 *((volatile int *)0xe0000110) = 2;
3757 We want this to wind up as:
3761 str rB, [rA, #0x100]
3763 str rB, [rA, #0x110]
3765 rather than having to synthesize both large constants from scratch.
3767 Therefore, we calculate how many insns would be required to emit
3768 the constant starting from `best_start', and also starting from
3769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3770 yield a shorter sequence, we may as well use zero. */
3771 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3773 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3775 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3776 if (insns2
<= insns1
)
3778 *return_sequence
= tmp_sequence
;
3786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3788 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3789 struct four_ints
*return_sequence
, int i
)
3791 int remainder
= val
& 0xffffffff;
3794 /* Try and find a way of doing the job in either two or three
3797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3798 location. We start at position I. This may be the MSB, or
3799 optimial_immediate_sequence may have positioned it at the largest block
3800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3801 wrapping around to the top of the word when we drop off the bottom.
3802 In the worst case this code should produce no more than four insns.
3804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3805 constants, shifted to any arbitrary location. We should always start
3810 unsigned int b1
, b2
, b3
, b4
;
3811 unsigned HOST_WIDE_INT result
;
3814 gcc_assert (insns
< 4);
3819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3820 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3823 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3824 /* We can use addw/subw for the last 12 bits. */
3828 /* Use an 8-bit shifted/rotated immediate. */
3832 result
= remainder
& ((0x0ff << end
)
3833 | ((i
< end
) ? (0xff >> (32 - end
))
3840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3841 arbitrary shifts. */
3842 i
-= TARGET_ARM
? 2 : 1;
3846 /* Next, see if we can do a better job with a thumb2 replicated
3849 We do it this way around to catch the cases like 0x01F001E0 where
3850 two 8-bit immediates would work, but a replicated constant would
3853 TODO: 16-bit constants that don't clear all the bits, but still win.
3854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3857 b1
= (remainder
& 0xff000000) >> 24;
3858 b2
= (remainder
& 0x00ff0000) >> 16;
3859 b3
= (remainder
& 0x0000ff00) >> 8;
3860 b4
= remainder
& 0xff;
3864 /* The 8-bit immediate already found clears b1 (and maybe b2),
3865 but must leave b3 and b4 alone. */
3867 /* First try to find a 32-bit replicated constant that clears
3868 almost everything. We can assume that we can't do it in one,
3869 or else we wouldn't be here. */
3870 unsigned int tmp
= b1
& b2
& b3
& b4
;
3871 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3873 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3874 + (tmp
== b3
) + (tmp
== b4
);
3876 && (matching_bytes
>= 3
3877 || (matching_bytes
== 2
3878 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3880 /* At least 3 of the bytes match, and the fourth has at
3881 least as many bits set, or two of the bytes match
3882 and it will only require one more insn to finish. */
3890 /* Second, try to find a 16-bit replicated constant that can
3891 leave three of the bytes clear. If b2 or b4 is already
3892 zero, then we can. If the 8-bit from above would not
3893 clear b2 anyway, then we still win. */
3894 else if (b1
== b3
&& (!b2
|| !b4
3895 || (remainder
& 0x00ff0000 & ~result
)))
3897 result
= remainder
& 0xff00ff00;
3903 /* The 8-bit immediate already found clears b2 (and maybe b3)
3904 and we don't get here unless b1 is alredy clear, but it will
3905 leave b4 unchanged. */
3907 /* If we can clear b2 and b4 at once, then we win, since the
3908 8-bits couldn't possibly reach that far. */
3911 result
= remainder
& 0x00ff00ff;
3917 return_sequence
->i
[insns
++] = result
;
3918 remainder
&= ~result
;
3920 if (code
== SET
|| code
== MINUS
)
3928 /* Emit an instruction with the indicated PATTERN. If COND is
3929 non-NULL, conditionalize the execution of the instruction on COND
3933 emit_constant_insn (rtx cond
, rtx pattern
)
3936 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3937 emit_insn (pattern
);
3940 /* As above, but extra parameter GENERATE which, if clear, suppresses
3944 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
3945 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3950 int final_invert
= 0;
3952 int set_sign_bit_copies
= 0;
3953 int clear_sign_bit_copies
= 0;
3954 int clear_zero_bit_copies
= 0;
3955 int set_zero_bit_copies
= 0;
3956 int insns
= 0, neg_insns
, inv_insns
;
3957 unsigned HOST_WIDE_INT temp1
, temp2
;
3958 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3959 struct four_ints
*immediates
;
3960 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3962 /* Find out which operations are safe for a given CODE. Also do a quick
3963 check for degenerate cases; these can occur when DImode operations
3976 if (remainder
== 0xffffffff)
3979 emit_constant_insn (cond
,
3980 gen_rtx_SET (VOIDmode
, target
,
3981 GEN_INT (ARM_SIGN_EXTEND (val
))));
3987 if (reload_completed
&& rtx_equal_p (target
, source
))
3991 emit_constant_insn (cond
,
3992 gen_rtx_SET (VOIDmode
, target
, source
));
4001 emit_constant_insn (cond
,
4002 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
4005 if (remainder
== 0xffffffff)
4007 if (reload_completed
&& rtx_equal_p (target
, source
))
4010 emit_constant_insn (cond
,
4011 gen_rtx_SET (VOIDmode
, target
, source
));
4020 if (reload_completed
&& rtx_equal_p (target
, source
))
4023 emit_constant_insn (cond
,
4024 gen_rtx_SET (VOIDmode
, target
, source
));
4028 if (remainder
== 0xffffffff)
4031 emit_constant_insn (cond
,
4032 gen_rtx_SET (VOIDmode
, target
,
4033 gen_rtx_NOT (mode
, source
)));
4040 /* We treat MINUS as (val - source), since (source - val) is always
4041 passed as (source + (-val)). */
4045 emit_constant_insn (cond
,
4046 gen_rtx_SET (VOIDmode
, target
,
4047 gen_rtx_NEG (mode
, source
)));
4050 if (const_ok_for_arm (val
))
4053 emit_constant_insn (cond
,
4054 gen_rtx_SET (VOIDmode
, target
,
4055 gen_rtx_MINUS (mode
, GEN_INT (val
),
4066 /* If we can do it in one insn get out quickly. */
4067 if (const_ok_for_op (val
, code
))
4070 emit_constant_insn (cond
,
4071 gen_rtx_SET (VOIDmode
, target
,
4073 ? gen_rtx_fmt_ee (code
, mode
, source
,
4079 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4081 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4082 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4086 if (mode
== SImode
&& i
== 16)
4087 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4089 emit_constant_insn (cond
,
4090 gen_zero_extendhisi2
4091 (target
, gen_lowpart (HImode
, source
)));
4093 /* Extz only supports SImode, but we can coerce the operands
4095 emit_constant_insn (cond
,
4096 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4097 gen_lowpart (SImode
, source
),
4098 GEN_INT (i
), const0_rtx
));
4104 /* Calculate a few attributes that may be useful for specific
4106 /* Count number of leading zeros. */
4107 for (i
= 31; i
>= 0; i
--)
4109 if ((remainder
& (1 << i
)) == 0)
4110 clear_sign_bit_copies
++;
4115 /* Count number of leading 1's. */
4116 for (i
= 31; i
>= 0; i
--)
4118 if ((remainder
& (1 << i
)) != 0)
4119 set_sign_bit_copies
++;
4124 /* Count number of trailing zero's. */
4125 for (i
= 0; i
<= 31; i
++)
4127 if ((remainder
& (1 << i
)) == 0)
4128 clear_zero_bit_copies
++;
4133 /* Count number of trailing 1's. */
4134 for (i
= 0; i
<= 31; i
++)
4136 if ((remainder
& (1 << i
)) != 0)
4137 set_zero_bit_copies
++;
4145 /* See if we can do this by sign_extending a constant that is known
4146 to be negative. This is a good, way of doing it, since the shift
4147 may well merge into a subsequent insn. */
4148 if (set_sign_bit_copies
> 1)
4150 if (const_ok_for_arm
4151 (temp1
= ARM_SIGN_EXTEND (remainder
4152 << (set_sign_bit_copies
- 1))))
4156 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4157 emit_constant_insn (cond
,
4158 gen_rtx_SET (VOIDmode
, new_src
,
4160 emit_constant_insn (cond
,
4161 gen_ashrsi3 (target
, new_src
,
4162 GEN_INT (set_sign_bit_copies
- 1)));
4166 /* For an inverted constant, we will need to set the low bits,
4167 these will be shifted out of harm's way. */
4168 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4169 if (const_ok_for_arm (~temp1
))
4173 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4174 emit_constant_insn (cond
,
4175 gen_rtx_SET (VOIDmode
, new_src
,
4177 emit_constant_insn (cond
,
4178 gen_ashrsi3 (target
, new_src
,
4179 GEN_INT (set_sign_bit_copies
- 1)));
4185 /* See if we can calculate the value as the difference between two
4186 valid immediates. */
4187 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4189 int topshift
= clear_sign_bit_copies
& ~1;
4191 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4192 & (0xff000000 >> topshift
));
4194 /* If temp1 is zero, then that means the 9 most significant
4195 bits of remainder were 1 and we've caused it to overflow.
4196 When topshift is 0 we don't need to do anything since we
4197 can borrow from 'bit 32'. */
4198 if (temp1
== 0 && topshift
!= 0)
4199 temp1
= 0x80000000 >> (topshift
- 1);
4201 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4203 if (const_ok_for_arm (temp2
))
4207 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4208 emit_constant_insn (cond
,
4209 gen_rtx_SET (VOIDmode
, new_src
,
4211 emit_constant_insn (cond
,
4212 gen_addsi3 (target
, new_src
,
4220 /* See if we can generate this by setting the bottom (or the top)
4221 16 bits, and then shifting these into the other half of the
4222 word. We only look for the simplest cases, to do more would cost
4223 too much. Be careful, however, not to generate this when the
4224 alternative would take fewer insns. */
4225 if (val
& 0xffff0000)
4227 temp1
= remainder
& 0xffff0000;
4228 temp2
= remainder
& 0x0000ffff;
4230 /* Overlaps outside this range are best done using other methods. */
4231 for (i
= 9; i
< 24; i
++)
4233 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4234 && !const_ok_for_arm (temp2
))
4236 rtx new_src
= (subtargets
4237 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4239 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4240 source
, subtargets
, generate
);
4248 gen_rtx_ASHIFT (mode
, source
,
4255 /* Don't duplicate cases already considered. */
4256 for (i
= 17; i
< 24; i
++)
4258 if (((temp1
| (temp1
>> i
)) == remainder
)
4259 && !const_ok_for_arm (temp1
))
4261 rtx new_src
= (subtargets
4262 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4264 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4265 source
, subtargets
, generate
);
4270 gen_rtx_SET (VOIDmode
, target
,
4273 gen_rtx_LSHIFTRT (mode
, source
,
4284 /* If we have IOR or XOR, and the constant can be loaded in a
4285 single instruction, and we can find a temporary to put it in,
4286 then this can be done in two instructions instead of 3-4. */
4288 /* TARGET can't be NULL if SUBTARGETS is 0 */
4289 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4291 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4295 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4297 emit_constant_insn (cond
,
4298 gen_rtx_SET (VOIDmode
, sub
,
4300 emit_constant_insn (cond
,
4301 gen_rtx_SET (VOIDmode
, target
,
4302 gen_rtx_fmt_ee (code
, mode
,
4313 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4314 and the remainder 0s for e.g. 0xfff00000)
4315 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4317 This can be done in 2 instructions by using shifts with mov or mvn.
4322 mvn r0, r0, lsr #12 */
4323 if (set_sign_bit_copies
> 8
4324 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4328 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4329 rtx shift
= GEN_INT (set_sign_bit_copies
);
4333 gen_rtx_SET (VOIDmode
, sub
,
4335 gen_rtx_ASHIFT (mode
,
4340 gen_rtx_SET (VOIDmode
, target
,
4342 gen_rtx_LSHIFTRT (mode
, sub
,
4349 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4351 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4353 For eg. r0 = r0 | 0xfff
4358 if (set_zero_bit_copies
> 8
4359 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4363 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4364 rtx shift
= GEN_INT (set_zero_bit_copies
);
4368 gen_rtx_SET (VOIDmode
, sub
,
4370 gen_rtx_LSHIFTRT (mode
,
4375 gen_rtx_SET (VOIDmode
, target
,
4377 gen_rtx_ASHIFT (mode
, sub
,
4383 /* This will never be reached for Thumb2 because orn is a valid
4384 instruction. This is for Thumb1 and the ARM 32 bit cases.
4386 x = y | constant (such that ~constant is a valid constant)
4388 x = ~(~y & ~constant).
4390 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4394 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4395 emit_constant_insn (cond
,
4396 gen_rtx_SET (VOIDmode
, sub
,
4397 gen_rtx_NOT (mode
, source
)));
4400 sub
= gen_reg_rtx (mode
);
4401 emit_constant_insn (cond
,
4402 gen_rtx_SET (VOIDmode
, sub
,
4403 gen_rtx_AND (mode
, source
,
4405 emit_constant_insn (cond
,
4406 gen_rtx_SET (VOIDmode
, target
,
4407 gen_rtx_NOT (mode
, sub
)));
4414 /* See if two shifts will do 2 or more insn's worth of work. */
4415 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4417 HOST_WIDE_INT shift_mask
= ((0xffffffff
4418 << (32 - clear_sign_bit_copies
))
4421 if ((remainder
| shift_mask
) != 0xffffffff)
4425 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4426 insns
= arm_gen_constant (AND
, mode
, cond
,
4427 remainder
| shift_mask
,
4428 new_src
, source
, subtargets
, 1);
4433 rtx targ
= subtargets
? NULL_RTX
: target
;
4434 insns
= arm_gen_constant (AND
, mode
, cond
,
4435 remainder
| shift_mask
,
4436 targ
, source
, subtargets
, 0);
4442 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4443 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4445 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4446 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4452 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4454 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4456 if ((remainder
| shift_mask
) != 0xffffffff)
4460 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4462 insns
= arm_gen_constant (AND
, mode
, cond
,
4463 remainder
| shift_mask
,
4464 new_src
, source
, subtargets
, 1);
4469 rtx targ
= subtargets
? NULL_RTX
: target
;
4471 insns
= arm_gen_constant (AND
, mode
, cond
,
4472 remainder
| shift_mask
,
4473 targ
, source
, subtargets
, 0);
4479 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4480 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4482 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4483 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4495 /* Calculate what the instruction sequences would be if we generated it
4496 normally, negated, or inverted. */
4498 /* AND cannot be split into multiple insns, so invert and use BIC. */
4501 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4504 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4509 if (can_invert
|| final_invert
)
4510 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4515 immediates
= &pos_immediates
;
4517 /* Is the negated immediate sequence more efficient? */
4518 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4521 immediates
= &neg_immediates
;
4526 /* Is the inverted immediate sequence more efficient?
4527 We must allow for an extra NOT instruction for XOR operations, although
4528 there is some chance that the final 'mvn' will get optimized later. */
4529 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4532 immediates
= &inv_immediates
;
4540 /* Now output the chosen sequence as instructions. */
4543 for (i
= 0; i
< insns
; i
++)
4545 rtx new_src
, temp1_rtx
;
4547 temp1
= immediates
->i
[i
];
4549 if (code
== SET
|| code
== MINUS
)
4550 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4551 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4552 new_src
= gen_reg_rtx (mode
);
4558 else if (can_negate
)
4561 temp1
= trunc_int_for_mode (temp1
, mode
);
4562 temp1_rtx
= GEN_INT (temp1
);
4566 else if (code
== MINUS
)
4567 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4569 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4571 emit_constant_insn (cond
,
4572 gen_rtx_SET (VOIDmode
, new_src
,
4578 can_negate
= can_invert
;
4582 else if (code
== MINUS
)
4590 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4591 gen_rtx_NOT (mode
, source
)));
4598 /* Canonicalize a comparison so that we are more likely to recognize it.
4599 This can be done for a few constant compares, where we can make the
4600 immediate value easier to load. */
4603 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4604 bool op0_preserve_value
)
4607 unsigned HOST_WIDE_INT i
, maxval
;
4609 mode
= GET_MODE (*op0
);
4610 if (mode
== VOIDmode
)
4611 mode
= GET_MODE (*op1
);
4613 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4615 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4616 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4617 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4618 for GTU/LEU in Thumb mode. */
4622 if (*code
== GT
|| *code
== LE
4623 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4625 /* Missing comparison. First try to use an available
4627 if (CONST_INT_P (*op1
))
4635 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4637 *op1
= GEN_INT (i
+ 1);
4638 *code
= *code
== GT
? GE
: LT
;
4644 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4645 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4647 *op1
= GEN_INT (i
+ 1);
4648 *code
= *code
== GTU
? GEU
: LTU
;
4657 /* If that did not work, reverse the condition. */
4658 if (!op0_preserve_value
)
4660 std::swap (*op0
, *op1
);
4661 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4667 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4668 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4669 to facilitate possible combining with a cmp into 'ands'. */
4671 && GET_CODE (*op0
) == ZERO_EXTEND
4672 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4673 && GET_MODE (XEXP (*op0
, 0)) == QImode
4674 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4675 && subreg_lowpart_p (XEXP (*op0
, 0))
4676 && *op1
== const0_rtx
)
4677 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4680 /* Comparisons smaller than DImode. Only adjust comparisons against
4681 an out-of-range constant. */
4682 if (!CONST_INT_P (*op1
)
4683 || const_ok_for_arm (INTVAL (*op1
))
4684 || const_ok_for_arm (- INTVAL (*op1
)))
4698 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4700 *op1
= GEN_INT (i
+ 1);
4701 *code
= *code
== GT
? GE
: LT
;
4709 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4711 *op1
= GEN_INT (i
- 1);
4712 *code
= *code
== GE
? GT
: LE
;
4719 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4720 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4722 *op1
= GEN_INT (i
+ 1);
4723 *code
= *code
== GTU
? GEU
: LTU
;
4731 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4733 *op1
= GEN_INT (i
- 1);
4734 *code
= *code
== GEU
? GTU
: LEU
;
4745 /* Define how to find the value returned by a function. */
4748 arm_function_value(const_tree type
, const_tree func
,
4749 bool outgoing ATTRIBUTE_UNUSED
)
4752 int unsignedp ATTRIBUTE_UNUSED
;
4753 rtx r ATTRIBUTE_UNUSED
;
4755 mode
= TYPE_MODE (type
);
4757 if (TARGET_AAPCS_BASED
)
4758 return aapcs_allocate_return_reg (mode
, type
, func
);
4760 /* Promote integer types. */
4761 if (INTEGRAL_TYPE_P (type
))
4762 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4764 /* Promotes small structs returned in a register to full-word size
4765 for big-endian AAPCS. */
4766 if (arm_return_in_msb (type
))
4768 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4769 if (size
% UNITS_PER_WORD
!= 0)
4771 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4772 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4776 return arm_libcall_value_1 (mode
);
4779 /* libcall hashtable helpers. */
4781 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4783 typedef rtx_def value_type
;
4784 typedef rtx_def compare_type
;
4785 static inline hashval_t
hash (const value_type
*);
4786 static inline bool equal (const value_type
*, const compare_type
*);
4787 static inline void remove (value_type
*);
4791 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4793 return rtx_equal_p (p1
, p2
);
4797 libcall_hasher::hash (const value_type
*p1
)
4799 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4802 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4805 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4807 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4811 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4813 static bool init_done
= false;
4814 static libcall_table_type
*libcall_htab
= NULL
;
4820 libcall_htab
= new libcall_table_type (31);
4821 add_libcall (libcall_htab
,
4822 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4823 add_libcall (libcall_htab
,
4824 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4825 add_libcall (libcall_htab
,
4826 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4827 add_libcall (libcall_htab
,
4828 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4830 add_libcall (libcall_htab
,
4831 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4832 add_libcall (libcall_htab
,
4833 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4834 add_libcall (libcall_htab
,
4835 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4836 add_libcall (libcall_htab
,
4837 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4839 add_libcall (libcall_htab
,
4840 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4841 add_libcall (libcall_htab
,
4842 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4843 add_libcall (libcall_htab
,
4844 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4845 add_libcall (libcall_htab
,
4846 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4847 add_libcall (libcall_htab
,
4848 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4849 add_libcall (libcall_htab
,
4850 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4851 add_libcall (libcall_htab
,
4852 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4853 add_libcall (libcall_htab
,
4854 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4856 /* Values from double-precision helper functions are returned in core
4857 registers if the selected core only supports single-precision
4858 arithmetic, even if we are using the hard-float ABI. The same is
4859 true for single-precision helpers, but we will never be using the
4860 hard-float ABI on a CPU which doesn't support single-precision
4861 operations in hardware. */
4862 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4863 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4864 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4865 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4866 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4867 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4868 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4869 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4870 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4871 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4872 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4873 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4875 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4879 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
4883 arm_libcall_value_1 (machine_mode mode
)
4885 if (TARGET_AAPCS_BASED
)
4886 return aapcs_libcall_value (mode
);
4887 else if (TARGET_IWMMXT_ABI
4888 && arm_vector_mode_supported_p (mode
))
4889 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4891 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4894 /* Define how to find the value returned by a library function
4895 assuming the value has mode MODE. */
4898 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
4900 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4901 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4903 /* The following libcalls return their result in integer registers,
4904 even though they return a floating point value. */
4905 if (arm_libcall_uses_aapcs_base (libcall
))
4906 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4910 return arm_libcall_value_1 (mode
);
4913 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4916 arm_function_value_regno_p (const unsigned int regno
)
4918 if (regno
== ARG_REGISTER (1)
4920 && TARGET_AAPCS_BASED
4922 && TARGET_HARD_FLOAT
4923 && regno
== FIRST_VFP_REGNUM
)
4924 || (TARGET_IWMMXT_ABI
4925 && regno
== FIRST_IWMMXT_REGNUM
))
4931 /* Determine the amount of memory needed to store the possible return
4932 registers of an untyped call. */
4934 arm_apply_result_size (void)
4940 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4942 if (TARGET_IWMMXT_ABI
)
4949 /* Decide whether TYPE should be returned in memory (true)
4950 or in a register (false). FNTYPE is the type of the function making
4953 arm_return_in_memory (const_tree type
, const_tree fntype
)
4957 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4959 if (TARGET_AAPCS_BASED
)
4961 /* Simple, non-aggregate types (ie not including vectors and
4962 complex) are always returned in a register (or registers).
4963 We don't care about which register here, so we can short-cut
4964 some of the detail. */
4965 if (!AGGREGATE_TYPE_P (type
)
4966 && TREE_CODE (type
) != VECTOR_TYPE
4967 && TREE_CODE (type
) != COMPLEX_TYPE
)
4970 /* Any return value that is no larger than one word can be
4972 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4975 /* Check any available co-processors to see if they accept the
4976 type as a register candidate (VFP, for example, can return
4977 some aggregates in consecutive registers). These aren't
4978 available if the call is variadic. */
4979 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4982 /* Vector values should be returned using ARM registers, not
4983 memory (unless they're over 16 bytes, which will break since
4984 we only have four call-clobbered registers to play with). */
4985 if (TREE_CODE (type
) == VECTOR_TYPE
)
4986 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4988 /* The rest go in memory. */
4992 if (TREE_CODE (type
) == VECTOR_TYPE
)
4993 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4995 if (!AGGREGATE_TYPE_P (type
) &&
4996 (TREE_CODE (type
) != VECTOR_TYPE
))
4997 /* All simple types are returned in registers. */
5000 if (arm_abi
!= ARM_ABI_APCS
)
5002 /* ATPCS and later return aggregate types in memory only if they are
5003 larger than a word (or are variable size). */
5004 return (size
< 0 || size
> UNITS_PER_WORD
);
5007 /* For the arm-wince targets we choose to be compatible with Microsoft's
5008 ARM and Thumb compilers, which always return aggregates in memory. */
5010 /* All structures/unions bigger than one word are returned in memory.
5011 Also catch the case where int_size_in_bytes returns -1. In this case
5012 the aggregate is either huge or of variable size, and in either case
5013 we will want to return it via memory and not in a register. */
5014 if (size
< 0 || size
> UNITS_PER_WORD
)
5017 if (TREE_CODE (type
) == RECORD_TYPE
)
5021 /* For a struct the APCS says that we only return in a register
5022 if the type is 'integer like' and every addressable element
5023 has an offset of zero. For practical purposes this means
5024 that the structure can have at most one non bit-field element
5025 and that this element must be the first one in the structure. */
5027 /* Find the first field, ignoring non FIELD_DECL things which will
5028 have been created by C++. */
5029 for (field
= TYPE_FIELDS (type
);
5030 field
&& TREE_CODE (field
) != FIELD_DECL
;
5031 field
= DECL_CHAIN (field
))
5035 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5037 /* Check that the first field is valid for returning in a register. */
5039 /* ... Floats are not allowed */
5040 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5043 /* ... Aggregates that are not themselves valid for returning in
5044 a register are not allowed. */
5045 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5048 /* Now check the remaining fields, if any. Only bitfields are allowed,
5049 since they are not addressable. */
5050 for (field
= DECL_CHAIN (field
);
5052 field
= DECL_CHAIN (field
))
5054 if (TREE_CODE (field
) != FIELD_DECL
)
5057 if (!DECL_BIT_FIELD_TYPE (field
))
5064 if (TREE_CODE (type
) == UNION_TYPE
)
5068 /* Unions can be returned in registers if every element is
5069 integral, or can be returned in an integer register. */
5070 for (field
= TYPE_FIELDS (type
);
5072 field
= DECL_CHAIN (field
))
5074 if (TREE_CODE (field
) != FIELD_DECL
)
5077 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5080 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5086 #endif /* not ARM_WINCE */
5088 /* Return all other types in memory. */
5092 const struct pcs_attribute_arg
5096 } pcs_attribute_args
[] =
5098 {"aapcs", ARM_PCS_AAPCS
},
5099 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5101 /* We could recognize these, but changes would be needed elsewhere
5102 * to implement them. */
5103 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5104 {"atpcs", ARM_PCS_ATPCS
},
5105 {"apcs", ARM_PCS_APCS
},
5107 {NULL
, ARM_PCS_UNKNOWN
}
5111 arm_pcs_from_attribute (tree attr
)
5113 const struct pcs_attribute_arg
*ptr
;
5116 /* Get the value of the argument. */
5117 if (TREE_VALUE (attr
) == NULL_TREE
5118 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5119 return ARM_PCS_UNKNOWN
;
5121 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5123 /* Check it against the list of known arguments. */
5124 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5125 if (streq (arg
, ptr
->arg
))
5128 /* An unrecognized interrupt type. */
5129 return ARM_PCS_UNKNOWN
;
5132 /* Get the PCS variant to use for this call. TYPE is the function's type
5133 specification, DECL is the specific declartion. DECL may be null if
5134 the call could be indirect or if this is a library call. */
5136 arm_get_pcs_model (const_tree type
, const_tree decl
)
5138 bool user_convention
= false;
5139 enum arm_pcs user_pcs
= arm_pcs_default
;
5144 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5147 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5148 user_convention
= true;
5151 if (TARGET_AAPCS_BASED
)
5153 /* Detect varargs functions. These always use the base rules
5154 (no argument is ever a candidate for a co-processor
5156 bool base_rules
= stdarg_p (type
);
5158 if (user_convention
)
5160 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5161 sorry ("non-AAPCS derived PCS variant");
5162 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5163 error ("variadic functions must use the base AAPCS variant");
5167 return ARM_PCS_AAPCS
;
5168 else if (user_convention
)
5170 else if (decl
&& flag_unit_at_a_time
)
5172 /* Local functions never leak outside this compilation unit,
5173 so we are free to use whatever conventions are
5175 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5176 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5178 return ARM_PCS_AAPCS_LOCAL
;
5181 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5182 sorry ("PCS variant");
5184 /* For everything else we use the target's default. */
5185 return arm_pcs_default
;
5190 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5191 const_tree fntype ATTRIBUTE_UNUSED
,
5192 rtx libcall ATTRIBUTE_UNUSED
,
5193 const_tree fndecl ATTRIBUTE_UNUSED
)
5195 /* Record the unallocated VFP registers. */
5196 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5197 pcum
->aapcs_vfp_reg_alloc
= 0;
5200 /* Walk down the type tree of TYPE counting consecutive base elements.
5201 If *MODEP is VOIDmode, then set it to the first valid floating point
5202 type. If a non-floating point type is found, or if a floating point
5203 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5204 otherwise return the count in the sub-tree. */
5206 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5211 switch (TREE_CODE (type
))
5214 mode
= TYPE_MODE (type
);
5215 if (mode
!= DFmode
&& mode
!= SFmode
)
5218 if (*modep
== VOIDmode
)
5227 mode
= TYPE_MODE (TREE_TYPE (type
));
5228 if (mode
!= DFmode
&& mode
!= SFmode
)
5231 if (*modep
== VOIDmode
)
5240 /* Use V2SImode and V4SImode as representatives of all 64-bit
5241 and 128-bit vector types, whether or not those modes are
5242 supported with the present options. */
5243 size
= int_size_in_bytes (type
);
5256 if (*modep
== VOIDmode
)
5259 /* Vector modes are considered to be opaque: two vectors are
5260 equivalent for the purposes of being homogeneous aggregates
5261 if they are the same size. */
5270 tree index
= TYPE_DOMAIN (type
);
5272 /* Can't handle incomplete types nor sizes that are not
5274 if (!COMPLETE_TYPE_P (type
)
5275 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5278 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5281 || !TYPE_MAX_VALUE (index
)
5282 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5283 || !TYPE_MIN_VALUE (index
)
5284 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5288 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5289 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5291 /* There must be no padding. */
5292 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5304 /* Can't handle incomplete types nor sizes that are not
5306 if (!COMPLETE_TYPE_P (type
)
5307 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5310 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5312 if (TREE_CODE (field
) != FIELD_DECL
)
5315 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5321 /* There must be no padding. */
5322 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5329 case QUAL_UNION_TYPE
:
5331 /* These aren't very interesting except in a degenerate case. */
5336 /* Can't handle incomplete types nor sizes that are not
5338 if (!COMPLETE_TYPE_P (type
)
5339 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5342 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5344 if (TREE_CODE (field
) != FIELD_DECL
)
5347 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5350 count
= count
> sub_count
? count
: sub_count
;
5353 /* There must be no padding. */
5354 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5367 /* Return true if PCS_VARIANT should use VFP registers. */
5369 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5371 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5373 static bool seen_thumb1_vfp
= false;
5375 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5377 sorry ("Thumb-1 hard-float VFP ABI");
5378 /* sorry() is not immediately fatal, so only display this once. */
5379 seen_thumb1_vfp
= true;
5385 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5388 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5389 (TARGET_VFP_DOUBLE
|| !is_double
));
5392 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5393 suitable for passing or returning in VFP registers for the PCS
5394 variant selected. If it is, then *BASE_MODE is updated to contain
5395 a machine mode describing each element of the argument's type and
5396 *COUNT to hold the number of such elements. */
5398 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5399 machine_mode mode
, const_tree type
,
5400 machine_mode
*base_mode
, int *count
)
5402 machine_mode new_mode
= VOIDmode
;
5404 /* If we have the type information, prefer that to working things
5405 out from the mode. */
5408 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5410 if (ag_count
> 0 && ag_count
<= 4)
5415 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5416 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5417 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5422 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5425 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5431 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5434 *base_mode
= new_mode
;
5439 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5440 machine_mode mode
, const_tree type
)
5442 int count ATTRIBUTE_UNUSED
;
5443 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5445 if (!use_vfp_abi (pcs_variant
, false))
5447 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5452 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5455 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5458 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5459 &pcum
->aapcs_vfp_rmode
,
5460 &pcum
->aapcs_vfp_rcount
);
5464 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5465 const_tree type ATTRIBUTE_UNUSED
)
5467 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5468 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5471 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5472 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5474 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5476 || (mode
== TImode
&& ! TARGET_NEON
)
5477 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5480 int rcount
= pcum
->aapcs_vfp_rcount
;
5482 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5486 /* Avoid using unsupported vector modes. */
5487 if (rmode
== V2SImode
)
5489 else if (rmode
== V4SImode
)
5496 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5497 for (i
= 0; i
< rcount
; i
++)
5499 rtx tmp
= gen_rtx_REG (rmode
,
5500 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5501 tmp
= gen_rtx_EXPR_LIST
5503 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5504 XVECEXP (par
, 0, i
) = tmp
;
5507 pcum
->aapcs_reg
= par
;
5510 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5517 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5519 const_tree type ATTRIBUTE_UNUSED
)
5521 if (!use_vfp_abi (pcs_variant
, false))
5524 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5527 machine_mode ag_mode
;
5532 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5537 if (ag_mode
== V2SImode
)
5539 else if (ag_mode
== V4SImode
)
5545 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5546 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5547 for (i
= 0; i
< count
; i
++)
5549 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5550 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5551 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5552 XVECEXP (par
, 0, i
) = tmp
;
5558 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5562 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5563 machine_mode mode ATTRIBUTE_UNUSED
,
5564 const_tree type ATTRIBUTE_UNUSED
)
5566 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5567 pcum
->aapcs_vfp_reg_alloc
= 0;
5571 #define AAPCS_CP(X) \
5573 aapcs_ ## X ## _cum_init, \
5574 aapcs_ ## X ## _is_call_candidate, \
5575 aapcs_ ## X ## _allocate, \
5576 aapcs_ ## X ## _is_return_candidate, \
5577 aapcs_ ## X ## _allocate_return_reg, \
5578 aapcs_ ## X ## _advance \
5581 /* Table of co-processors that can be used to pass arguments in
5582 registers. Idealy no arugment should be a candidate for more than
5583 one co-processor table entry, but the table is processed in order
5584 and stops after the first match. If that entry then fails to put
5585 the argument into a co-processor register, the argument will go on
5589 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5590 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5592 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5593 BLKmode) is a candidate for this co-processor's registers; this
5594 function should ignore any position-dependent state in
5595 CUMULATIVE_ARGS and only use call-type dependent information. */
5596 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5598 /* Return true if the argument does get a co-processor register; it
5599 should set aapcs_reg to an RTX of the register allocated as is
5600 required for a return from FUNCTION_ARG. */
5601 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5603 /* Return true if a result of mode MODE (or type TYPE if MODE is
5604 BLKmode) is can be returned in this co-processor's registers. */
5605 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5607 /* Allocate and return an RTX element to hold the return type of a
5608 call, this routine must not fail and will only be called if
5609 is_return_candidate returned true with the same parameters. */
5610 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5612 /* Finish processing this argument and prepare to start processing
5614 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5615 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5623 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5628 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5629 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5636 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5638 /* We aren't passed a decl, so we can't check that a call is local.
5639 However, it isn't clear that that would be a win anyway, since it
5640 might limit some tail-calling opportunities. */
5641 enum arm_pcs pcs_variant
;
5645 const_tree fndecl
= NULL_TREE
;
5647 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5650 fntype
= TREE_TYPE (fntype
);
5653 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5656 pcs_variant
= arm_pcs_default
;
5658 if (pcs_variant
!= ARM_PCS_AAPCS
)
5662 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5663 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5672 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5675 /* We aren't passed a decl, so we can't check that a call is local.
5676 However, it isn't clear that that would be a win anyway, since it
5677 might limit some tail-calling opportunities. */
5678 enum arm_pcs pcs_variant
;
5679 int unsignedp ATTRIBUTE_UNUSED
;
5683 const_tree fndecl
= NULL_TREE
;
5685 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5688 fntype
= TREE_TYPE (fntype
);
5691 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5694 pcs_variant
= arm_pcs_default
;
5696 /* Promote integer types. */
5697 if (type
&& INTEGRAL_TYPE_P (type
))
5698 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5700 if (pcs_variant
!= ARM_PCS_AAPCS
)
5704 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5705 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5707 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5711 /* Promotes small structs returned in a register to full-word size
5712 for big-endian AAPCS. */
5713 if (type
&& arm_return_in_msb (type
))
5715 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5716 if (size
% UNITS_PER_WORD
!= 0)
5718 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5719 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5723 return gen_rtx_REG (mode
, R0_REGNUM
);
5727 aapcs_libcall_value (machine_mode mode
)
5729 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5730 && GET_MODE_SIZE (mode
) <= 4)
5733 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5736 /* Lay out a function argument using the AAPCS rules. The rule
5737 numbers referred to here are those in the AAPCS. */
5739 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5740 const_tree type
, bool named
)
5745 /* We only need to do this once per argument. */
5746 if (pcum
->aapcs_arg_processed
)
5749 pcum
->aapcs_arg_processed
= true;
5751 /* Special case: if named is false then we are handling an incoming
5752 anonymous argument which is on the stack. */
5756 /* Is this a potential co-processor register candidate? */
5757 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5759 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5760 pcum
->aapcs_cprc_slot
= slot
;
5762 /* We don't have to apply any of the rules from part B of the
5763 preparation phase, these are handled elsewhere in the
5768 /* A Co-processor register candidate goes either in its own
5769 class of registers or on the stack. */
5770 if (!pcum
->aapcs_cprc_failed
[slot
])
5772 /* C1.cp - Try to allocate the argument to co-processor
5774 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5777 /* C2.cp - Put the argument on the stack and note that we
5778 can't assign any more candidates in this slot. We also
5779 need to note that we have allocated stack space, so that
5780 we won't later try to split a non-cprc candidate between
5781 core registers and the stack. */
5782 pcum
->aapcs_cprc_failed
[slot
] = true;
5783 pcum
->can_split
= false;
5786 /* We didn't get a register, so this argument goes on the
5788 gcc_assert (pcum
->can_split
== false);
5793 /* C3 - For double-word aligned arguments, round the NCRN up to the
5794 next even number. */
5795 ncrn
= pcum
->aapcs_ncrn
;
5796 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5799 nregs
= ARM_NUM_REGS2(mode
, type
);
5801 /* Sigh, this test should really assert that nregs > 0, but a GCC
5802 extension allows empty structs and then gives them empty size; it
5803 then allows such a structure to be passed by value. For some of
5804 the code below we have to pretend that such an argument has
5805 non-zero size so that we 'locate' it correctly either in
5806 registers or on the stack. */
5807 gcc_assert (nregs
>= 0);
5809 nregs2
= nregs
? nregs
: 1;
5811 /* C4 - Argument fits entirely in core registers. */
5812 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5814 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5815 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5819 /* C5 - Some core registers left and there are no arguments already
5820 on the stack: split this argument between the remaining core
5821 registers and the stack. */
5822 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5824 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5825 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5826 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5830 /* C6 - NCRN is set to 4. */
5831 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5833 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5837 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5838 for a call to a function whose data type is FNTYPE.
5839 For a library call, FNTYPE is NULL. */
5841 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5843 tree fndecl ATTRIBUTE_UNUSED
)
5845 /* Long call handling. */
5847 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5849 pcum
->pcs_variant
= arm_pcs_default
;
5851 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5853 if (arm_libcall_uses_aapcs_base (libname
))
5854 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5856 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5857 pcum
->aapcs_reg
= NULL_RTX
;
5858 pcum
->aapcs_partial
= 0;
5859 pcum
->aapcs_arg_processed
= false;
5860 pcum
->aapcs_cprc_slot
= -1;
5861 pcum
->can_split
= true;
5863 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5867 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5869 pcum
->aapcs_cprc_failed
[i
] = false;
5870 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5878 /* On the ARM, the offset starts at 0. */
5880 pcum
->iwmmxt_nregs
= 0;
5881 pcum
->can_split
= true;
5883 /* Varargs vectors are treated the same as long long.
5884 named_count avoids having to change the way arm handles 'named' */
5885 pcum
->named_count
= 0;
5888 if (TARGET_REALLY_IWMMXT
&& fntype
)
5892 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5894 fn_arg
= TREE_CHAIN (fn_arg
))
5895 pcum
->named_count
+= 1;
5897 if (! pcum
->named_count
)
5898 pcum
->named_count
= INT_MAX
;
5902 /* Return true if we use LRA instead of reload pass. */
5906 return arm_lra_flag
;
5909 /* Return true if mode/type need doubleword alignment. */
5911 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
5913 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5914 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5918 /* Determine where to put an argument to a function.
5919 Value is zero to push the argument on the stack,
5920 or a hard register in which to store the argument.
5922 MODE is the argument's machine mode.
5923 TYPE is the data type of the argument (as a tree).
5924 This is null for libcalls where that information may
5926 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5927 the preceding args and about the function being called.
5928 NAMED is nonzero if this argument is a named parameter
5929 (otherwise it is an extra parameter matching an ellipsis).
5931 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5932 other arguments are passed on the stack. If (NAMED == 0) (which happens
5933 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5934 defined), say it is passed in the stack (function_prologue will
5935 indeed make it pass in the stack if necessary). */
5938 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
5939 const_tree type
, bool named
)
5941 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5944 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5945 a call insn (op3 of a call_value insn). */
5946 if (mode
== VOIDmode
)
5949 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5951 aapcs_layout_arg (pcum
, mode
, type
, named
);
5952 return pcum
->aapcs_reg
;
5955 /* Varargs vectors are treated the same as long long.
5956 named_count avoids having to change the way arm handles 'named' */
5957 if (TARGET_IWMMXT_ABI
5958 && arm_vector_mode_supported_p (mode
)
5959 && pcum
->named_count
> pcum
->nargs
+ 1)
5961 if (pcum
->iwmmxt_nregs
<= 9)
5962 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5965 pcum
->can_split
= false;
5970 /* Put doubleword aligned quantities in even register pairs. */
5972 && ARM_DOUBLEWORD_ALIGN
5973 && arm_needs_doubleword_align (mode
, type
))
5976 /* Only allow splitting an arg between regs and memory if all preceding
5977 args were allocated to regs. For args passed by reference we only count
5978 the reference pointer. */
5979 if (pcum
->can_split
)
5982 nregs
= ARM_NUM_REGS2 (mode
, type
);
5984 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5987 return gen_rtx_REG (mode
, pcum
->nregs
);
5991 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
5993 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5994 ? DOUBLEWORD_ALIGNMENT
5999 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6000 tree type
, bool named
)
6002 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6003 int nregs
= pcum
->nregs
;
6005 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6007 aapcs_layout_arg (pcum
, mode
, type
, named
);
6008 return pcum
->aapcs_partial
;
6011 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6014 if (NUM_ARG_REGS
> nregs
6015 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6017 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6022 /* Update the data in PCUM to advance over an argument
6023 of mode MODE and data type TYPE.
6024 (TYPE is null for libcalls where that information may not be available.) */
6027 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6028 const_tree type
, bool named
)
6030 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6032 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6034 aapcs_layout_arg (pcum
, mode
, type
, named
);
6036 if (pcum
->aapcs_cprc_slot
>= 0)
6038 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6040 pcum
->aapcs_cprc_slot
= -1;
6043 /* Generic stuff. */
6044 pcum
->aapcs_arg_processed
= false;
6045 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6046 pcum
->aapcs_reg
= NULL_RTX
;
6047 pcum
->aapcs_partial
= 0;
6052 if (arm_vector_mode_supported_p (mode
)
6053 && pcum
->named_count
> pcum
->nargs
6054 && TARGET_IWMMXT_ABI
)
6055 pcum
->iwmmxt_nregs
+= 1;
6057 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6061 /* Variable sized types are passed by reference. This is a GCC
6062 extension to the ARM ABI. */
6065 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6066 machine_mode mode ATTRIBUTE_UNUSED
,
6067 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6069 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6072 /* Encode the current state of the #pragma [no_]long_calls. */
6075 OFF
, /* No #pragma [no_]long_calls is in effect. */
6076 LONG
, /* #pragma long_calls is in effect. */
6077 SHORT
/* #pragma no_long_calls is in effect. */
6080 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6083 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6085 arm_pragma_long_calls
= LONG
;
6089 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6091 arm_pragma_long_calls
= SHORT
;
6095 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6097 arm_pragma_long_calls
= OFF
;
6100 /* Handle an attribute requiring a FUNCTION_DECL;
6101 arguments as in struct attribute_spec.handler. */
6103 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6104 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6106 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6108 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6110 *no_add_attrs
= true;
6116 /* Handle an "interrupt" or "isr" attribute;
6117 arguments as in struct attribute_spec.handler. */
6119 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6124 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6126 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6128 *no_add_attrs
= true;
6130 /* FIXME: the argument if any is checked for type attributes;
6131 should it be checked for decl ones? */
6135 if (TREE_CODE (*node
) == FUNCTION_TYPE
6136 || TREE_CODE (*node
) == METHOD_TYPE
)
6138 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6140 warning (OPT_Wattributes
, "%qE attribute ignored",
6142 *no_add_attrs
= true;
6145 else if (TREE_CODE (*node
) == POINTER_TYPE
6146 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6147 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6148 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6150 *node
= build_variant_type_copy (*node
);
6151 TREE_TYPE (*node
) = build_type_attribute_variant
6153 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6154 *no_add_attrs
= true;
6158 /* Possibly pass this attribute on from the type to a decl. */
6159 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6160 | (int) ATTR_FLAG_FUNCTION_NEXT
6161 | (int) ATTR_FLAG_ARRAY_NEXT
))
6163 *no_add_attrs
= true;
6164 return tree_cons (name
, args
, NULL_TREE
);
6168 warning (OPT_Wattributes
, "%qE attribute ignored",
6177 /* Handle a "pcs" attribute; arguments as in struct
6178 attribute_spec.handler. */
6180 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6181 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6183 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6185 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6186 *no_add_attrs
= true;
6191 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6192 /* Handle the "notshared" attribute. This attribute is another way of
6193 requesting hidden visibility. ARM's compiler supports
6194 "__declspec(notshared)"; we support the same thing via an
6198 arm_handle_notshared_attribute (tree
*node
,
6199 tree name ATTRIBUTE_UNUSED
,
6200 tree args ATTRIBUTE_UNUSED
,
6201 int flags ATTRIBUTE_UNUSED
,
6204 tree decl
= TYPE_NAME (*node
);
6208 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6209 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6210 *no_add_attrs
= false;
6216 /* Return 0 if the attributes for two types are incompatible, 1 if they
6217 are compatible, and 2 if they are nearly compatible (which causes a
6218 warning to be generated). */
6220 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6224 /* Check for mismatch of non-default calling convention. */
6225 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6228 /* Check for mismatched call attributes. */
6229 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6230 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6231 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6232 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6234 /* Only bother to check if an attribute is defined. */
6235 if (l1
| l2
| s1
| s2
)
6237 /* If one type has an attribute, the other must have the same attribute. */
6238 if ((l1
!= l2
) || (s1
!= s2
))
6241 /* Disallow mixed attributes. */
6242 if ((l1
& s2
) || (l2
& s1
))
6246 /* Check for mismatched ISR attribute. */
6247 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6249 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6250 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6252 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6259 /* Assigns default attributes to newly defined type. This is used to
6260 set short_call/long_call attributes for function types of
6261 functions defined inside corresponding #pragma scopes. */
6263 arm_set_default_type_attributes (tree type
)
6265 /* Add __attribute__ ((long_call)) to all functions, when
6266 inside #pragma long_calls or __attribute__ ((short_call)),
6267 when inside #pragma no_long_calls. */
6268 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6270 tree type_attr_list
, attr_name
;
6271 type_attr_list
= TYPE_ATTRIBUTES (type
);
6273 if (arm_pragma_long_calls
== LONG
)
6274 attr_name
= get_identifier ("long_call");
6275 else if (arm_pragma_long_calls
== SHORT
)
6276 attr_name
= get_identifier ("short_call");
6280 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6281 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6285 /* Return true if DECL is known to be linked into section SECTION. */
6288 arm_function_in_section_p (tree decl
, section
*section
)
6290 /* We can only be certain about functions defined in the same
6291 compilation unit. */
6292 if (!TREE_STATIC (decl
))
6295 /* Make sure that SYMBOL always binds to the definition in this
6296 compilation unit. */
6297 if (!targetm
.binds_local_p (decl
))
6300 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6301 if (!DECL_SECTION_NAME (decl
))
6303 /* Make sure that we will not create a unique section for DECL. */
6304 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6308 return function_section (decl
) == section
;
6311 /* Return nonzero if a 32-bit "long_call" should be generated for
6312 a call from the current function to DECL. We generate a long_call
6315 a. has an __attribute__((long call))
6316 or b. is within the scope of a #pragma long_calls
6317 or c. the -mlong-calls command line switch has been specified
6319 However we do not generate a long call if the function:
6321 d. has an __attribute__ ((short_call))
6322 or e. is inside the scope of a #pragma no_long_calls
6323 or f. is defined in the same section as the current function. */
6326 arm_is_long_call_p (tree decl
)
6331 return TARGET_LONG_CALLS
;
6333 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6334 if (lookup_attribute ("short_call", attrs
))
6337 /* For "f", be conservative, and only cater for cases in which the
6338 whole of the current function is placed in the same section. */
6339 if (!flag_reorder_blocks_and_partition
6340 && TREE_CODE (decl
) == FUNCTION_DECL
6341 && arm_function_in_section_p (decl
, current_function_section ()))
6344 if (lookup_attribute ("long_call", attrs
))
6347 return TARGET_LONG_CALLS
;
6350 /* Return nonzero if it is ok to make a tail-call to DECL. */
6352 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6354 unsigned long func_type
;
6356 if (cfun
->machine
->sibcall_blocked
)
6359 /* Never tailcall something if we are generating code for Thumb-1. */
6363 /* The PIC register is live on entry to VxWorks PLT entries, so we
6364 must make the call before restoring the PIC register. */
6365 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6368 /* If we are interworking and the function is not declared static
6369 then we can't tail-call it unless we know that it exists in this
6370 compilation unit (since it might be a Thumb routine). */
6371 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6372 && !TREE_ASM_WRITTEN (decl
))
6375 func_type
= arm_current_func_type ();
6376 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6377 if (IS_INTERRUPT (func_type
))
6380 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6382 /* Check that the return value locations are the same. For
6383 example that we aren't returning a value from the sibling in
6384 a VFP register but then need to transfer it to a core
6388 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6389 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6391 if (!rtx_equal_p (a
, b
))
6395 /* Never tailcall if function may be called with a misaligned SP. */
6396 if (IS_STACKALIGN (func_type
))
6399 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6400 references should become a NOP. Don't convert such calls into
6402 if (TARGET_AAPCS_BASED
6403 && arm_abi
== ARM_ABI_AAPCS
6405 && DECL_WEAK (decl
))
6408 /* Everything else is ok. */
6413 /* Addressing mode support functions. */
6415 /* Return nonzero if X is a legitimate immediate operand when compiling
6416 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6418 legitimate_pic_operand_p (rtx x
)
6420 if (GET_CODE (x
) == SYMBOL_REF
6421 || (GET_CODE (x
) == CONST
6422 && GET_CODE (XEXP (x
, 0)) == PLUS
6423 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6429 /* Record that the current function needs a PIC register. Initialize
6430 cfun->machine->pic_reg if we have not already done so. */
6433 require_pic_register (void)
6435 /* A lot of the logic here is made obscure by the fact that this
6436 routine gets called as part of the rtx cost estimation process.
6437 We don't want those calls to affect any assumptions about the real
6438 function; and further, we can't call entry_of_function() until we
6439 start the real expansion process. */
6440 if (!crtl
->uses_pic_offset_table
)
6442 gcc_assert (can_create_pseudo_p ());
6443 if (arm_pic_register
!= INVALID_REGNUM
6444 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6446 if (!cfun
->machine
->pic_reg
)
6447 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6449 /* Play games to avoid marking the function as needing pic
6450 if we are being called as part of the cost-estimation
6452 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6453 crtl
->uses_pic_offset_table
= 1;
6457 rtx_insn
*seq
, *insn
;
6459 if (!cfun
->machine
->pic_reg
)
6460 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6462 /* Play games to avoid marking the function as needing pic
6463 if we are being called as part of the cost-estimation
6465 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6467 crtl
->uses_pic_offset_table
= 1;
6470 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6471 && arm_pic_register
> LAST_LO_REGNUM
)
6472 emit_move_insn (cfun
->machine
->pic_reg
,
6473 gen_rtx_REG (Pmode
, arm_pic_register
));
6475 arm_load_pic_register (0UL);
6480 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6482 INSN_LOCATION (insn
) = prologue_location
;
6484 /* We can be called during expansion of PHI nodes, where
6485 we can't yet emit instructions directly in the final
6486 insn stream. Queue the insns on the entry edge, they will
6487 be committed after everything else is expanded. */
6488 insert_insn_on_edge (seq
,
6489 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6496 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6498 if (GET_CODE (orig
) == SYMBOL_REF
6499 || GET_CODE (orig
) == LABEL_REF
)
6505 gcc_assert (can_create_pseudo_p ());
6506 reg
= gen_reg_rtx (Pmode
);
6509 /* VxWorks does not impose a fixed gap between segments; the run-time
6510 gap can be different from the object-file gap. We therefore can't
6511 use GOTOFF unless we are absolutely sure that the symbol is in the
6512 same segment as the GOT. Unfortunately, the flexibility of linker
6513 scripts means that we can't be sure of that in general, so assume
6514 that GOTOFF is never valid on VxWorks. */
6515 if ((GET_CODE (orig
) == LABEL_REF
6516 || (GET_CODE (orig
) == SYMBOL_REF
&&
6517 SYMBOL_REF_LOCAL_P (orig
)))
6519 && arm_pic_data_is_text_relative
)
6520 insn
= arm_pic_static_addr (orig
, reg
);
6526 /* If this function doesn't have a pic register, create one now. */
6527 require_pic_register ();
6529 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6531 /* Make the MEM as close to a constant as possible. */
6532 mem
= SET_SRC (pat
);
6533 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6534 MEM_READONLY_P (mem
) = 1;
6535 MEM_NOTRAP_P (mem
) = 1;
6537 insn
= emit_insn (pat
);
6540 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6542 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6546 else if (GET_CODE (orig
) == CONST
)
6550 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6551 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6554 /* Handle the case where we have: const (UNSPEC_TLS). */
6555 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6556 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6559 /* Handle the case where we have:
6560 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6562 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6563 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6564 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6566 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6572 gcc_assert (can_create_pseudo_p ());
6573 reg
= gen_reg_rtx (Pmode
);
6576 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6578 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6579 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6580 base
== reg
? 0 : reg
);
6582 if (CONST_INT_P (offset
))
6584 /* The base register doesn't really matter, we only want to
6585 test the index for the appropriate mode. */
6586 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6588 gcc_assert (can_create_pseudo_p ());
6589 offset
= force_reg (Pmode
, offset
);
6592 if (CONST_INT_P (offset
))
6593 return plus_constant (Pmode
, base
, INTVAL (offset
));
6596 if (GET_MODE_SIZE (mode
) > 4
6597 && (GET_MODE_CLASS (mode
) == MODE_INT
6598 || TARGET_SOFT_FLOAT
))
6600 emit_insn (gen_addsi3 (reg
, base
, offset
));
6604 return gen_rtx_PLUS (Pmode
, base
, offset
);
6611 /* Find a spare register to use during the prolog of a function. */
6614 thumb_find_work_register (unsigned long pushed_regs_mask
)
6618 /* Check the argument registers first as these are call-used. The
6619 register allocation order means that sometimes r3 might be used
6620 but earlier argument registers might not, so check them all. */
6621 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6622 if (!df_regs_ever_live_p (reg
))
6625 /* Before going on to check the call-saved registers we can try a couple
6626 more ways of deducing that r3 is available. The first is when we are
6627 pushing anonymous arguments onto the stack and we have less than 4
6628 registers worth of fixed arguments(*). In this case r3 will be part of
6629 the variable argument list and so we can be sure that it will be
6630 pushed right at the start of the function. Hence it will be available
6631 for the rest of the prologue.
6632 (*): ie crtl->args.pretend_args_size is greater than 0. */
6633 if (cfun
->machine
->uses_anonymous_args
6634 && crtl
->args
.pretend_args_size
> 0)
6635 return LAST_ARG_REGNUM
;
6637 /* The other case is when we have fixed arguments but less than 4 registers
6638 worth. In this case r3 might be used in the body of the function, but
6639 it is not being used to convey an argument into the function. In theory
6640 we could just check crtl->args.size to see how many bytes are
6641 being passed in argument registers, but it seems that it is unreliable.
6642 Sometimes it will have the value 0 when in fact arguments are being
6643 passed. (See testcase execute/20021111-1.c for an example). So we also
6644 check the args_info.nregs field as well. The problem with this field is
6645 that it makes no allowances for arguments that are passed to the
6646 function but which are not used. Hence we could miss an opportunity
6647 when a function has an unused argument in r3. But it is better to be
6648 safe than to be sorry. */
6649 if (! cfun
->machine
->uses_anonymous_args
6650 && crtl
->args
.size
>= 0
6651 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6652 && (TARGET_AAPCS_BASED
6653 ? crtl
->args
.info
.aapcs_ncrn
< 4
6654 : crtl
->args
.info
.nregs
< 4))
6655 return LAST_ARG_REGNUM
;
6657 /* Otherwise look for a call-saved register that is going to be pushed. */
6658 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6659 if (pushed_regs_mask
& (1 << reg
))
6664 /* Thumb-2 can use high regs. */
6665 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6666 if (pushed_regs_mask
& (1 << reg
))
6669 /* Something went wrong - thumb_compute_save_reg_mask()
6670 should have arranged for a suitable register to be pushed. */
6674 static GTY(()) int pic_labelno
;
6676 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6680 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6682 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6684 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6687 gcc_assert (flag_pic
);
6689 pic_reg
= cfun
->machine
->pic_reg
;
6690 if (TARGET_VXWORKS_RTP
)
6692 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6693 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6694 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6696 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6698 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6699 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6703 /* We use an UNSPEC rather than a LABEL_REF because this label
6704 never appears in the code stream. */
6706 labelno
= GEN_INT (pic_labelno
++);
6707 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6708 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6710 /* On the ARM the PC register contains 'dot + 8' at the time of the
6711 addition, on the Thumb it is 'dot + 4'. */
6712 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6713 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6715 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6719 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6721 else /* TARGET_THUMB1 */
6723 if (arm_pic_register
!= INVALID_REGNUM
6724 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6726 /* We will have pushed the pic register, so we should always be
6727 able to find a work register. */
6728 pic_tmp
= gen_rtx_REG (SImode
,
6729 thumb_find_work_register (saved_regs
));
6730 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6731 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6732 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6734 else if (arm_pic_register
!= INVALID_REGNUM
6735 && arm_pic_register
> LAST_LO_REGNUM
6736 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6738 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6739 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6740 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6743 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6747 /* Need to emit this whether or not we obey regdecls,
6748 since setjmp/longjmp can cause life info to screw up. */
6752 /* Generate code to load the address of a static var when flag_pic is set. */
6754 arm_pic_static_addr (rtx orig
, rtx reg
)
6756 rtx l1
, labelno
, offset_rtx
, insn
;
6758 gcc_assert (flag_pic
);
6760 /* We use an UNSPEC rather than a LABEL_REF because this label
6761 never appears in the code stream. */
6762 labelno
= GEN_INT (pic_labelno
++);
6763 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6764 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6766 /* On the ARM the PC register contains 'dot + 8' at the time of the
6767 addition, on the Thumb it is 'dot + 4'. */
6768 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6769 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6770 UNSPEC_SYMBOL_OFFSET
);
6771 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6773 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6777 /* Return nonzero if X is valid as an ARM state addressing register. */
6779 arm_address_register_rtx_p (rtx x
, int strict_p
)
6789 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6791 return (regno
<= LAST_ARM_REGNUM
6792 || regno
>= FIRST_PSEUDO_REGISTER
6793 || regno
== FRAME_POINTER_REGNUM
6794 || regno
== ARG_POINTER_REGNUM
);
6797 /* Return TRUE if this rtx is the difference of a symbol and a label,
6798 and will reduce to a PC-relative relocation in the object file.
6799 Expressions like this can be left alone when generating PIC, rather
6800 than forced through the GOT. */
6802 pcrel_constant_p (rtx x
)
6804 if (GET_CODE (x
) == MINUS
)
6805 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6810 /* Return true if X will surely end up in an index register after next
6813 will_be_in_index_register (const_rtx x
)
6815 /* arm.md: calculate_pic_address will split this into a register. */
6816 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6819 /* Return nonzero if X is a valid ARM state address operand. */
6821 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6825 enum rtx_code code
= GET_CODE (x
);
6827 if (arm_address_register_rtx_p (x
, strict_p
))
6830 use_ldrd
= (TARGET_LDRD
6832 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6834 if (code
== POST_INC
|| code
== PRE_DEC
6835 || ((code
== PRE_INC
|| code
== POST_DEC
)
6836 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6837 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6839 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6840 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6841 && GET_CODE (XEXP (x
, 1)) == PLUS
6842 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6844 rtx addend
= XEXP (XEXP (x
, 1), 1);
6846 /* Don't allow ldrd post increment by register because it's hard
6847 to fixup invalid register choices. */
6849 && GET_CODE (x
) == POST_MODIFY
6853 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6854 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6857 /* After reload constants split into minipools will have addresses
6858 from a LABEL_REF. */
6859 else if (reload_completed
6860 && (code
== LABEL_REF
6862 && GET_CODE (XEXP (x
, 0)) == PLUS
6863 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6864 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6867 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6870 else if (code
== PLUS
)
6872 rtx xop0
= XEXP (x
, 0);
6873 rtx xop1
= XEXP (x
, 1);
6875 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6876 && ((CONST_INT_P (xop1
)
6877 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6878 || (!strict_p
&& will_be_in_index_register (xop1
))))
6879 || (arm_address_register_rtx_p (xop1
, strict_p
)
6880 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6884 /* Reload currently can't handle MINUS, so disable this for now */
6885 else if (GET_CODE (x
) == MINUS
)
6887 rtx xop0
= XEXP (x
, 0);
6888 rtx xop1
= XEXP (x
, 1);
6890 return (arm_address_register_rtx_p (xop0
, strict_p
)
6891 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6895 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6896 && code
== SYMBOL_REF
6897 && CONSTANT_POOL_ADDRESS_P (x
)
6899 && symbol_mentioned_p (get_pool_constant (x
))
6900 && ! pcrel_constant_p (get_pool_constant (x
))))
6906 /* Return nonzero if X is a valid Thumb-2 address operand. */
6908 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
6911 enum rtx_code code
= GET_CODE (x
);
6913 if (arm_address_register_rtx_p (x
, strict_p
))
6916 use_ldrd
= (TARGET_LDRD
6918 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6920 if (code
== POST_INC
|| code
== PRE_DEC
6921 || ((code
== PRE_INC
|| code
== POST_DEC
)
6922 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6923 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6925 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6926 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6927 && GET_CODE (XEXP (x
, 1)) == PLUS
6928 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6930 /* Thumb-2 only has autoincrement by constant. */
6931 rtx addend
= XEXP (XEXP (x
, 1), 1);
6932 HOST_WIDE_INT offset
;
6934 if (!CONST_INT_P (addend
))
6937 offset
= INTVAL(addend
);
6938 if (GET_MODE_SIZE (mode
) <= 4)
6939 return (offset
> -256 && offset
< 256);
6941 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6942 && (offset
& 3) == 0);
6945 /* After reload constants split into minipools will have addresses
6946 from a LABEL_REF. */
6947 else if (reload_completed
6948 && (code
== LABEL_REF
6950 && GET_CODE (XEXP (x
, 0)) == PLUS
6951 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6952 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6955 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6958 else if (code
== PLUS
)
6960 rtx xop0
= XEXP (x
, 0);
6961 rtx xop1
= XEXP (x
, 1);
6963 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6964 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6965 || (!strict_p
&& will_be_in_index_register (xop1
))))
6966 || (arm_address_register_rtx_p (xop1
, strict_p
)
6967 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6970 /* Normally we can assign constant values to target registers without
6971 the help of constant pool. But there are cases we have to use constant
6973 1) assign a label to register.
6974 2) sign-extend a 8bit value to 32bit and then assign to register.
6976 Constant pool access in format:
6977 (set (reg r0) (mem (symbol_ref (".LC0"))))
6978 will cause the use of literal pool (later in function arm_reorg).
6979 So here we mark such format as an invalid format, then the compiler
6980 will adjust it into:
6981 (set (reg r0) (symbol_ref (".LC0")))
6982 (set (reg r0) (mem (reg r0))).
6983 No extra register is required, and (mem (reg r0)) won't cause the use
6984 of literal pools. */
6985 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x
))
6989 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6990 && code
== SYMBOL_REF
6991 && CONSTANT_POOL_ADDRESS_P (x
)
6993 && symbol_mentioned_p (get_pool_constant (x
))
6994 && ! pcrel_constant_p (get_pool_constant (x
))))
7000 /* Return nonzero if INDEX is valid for an address index operand in
7003 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7006 HOST_WIDE_INT range
;
7007 enum rtx_code code
= GET_CODE (index
);
7009 /* Standard coprocessor addressing modes. */
7010 if (TARGET_HARD_FLOAT
7012 && (mode
== SFmode
|| mode
== DFmode
))
7013 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7014 && INTVAL (index
) > -1024
7015 && (INTVAL (index
) & 3) == 0);
7017 /* For quad modes, we restrict the constant offset to be slightly less
7018 than what the instruction format permits. We do this because for
7019 quad mode moves, we will actually decompose them into two separate
7020 double-mode reads or writes. INDEX must therefore be a valid
7021 (double-mode) offset and so should INDEX+8. */
7022 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7023 return (code
== CONST_INT
7024 && INTVAL (index
) < 1016
7025 && INTVAL (index
) > -1024
7026 && (INTVAL (index
) & 3) == 0);
7028 /* We have no such constraint on double mode offsets, so we permit the
7029 full range of the instruction format. */
7030 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7031 return (code
== CONST_INT
7032 && INTVAL (index
) < 1024
7033 && INTVAL (index
) > -1024
7034 && (INTVAL (index
) & 3) == 0);
7036 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7037 return (code
== CONST_INT
7038 && INTVAL (index
) < 1024
7039 && INTVAL (index
) > -1024
7040 && (INTVAL (index
) & 3) == 0);
7042 if (arm_address_register_rtx_p (index
, strict_p
)
7043 && (GET_MODE_SIZE (mode
) <= 4))
7046 if (mode
== DImode
|| mode
== DFmode
)
7048 if (code
== CONST_INT
)
7050 HOST_WIDE_INT val
= INTVAL (index
);
7053 return val
> -256 && val
< 256;
7055 return val
> -4096 && val
< 4092;
7058 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7061 if (GET_MODE_SIZE (mode
) <= 4
7065 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7069 rtx xiop0
= XEXP (index
, 0);
7070 rtx xiop1
= XEXP (index
, 1);
7072 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7073 && power_of_two_operand (xiop1
, SImode
))
7074 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7075 && power_of_two_operand (xiop0
, SImode
)));
7077 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7078 || code
== ASHIFT
|| code
== ROTATERT
)
7080 rtx op
= XEXP (index
, 1);
7082 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7085 && INTVAL (op
) <= 31);
7089 /* For ARM v4 we may be doing a sign-extend operation during the
7095 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7101 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7103 return (code
== CONST_INT
7104 && INTVAL (index
) < range
7105 && INTVAL (index
) > -range
);
7108 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7109 index operand. i.e. 1, 2, 4 or 8. */
7111 thumb2_index_mul_operand (rtx op
)
7115 if (!CONST_INT_P (op
))
7119 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7122 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7124 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7126 enum rtx_code code
= GET_CODE (index
);
7128 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7129 /* Standard coprocessor addressing modes. */
7130 if (TARGET_HARD_FLOAT
7132 && (mode
== SFmode
|| mode
== DFmode
))
7133 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7134 /* Thumb-2 allows only > -256 index range for it's core register
7135 load/stores. Since we allow SF/DF in core registers, we have
7136 to use the intersection between -256~4096 (core) and -1024~1024
7138 && INTVAL (index
) > -256
7139 && (INTVAL (index
) & 3) == 0);
7141 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7143 /* For DImode assume values will usually live in core regs
7144 and only allow LDRD addressing modes. */
7145 if (!TARGET_LDRD
|| mode
!= DImode
)
7146 return (code
== CONST_INT
7147 && INTVAL (index
) < 1024
7148 && INTVAL (index
) > -1024
7149 && (INTVAL (index
) & 3) == 0);
7152 /* For quad modes, we restrict the constant offset to be slightly less
7153 than what the instruction format permits. We do this because for
7154 quad mode moves, we will actually decompose them into two separate
7155 double-mode reads or writes. INDEX must therefore be a valid
7156 (double-mode) offset and so should INDEX+8. */
7157 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7158 return (code
== CONST_INT
7159 && INTVAL (index
) < 1016
7160 && INTVAL (index
) > -1024
7161 && (INTVAL (index
) & 3) == 0);
7163 /* We have no such constraint on double mode offsets, so we permit the
7164 full range of the instruction format. */
7165 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7166 return (code
== CONST_INT
7167 && INTVAL (index
) < 1024
7168 && INTVAL (index
) > -1024
7169 && (INTVAL (index
) & 3) == 0);
7171 if (arm_address_register_rtx_p (index
, strict_p
)
7172 && (GET_MODE_SIZE (mode
) <= 4))
7175 if (mode
== DImode
|| mode
== DFmode
)
7177 if (code
== CONST_INT
)
7179 HOST_WIDE_INT val
= INTVAL (index
);
7180 /* ??? Can we assume ldrd for thumb2? */
7181 /* Thumb-2 ldrd only has reg+const addressing modes. */
7182 /* ldrd supports offsets of +-1020.
7183 However the ldr fallback does not. */
7184 return val
> -256 && val
< 256 && (val
& 3) == 0;
7192 rtx xiop0
= XEXP (index
, 0);
7193 rtx xiop1
= XEXP (index
, 1);
7195 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7196 && thumb2_index_mul_operand (xiop1
))
7197 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7198 && thumb2_index_mul_operand (xiop0
)));
7200 else if (code
== ASHIFT
)
7202 rtx op
= XEXP (index
, 1);
7204 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7207 && INTVAL (op
) <= 3);
7210 return (code
== CONST_INT
7211 && INTVAL (index
) < 4096
7212 && INTVAL (index
) > -256);
7215 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7217 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7227 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7229 return (regno
<= LAST_LO_REGNUM
7230 || regno
> LAST_VIRTUAL_REGISTER
7231 || regno
== FRAME_POINTER_REGNUM
7232 || (GET_MODE_SIZE (mode
) >= 4
7233 && (regno
== STACK_POINTER_REGNUM
7234 || regno
>= FIRST_PSEUDO_REGISTER
7235 || x
== hard_frame_pointer_rtx
7236 || x
== arg_pointer_rtx
)));
7239 /* Return nonzero if x is a legitimate index register. This is the case
7240 for any base register that can access a QImode object. */
7242 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7244 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7247 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7249 The AP may be eliminated to either the SP or the FP, so we use the
7250 least common denominator, e.g. SImode, and offsets from 0 to 64.
7252 ??? Verify whether the above is the right approach.
7254 ??? Also, the FP may be eliminated to the SP, so perhaps that
7255 needs special handling also.
7257 ??? Look at how the mips16 port solves this problem. It probably uses
7258 better ways to solve some of these problems.
7260 Although it is not incorrect, we don't accept QImode and HImode
7261 addresses based on the frame pointer or arg pointer until the
7262 reload pass starts. This is so that eliminating such addresses
7263 into stack based ones won't produce impossible code. */
7265 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7267 /* ??? Not clear if this is right. Experiment. */
7268 if (GET_MODE_SIZE (mode
) < 4
7269 && !(reload_in_progress
|| reload_completed
)
7270 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7271 || reg_mentioned_p (arg_pointer_rtx
, x
)
7272 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7273 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7274 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7275 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7278 /* Accept any base register. SP only in SImode or larger. */
7279 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7282 /* This is PC relative data before arm_reorg runs. */
7283 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7284 && GET_CODE (x
) == SYMBOL_REF
7285 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7288 /* This is PC relative data after arm_reorg runs. */
7289 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7291 && (GET_CODE (x
) == LABEL_REF
7292 || (GET_CODE (x
) == CONST
7293 && GET_CODE (XEXP (x
, 0)) == PLUS
7294 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7295 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7298 /* Post-inc indexing only supported for SImode and larger. */
7299 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7300 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7303 else if (GET_CODE (x
) == PLUS
)
7305 /* REG+REG address can be any two index registers. */
7306 /* We disallow FRAME+REG addressing since we know that FRAME
7307 will be replaced with STACK, and SP relative addressing only
7308 permits SP+OFFSET. */
7309 if (GET_MODE_SIZE (mode
) <= 4
7310 && XEXP (x
, 0) != frame_pointer_rtx
7311 && XEXP (x
, 1) != frame_pointer_rtx
7312 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7313 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7314 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7317 /* REG+const has 5-7 bit offset for non-SP registers. */
7318 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7319 || XEXP (x
, 0) == arg_pointer_rtx
)
7320 && CONST_INT_P (XEXP (x
, 1))
7321 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7324 /* REG+const has 10-bit offset for SP, but only SImode and
7325 larger is supported. */
7326 /* ??? Should probably check for DI/DFmode overflow here
7327 just like GO_IF_LEGITIMATE_OFFSET does. */
7328 else if (REG_P (XEXP (x
, 0))
7329 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7330 && GET_MODE_SIZE (mode
) >= 4
7331 && CONST_INT_P (XEXP (x
, 1))
7332 && INTVAL (XEXP (x
, 1)) >= 0
7333 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7334 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7337 else if (REG_P (XEXP (x
, 0))
7338 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7339 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7340 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7341 && REGNO (XEXP (x
, 0))
7342 <= LAST_VIRTUAL_POINTER_REGISTER
))
7343 && GET_MODE_SIZE (mode
) >= 4
7344 && CONST_INT_P (XEXP (x
, 1))
7345 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7349 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7350 && GET_MODE_SIZE (mode
) == 4
7351 && GET_CODE (x
) == SYMBOL_REF
7352 && CONSTANT_POOL_ADDRESS_P (x
)
7354 && symbol_mentioned_p (get_pool_constant (x
))
7355 && ! pcrel_constant_p (get_pool_constant (x
))))
7361 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7362 instruction of mode MODE. */
7364 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7366 switch (GET_MODE_SIZE (mode
))
7369 return val
>= 0 && val
< 32;
7372 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7376 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7382 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7385 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7386 else if (TARGET_THUMB2
)
7387 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7388 else /* if (TARGET_THUMB1) */
7389 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7392 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7394 Given an rtx X being reloaded into a reg required to be
7395 in class CLASS, return the class of reg to actually use.
7396 In general this is just CLASS, but for the Thumb core registers and
7397 immediate constants we prefer a LO_REGS class or a subset. */
7400 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7406 if (rclass
== GENERAL_REGS
)
7413 /* Build the SYMBOL_REF for __tls_get_addr. */
7415 static GTY(()) rtx tls_get_addr_libfunc
;
7418 get_tls_get_addr (void)
7420 if (!tls_get_addr_libfunc
)
7421 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7422 return tls_get_addr_libfunc
;
7426 arm_load_tp (rtx target
)
7429 target
= gen_reg_rtx (SImode
);
7433 /* Can return in any reg. */
7434 emit_insn (gen_load_tp_hard (target
));
7438 /* Always returned in r0. Immediately copy the result into a pseudo,
7439 otherwise other uses of r0 (e.g. setting up function arguments) may
7440 clobber the value. */
7444 emit_insn (gen_load_tp_soft ());
7446 tmp
= gen_rtx_REG (SImode
, 0);
7447 emit_move_insn (target
, tmp
);
7453 load_tls_operand (rtx x
, rtx reg
)
7457 if (reg
== NULL_RTX
)
7458 reg
= gen_reg_rtx (SImode
);
7460 tmp
= gen_rtx_CONST (SImode
, x
);
7462 emit_move_insn (reg
, tmp
);
7468 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7470 rtx insns
, label
, labelno
, sum
;
7472 gcc_assert (reloc
!= TLS_DESCSEQ
);
7475 labelno
= GEN_INT (pic_labelno
++);
7476 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7477 label
= gen_rtx_CONST (VOIDmode
, label
);
7479 sum
= gen_rtx_UNSPEC (Pmode
,
7480 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7481 GEN_INT (TARGET_ARM
? 8 : 4)),
7483 reg
= load_tls_operand (sum
, reg
);
7486 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7488 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7490 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7491 LCT_PURE
, /* LCT_CONST? */
7492 Pmode
, 1, reg
, Pmode
);
7494 insns
= get_insns ();
7501 arm_tls_descseq_addr (rtx x
, rtx reg
)
7503 rtx labelno
= GEN_INT (pic_labelno
++);
7504 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7505 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7506 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7507 gen_rtx_CONST (VOIDmode
, label
),
7508 GEN_INT (!TARGET_ARM
)),
7510 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7512 emit_insn (gen_tlscall (x
, labelno
));
7514 reg
= gen_reg_rtx (SImode
);
7516 gcc_assert (REGNO (reg
) != 0);
7518 emit_move_insn (reg
, reg0
);
7524 legitimize_tls_address (rtx x
, rtx reg
)
7526 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7527 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7531 case TLS_MODEL_GLOBAL_DYNAMIC
:
7532 if (TARGET_GNU2_TLS
)
7534 reg
= arm_tls_descseq_addr (x
, reg
);
7536 tp
= arm_load_tp (NULL_RTX
);
7538 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7542 /* Original scheme */
7543 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7544 dest
= gen_reg_rtx (Pmode
);
7545 emit_libcall_block (insns
, dest
, ret
, x
);
7549 case TLS_MODEL_LOCAL_DYNAMIC
:
7550 if (TARGET_GNU2_TLS
)
7552 reg
= arm_tls_descseq_addr (x
, reg
);
7554 tp
= arm_load_tp (NULL_RTX
);
7556 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7560 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7562 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7563 share the LDM result with other LD model accesses. */
7564 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7566 dest
= gen_reg_rtx (Pmode
);
7567 emit_libcall_block (insns
, dest
, ret
, eqv
);
7569 /* Load the addend. */
7570 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7571 GEN_INT (TLS_LDO32
)),
7573 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7574 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7578 case TLS_MODEL_INITIAL_EXEC
:
7579 labelno
= GEN_INT (pic_labelno
++);
7580 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7581 label
= gen_rtx_CONST (VOIDmode
, label
);
7582 sum
= gen_rtx_UNSPEC (Pmode
,
7583 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7584 GEN_INT (TARGET_ARM
? 8 : 4)),
7586 reg
= load_tls_operand (sum
, reg
);
7589 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7590 else if (TARGET_THUMB2
)
7591 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7594 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7595 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7598 tp
= arm_load_tp (NULL_RTX
);
7600 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7602 case TLS_MODEL_LOCAL_EXEC
:
7603 tp
= arm_load_tp (NULL_RTX
);
7605 reg
= gen_rtx_UNSPEC (Pmode
,
7606 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7608 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7610 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7617 /* Try machine-dependent ways of modifying an illegitimate address
7618 to be legitimate. If we find one, return the new, valid address. */
7620 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7622 if (arm_tls_referenced_p (x
))
7626 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7628 addend
= XEXP (XEXP (x
, 0), 1);
7629 x
= XEXP (XEXP (x
, 0), 0);
7632 if (GET_CODE (x
) != SYMBOL_REF
)
7635 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7637 x
= legitimize_tls_address (x
, NULL_RTX
);
7641 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7650 /* TODO: legitimize_address for Thumb2. */
7653 return thumb_legitimize_address (x
, orig_x
, mode
);
7656 if (GET_CODE (x
) == PLUS
)
7658 rtx xop0
= XEXP (x
, 0);
7659 rtx xop1
= XEXP (x
, 1);
7661 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7662 xop0
= force_reg (SImode
, xop0
);
7664 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7665 && !symbol_mentioned_p (xop1
))
7666 xop1
= force_reg (SImode
, xop1
);
7668 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7669 && CONST_INT_P (xop1
))
7671 HOST_WIDE_INT n
, low_n
;
7675 /* VFP addressing modes actually allow greater offsets, but for
7676 now we just stick with the lowest common denominator. */
7678 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7690 low_n
= ((mode
) == TImode
? 0
7691 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7695 base_reg
= gen_reg_rtx (SImode
);
7696 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7697 emit_move_insn (base_reg
, val
);
7698 x
= plus_constant (Pmode
, base_reg
, low_n
);
7700 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7701 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7704 /* XXX We don't allow MINUS any more -- see comment in
7705 arm_legitimate_address_outer_p (). */
7706 else if (GET_CODE (x
) == MINUS
)
7708 rtx xop0
= XEXP (x
, 0);
7709 rtx xop1
= XEXP (x
, 1);
7711 if (CONSTANT_P (xop0
))
7712 xop0
= force_reg (SImode
, xop0
);
7714 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7715 xop1
= force_reg (SImode
, xop1
);
7717 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7718 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7721 /* Make sure to take full advantage of the pre-indexed addressing mode
7722 with absolute addresses which often allows for the base register to
7723 be factorized for multiple adjacent memory references, and it might
7724 even allows for the mini pool to be avoided entirely. */
7725 else if (CONST_INT_P (x
) && optimize
> 0)
7728 HOST_WIDE_INT mask
, base
, index
;
7731 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7732 use a 8-bit index. So let's use a 12-bit index for SImode only and
7733 hope that arm_gen_constant will enable ldrb to use more bits. */
7734 bits
= (mode
== SImode
) ? 12 : 8;
7735 mask
= (1 << bits
) - 1;
7736 base
= INTVAL (x
) & ~mask
;
7737 index
= INTVAL (x
) & mask
;
7738 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7740 /* It'll most probably be more efficient to generate the base
7741 with more bits set and use a negative index instead. */
7745 base_reg
= force_reg (SImode
, GEN_INT (base
));
7746 x
= plus_constant (Pmode
, base_reg
, index
);
7751 /* We need to find and carefully transform any SYMBOL and LABEL
7752 references; so go back to the original address expression. */
7753 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7755 if (new_x
!= orig_x
)
7763 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7764 to be legitimate. If we find one, return the new, valid address. */
7766 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7768 if (GET_CODE (x
) == PLUS
7769 && CONST_INT_P (XEXP (x
, 1))
7770 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7771 || INTVAL (XEXP (x
, 1)) < 0))
7773 rtx xop0
= XEXP (x
, 0);
7774 rtx xop1
= XEXP (x
, 1);
7775 HOST_WIDE_INT offset
= INTVAL (xop1
);
7777 /* Try and fold the offset into a biasing of the base register and
7778 then offsetting that. Don't do this when optimizing for space
7779 since it can cause too many CSEs. */
7780 if (optimize_size
&& offset
>= 0
7781 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7783 HOST_WIDE_INT delta
;
7786 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7787 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7788 delta
= 31 * GET_MODE_SIZE (mode
);
7790 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7792 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7794 x
= plus_constant (Pmode
, xop0
, delta
);
7796 else if (offset
< 0 && offset
> -256)
7797 /* Small negative offsets are best done with a subtract before the
7798 dereference, forcing these into a register normally takes two
7800 x
= force_operand (x
, NULL_RTX
);
7803 /* For the remaining cases, force the constant into a register. */
7804 xop1
= force_reg (SImode
, xop1
);
7805 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7808 else if (GET_CODE (x
) == PLUS
7809 && s_register_operand (XEXP (x
, 1), SImode
)
7810 && !s_register_operand (XEXP (x
, 0), SImode
))
7812 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7814 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7819 /* We need to find and carefully transform any SYMBOL and LABEL
7820 references; so go back to the original address expression. */
7821 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7823 if (new_x
!= orig_x
)
7831 arm_legitimize_reload_address (rtx
*p
,
7833 int opnum
, int type
,
7834 int ind_levels ATTRIBUTE_UNUSED
)
7836 /* We must recognize output that we have already generated ourselves. */
7837 if (GET_CODE (*p
) == PLUS
7838 && GET_CODE (XEXP (*p
, 0)) == PLUS
7839 && REG_P (XEXP (XEXP (*p
, 0), 0))
7840 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7841 && CONST_INT_P (XEXP (*p
, 1)))
7843 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7844 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7845 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7849 if (GET_CODE (*p
) == PLUS
7850 && REG_P (XEXP (*p
, 0))
7851 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7852 /* If the base register is equivalent to a constant, let the generic
7853 code handle it. Otherwise we will run into problems if a future
7854 reload pass decides to rematerialize the constant. */
7855 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7856 && CONST_INT_P (XEXP (*p
, 1)))
7858 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7859 HOST_WIDE_INT low
, high
;
7861 /* Detect coprocessor load/stores. */
7862 bool coproc_p
= ((TARGET_HARD_FLOAT
7864 && (mode
== SFmode
|| mode
== DFmode
))
7865 || (TARGET_REALLY_IWMMXT
7866 && VALID_IWMMXT_REG_MODE (mode
))
7868 && (VALID_NEON_DREG_MODE (mode
)
7869 || VALID_NEON_QREG_MODE (mode
))));
7871 /* For some conditions, bail out when lower two bits are unaligned. */
7872 if ((val
& 0x3) != 0
7873 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7875 /* For DI, and DF under soft-float: */
7876 || ((mode
== DImode
|| mode
== DFmode
)
7877 /* Without ldrd, we use stm/ldm, which does not
7878 fair well with unaligned bits. */
7880 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7881 || TARGET_THUMB2
))))
7884 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7885 of which the (reg+high) gets turned into a reload add insn,
7886 we try to decompose the index into high/low values that can often
7887 also lead to better reload CSE.
7889 ldr r0, [r2, #4100] // Offset too large
7890 ldr r1, [r2, #4104] // Offset too large
7892 is best reloaded as:
7898 which post-reload CSE can simplify in most cases to eliminate the
7899 second add instruction:
7904 The idea here is that we want to split out the bits of the constant
7905 as a mask, rather than as subtracting the maximum offset that the
7906 respective type of load/store used can handle.
7908 When encountering negative offsets, we can still utilize it even if
7909 the overall offset is positive; sometimes this may lead to an immediate
7910 that can be constructed with fewer instructions.
7912 ldr r0, [r2, #0x3FFFFC]
7914 This is best reloaded as:
7915 add t1, r2, #0x400000
7918 The trick for spotting this for a load insn with N bits of offset
7919 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7920 negative offset that is going to make bit N and all the bits below
7921 it become zero in the remainder part.
7923 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7924 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7925 used in most cases of ARM load/store instructions. */
7927 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7928 (((VAL) & ((1 << (N)) - 1)) \
7929 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7934 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7936 /* NEON quad-word load/stores are made of two double-word accesses,
7937 so the valid index range is reduced by 8. Treat as 9-bit range if
7939 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7940 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7942 else if (GET_MODE_SIZE (mode
) == 8)
7945 low
= (TARGET_THUMB2
7946 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7947 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7949 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7950 to access doublewords. The supported load/store offsets are
7951 -8, -4, and 4, which we try to produce here. */
7952 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7954 else if (GET_MODE_SIZE (mode
) < 8)
7956 /* NEON element load/stores do not have an offset. */
7957 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7962 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7963 Try the wider 12-bit range first, and re-try if the result
7965 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7967 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7971 if (mode
== HImode
|| mode
== HFmode
)
7974 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7977 /* The storehi/movhi_bytes fallbacks can use only
7978 [-4094,+4094] of the full ldrb/strb index range. */
7979 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7980 if (low
== 4095 || low
== -4095)
7985 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7991 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7992 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7993 - (unsigned HOST_WIDE_INT
) 0x80000000);
7994 /* Check for overflow or zero */
7995 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7998 /* Reload the high part into a base reg; leave the low part
8000 Note that replacing this gen_rtx_PLUS with plus_constant is
8001 wrong in this case because we rely on the
8002 (plus (plus reg c1) c2) structure being preserved so that
8003 XEXP (*p, 0) in push_reload below uses the correct term. */
8004 *p
= gen_rtx_PLUS (GET_MODE (*p
),
8005 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
8008 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
8009 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
8010 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8018 thumb_legitimize_reload_address (rtx
*x_p
,
8020 int opnum
, int type
,
8021 int ind_levels ATTRIBUTE_UNUSED
)
8025 if (GET_CODE (x
) == PLUS
8026 && GET_MODE_SIZE (mode
) < 4
8027 && REG_P (XEXP (x
, 0))
8028 && XEXP (x
, 0) == stack_pointer_rtx
8029 && CONST_INT_P (XEXP (x
, 1))
8030 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8035 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8036 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8040 /* If both registers are hi-regs, then it's better to reload the
8041 entire expression rather than each register individually. That
8042 only requires one reload register rather than two. */
8043 if (GET_CODE (x
) == PLUS
8044 && REG_P (XEXP (x
, 0))
8045 && REG_P (XEXP (x
, 1))
8046 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8047 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8052 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8053 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8060 /* Return TRUE if X contains any TLS symbol references. */
8063 arm_tls_referenced_p (rtx x
)
8065 if (! TARGET_HAVE_TLS
)
8068 subrtx_iterator::array_type array
;
8069 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8071 const_rtx x
= *iter
;
8072 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8075 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8076 TLS offsets, not real symbol references. */
8077 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8078 iter
.skip_subrtxes ();
8083 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8085 On the ARM, allow any integer (invalid ones are removed later by insn
8086 patterns), nice doubles and symbol_refs which refer to the function's
8089 When generating pic allow anything. */
8092 arm_legitimate_constant_p_1 (machine_mode mode
, rtx x
)
8094 /* At present, we have no support for Neon structure constants, so forbid
8095 them here. It might be possible to handle simple cases like 0 and -1
8097 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8100 return flag_pic
|| !label_mentioned_p (x
);
8104 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8106 return (CONST_INT_P (x
)
8107 || CONST_DOUBLE_P (x
)
8108 || CONSTANT_ADDRESS_P (x
)
8113 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8115 return (!arm_cannot_force_const_mem (mode
, x
)
8117 ? arm_legitimate_constant_p_1 (mode
, x
)
8118 : thumb_legitimate_constant_p (mode
, x
)));
8121 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8124 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8128 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8130 split_const (x
, &base
, &offset
);
8131 if (GET_CODE (base
) == SYMBOL_REF
8132 && !offset_within_block_p (base
, INTVAL (offset
)))
8135 return arm_tls_referenced_p (x
);
8138 #define REG_OR_SUBREG_REG(X) \
8140 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8142 #define REG_OR_SUBREG_RTX(X) \
8143 (REG_P (X) ? (X) : SUBREG_REG (X))
8146 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8148 machine_mode mode
= GET_MODE (x
);
8157 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8164 return COSTS_N_INSNS (1);
8167 if (CONST_INT_P (XEXP (x
, 1)))
8170 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8177 return COSTS_N_INSNS (2) + cycles
;
8179 return COSTS_N_INSNS (1) + 16;
8182 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8184 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8185 return (COSTS_N_INSNS (words
)
8186 + 4 * ((MEM_P (SET_SRC (x
)))
8187 + MEM_P (SET_DEST (x
))));
8192 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8194 if (thumb_shiftable_const (INTVAL (x
)))
8195 return COSTS_N_INSNS (2);
8196 return COSTS_N_INSNS (3);
8198 else if ((outer
== PLUS
|| outer
== COMPARE
)
8199 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8201 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8202 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8203 return COSTS_N_INSNS (1);
8204 else if (outer
== AND
)
8207 /* This duplicates the tests in the andsi3 expander. */
8208 for (i
= 9; i
<= 31; i
++)
8209 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8210 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8211 return COSTS_N_INSNS (2);
8213 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8214 || outer
== LSHIFTRT
)
8216 return COSTS_N_INSNS (2);
8222 return COSTS_N_INSNS (3);
8240 /* XXX another guess. */
8241 /* Memory costs quite a lot for the first word, but subsequent words
8242 load at the equivalent of a single insn each. */
8243 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8244 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8249 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8255 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8256 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8262 return total
+ COSTS_N_INSNS (1);
8264 /* Assume a two-shift sequence. Increase the cost slightly so
8265 we prefer actual shifts over an extend operation. */
8266 return total
+ 1 + COSTS_N_INSNS (2);
8274 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8276 machine_mode mode
= GET_MODE (x
);
8277 enum rtx_code subcode
;
8279 enum rtx_code code
= GET_CODE (x
);
8285 /* Memory costs quite a lot for the first word, but subsequent words
8286 load at the equivalent of a single insn each. */
8287 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8294 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8295 *total
= COSTS_N_INSNS (2);
8296 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8297 *total
= COSTS_N_INSNS (4);
8299 *total
= COSTS_N_INSNS (20);
8303 if (REG_P (XEXP (x
, 1)))
8304 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8305 else if (!CONST_INT_P (XEXP (x
, 1)))
8306 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8312 *total
+= COSTS_N_INSNS (4);
8317 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8318 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8321 *total
+= COSTS_N_INSNS (3);
8325 *total
+= COSTS_N_INSNS (1);
8326 /* Increase the cost of complex shifts because they aren't any faster,
8327 and reduce dual issue opportunities. */
8328 if (arm_tune_cortex_a9
8329 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8337 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8338 if (CONST_INT_P (XEXP (x
, 0))
8339 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8341 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8345 if (CONST_INT_P (XEXP (x
, 1))
8346 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8348 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8355 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8357 if (TARGET_HARD_FLOAT
8359 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8361 *total
= COSTS_N_INSNS (1);
8362 if (CONST_DOUBLE_P (XEXP (x
, 0))
8363 && arm_const_double_rtx (XEXP (x
, 0)))
8365 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8369 if (CONST_DOUBLE_P (XEXP (x
, 1))
8370 && arm_const_double_rtx (XEXP (x
, 1)))
8372 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8378 *total
= COSTS_N_INSNS (20);
8382 *total
= COSTS_N_INSNS (1);
8383 if (CONST_INT_P (XEXP (x
, 0))
8384 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8386 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8390 subcode
= GET_CODE (XEXP (x
, 1));
8391 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8392 || subcode
== LSHIFTRT
8393 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8395 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8396 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8400 /* A shift as a part of RSB costs no more than RSB itself. */
8401 if (GET_CODE (XEXP (x
, 0)) == MULT
8402 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8404 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8405 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8410 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8412 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8413 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8417 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8418 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8420 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8421 if (REG_P (XEXP (XEXP (x
, 1), 0))
8422 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8423 *total
+= COSTS_N_INSNS (1);
8431 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8432 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8433 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8435 *total
= COSTS_N_INSNS (1);
8436 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8438 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8442 /* MLA: All arguments must be registers. We filter out
8443 multiplication by a power of two, so that we fall down into
8445 if (GET_CODE (XEXP (x
, 0)) == MULT
8446 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8448 /* The cost comes from the cost of the multiply. */
8452 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8454 if (TARGET_HARD_FLOAT
8456 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8458 *total
= COSTS_N_INSNS (1);
8459 if (CONST_DOUBLE_P (XEXP (x
, 1))
8460 && arm_const_double_rtx (XEXP (x
, 1)))
8462 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8469 *total
= COSTS_N_INSNS (20);
8473 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8474 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8476 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8477 if (REG_P (XEXP (XEXP (x
, 0), 0))
8478 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8479 *total
+= COSTS_N_INSNS (1);
8485 case AND
: case XOR
: case IOR
:
8487 /* Normally the frame registers will be spilt into reg+const during
8488 reload, so it is a bad idea to combine them with other instructions,
8489 since then they might not be moved outside of loops. As a compromise
8490 we allow integration with ops that have a constant as their second
8492 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8493 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8494 && !CONST_INT_P (XEXP (x
, 1)))
8495 *total
= COSTS_N_INSNS (1);
8499 *total
+= COSTS_N_INSNS (2);
8500 if (CONST_INT_P (XEXP (x
, 1))
8501 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8503 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8510 *total
+= COSTS_N_INSNS (1);
8511 if (CONST_INT_P (XEXP (x
, 1))
8512 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8514 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8517 subcode
= GET_CODE (XEXP (x
, 0));
8518 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8519 || subcode
== LSHIFTRT
8520 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8522 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8523 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8528 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8530 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8531 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8535 if (subcode
== UMIN
|| subcode
== UMAX
8536 || subcode
== SMIN
|| subcode
== SMAX
)
8538 *total
= COSTS_N_INSNS (3);
8545 /* This should have been handled by the CPU specific routines. */
8549 if (arm_arch3m
&& mode
== SImode
8550 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8551 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8552 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8553 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8554 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8555 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8557 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8560 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8564 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8566 if (TARGET_HARD_FLOAT
8568 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8570 *total
= COSTS_N_INSNS (1);
8573 *total
= COSTS_N_INSNS (2);
8579 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8580 if (mode
== SImode
&& code
== NOT
)
8582 subcode
= GET_CODE (XEXP (x
, 0));
8583 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8584 || subcode
== LSHIFTRT
8585 || subcode
== ROTATE
|| subcode
== ROTATERT
8587 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8589 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8590 /* Register shifts cost an extra cycle. */
8591 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8592 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8601 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8603 *total
= COSTS_N_INSNS (4);
8607 operand
= XEXP (x
, 0);
8609 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8610 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8611 && REG_P (XEXP (operand
, 0))
8612 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8613 *total
+= COSTS_N_INSNS (1);
8614 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8615 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8619 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8621 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8627 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8628 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8630 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8636 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8637 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8639 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8659 /* SCC insns. In the case where the comparison has already been
8660 performed, then they cost 2 instructions. Otherwise they need
8661 an additional comparison before them. */
8662 *total
= COSTS_N_INSNS (2);
8663 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8670 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8676 *total
+= COSTS_N_INSNS (1);
8677 if (CONST_INT_P (XEXP (x
, 1))
8678 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8680 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8684 subcode
= GET_CODE (XEXP (x
, 0));
8685 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8686 || subcode
== LSHIFTRT
8687 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8689 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8690 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8695 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8697 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8698 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8708 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8709 if (!CONST_INT_P (XEXP (x
, 1))
8710 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8711 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8715 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8717 if (TARGET_HARD_FLOAT
8719 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8721 *total
= COSTS_N_INSNS (1);
8724 *total
= COSTS_N_INSNS (20);
8727 *total
= COSTS_N_INSNS (1);
8729 *total
+= COSTS_N_INSNS (3);
8735 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8737 rtx op
= XEXP (x
, 0);
8738 machine_mode opmode
= GET_MODE (op
);
8741 *total
+= COSTS_N_INSNS (1);
8743 if (opmode
!= SImode
)
8747 /* If !arm_arch4, we use one of the extendhisi2_mem
8748 or movhi_bytes patterns for HImode. For a QImode
8749 sign extension, we first zero-extend from memory
8750 and then perform a shift sequence. */
8751 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8752 *total
+= COSTS_N_INSNS (2);
8755 *total
+= COSTS_N_INSNS (1);
8757 /* We don't have the necessary insn, so we need to perform some
8759 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8760 /* An and with constant 255. */
8761 *total
+= COSTS_N_INSNS (1);
8763 /* A shift sequence. Increase costs slightly to avoid
8764 combining two shifts into an extend operation. */
8765 *total
+= COSTS_N_INSNS (2) + 1;
8771 switch (GET_MODE (XEXP (x
, 0)))
8778 *total
= COSTS_N_INSNS (1);
8788 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8792 if (const_ok_for_arm (INTVAL (x
))
8793 || const_ok_for_arm (~INTVAL (x
)))
8794 *total
= COSTS_N_INSNS (1);
8796 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8797 INTVAL (x
), NULL_RTX
,
8804 *total
= COSTS_N_INSNS (3);
8808 *total
= COSTS_N_INSNS (1);
8812 *total
= COSTS_N_INSNS (1);
8813 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8817 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8818 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8819 *total
= COSTS_N_INSNS (1);
8821 *total
= COSTS_N_INSNS (4);
8825 /* The vec_extract patterns accept memory operands that require an
8826 address reload. Account for the cost of that reload to give the
8827 auto-inc-dec pass an incentive to try to replace them. */
8828 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8829 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8831 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8832 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8833 *total
+= COSTS_N_INSNS (1);
8836 /* Likewise for the vec_set patterns. */
8837 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8838 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8839 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8841 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8842 *total
= rtx_cost (mem
, code
, 0, speed
);
8843 if (!neon_vector_mem_operand (mem
, 2, true))
8844 *total
+= COSTS_N_INSNS (1);
8850 /* We cost this as high as our memory costs to allow this to
8851 be hoisted from loops. */
8852 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8854 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8860 && TARGET_HARD_FLOAT
8862 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8863 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8864 *total
= COSTS_N_INSNS (1);
8866 *total
= COSTS_N_INSNS (4);
8870 *total
= COSTS_N_INSNS (4);
8875 /* Estimates the size cost of thumb1 instructions.
8876 For now most of the code is copied from thumb1_rtx_costs. We need more
8877 fine grain tuning when we have more related test cases. */
8879 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8881 machine_mode mode
= GET_MODE (x
);
8890 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8894 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8895 defined by RTL expansion, especially for the expansion of
8897 if ((GET_CODE (XEXP (x
, 0)) == MULT
8898 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8899 || (GET_CODE (XEXP (x
, 1)) == MULT
8900 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8901 return COSTS_N_INSNS (2);
8902 /* On purpose fall through for normal RTX. */
8906 return COSTS_N_INSNS (1);
8909 if (CONST_INT_P (XEXP (x
, 1)))
8911 /* Thumb1 mul instruction can't operate on const. We must Load it
8912 into a register first. */
8913 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8914 /* For the targets which have a very small and high-latency multiply
8915 unit, we prefer to synthesize the mult with up to 5 instructions,
8916 giving a good balance between size and performance. */
8917 if (arm_arch6m
&& arm_m_profile_small_mul
)
8918 return COSTS_N_INSNS (5);
8920 return COSTS_N_INSNS (1) + const_size
;
8922 return COSTS_N_INSNS (1);
8925 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8927 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8928 return COSTS_N_INSNS (words
)
8929 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8930 || satisfies_constraint_K (SET_SRC (x
))
8931 /* thumb1_movdi_insn. */
8932 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8937 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8938 return COSTS_N_INSNS (1);
8939 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8940 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8941 return COSTS_N_INSNS (2);
8942 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8943 if (thumb_shiftable_const (INTVAL (x
)))
8944 return COSTS_N_INSNS (2);
8945 return COSTS_N_INSNS (3);
8947 else if ((outer
== PLUS
|| outer
== COMPARE
)
8948 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8950 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8951 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8952 return COSTS_N_INSNS (1);
8953 else if (outer
== AND
)
8956 /* This duplicates the tests in the andsi3 expander. */
8957 for (i
= 9; i
<= 31; i
++)
8958 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8959 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8960 return COSTS_N_INSNS (2);
8962 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8963 || outer
== LSHIFTRT
)
8965 return COSTS_N_INSNS (2);
8971 return COSTS_N_INSNS (3);
8985 return COSTS_N_INSNS (1);
8988 return (COSTS_N_INSNS (1)
8990 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8991 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8992 ? COSTS_N_INSNS (1) : 0));
8996 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9001 /* XXX still guessing. */
9002 switch (GET_MODE (XEXP (x
, 0)))
9005 return (1 + (mode
== DImode
? 4 : 0)
9006 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9009 return (4 + (mode
== DImode
? 4 : 0)
9010 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9013 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9024 /* RTX costs when optimizing for size. */
9026 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9029 machine_mode mode
= GET_MODE (x
);
9032 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9036 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9040 /* A memory access costs 1 insn if the mode is small, or the address is
9041 a single register, otherwise it costs one insn per word. */
9042 if (REG_P (XEXP (x
, 0)))
9043 *total
= COSTS_N_INSNS (1);
9045 && GET_CODE (XEXP (x
, 0)) == PLUS
9046 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9047 /* This will be split into two instructions.
9048 See arm.md:calculate_pic_address. */
9049 *total
= COSTS_N_INSNS (2);
9051 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9058 /* Needs a libcall, so it costs about this. */
9059 *total
= COSTS_N_INSNS (2);
9063 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9065 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9073 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9075 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9078 else if (mode
== SImode
)
9080 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9081 /* Slightly disparage register shifts, but not by much. */
9082 if (!CONST_INT_P (XEXP (x
, 1)))
9083 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9087 /* Needs a libcall. */
9088 *total
= COSTS_N_INSNS (2);
9092 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9093 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9095 *total
= COSTS_N_INSNS (1);
9101 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9102 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9104 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9105 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9106 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9107 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9108 || subcode1
== ASHIFTRT
)
9110 /* It's just the cost of the two operands. */
9115 *total
= COSTS_N_INSNS (1);
9119 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9123 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9124 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9126 *total
= COSTS_N_INSNS (1);
9130 /* A shift as a part of ADD costs nothing. */
9131 if (GET_CODE (XEXP (x
, 0)) == MULT
9132 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9134 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9135 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9136 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9141 case AND
: case XOR
: case IOR
:
9144 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9146 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9147 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9148 || (code
== AND
&& subcode
== NOT
))
9150 /* It's just the cost of the two operands. */
9156 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9160 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9164 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9165 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9167 *total
= COSTS_N_INSNS (1);
9173 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9182 if (cc_register (XEXP (x
, 0), VOIDmode
))
9185 *total
= COSTS_N_INSNS (1);
9189 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9190 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9191 *total
= COSTS_N_INSNS (1);
9193 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9198 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9201 if (const_ok_for_arm (INTVAL (x
)))
9202 /* A multiplication by a constant requires another instruction
9203 to load the constant to a register. */
9204 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9206 else if (const_ok_for_arm (~INTVAL (x
)))
9207 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9208 else if (const_ok_for_arm (-INTVAL (x
)))
9210 if (outer_code
== COMPARE
|| outer_code
== PLUS
9211 || outer_code
== MINUS
)
9214 *total
= COSTS_N_INSNS (1);
9217 *total
= COSTS_N_INSNS (2);
9223 *total
= COSTS_N_INSNS (2);
9227 *total
= COSTS_N_INSNS (4);
9232 && TARGET_HARD_FLOAT
9233 && outer_code
== SET
9234 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9235 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9236 *total
= COSTS_N_INSNS (1);
9238 *total
= COSTS_N_INSNS (4);
9243 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9244 cost of these slightly. */
9245 *total
= COSTS_N_INSNS (1) + 1;
9252 if (mode
!= VOIDmode
)
9253 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9255 *total
= COSTS_N_INSNS (4); /* How knows? */
9260 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9261 operand, then return the operand that is being shifted. If the shift
9262 is not by a constant, then set SHIFT_REG to point to the operand.
9263 Return NULL if OP is not a shifter operand. */
9265 shifter_op_p (rtx op
, rtx
*shift_reg
)
9267 enum rtx_code code
= GET_CODE (op
);
9269 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9270 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9271 return XEXP (op
, 0);
9272 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9273 return XEXP (op
, 0);
9274 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9275 || code
== ASHIFTRT
)
9277 if (!CONST_INT_P (XEXP (op
, 1)))
9278 *shift_reg
= XEXP (op
, 1);
9279 return XEXP (op
, 0);
9286 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9288 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9289 gcc_assert (GET_CODE (x
) == UNSPEC
);
9291 switch (XINT (x
, 1))
9293 case UNSPEC_UNALIGNED_LOAD
:
9294 /* We can only do unaligned loads into the integer unit, and we can't
9296 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9298 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9299 + extra_cost
->ldst
.load_unaligned
);
9302 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9303 ADDR_SPACE_GENERIC
, speed_p
);
9307 case UNSPEC_UNALIGNED_STORE
:
9308 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9310 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9311 + extra_cost
->ldst
.store_unaligned
);
9313 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9315 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9316 ADDR_SPACE_GENERIC
, speed_p
);
9326 *cost
= COSTS_N_INSNS (1);
9328 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9332 *cost
= COSTS_N_INSNS (2);
9338 /* Cost of a libcall. We assume one insn per argument, an amount for the
9339 call (one insn for -Os) and then one for processing the result. */
9340 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9342 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9345 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9346 if (shift_op != NULL \
9347 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9352 *cost += extra_cost->alu.arith_shift_reg; \
9353 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9356 *cost += extra_cost->alu.arith_shift; \
9358 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9359 + rtx_cost (XEXP (x, 1 - IDX), \
9366 /* RTX costs. Make an estimate of the cost of executing the operation
9367 X, which is contained with an operation with code OUTER_CODE.
9368 SPEED_P indicates whether the cost desired is the performance cost,
9369 or the size cost. The estimate is stored in COST and the return
9370 value is TRUE if the cost calculation is final, or FALSE if the
9371 caller should recurse through the operands of X to add additional
9374 We currently make no attempt to model the size savings of Thumb-2
9375 16-bit instructions. At the normal points in compilation where
9376 this code is called we have no measure of whether the condition
9377 flags are live or not, and thus no realistic way to determine what
9378 the size will eventually be. */
9380 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9381 const struct cpu_cost_table
*extra_cost
,
9382 int *cost
, bool speed_p
)
9384 machine_mode mode
= GET_MODE (x
);
9389 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9391 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9399 /* SET RTXs don't have a mode so we get it from the destination. */
9400 mode
= GET_MODE (SET_DEST (x
));
9402 if (REG_P (SET_SRC (x
))
9403 && REG_P (SET_DEST (x
)))
9405 /* Assume that most copies can be done with a single insn,
9406 unless we don't have HW FP, in which case everything
9407 larger than word mode will require two insns. */
9408 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9409 && GET_MODE_SIZE (mode
) > 4)
9412 /* Conditional register moves can be encoded
9413 in 16 bits in Thumb mode. */
9414 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9420 if (CONST_INT_P (SET_SRC (x
)))
9422 /* Handle CONST_INT here, since the value doesn't have a mode
9423 and we would otherwise be unable to work out the true cost. */
9424 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9426 /* Slightly lower the cost of setting a core reg to a constant.
9427 This helps break up chains and allows for better scheduling. */
9428 if (REG_P (SET_DEST (x
))
9429 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9432 /* Immediate moves with an immediate in the range [0, 255] can be
9433 encoded in 16 bits in Thumb mode. */
9434 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9435 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9437 goto const_int_cost
;
9443 /* A memory access costs 1 insn if the mode is small, or the address is
9444 a single register, otherwise it costs one insn per word. */
9445 if (REG_P (XEXP (x
, 0)))
9446 *cost
= COSTS_N_INSNS (1);
9448 && GET_CODE (XEXP (x
, 0)) == PLUS
9449 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9450 /* This will be split into two instructions.
9451 See arm.md:calculate_pic_address. */
9452 *cost
= COSTS_N_INSNS (2);
9454 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9456 /* For speed optimizations, add the costs of the address and
9457 accessing memory. */
9460 *cost
+= (extra_cost
->ldst
.load
9461 + arm_address_cost (XEXP (x
, 0), mode
,
9462 ADDR_SPACE_GENERIC
, speed_p
));
9464 *cost
+= extra_cost
->ldst
.load
;
9470 /* Calculations of LDM costs are complex. We assume an initial cost
9471 (ldm_1st) which will load the number of registers mentioned in
9472 ldm_regs_per_insn_1st registers; then each additional
9473 ldm_regs_per_insn_subsequent registers cost one more insn. The
9474 formula for N regs is thus:
9476 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9477 + ldm_regs_per_insn_subsequent - 1)
9478 / ldm_regs_per_insn_subsequent).
9480 Additional costs may also be added for addressing. A similar
9481 formula is used for STM. */
9483 bool is_ldm
= load_multiple_operation (x
, SImode
);
9484 bool is_stm
= store_multiple_operation (x
, SImode
);
9486 *cost
= COSTS_N_INSNS (1);
9488 if (is_ldm
|| is_stm
)
9492 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9493 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9494 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9495 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9496 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9497 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9498 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9500 *cost
+= regs_per_insn_1st
9501 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9502 + regs_per_insn_sub
- 1)
9503 / regs_per_insn_sub
);
9512 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9513 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9514 *cost
= COSTS_N_INSNS (speed_p
9515 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9516 else if (mode
== SImode
&& TARGET_IDIV
)
9517 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9519 *cost
= LIBCALL_COST (2);
9520 return false; /* All arguments must be in registers. */
9524 *cost
= LIBCALL_COST (2);
9525 return false; /* All arguments must be in registers. */
9528 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9530 *cost
= (COSTS_N_INSNS (2)
9531 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9533 *cost
+= extra_cost
->alu
.shift_reg
;
9541 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9543 *cost
= (COSTS_N_INSNS (3)
9544 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9546 *cost
+= 2 * extra_cost
->alu
.shift
;
9549 else if (mode
== SImode
)
9551 *cost
= (COSTS_N_INSNS (1)
9552 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x
, 1)))
9555 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9556 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9559 else if (GET_MODE_CLASS (mode
) == MODE_INT
9560 && GET_MODE_SIZE (mode
) < 4)
9564 *cost
= (COSTS_N_INSNS (1)
9565 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9566 /* Slightly disparage register shifts at -Os, but not by
9568 if (!CONST_INT_P (XEXP (x
, 1)))
9569 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9570 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9572 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9574 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9576 /* Can use SBFX/UBFX. */
9577 *cost
= COSTS_N_INSNS (1);
9579 *cost
+= extra_cost
->alu
.bfx
;
9580 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9584 *cost
= COSTS_N_INSNS (2);
9585 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9588 if (CONST_INT_P (XEXP (x
, 1)))
9589 *cost
+= 2 * extra_cost
->alu
.shift
;
9591 *cost
+= (extra_cost
->alu
.shift
9592 + extra_cost
->alu
.shift_reg
);
9595 /* Slightly disparage register shifts. */
9596 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9601 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9602 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9605 if (CONST_INT_P (XEXP (x
, 1)))
9606 *cost
+= (2 * extra_cost
->alu
.shift
9607 + extra_cost
->alu
.log_shift
);
9609 *cost
+= (extra_cost
->alu
.shift
9610 + extra_cost
->alu
.shift_reg
9611 + extra_cost
->alu
.log_shift_reg
);
9617 *cost
= LIBCALL_COST (2);
9625 *cost
= COSTS_N_INSNS (1);
9627 *cost
+= extra_cost
->alu
.rev
;
9634 /* No rev instruction available. Look at arm_legacy_rev
9635 and thumb_legacy_rev for the form of RTL used then. */
9638 *cost
= COSTS_N_INSNS (10);
9642 *cost
+= 6 * extra_cost
->alu
.shift
;
9643 *cost
+= 3 * extra_cost
->alu
.logical
;
9648 *cost
= COSTS_N_INSNS (5);
9652 *cost
+= 2 * extra_cost
->alu
.shift
;
9653 *cost
+= extra_cost
->alu
.arith_shift
;
9654 *cost
+= 2 * extra_cost
->alu
.logical
;
9662 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9663 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9665 *cost
= COSTS_N_INSNS (1);
9666 if (GET_CODE (XEXP (x
, 0)) == MULT
9667 || GET_CODE (XEXP (x
, 1)) == MULT
)
9669 rtx mul_op0
, mul_op1
, sub_op
;
9672 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9674 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9676 mul_op0
= XEXP (XEXP (x
, 0), 0);
9677 mul_op1
= XEXP (XEXP (x
, 0), 1);
9678 sub_op
= XEXP (x
, 1);
9682 mul_op0
= XEXP (XEXP (x
, 1), 0);
9683 mul_op1
= XEXP (XEXP (x
, 1), 1);
9684 sub_op
= XEXP (x
, 0);
9687 /* The first operand of the multiply may be optionally
9689 if (GET_CODE (mul_op0
) == NEG
)
9690 mul_op0
= XEXP (mul_op0
, 0);
9692 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9693 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9694 + rtx_cost (sub_op
, code
, 0, speed_p
));
9700 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9706 rtx shift_by_reg
= NULL
;
9710 *cost
= COSTS_N_INSNS (1);
9712 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9713 if (shift_op
== NULL
)
9715 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9716 non_shift_op
= XEXP (x
, 0);
9719 non_shift_op
= XEXP (x
, 1);
9721 if (shift_op
!= NULL
)
9723 if (shift_by_reg
!= NULL
)
9726 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9727 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9730 *cost
+= extra_cost
->alu
.arith_shift
;
9732 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9733 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9738 && GET_CODE (XEXP (x
, 1)) == MULT
)
9742 *cost
+= extra_cost
->mult
[0].add
;
9743 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9744 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9745 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9749 if (CONST_INT_P (XEXP (x
, 0)))
9751 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9752 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9754 *cost
= COSTS_N_INSNS (insns
);
9756 *cost
+= insns
* extra_cost
->alu
.arith
;
9757 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9761 *cost
+= extra_cost
->alu
.arith
;
9766 if (GET_MODE_CLASS (mode
) == MODE_INT
9767 && GET_MODE_SIZE (mode
) < 4)
9769 rtx shift_op
, shift_reg
;
9772 /* We check both sides of the MINUS for shifter operands since,
9773 unlike PLUS, it's not commutative. */
9775 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9776 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9778 /* Slightly disparage, as we might need to widen the result. */
9779 *cost
= 1 + COSTS_N_INSNS (1);
9781 *cost
+= extra_cost
->alu
.arith
;
9783 if (CONST_INT_P (XEXP (x
, 0)))
9785 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9794 *cost
= COSTS_N_INSNS (2);
9796 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9798 rtx op1
= XEXP (x
, 1);
9801 *cost
+= 2 * extra_cost
->alu
.arith
;
9803 if (GET_CODE (op1
) == ZERO_EXTEND
)
9804 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9806 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9807 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9811 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9814 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9815 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9817 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9820 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9821 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9824 *cost
+= (extra_cost
->alu
.arith
9825 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9826 ? extra_cost
->alu
.arith
9827 : extra_cost
->alu
.arith_shift
));
9828 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9829 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9830 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9835 *cost
+= 2 * extra_cost
->alu
.arith
;
9841 *cost
= LIBCALL_COST (2);
9845 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9846 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9848 *cost
= COSTS_N_INSNS (1);
9849 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9851 rtx mul_op0
, mul_op1
, add_op
;
9854 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9856 mul_op0
= XEXP (XEXP (x
, 0), 0);
9857 mul_op1
= XEXP (XEXP (x
, 0), 1);
9858 add_op
= XEXP (x
, 1);
9860 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9861 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9862 + rtx_cost (add_op
, code
, 0, speed_p
));
9868 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9871 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9873 *cost
= LIBCALL_COST (2);
9877 /* Narrow modes can be synthesized in SImode, but the range
9878 of useful sub-operations is limited. Check for shift operations
9879 on one of the operands. Only left shifts can be used in the
9881 if (GET_MODE_CLASS (mode
) == MODE_INT
9882 && GET_MODE_SIZE (mode
) < 4)
9884 rtx shift_op
, shift_reg
;
9887 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9889 if (CONST_INT_P (XEXP (x
, 1)))
9891 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9892 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9894 *cost
= COSTS_N_INSNS (insns
);
9896 *cost
+= insns
* extra_cost
->alu
.arith
;
9897 /* Slightly penalize a narrow operation as the result may
9899 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9903 /* Slightly penalize a narrow operation as the result may
9905 *cost
= 1 + COSTS_N_INSNS (1);
9907 *cost
+= extra_cost
->alu
.arith
;
9914 rtx shift_op
, shift_reg
;
9916 *cost
= COSTS_N_INSNS (1);
9918 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9921 /* UXTA[BH] or SXTA[BH]. */
9923 *cost
+= extra_cost
->alu
.extend_arith
;
9924 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9926 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9931 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9932 if (shift_op
!= NULL
)
9937 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9938 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9941 *cost
+= extra_cost
->alu
.arith_shift
;
9943 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9944 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9947 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9949 rtx mul_op
= XEXP (x
, 0);
9951 *cost
= COSTS_N_INSNS (1);
9953 if (TARGET_DSP_MULTIPLY
9954 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9955 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9956 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9957 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9958 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9959 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9960 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9961 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9962 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9963 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9965 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9970 *cost
+= extra_cost
->mult
[0].extend_add
;
9971 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9972 SIGN_EXTEND
, 0, speed_p
)
9973 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9974 SIGN_EXTEND
, 0, speed_p
)
9975 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9980 *cost
+= extra_cost
->mult
[0].add
;
9981 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9982 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9983 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9986 if (CONST_INT_P (XEXP (x
, 1)))
9988 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9989 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9991 *cost
= COSTS_N_INSNS (insns
);
9993 *cost
+= insns
* extra_cost
->alu
.arith
;
9994 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9998 *cost
+= extra_cost
->alu
.arith
;
10003 if (mode
== DImode
)
10006 && GET_CODE (XEXP (x
, 0)) == MULT
10007 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10008 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10009 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10010 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10012 *cost
= COSTS_N_INSNS (1);
10014 *cost
+= extra_cost
->mult
[1].extend_add
;
10015 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10016 ZERO_EXTEND
, 0, speed_p
)
10017 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10018 ZERO_EXTEND
, 0, speed_p
)
10019 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10023 *cost
= COSTS_N_INSNS (2);
10025 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10026 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10029 *cost
+= (extra_cost
->alu
.arith
10030 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10031 ? extra_cost
->alu
.arith
10032 : extra_cost
->alu
.arith_shift
));
10034 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10036 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10041 *cost
+= 2 * extra_cost
->alu
.arith
;
10046 *cost
= LIBCALL_COST (2);
10049 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10051 *cost
= COSTS_N_INSNS (1);
10053 *cost
+= extra_cost
->alu
.rev
;
10057 /* Fall through. */
10058 case AND
: case XOR
:
10059 if (mode
== SImode
)
10061 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10062 rtx op0
= XEXP (x
, 0);
10063 rtx shift_op
, shift_reg
;
10065 *cost
= COSTS_N_INSNS (1);
10069 || (code
== IOR
&& TARGET_THUMB2
)))
10070 op0
= XEXP (op0
, 0);
10073 shift_op
= shifter_op_p (op0
, &shift_reg
);
10074 if (shift_op
!= NULL
)
10079 *cost
+= extra_cost
->alu
.log_shift_reg
;
10080 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10083 *cost
+= extra_cost
->alu
.log_shift
;
10085 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10086 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10090 if (CONST_INT_P (XEXP (x
, 1)))
10092 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10093 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10096 *cost
= COSTS_N_INSNS (insns
);
10098 *cost
+= insns
* extra_cost
->alu
.logical
;
10099 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10104 *cost
+= extra_cost
->alu
.logical
;
10105 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10106 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10110 if (mode
== DImode
)
10112 rtx op0
= XEXP (x
, 0);
10113 enum rtx_code subcode
= GET_CODE (op0
);
10115 *cost
= COSTS_N_INSNS (2);
10119 || (code
== IOR
&& TARGET_THUMB2
)))
10120 op0
= XEXP (op0
, 0);
10122 if (GET_CODE (op0
) == ZERO_EXTEND
)
10125 *cost
+= 2 * extra_cost
->alu
.logical
;
10127 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10128 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10131 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10134 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10136 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10137 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10142 *cost
+= 2 * extra_cost
->alu
.logical
;
10148 *cost
= LIBCALL_COST (2);
10152 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10153 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10155 rtx op0
= XEXP (x
, 0);
10157 *cost
= COSTS_N_INSNS (1);
10159 if (GET_CODE (op0
) == NEG
)
10160 op0
= XEXP (op0
, 0);
10163 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10165 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10166 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10169 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10171 *cost
= LIBCALL_COST (2);
10175 if (mode
== SImode
)
10177 *cost
= COSTS_N_INSNS (1);
10178 if (TARGET_DSP_MULTIPLY
10179 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10180 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10181 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10183 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10184 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10186 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10187 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10188 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10189 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10190 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10193 /* SMUL[TB][TB]. */
10195 *cost
+= extra_cost
->mult
[0].extend
;
10196 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10197 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10201 *cost
+= extra_cost
->mult
[0].simple
;
10205 if (mode
== DImode
)
10208 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10209 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10210 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10211 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10213 *cost
= COSTS_N_INSNS (1);
10215 *cost
+= extra_cost
->mult
[1].extend
;
10216 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10217 ZERO_EXTEND
, 0, speed_p
)
10218 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10219 ZERO_EXTEND
, 0, speed_p
));
10223 *cost
= LIBCALL_COST (2);
10228 *cost
= LIBCALL_COST (2);
10232 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10233 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10235 *cost
= COSTS_N_INSNS (1);
10237 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10241 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10243 *cost
= LIBCALL_COST (1);
10247 if (mode
== SImode
)
10249 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10251 *cost
= COSTS_N_INSNS (2);
10252 /* Assume the non-flag-changing variant. */
10254 *cost
+= (extra_cost
->alu
.log_shift
10255 + extra_cost
->alu
.arith_shift
);
10256 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10260 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10261 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10263 *cost
= COSTS_N_INSNS (2);
10264 /* No extra cost for MOV imm and MVN imm. */
10265 /* If the comparison op is using the flags, there's no further
10266 cost, otherwise we need to add the cost of the comparison. */
10267 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10268 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10269 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10271 *cost
+= (COSTS_N_INSNS (1)
10272 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10274 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10277 *cost
+= extra_cost
->alu
.arith
;
10281 *cost
= COSTS_N_INSNS (1);
10283 *cost
+= extra_cost
->alu
.arith
;
10287 if (GET_MODE_CLASS (mode
) == MODE_INT
10288 && GET_MODE_SIZE (mode
) < 4)
10290 /* Slightly disparage, as we might need an extend operation. */
10291 *cost
= 1 + COSTS_N_INSNS (1);
10293 *cost
+= extra_cost
->alu
.arith
;
10297 if (mode
== DImode
)
10299 *cost
= COSTS_N_INSNS (2);
10301 *cost
+= 2 * extra_cost
->alu
.arith
;
10306 *cost
= LIBCALL_COST (1);
10310 if (mode
== SImode
)
10313 rtx shift_reg
= NULL
;
10315 *cost
= COSTS_N_INSNS (1);
10316 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10320 if (shift_reg
!= NULL
)
10323 *cost
+= extra_cost
->alu
.log_shift_reg
;
10324 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10327 *cost
+= extra_cost
->alu
.log_shift
;
10328 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10333 *cost
+= extra_cost
->alu
.logical
;
10336 if (mode
== DImode
)
10338 *cost
= COSTS_N_INSNS (2);
10344 *cost
+= LIBCALL_COST (1);
10349 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10351 *cost
= COSTS_N_INSNS (4);
10354 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10355 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10357 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10358 /* Assume that if one arm of the if_then_else is a register,
10359 that it will be tied with the result and eliminate the
10360 conditional insn. */
10361 if (REG_P (XEXP (x
, 1)))
10363 else if (REG_P (XEXP (x
, 2)))
10369 if (extra_cost
->alu
.non_exec_costs_exec
)
10370 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10372 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10375 *cost
+= op1cost
+ op2cost
;
10381 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10385 machine_mode op0mode
;
10386 /* We'll mostly assume that the cost of a compare is the cost of the
10387 LHS. However, there are some notable exceptions. */
10389 /* Floating point compares are never done as side-effects. */
10390 op0mode
= GET_MODE (XEXP (x
, 0));
10391 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10392 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10394 *cost
= COSTS_N_INSNS (1);
10396 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10398 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10400 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10406 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10408 *cost
= LIBCALL_COST (2);
10412 /* DImode compares normally take two insns. */
10413 if (op0mode
== DImode
)
10415 *cost
= COSTS_N_INSNS (2);
10417 *cost
+= 2 * extra_cost
->alu
.arith
;
10421 if (op0mode
== SImode
)
10426 if (XEXP (x
, 1) == const0_rtx
10427 && !(REG_P (XEXP (x
, 0))
10428 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10429 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10431 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10433 /* Multiply operations that set the flags are often
10434 significantly more expensive. */
10436 && GET_CODE (XEXP (x
, 0)) == MULT
10437 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10438 *cost
+= extra_cost
->mult
[0].flag_setting
;
10441 && GET_CODE (XEXP (x
, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10443 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10445 *cost
+= extra_cost
->mult
[0].flag_setting
;
10450 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10451 if (shift_op
!= NULL
)
10453 *cost
= COSTS_N_INSNS (1);
10454 if (shift_reg
!= NULL
)
10456 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10458 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10461 *cost
+= extra_cost
->alu
.arith_shift
;
10462 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10463 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10467 *cost
= COSTS_N_INSNS (1);
10469 *cost
+= extra_cost
->alu
.arith
;
10470 if (CONST_INT_P (XEXP (x
, 1))
10471 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10473 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10481 *cost
= LIBCALL_COST (2);
10504 if (outer_code
== SET
)
10506 /* Is it a store-flag operation? */
10507 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10508 && XEXP (x
, 1) == const0_rtx
)
10510 /* Thumb also needs an IT insn. */
10511 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10514 if (XEXP (x
, 1) == const0_rtx
)
10519 /* LSR Rd, Rn, #31. */
10520 *cost
= COSTS_N_INSNS (1);
10522 *cost
+= extra_cost
->alu
.shift
;
10532 *cost
= COSTS_N_INSNS (2);
10536 /* RSBS T1, Rn, Rn, LSR #31
10538 *cost
= COSTS_N_INSNS (2);
10540 *cost
+= extra_cost
->alu
.arith_shift
;
10544 /* RSB Rd, Rn, Rn, ASR #1
10545 LSR Rd, Rd, #31. */
10546 *cost
= COSTS_N_INSNS (2);
10548 *cost
+= (extra_cost
->alu
.arith_shift
10549 + extra_cost
->alu
.shift
);
10555 *cost
= COSTS_N_INSNS (2);
10557 *cost
+= extra_cost
->alu
.shift
;
10561 /* Remaining cases are either meaningless or would take
10562 three insns anyway. */
10563 *cost
= COSTS_N_INSNS (3);
10566 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10571 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10572 if (CONST_INT_P (XEXP (x
, 1))
10573 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10575 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10582 /* Not directly inside a set. If it involves the condition code
10583 register it must be the condition for a branch, cond_exec or
10584 I_T_E operation. Since the comparison is performed elsewhere
10585 this is just the control part which has no additional
10587 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10588 && XEXP (x
, 1) == const0_rtx
)
10596 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10597 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10599 *cost
= COSTS_N_INSNS (1);
10601 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10605 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10607 *cost
= LIBCALL_COST (1);
10611 if (mode
== SImode
)
10613 *cost
= COSTS_N_INSNS (1);
10615 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10619 *cost
= LIBCALL_COST (1);
10623 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10624 && MEM_P (XEXP (x
, 0)))
10626 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10628 if (mode
== DImode
)
10629 *cost
+= COSTS_N_INSNS (1);
10634 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10635 *cost
+= extra_cost
->ldst
.load
;
10637 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10639 if (mode
== DImode
)
10640 *cost
+= extra_cost
->alu
.shift
;
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10648 /* We have SXTB/SXTH. */
10649 *cost
= COSTS_N_INSNS (1);
10650 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10652 *cost
+= extra_cost
->alu
.extend
;
10654 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10656 /* Needs two shifts. */
10657 *cost
= COSTS_N_INSNS (2);
10658 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10660 *cost
+= 2 * extra_cost
->alu
.shift
;
10663 /* Widening beyond 32-bits requires one more insn. */
10664 if (mode
== DImode
)
10666 *cost
+= COSTS_N_INSNS (1);
10668 *cost
+= extra_cost
->alu
.shift
;
10675 || GET_MODE (XEXP (x
, 0)) == SImode
10676 || GET_MODE (XEXP (x
, 0)) == QImode
)
10677 && MEM_P (XEXP (x
, 0)))
10679 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10681 if (mode
== DImode
)
10682 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10687 /* Widening from less than 32-bits requires an extend operation. */
10688 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10690 /* UXTB can be a shorter instruction in Thumb2, but it might
10691 be slower than the AND Rd, Rn, #255 alternative. When
10692 optimizing for speed it should never be slower to use
10693 AND, and we don't really model 16-bit vs 32-bit insns
10695 *cost
= COSTS_N_INSNS (1);
10697 *cost
+= extra_cost
->alu
.logical
;
10699 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10701 /* We have UXTB/UXTH. */
10702 *cost
= COSTS_N_INSNS (1);
10703 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10705 *cost
+= extra_cost
->alu
.extend
;
10707 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10709 /* Needs two shifts. It's marginally preferable to use
10710 shifts rather than two BIC instructions as the second
10711 shift may merge with a subsequent insn as a shifter
10713 *cost
= COSTS_N_INSNS (2);
10714 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10716 *cost
+= 2 * extra_cost
->alu
.shift
;
10718 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10719 *cost
= COSTS_N_INSNS (1);
10721 /* Widening beyond 32-bits requires one more insn. */
10722 if (mode
== DImode
)
10724 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10731 /* CONST_INT has no mode, so we cannot tell for sure how many
10732 insns are really going to be needed. The best we can do is
10733 look at the value passed. If it fits in SImode, then assume
10734 that's the mode it will be used for. Otherwise assume it
10735 will be used in DImode. */
10736 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10741 /* Avoid blowing up in arm_gen_constant (). */
10742 if (!(outer_code
== PLUS
10743 || outer_code
== AND
10744 || outer_code
== IOR
10745 || outer_code
== XOR
10746 || outer_code
== MINUS
))
10750 if (mode
== SImode
)
10752 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10753 INTVAL (x
), NULL
, NULL
,
10759 *cost
+= COSTS_N_INSNS (arm_gen_constant
10760 (outer_code
, SImode
, NULL
,
10761 trunc_int_for_mode (INTVAL (x
), SImode
),
10763 + arm_gen_constant (outer_code
, SImode
, NULL
,
10764 INTVAL (x
) >> 32, NULL
,
10776 if (arm_arch_thumb2
&& !flag_pic
)
10777 *cost
= COSTS_N_INSNS (2);
10779 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10782 *cost
= COSTS_N_INSNS (2);
10786 *cost
+= COSTS_N_INSNS (1);
10788 *cost
+= extra_cost
->alu
.arith
;
10794 *cost
= COSTS_N_INSNS (4);
10799 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10800 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10802 if (vfp3_const_double_rtx (x
))
10804 *cost
= COSTS_N_INSNS (1);
10806 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10812 *cost
= COSTS_N_INSNS (1);
10813 if (mode
== DFmode
)
10814 *cost
+= extra_cost
->ldst
.loadd
;
10816 *cost
+= extra_cost
->ldst
.loadf
;
10819 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10823 *cost
= COSTS_N_INSNS (4);
10829 && TARGET_HARD_FLOAT
10830 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10831 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10832 *cost
= COSTS_N_INSNS (1);
10834 *cost
= COSTS_N_INSNS (4);
10839 *cost
= COSTS_N_INSNS (1);
10840 /* When optimizing for size, we prefer constant pool entries to
10841 MOVW/MOVT pairs, so bump the cost of these slightly. */
10847 *cost
= COSTS_N_INSNS (1);
10849 *cost
+= extra_cost
->alu
.clz
;
10853 if (XEXP (x
, 1) == const0_rtx
)
10855 *cost
= COSTS_N_INSNS (1);
10857 *cost
+= extra_cost
->alu
.log_shift
;
10858 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10861 /* Fall through. */
10865 *cost
= COSTS_N_INSNS (2);
10869 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10870 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10871 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10872 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10873 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10874 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10875 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10876 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10879 *cost
= COSTS_N_INSNS (1);
10881 *cost
+= extra_cost
->mult
[1].extend
;
10882 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10884 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10888 *cost
= LIBCALL_COST (1);
10892 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10895 /* Reading the PC is like reading any other register. Writing it
10896 is more expensive, but we take that into account elsewhere. */
10901 /* TODO: Simple zero_extract of bottom bits using AND. */
10902 /* Fall through. */
10906 && CONST_INT_P (XEXP (x
, 1))
10907 && CONST_INT_P (XEXP (x
, 2)))
10909 *cost
= COSTS_N_INSNS (1);
10911 *cost
+= extra_cost
->alu
.bfx
;
10912 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10915 /* Without UBFX/SBFX, need to resort to shift operations. */
10916 *cost
= COSTS_N_INSNS (2);
10918 *cost
+= 2 * extra_cost
->alu
.shift
;
10919 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10923 if (TARGET_HARD_FLOAT
)
10925 *cost
= COSTS_N_INSNS (1);
10927 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10928 if (!TARGET_FPU_ARMV8
10929 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10931 /* Pre v8, widening HF->DF is a two-step process, first
10932 widening to SFmode. */
10933 *cost
+= COSTS_N_INSNS (1);
10935 *cost
+= extra_cost
->fp
[0].widen
;
10937 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10941 *cost
= LIBCALL_COST (1);
10944 case FLOAT_TRUNCATE
:
10945 if (TARGET_HARD_FLOAT
)
10947 *cost
= COSTS_N_INSNS (1);
10949 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10950 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10952 /* Vector modes? */
10954 *cost
= LIBCALL_COST (1);
10958 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10960 rtx op0
= XEXP (x
, 0);
10961 rtx op1
= XEXP (x
, 1);
10962 rtx op2
= XEXP (x
, 2);
10964 *cost
= COSTS_N_INSNS (1);
10966 /* vfms or vfnma. */
10967 if (GET_CODE (op0
) == NEG
)
10968 op0
= XEXP (op0
, 0);
10970 /* vfnms or vfnma. */
10971 if (GET_CODE (op2
) == NEG
)
10972 op2
= XEXP (op2
, 0);
10974 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10975 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10976 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10979 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10984 *cost
= LIBCALL_COST (3);
10989 if (TARGET_HARD_FLOAT
)
10991 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10993 *cost
= COSTS_N_INSNS (1);
10995 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10996 /* Strip of the 'cost' of rounding towards zero. */
10997 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10998 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11000 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11001 /* ??? Increase the cost to deal with transferring from
11002 FP -> CORE registers? */
11005 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11006 && TARGET_FPU_ARMV8
)
11008 *cost
= COSTS_N_INSNS (1);
11010 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11013 /* Vector costs? */
11015 *cost
= LIBCALL_COST (1);
11019 case UNSIGNED_FLOAT
:
11020 if (TARGET_HARD_FLOAT
)
11022 /* ??? Increase the cost to deal with transferring from CORE
11023 -> FP registers? */
11024 *cost
= COSTS_N_INSNS (1);
11026 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11029 *cost
= LIBCALL_COST (1);
11033 *cost
= COSTS_N_INSNS (1);
11038 /* Just a guess. Guess number of instructions in the asm
11039 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11040 though (see PR60663). */
11041 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11042 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11044 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11048 if (mode
!= VOIDmode
)
11049 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11051 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11058 /* RTX costs when optimizing for size. */
11060 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11061 int *total
, bool speed
)
11065 if (TARGET_OLD_RTX_COSTS
11066 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11068 /* Old way. (Deprecated.) */
11070 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11071 (enum rtx_code
) outer_code
, total
);
11073 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11074 (enum rtx_code
) outer_code
, total
,
11080 if (current_tune
->insn_extra_cost
)
11081 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11082 (enum rtx_code
) outer_code
,
11083 current_tune
->insn_extra_cost
,
11085 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11086 && current_tune->insn_extra_cost != NULL */
11088 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11089 (enum rtx_code
) outer_code
,
11090 &generic_extra_costs
, total
, speed
);
11093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11095 print_rtl_single (dump_file
, x
);
11096 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11097 *total
, result
? "final" : "partial");
11102 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11103 supported on any "slowmul" cores, so it can be ignored. */
11106 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11107 int *total
, bool speed
)
11109 machine_mode mode
= GET_MODE (x
);
11113 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11120 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11123 *total
= COSTS_N_INSNS (20);
11127 if (CONST_INT_P (XEXP (x
, 1)))
11129 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11130 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11131 int cost
, const_ok
= const_ok_for_arm (i
);
11132 int j
, booth_unit_size
;
11134 /* Tune as appropriate. */
11135 cost
= const_ok
? 4 : 8;
11136 booth_unit_size
= 2;
11137 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11139 i
>>= booth_unit_size
;
11143 *total
= COSTS_N_INSNS (cost
);
11144 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11148 *total
= COSTS_N_INSNS (20);
11152 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11157 /* RTX cost for cores with a fast multiply unit (M variants). */
11160 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11161 int *total
, bool speed
)
11163 machine_mode mode
= GET_MODE (x
);
11167 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11171 /* ??? should thumb2 use different costs? */
11175 /* There is no point basing this on the tuning, since it is always the
11176 fast variant if it exists at all. */
11178 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11179 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11180 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11182 *total
= COSTS_N_INSNS(2);
11187 if (mode
== DImode
)
11189 *total
= COSTS_N_INSNS (5);
11193 if (CONST_INT_P (XEXP (x
, 1)))
11195 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11196 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11197 int cost
, const_ok
= const_ok_for_arm (i
);
11198 int j
, booth_unit_size
;
11200 /* Tune as appropriate. */
11201 cost
= const_ok
? 4 : 8;
11202 booth_unit_size
= 8;
11203 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11205 i
>>= booth_unit_size
;
11209 *total
= COSTS_N_INSNS(cost
);
11213 if (mode
== SImode
)
11215 *total
= COSTS_N_INSNS (4);
11219 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11221 if (TARGET_HARD_FLOAT
11223 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11225 *total
= COSTS_N_INSNS (1);
11230 /* Requires a lib call */
11231 *total
= COSTS_N_INSNS (20);
11235 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11240 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11241 so it can be ignored. */
11244 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11245 int *total
, bool speed
)
11247 machine_mode mode
= GET_MODE (x
);
11251 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11258 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11259 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11261 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11262 will stall until the multiplication is complete. */
11263 *total
= COSTS_N_INSNS (3);
11267 /* There is no point basing this on the tuning, since it is always the
11268 fast variant if it exists at all. */
11270 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11271 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11272 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11274 *total
= COSTS_N_INSNS (2);
11279 if (mode
== DImode
)
11281 *total
= COSTS_N_INSNS (5);
11285 if (CONST_INT_P (XEXP (x
, 1)))
11287 /* If operand 1 is a constant we can more accurately
11288 calculate the cost of the multiply. The multiplier can
11289 retire 15 bits on the first cycle and a further 12 on the
11290 second. We do, of course, have to load the constant into
11291 a register first. */
11292 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11293 /* There's a general overhead of one cycle. */
11295 unsigned HOST_WIDE_INT masked_const
;
11297 if (i
& 0x80000000)
11300 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11302 masked_const
= i
& 0xffff8000;
11303 if (masked_const
!= 0)
11306 masked_const
= i
& 0xf8000000;
11307 if (masked_const
!= 0)
11310 *total
= COSTS_N_INSNS (cost
);
11314 if (mode
== SImode
)
11316 *total
= COSTS_N_INSNS (3);
11320 /* Requires a lib call */
11321 *total
= COSTS_N_INSNS (20);
11325 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11330 /* RTX costs for 9e (and later) cores. */
11333 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11334 int *total
, bool speed
)
11336 machine_mode mode
= GET_MODE (x
);
11343 /* Small multiply: 32 cycles for an integer multiply inst. */
11344 if (arm_arch6m
&& arm_m_profile_small_mul
)
11345 *total
= COSTS_N_INSNS (32);
11347 *total
= COSTS_N_INSNS (3);
11351 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11359 /* There is no point basing this on the tuning, since it is always the
11360 fast variant if it exists at all. */
11362 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11363 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11364 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11366 *total
= COSTS_N_INSNS (2);
11371 if (mode
== DImode
)
11373 *total
= COSTS_N_INSNS (5);
11377 if (mode
== SImode
)
11379 *total
= COSTS_N_INSNS (2);
11383 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11385 if (TARGET_HARD_FLOAT
11387 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11389 *total
= COSTS_N_INSNS (1);
11394 *total
= COSTS_N_INSNS (20);
11398 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11401 /* All address computations that can be done are free, but rtx cost returns
11402 the same for practically all of them. So we weight the different types
11403 of address here in the order (most pref first):
11404 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11406 arm_arm_address_cost (rtx x
)
11408 enum rtx_code c
= GET_CODE (x
);
11410 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11412 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11417 if (CONST_INT_P (XEXP (x
, 1)))
11420 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11430 arm_thumb_address_cost (rtx x
)
11432 enum rtx_code c
= GET_CODE (x
);
11437 && REG_P (XEXP (x
, 0))
11438 && CONST_INT_P (XEXP (x
, 1)))
11445 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11446 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11448 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11451 /* Adjust cost hook for XScale. */
11453 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11455 /* Some true dependencies can have a higher cost depending
11456 on precisely how certain input operands are used. */
11457 if (REG_NOTE_KIND(link
) == 0
11458 && recog_memoized (insn
) >= 0
11459 && recog_memoized (dep
) >= 0)
11461 int shift_opnum
= get_attr_shift (insn
);
11462 enum attr_type attr_type
= get_attr_type (dep
);
11464 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11465 operand for INSN. If we have a shifted input operand and the
11466 instruction we depend on is another ALU instruction, then we may
11467 have to account for an additional stall. */
11468 if (shift_opnum
!= 0
11469 && (attr_type
== TYPE_ALU_SHIFT_IMM
11470 || attr_type
== TYPE_ALUS_SHIFT_IMM
11471 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11472 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11473 || attr_type
== TYPE_ALU_SHIFT_REG
11474 || attr_type
== TYPE_ALUS_SHIFT_REG
11475 || attr_type
== TYPE_LOGIC_SHIFT_REG
11476 || attr_type
== TYPE_LOGICS_SHIFT_REG
11477 || attr_type
== TYPE_MOV_SHIFT
11478 || attr_type
== TYPE_MVN_SHIFT
11479 || attr_type
== TYPE_MOV_SHIFT_REG
11480 || attr_type
== TYPE_MVN_SHIFT_REG
))
11482 rtx shifted_operand
;
11485 /* Get the shifted operand. */
11486 extract_insn (insn
);
11487 shifted_operand
= recog_data
.operand
[shift_opnum
];
11489 /* Iterate over all the operands in DEP. If we write an operand
11490 that overlaps with SHIFTED_OPERAND, then we have increase the
11491 cost of this dependency. */
11492 extract_insn (dep
);
11493 preprocess_constraints (dep
);
11494 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11496 /* We can ignore strict inputs. */
11497 if (recog_data
.operand_type
[opno
] == OP_IN
)
11500 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11512 /* Adjust cost hook for Cortex A9. */
11514 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11516 switch (REG_NOTE_KIND (link
))
11523 case REG_DEP_OUTPUT
:
11524 if (recog_memoized (insn
) >= 0
11525 && recog_memoized (dep
) >= 0)
11527 if (GET_CODE (PATTERN (insn
)) == SET
)
11530 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11532 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11534 enum attr_type attr_type_insn
= get_attr_type (insn
);
11535 enum attr_type attr_type_dep
= get_attr_type (dep
);
11537 /* By default all dependencies of the form
11540 have an extra latency of 1 cycle because
11541 of the input and output dependency in this
11542 case. However this gets modeled as an true
11543 dependency and hence all these checks. */
11544 if (REG_P (SET_DEST (PATTERN (insn
)))
11545 && REG_P (SET_DEST (PATTERN (dep
)))
11546 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11547 SET_DEST (PATTERN (dep
))))
11549 /* FMACS is a special case where the dependent
11550 instruction can be issued 3 cycles before
11551 the normal latency in case of an output
11553 if ((attr_type_insn
== TYPE_FMACS
11554 || attr_type_insn
== TYPE_FMACD
)
11555 && (attr_type_dep
== TYPE_FMACS
11556 || attr_type_dep
== TYPE_FMACD
))
11558 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11559 *cost
= insn_default_latency (dep
) - 3;
11561 *cost
= insn_default_latency (dep
);
11566 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11567 *cost
= insn_default_latency (dep
) + 1;
11569 *cost
= insn_default_latency (dep
);
11579 gcc_unreachable ();
11585 /* Adjust cost hook for FA726TE. */
11587 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11589 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11590 have penalty of 3. */
11591 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11592 && recog_memoized (insn
) >= 0
11593 && recog_memoized (dep
) >= 0
11594 && get_attr_conds (dep
) == CONDS_SET
)
11596 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11597 if (get_attr_conds (insn
) == CONDS_USE
11598 && get_attr_type (insn
) != TYPE_BRANCH
)
11604 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11605 || get_attr_conds (insn
) == CONDS_USE
)
11615 /* Implement TARGET_REGISTER_MOVE_COST.
11617 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11618 it is typically more expensive than a single memory access. We set
11619 the cost to less than two memory accesses so that floating
11620 point to integer conversion does not go through memory. */
11623 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11624 reg_class_t from
, reg_class_t to
)
11628 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11629 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11631 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11632 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11634 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11641 if (from
== HI_REGS
|| to
== HI_REGS
)
11648 /* Implement TARGET_MEMORY_MOVE_COST. */
11651 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11652 bool in ATTRIBUTE_UNUSED
)
11658 if (GET_MODE_SIZE (mode
) < 4)
11661 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11665 /* Vectorizer cost model implementation. */
11667 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11669 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11671 int misalign ATTRIBUTE_UNUSED
)
11675 switch (type_of_cost
)
11678 return current_tune
->vec_costs
->scalar_stmt_cost
;
11681 return current_tune
->vec_costs
->scalar_load_cost
;
11684 return current_tune
->vec_costs
->scalar_store_cost
;
11687 return current_tune
->vec_costs
->vec_stmt_cost
;
11690 return current_tune
->vec_costs
->vec_align_load_cost
;
11693 return current_tune
->vec_costs
->vec_store_cost
;
11695 case vec_to_scalar
:
11696 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11698 case scalar_to_vec
:
11699 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11701 case unaligned_load
:
11702 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11704 case unaligned_store
:
11705 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11707 case cond_branch_taken
:
11708 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11710 case cond_branch_not_taken
:
11711 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11714 case vec_promote_demote
:
11715 return current_tune
->vec_costs
->vec_stmt_cost
;
11717 case vec_construct
:
11718 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11719 return elements
/ 2 + 1;
11722 gcc_unreachable ();
11726 /* Implement targetm.vectorize.add_stmt_cost. */
11729 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11730 struct _stmt_vec_info
*stmt_info
, int misalign
,
11731 enum vect_cost_model_location where
)
11733 unsigned *cost
= (unsigned *) data
;
11734 unsigned retval
= 0;
11736 if (flag_vect_cost_model
)
11738 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11739 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11741 /* Statements in an inner loop relative to the loop being
11742 vectorized are weighted more heavily. The value here is
11743 arbitrary and could potentially be improved with analysis. */
11744 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11745 count
*= 50; /* FIXME. */
11747 retval
= (unsigned) (count
* stmt_cost
);
11748 cost
[where
] += retval
;
11754 /* Return true if and only if this insn can dual-issue only as older. */
11756 cortexa7_older_only (rtx_insn
*insn
)
11758 if (recog_memoized (insn
) < 0)
11761 switch (get_attr_type (insn
))
11763 case TYPE_ALU_DSP_REG
:
11764 case TYPE_ALU_SREG
:
11765 case TYPE_ALUS_SREG
:
11766 case TYPE_LOGIC_REG
:
11767 case TYPE_LOGICS_REG
:
11769 case TYPE_ADCS_REG
:
11774 case TYPE_SHIFT_IMM
:
11775 case TYPE_SHIFT_REG
:
11776 case TYPE_LOAD_BYTE
:
11779 case TYPE_FFARITHS
:
11781 case TYPE_FFARITHD
:
11799 case TYPE_F_STORES
:
11806 /* Return true if and only if this insn can dual-issue as younger. */
11808 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11810 if (recog_memoized (insn
) < 0)
11813 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11817 switch (get_attr_type (insn
))
11820 case TYPE_ALUS_IMM
:
11821 case TYPE_LOGIC_IMM
:
11822 case TYPE_LOGICS_IMM
:
11827 case TYPE_MOV_SHIFT
:
11828 case TYPE_MOV_SHIFT_REG
:
11838 /* Look for an instruction that can dual issue only as an older
11839 instruction, and move it in front of any instructions that can
11840 dual-issue as younger, while preserving the relative order of all
11841 other instructions in the ready list. This is a hueuristic to help
11842 dual-issue in later cycles, by postponing issue of more flexible
11843 instructions. This heuristic may affect dual issue opportunities
11844 in the current cycle. */
11846 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11847 int *n_readyp
, int clock
)
11850 int first_older_only
= -1, first_younger
= -1;
11854 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11858 /* Traverse the ready list from the head (the instruction to issue
11859 first), and looking for the first instruction that can issue as
11860 younger and the first instruction that can dual-issue only as
11862 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11864 rtx_insn
*insn
= ready
[i
];
11865 if (cortexa7_older_only (insn
))
11867 first_older_only
= i
;
11869 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11872 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11876 /* Nothing to reorder because either no younger insn found or insn
11877 that can dual-issue only as older appears before any insn that
11878 can dual-issue as younger. */
11879 if (first_younger
== -1)
11882 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11886 /* Nothing to reorder because no older-only insn in the ready list. */
11887 if (first_older_only
== -1)
11890 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11894 /* Move first_older_only insn before first_younger. */
11896 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11897 INSN_UID(ready
[first_older_only
]),
11898 INSN_UID(ready
[first_younger
]));
11899 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11900 for (i
= first_older_only
; i
< first_younger
; i
++)
11902 ready
[i
] = ready
[i
+1];
11905 ready
[i
] = first_older_only_insn
;
11909 /* Implement TARGET_SCHED_REORDER. */
11911 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11917 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11920 /* Do nothing for other cores. */
11924 return arm_issue_rate ();
11927 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11928 It corrects the value of COST based on the relationship between
11929 INSN and DEP through the dependence LINK. It returns the new
11930 value. There is a per-core adjust_cost hook to adjust scheduler costs
11931 and the per-core hook can choose to completely override the generic
11932 adjust_cost function. Only put bits of code into arm_adjust_cost that
11933 are common across all cores. */
11935 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11939 /* When generating Thumb-1 code, we want to place flag-setting operations
11940 close to a conditional branch which depends on them, so that we can
11941 omit the comparison. */
11943 && REG_NOTE_KIND (link
) == 0
11944 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11945 && recog_memoized (dep
) >= 0
11946 && get_attr_conds (dep
) == CONDS_SET
)
11949 if (current_tune
->sched_adjust_cost
!= NULL
)
11951 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11955 /* XXX Is this strictly true? */
11956 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11957 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11960 /* Call insns don't incur a stall, even if they follow a load. */
11961 if (REG_NOTE_KIND (link
) == 0
11965 if ((i_pat
= single_set (insn
)) != NULL
11966 && MEM_P (SET_SRC (i_pat
))
11967 && (d_pat
= single_set (dep
)) != NULL
11968 && MEM_P (SET_DEST (d_pat
)))
11970 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11971 /* This is a load after a store, there is no conflict if the load reads
11972 from a cached area. Assume that loads from the stack, and from the
11973 constant pool are cached, and that others will miss. This is a
11976 if ((GET_CODE (src_mem
) == SYMBOL_REF
11977 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11978 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11979 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11980 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11988 arm_max_conditional_execute (void)
11990 return max_insns_skipped
;
11994 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11997 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11999 return (optimize
> 0) ? 2 : 0;
12003 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12005 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12008 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12009 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12010 sequences of non-executed instructions in IT blocks probably take the same
12011 amount of time as executed instructions (and the IT instruction itself takes
12012 space in icache). This function was experimentally determined to give good
12013 results on a popular embedded benchmark. */
12016 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12018 return (TARGET_32BIT
&& speed_p
) ? 1
12019 : arm_default_branch_cost (speed_p
, predictable_p
);
12022 static bool fp_consts_inited
= false;
12024 static REAL_VALUE_TYPE value_fp0
;
12027 init_fp_table (void)
12031 r
= REAL_VALUE_ATOF ("0", DFmode
);
12033 fp_consts_inited
= true;
12036 /* Return TRUE if rtx X is a valid immediate FP constant. */
12038 arm_const_double_rtx (rtx x
)
12042 if (!fp_consts_inited
)
12045 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12046 if (REAL_VALUE_MINUS_ZERO (r
))
12049 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12055 /* VFPv3 has a fairly wide range of representable immediates, formed from
12056 "quarter-precision" floating-point values. These can be evaluated using this
12057 formula (with ^ for exponentiation):
12061 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12062 16 <= n <= 31 and 0 <= r <= 7.
12064 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12066 - A (most-significant) is the sign bit.
12067 - BCD are the exponent (encoded as r XOR 3).
12068 - EFGH are the mantissa (encoded as n - 16).
12071 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12072 fconst[sd] instruction, or -1 if X isn't suitable. */
12074 vfp3_const_double_index (rtx x
)
12076 REAL_VALUE_TYPE r
, m
;
12077 int sign
, exponent
;
12078 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12079 unsigned HOST_WIDE_INT mask
;
12080 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12083 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12086 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12088 /* We can't represent these things, so detect them first. */
12089 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12092 /* Extract sign, exponent and mantissa. */
12093 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12094 r
= real_value_abs (&r
);
12095 exponent
= REAL_EXP (&r
);
12096 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12097 highest (sign) bit, with a fixed binary point at bit point_pos.
12098 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12099 bits for the mantissa, this may fail (low bits would be lost). */
12100 real_ldexp (&m
, &r
, point_pos
- exponent
);
12101 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12102 mantissa
= w
.elt (0);
12103 mant_hi
= w
.elt (1);
12105 /* If there are bits set in the low part of the mantissa, we can't
12106 represent this value. */
12110 /* Now make it so that mantissa contains the most-significant bits, and move
12111 the point_pos to indicate that the least-significant bits have been
12113 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12114 mantissa
= mant_hi
;
12116 /* We can permit four significant bits of mantissa only, plus a high bit
12117 which is always 1. */
12118 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12119 if ((mantissa
& mask
) != 0)
12122 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12123 mantissa
>>= point_pos
- 5;
12125 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12126 floating-point immediate zero with Neon using an integer-zero load, but
12127 that case is handled elsewhere.) */
12131 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12133 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12134 normalized significands are in the range [1, 2). (Our mantissa is shifted
12135 left 4 places at this point relative to normalized IEEE754 values). GCC
12136 internally uses [0.5, 1) (see real.c), so the exponent returned from
12137 REAL_EXP must be altered. */
12138 exponent
= 5 - exponent
;
12140 if (exponent
< 0 || exponent
> 7)
12143 /* Sign, mantissa and exponent are now in the correct form to plug into the
12144 formula described in the comment above. */
12145 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12148 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12150 vfp3_const_double_rtx (rtx x
)
12155 return vfp3_const_double_index (x
) != -1;
12158 /* Recognize immediates which can be used in various Neon instructions. Legal
12159 immediates are described by the following table (for VMVN variants, the
12160 bitwise inverse of the constant shown is recognized. In either case, VMOV
12161 is output and the correct instruction to use for a given constant is chosen
12162 by the assembler). The constant shown is replicated across all elements of
12163 the destination vector.
12165 insn elems variant constant (binary)
12166 ---- ----- ------- -----------------
12167 vmov i32 0 00000000 00000000 00000000 abcdefgh
12168 vmov i32 1 00000000 00000000 abcdefgh 00000000
12169 vmov i32 2 00000000 abcdefgh 00000000 00000000
12170 vmov i32 3 abcdefgh 00000000 00000000 00000000
12171 vmov i16 4 00000000 abcdefgh
12172 vmov i16 5 abcdefgh 00000000
12173 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12174 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12175 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12176 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12177 vmvn i16 10 00000000 abcdefgh
12178 vmvn i16 11 abcdefgh 00000000
12179 vmov i32 12 00000000 00000000 abcdefgh 11111111
12180 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12181 vmov i32 14 00000000 abcdefgh 11111111 11111111
12182 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12183 vmov i8 16 abcdefgh
12184 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12185 eeeeeeee ffffffff gggggggg hhhhhhhh
12186 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12187 vmov f32 19 00000000 00000000 00000000 00000000
12189 For case 18, B = !b. Representable values are exactly those accepted by
12190 vfp3_const_double_index, but are output as floating-point numbers rather
12193 For case 19, we will change it to vmov.i32 when assembling.
12195 Variants 0-5 (inclusive) may also be used as immediates for the second
12196 operand of VORR/VBIC instructions.
12198 The INVERSE argument causes the bitwise inverse of the given operand to be
12199 recognized instead (used for recognizing legal immediates for the VAND/VORN
12200 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12201 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12202 output, rather than the real insns vbic/vorr).
12204 INVERSE makes no difference to the recognition of float vectors.
12206 The return value is the variant of immediate as shown in the above table, or
12207 -1 if the given value doesn't match any of the listed patterns.
12210 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12211 rtx
*modconst
, int *elementwidth
)
12213 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12215 for (i = 0; i < idx; i += (STRIDE)) \
12220 immtype = (CLASS); \
12221 elsize = (ELSIZE); \
12225 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12226 unsigned int innersize
;
12227 unsigned char bytes
[16];
12228 int immtype
= -1, matches
;
12229 unsigned int invmask
= inverse
? 0xff : 0;
12230 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12234 n_elts
= CONST_VECTOR_NUNITS (op
);
12235 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12240 if (mode
== VOIDmode
)
12242 innersize
= GET_MODE_SIZE (mode
);
12245 /* Vectors of float constants. */
12246 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12248 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12249 REAL_VALUE_TYPE r0
;
12251 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12254 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12256 for (i
= 1; i
< n_elts
; i
++)
12258 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12259 REAL_VALUE_TYPE re
;
12261 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12263 if (!REAL_VALUES_EQUAL (r0
, re
))
12268 *modconst
= CONST_VECTOR_ELT (op
, 0);
12273 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12279 /* Splat vector constant out into a byte vector. */
12280 for (i
= 0; i
< n_elts
; i
++)
12282 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12283 unsigned HOST_WIDE_INT elpart
;
12284 unsigned int part
, parts
;
12286 if (CONST_INT_P (el
))
12288 elpart
= INTVAL (el
);
12291 else if (CONST_DOUBLE_P (el
))
12293 elpart
= CONST_DOUBLE_LOW (el
);
12297 gcc_unreachable ();
12299 for (part
= 0; part
< parts
; part
++)
12302 for (byte
= 0; byte
< innersize
; byte
++)
12304 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12305 elpart
>>= BITS_PER_UNIT
;
12307 if (CONST_DOUBLE_P (el
))
12308 elpart
= CONST_DOUBLE_HIGH (el
);
12312 /* Sanity check. */
12313 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12317 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12318 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12320 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12321 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12323 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12324 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12326 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12327 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12329 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12331 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12333 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12334 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12336 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12337 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12339 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12340 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12342 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12343 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12345 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12347 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12349 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12350 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12352 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12353 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12355 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12356 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12358 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12359 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12361 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12363 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12364 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12372 *elementwidth
= elsize
;
12376 unsigned HOST_WIDE_INT imm
= 0;
12378 /* Un-invert bytes of recognized vector, if necessary. */
12380 for (i
= 0; i
< idx
; i
++)
12381 bytes
[i
] ^= invmask
;
12385 /* FIXME: Broken on 32-bit H_W_I hosts. */
12386 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12388 for (i
= 0; i
< 8; i
++)
12389 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12390 << (i
* BITS_PER_UNIT
);
12392 *modconst
= GEN_INT (imm
);
12396 unsigned HOST_WIDE_INT imm
= 0;
12398 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12399 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12401 *modconst
= GEN_INT (imm
);
12409 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12410 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12411 float elements), and a modified constant (whatever should be output for a
12412 VMOV) in *MODCONST. */
12415 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12416 rtx
*modconst
, int *elementwidth
)
12420 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12426 *modconst
= tmpconst
;
12429 *elementwidth
= tmpwidth
;
12434 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12435 the immediate is valid, write a constant suitable for using as an operand
12436 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12437 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12440 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12441 rtx
*modconst
, int *elementwidth
)
12445 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12447 if (retval
< 0 || retval
> 5)
12451 *modconst
= tmpconst
;
12454 *elementwidth
= tmpwidth
;
12459 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12460 the immediate is valid, write a constant suitable for using as an operand
12461 to VSHR/VSHL to *MODCONST and the corresponding element width to
12462 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12463 because they have different limitations. */
12466 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12467 rtx
*modconst
, int *elementwidth
,
12470 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12471 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12472 unsigned HOST_WIDE_INT last_elt
= 0;
12473 unsigned HOST_WIDE_INT maxshift
;
12475 /* Split vector constant out into a byte vector. */
12476 for (i
= 0; i
< n_elts
; i
++)
12478 rtx el
= CONST_VECTOR_ELT (op
, i
);
12479 unsigned HOST_WIDE_INT elpart
;
12481 if (CONST_INT_P (el
))
12482 elpart
= INTVAL (el
);
12483 else if (CONST_DOUBLE_P (el
))
12486 gcc_unreachable ();
12488 if (i
!= 0 && elpart
!= last_elt
)
12494 /* Shift less than element size. */
12495 maxshift
= innersize
* 8;
12499 /* Left shift immediate value can be from 0 to <size>-1. */
12500 if (last_elt
>= maxshift
)
12505 /* Right shift immediate value can be from 1 to <size>. */
12506 if (last_elt
== 0 || last_elt
> maxshift
)
12511 *elementwidth
= innersize
* 8;
12514 *modconst
= CONST_VECTOR_ELT (op
, 0);
12519 /* Return a string suitable for output of Neon immediate logic operation
12523 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12524 int inverse
, int quad
)
12526 int width
, is_valid
;
12527 static char templ
[40];
12529 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12531 gcc_assert (is_valid
!= 0);
12534 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12536 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12541 /* Return a string suitable for output of Neon immediate shift operation
12542 (VSHR or VSHL) MNEM. */
12545 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12546 machine_mode mode
, int quad
,
12549 int width
, is_valid
;
12550 static char templ
[40];
12552 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12553 gcc_assert (is_valid
!= 0);
12556 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12558 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12563 /* Output a sequence of pairwise operations to implement a reduction.
12564 NOTE: We do "too much work" here, because pairwise operations work on two
12565 registers-worth of operands in one go. Unfortunately we can't exploit those
12566 extra calculations to do the full operation in fewer steps, I don't think.
12567 Although all vector elements of the result but the first are ignored, we
12568 actually calculate the same result in each of the elements. An alternative
12569 such as initially loading a vector with zero to use as each of the second
12570 operands would use up an additional register and take an extra instruction,
12571 for no particular gain. */
12574 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12575 rtx (*reduc
) (rtx
, rtx
, rtx
))
12577 machine_mode inner
= GET_MODE_INNER (mode
);
12578 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12581 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12583 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12584 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12589 /* If VALS is a vector constant that can be loaded into a register
12590 using VDUP, generate instructions to do so and return an RTX to
12591 assign to the register. Otherwise return NULL_RTX. */
12594 neon_vdup_constant (rtx vals
)
12596 machine_mode mode
= GET_MODE (vals
);
12597 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12598 int n_elts
= GET_MODE_NUNITS (mode
);
12599 bool all_same
= true;
12603 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12606 for (i
= 0; i
< n_elts
; ++i
)
12608 x
= XVECEXP (vals
, 0, i
);
12609 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12614 /* The elements are not all the same. We could handle repeating
12615 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12616 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12620 /* We can load this constant by using VDUP and a constant in a
12621 single ARM register. This will be cheaper than a vector
12624 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12625 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12628 /* Generate code to load VALS, which is a PARALLEL containing only
12629 constants (for vec_init) or CONST_VECTOR, efficiently into a
12630 register. Returns an RTX to copy into the register, or NULL_RTX
12631 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12634 neon_make_constant (rtx vals
)
12636 machine_mode mode
= GET_MODE (vals
);
12638 rtx const_vec
= NULL_RTX
;
12639 int n_elts
= GET_MODE_NUNITS (mode
);
12643 if (GET_CODE (vals
) == CONST_VECTOR
)
12645 else if (GET_CODE (vals
) == PARALLEL
)
12647 /* A CONST_VECTOR must contain only CONST_INTs and
12648 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12649 Only store valid constants in a CONST_VECTOR. */
12650 for (i
= 0; i
< n_elts
; ++i
)
12652 rtx x
= XVECEXP (vals
, 0, i
);
12653 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12656 if (n_const
== n_elts
)
12657 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12660 gcc_unreachable ();
12662 if (const_vec
!= NULL
12663 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12664 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12666 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12667 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12668 pipeline cycle; creating the constant takes one or two ARM
12669 pipeline cycles. */
12671 else if (const_vec
!= NULL_RTX
)
12672 /* Load from constant pool. On Cortex-A8 this takes two cycles
12673 (for either double or quad vectors). We can not take advantage
12674 of single-cycle VLD1 because we need a PC-relative addressing
12678 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12679 We can not construct an initializer. */
12683 /* Initialize vector TARGET to VALS. */
12686 neon_expand_vector_init (rtx target
, rtx vals
)
12688 machine_mode mode
= GET_MODE (target
);
12689 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12690 int n_elts
= GET_MODE_NUNITS (mode
);
12691 int n_var
= 0, one_var
= -1;
12692 bool all_same
= true;
12696 for (i
= 0; i
< n_elts
; ++i
)
12698 x
= XVECEXP (vals
, 0, i
);
12699 if (!CONSTANT_P (x
))
12700 ++n_var
, one_var
= i
;
12702 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12708 rtx constant
= neon_make_constant (vals
);
12709 if (constant
!= NULL_RTX
)
12711 emit_move_insn (target
, constant
);
12716 /* Splat a single non-constant element if we can. */
12717 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12719 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12720 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12721 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12725 /* One field is non-constant. Load constant then overwrite varying
12726 field. This is more efficient than using the stack. */
12729 rtx copy
= copy_rtx (vals
);
12730 rtx index
= GEN_INT (one_var
);
12732 /* Load constant part of vector, substitute neighboring value for
12733 varying element. */
12734 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12735 neon_expand_vector_init (target
, copy
);
12737 /* Insert variable. */
12738 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12742 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12745 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12748 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12751 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12754 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12757 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12760 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12763 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12766 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12769 gcc_unreachable ();
12774 /* Construct the vector in memory one field at a time
12775 and load the whole vector. */
12776 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12777 for (i
= 0; i
< n_elts
; i
++)
12778 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12779 i
* GET_MODE_SIZE (inner_mode
)),
12780 XVECEXP (vals
, 0, i
));
12781 emit_move_insn (target
, mem
);
12784 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12785 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12786 reported source locations are bogus. */
12789 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12792 HOST_WIDE_INT lane
;
12794 gcc_assert (CONST_INT_P (operand
));
12796 lane
= INTVAL (operand
);
12798 if (lane
< low
|| lane
>= high
)
12802 /* Bounds-check lanes. */
12805 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12807 bounds_check (operand
, low
, high
, "lane out of range");
12810 /* Bounds-check constants. */
12813 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12815 bounds_check (operand
, low
, high
, "constant out of range");
12819 neon_element_bits (machine_mode mode
)
12821 if (mode
== DImode
)
12822 return GET_MODE_BITSIZE (mode
);
12824 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12828 /* Predicates for `match_operand' and `match_operator'. */
12830 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12831 WB is true if full writeback address modes are allowed and is false
12832 if limited writeback address modes (POST_INC and PRE_DEC) are
12836 arm_coproc_mem_operand (rtx op
, bool wb
)
12840 /* Reject eliminable registers. */
12841 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12842 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12843 || reg_mentioned_p (arg_pointer_rtx
, op
)
12844 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12845 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12846 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12847 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12850 /* Constants are converted into offsets from labels. */
12854 ind
= XEXP (op
, 0);
12856 if (reload_completed
12857 && (GET_CODE (ind
) == LABEL_REF
12858 || (GET_CODE (ind
) == CONST
12859 && GET_CODE (XEXP (ind
, 0)) == PLUS
12860 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12861 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12864 /* Match: (mem (reg)). */
12866 return arm_address_register_rtx_p (ind
, 0);
12868 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12869 acceptable in any case (subject to verification by
12870 arm_address_register_rtx_p). We need WB to be true to accept
12871 PRE_INC and POST_DEC. */
12872 if (GET_CODE (ind
) == POST_INC
12873 || GET_CODE (ind
) == PRE_DEC
12875 && (GET_CODE (ind
) == PRE_INC
12876 || GET_CODE (ind
) == POST_DEC
)))
12877 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12880 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12881 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12882 && GET_CODE (XEXP (ind
, 1)) == PLUS
12883 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12884 ind
= XEXP (ind
, 1);
12889 if (GET_CODE (ind
) == PLUS
12890 && REG_P (XEXP (ind
, 0))
12891 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12892 && CONST_INT_P (XEXP (ind
, 1))
12893 && INTVAL (XEXP (ind
, 1)) > -1024
12894 && INTVAL (XEXP (ind
, 1)) < 1024
12895 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12901 /* Return TRUE if OP is a memory operand which we can load or store a vector
12902 to/from. TYPE is one of the following values:
12903 0 - Vector load/stor (vldr)
12904 1 - Core registers (ldm)
12905 2 - Element/structure loads (vld1)
12908 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12912 /* Reject eliminable registers. */
12913 if (! (reload_in_progress
|| reload_completed
)
12914 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12915 || reg_mentioned_p (arg_pointer_rtx
, op
)
12916 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12917 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12918 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12919 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12922 /* Constants are converted into offsets from labels. */
12926 ind
= XEXP (op
, 0);
12928 if (reload_completed
12929 && (GET_CODE (ind
) == LABEL_REF
12930 || (GET_CODE (ind
) == CONST
12931 && GET_CODE (XEXP (ind
, 0)) == PLUS
12932 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12933 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12936 /* Match: (mem (reg)). */
12938 return arm_address_register_rtx_p (ind
, 0);
12940 /* Allow post-increment with Neon registers. */
12941 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12942 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12943 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12945 /* Allow post-increment by register for VLDn */
12946 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12947 && GET_CODE (XEXP (ind
, 1)) == PLUS
12948 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12955 && GET_CODE (ind
) == PLUS
12956 && REG_P (XEXP (ind
, 0))
12957 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12958 && CONST_INT_P (XEXP (ind
, 1))
12959 && INTVAL (XEXP (ind
, 1)) > -1024
12960 /* For quad modes, we restrict the constant offset to be slightly less
12961 than what the instruction format permits. We have no such constraint
12962 on double mode offsets. (This must match arm_legitimate_index_p.) */
12963 && (INTVAL (XEXP (ind
, 1))
12964 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12965 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12971 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12974 neon_struct_mem_operand (rtx op
)
12978 /* Reject eliminable registers. */
12979 if (! (reload_in_progress
|| reload_completed
)
12980 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12981 || reg_mentioned_p (arg_pointer_rtx
, op
)
12982 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12983 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12984 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12985 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12988 /* Constants are converted into offsets from labels. */
12992 ind
= XEXP (op
, 0);
12994 if (reload_completed
12995 && (GET_CODE (ind
) == LABEL_REF
12996 || (GET_CODE (ind
) == CONST
12997 && GET_CODE (XEXP (ind
, 0)) == PLUS
12998 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12999 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13002 /* Match: (mem (reg)). */
13004 return arm_address_register_rtx_p (ind
, 0);
13006 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13007 if (GET_CODE (ind
) == POST_INC
13008 || GET_CODE (ind
) == PRE_DEC
)
13009 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13014 /* Return true if X is a register that will be eliminated later on. */
13016 arm_eliminable_register (rtx x
)
13018 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13019 || REGNO (x
) == ARG_POINTER_REGNUM
13020 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13021 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13024 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13025 coprocessor registers. Otherwise return NO_REGS. */
13028 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13030 if (mode
== HFmode
)
13032 if (!TARGET_NEON_FP16
)
13033 return GENERAL_REGS
;
13034 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13036 return GENERAL_REGS
;
13039 /* The neon move patterns handle all legitimate vector and struct
13042 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13043 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13044 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13045 || VALID_NEON_STRUCT_MODE (mode
)))
13048 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13051 return GENERAL_REGS
;
13054 /* Values which must be returned in the most-significant end of the return
13058 arm_return_in_msb (const_tree valtype
)
13060 return (TARGET_AAPCS_BASED
13061 && BYTES_BIG_ENDIAN
13062 && (AGGREGATE_TYPE_P (valtype
)
13063 || TREE_CODE (valtype
) == COMPLEX_TYPE
13064 || FIXED_POINT_TYPE_P (valtype
)));
13067 /* Return TRUE if X references a SYMBOL_REF. */
13069 symbol_mentioned_p (rtx x
)
13074 if (GET_CODE (x
) == SYMBOL_REF
)
13077 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13078 are constant offsets, not symbols. */
13079 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13082 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13084 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13090 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13091 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13094 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13101 /* Return TRUE if X references a LABEL_REF. */
13103 label_mentioned_p (rtx x
)
13108 if (GET_CODE (x
) == LABEL_REF
)
13111 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13112 instruction, but they are constant offsets, not symbols. */
13113 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13116 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13117 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13123 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13124 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13127 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13135 tls_mentioned_p (rtx x
)
13137 switch (GET_CODE (x
))
13140 return tls_mentioned_p (XEXP (x
, 0));
13143 if (XINT (x
, 1) == UNSPEC_TLS
)
13151 /* Must not copy any rtx that uses a pc-relative address. */
13154 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13156 /* The tls call insn cannot be copied, as it is paired with a data
13158 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13161 subrtx_iterator::array_type array
;
13162 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13164 const_rtx x
= *iter
;
13165 if (GET_CODE (x
) == UNSPEC
13166 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13167 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13174 minmax_code (rtx x
)
13176 enum rtx_code code
= GET_CODE (x
);
13189 gcc_unreachable ();
13193 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13196 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13197 int *mask
, bool *signed_sat
)
13199 /* The high bound must be a power of two minus one. */
13200 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13204 /* The low bound is either zero (for usat) or one less than the
13205 negation of the high bound (for ssat). */
13206 if (INTVAL (lo_bound
) == 0)
13211 *signed_sat
= false;
13216 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13221 *signed_sat
= true;
13229 /* Return 1 if memory locations are adjacent. */
13231 adjacent_mem_locations (rtx a
, rtx b
)
13233 /* We don't guarantee to preserve the order of these memory refs. */
13234 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13237 if ((REG_P (XEXP (a
, 0))
13238 || (GET_CODE (XEXP (a
, 0)) == PLUS
13239 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13240 && (REG_P (XEXP (b
, 0))
13241 || (GET_CODE (XEXP (b
, 0)) == PLUS
13242 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13244 HOST_WIDE_INT val0
= 0, val1
= 0;
13248 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13250 reg0
= XEXP (XEXP (a
, 0), 0);
13251 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13254 reg0
= XEXP (a
, 0);
13256 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13258 reg1
= XEXP (XEXP (b
, 0), 0);
13259 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13262 reg1
= XEXP (b
, 0);
13264 /* Don't accept any offset that will require multiple
13265 instructions to handle, since this would cause the
13266 arith_adjacentmem pattern to output an overlong sequence. */
13267 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13270 /* Don't allow an eliminable register: register elimination can make
13271 the offset too large. */
13272 if (arm_eliminable_register (reg0
))
13275 val_diff
= val1
- val0
;
13279 /* If the target has load delay slots, then there's no benefit
13280 to using an ldm instruction unless the offset is zero and
13281 we are optimizing for size. */
13282 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13283 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13284 && (val_diff
== 4 || val_diff
== -4));
13287 return ((REGNO (reg0
) == REGNO (reg1
))
13288 && (val_diff
== 4 || val_diff
== -4));
13294 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13295 for load operations, false for store operations. CONSECUTIVE is true
13296 if the register numbers in the operation must be consecutive in the register
13297 bank. RETURN_PC is true if value is to be loaded in PC.
13298 The pattern we are trying to match for load is:
13299 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13300 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13303 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13306 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13307 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13308 3. If consecutive is TRUE, then for kth register being loaded,
13309 REGNO (R_dk) = REGNO (R_d0) + k.
13310 The pattern for store is similar. */
13312 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13313 bool consecutive
, bool return_pc
)
13315 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13316 rtx reg
, mem
, addr
;
13318 unsigned first_regno
;
13319 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13321 bool addr_reg_in_reglist
= false;
13322 bool update
= false;
13327 /* If not in SImode, then registers must be consecutive
13328 (e.g., VLDM instructions for DFmode). */
13329 gcc_assert ((mode
== SImode
) || consecutive
);
13330 /* Setting return_pc for stores is illegal. */
13331 gcc_assert (!return_pc
|| load
);
13333 /* Set up the increments and the regs per val based on the mode. */
13334 reg_increment
= GET_MODE_SIZE (mode
);
13335 regs_per_val
= reg_increment
/ 4;
13336 offset_adj
= return_pc
? 1 : 0;
13339 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13340 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13343 /* Check if this is a write-back. */
13344 elt
= XVECEXP (op
, 0, offset_adj
);
13345 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13351 /* The offset adjustment must be the number of registers being
13352 popped times the size of a single register. */
13353 if (!REG_P (SET_DEST (elt
))
13354 || !REG_P (XEXP (SET_SRC (elt
), 0))
13355 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13356 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13357 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13358 ((count
- 1 - offset_adj
) * reg_increment
))
13362 i
= i
+ offset_adj
;
13363 base
= base
+ offset_adj
;
13364 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13365 success depends on the type: VLDM can do just one reg,
13366 LDM must do at least two. */
13367 if ((count
<= i
) && (mode
== SImode
))
13370 elt
= XVECEXP (op
, 0, i
- 1);
13371 if (GET_CODE (elt
) != SET
)
13376 reg
= SET_DEST (elt
);
13377 mem
= SET_SRC (elt
);
13381 reg
= SET_SRC (elt
);
13382 mem
= SET_DEST (elt
);
13385 if (!REG_P (reg
) || !MEM_P (mem
))
13388 regno
= REGNO (reg
);
13389 first_regno
= regno
;
13390 addr
= XEXP (mem
, 0);
13391 if (GET_CODE (addr
) == PLUS
)
13393 if (!CONST_INT_P (XEXP (addr
, 1)))
13396 offset
= INTVAL (XEXP (addr
, 1));
13397 addr
= XEXP (addr
, 0);
13403 /* Don't allow SP to be loaded unless it is also the base register. It
13404 guarantees that SP is reset correctly when an LDM instruction
13405 is interrupted. Otherwise, we might end up with a corrupt stack. */
13406 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13409 for (; i
< count
; i
++)
13411 elt
= XVECEXP (op
, 0, i
);
13412 if (GET_CODE (elt
) != SET
)
13417 reg
= SET_DEST (elt
);
13418 mem
= SET_SRC (elt
);
13422 reg
= SET_SRC (elt
);
13423 mem
= SET_DEST (elt
);
13427 || GET_MODE (reg
) != mode
13428 || REGNO (reg
) <= regno
13431 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13432 /* Don't allow SP to be loaded unless it is also the base register. It
13433 guarantees that SP is reset correctly when an LDM instruction
13434 is interrupted. Otherwise, we might end up with a corrupt stack. */
13435 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13437 || GET_MODE (mem
) != mode
13438 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13439 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13440 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13441 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13442 offset
+ (i
- base
) * reg_increment
))
13443 && (!REG_P (XEXP (mem
, 0))
13444 || offset
+ (i
- base
) * reg_increment
!= 0)))
13447 regno
= REGNO (reg
);
13448 if (regno
== REGNO (addr
))
13449 addr_reg_in_reglist
= true;
13454 if (update
&& addr_reg_in_reglist
)
13457 /* For Thumb-1, address register is always modified - either by write-back
13458 or by explicit load. If the pattern does not describe an update,
13459 then the address register must be in the list of loaded registers. */
13461 return update
|| addr_reg_in_reglist
;
13467 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13468 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13469 instruction. ADD_OFFSET is nonzero if the base address register needs
13470 to be modified with an add instruction before we can use it. */
13473 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13474 int nops
, HOST_WIDE_INT add_offset
)
13476 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13477 if the offset isn't small enough. The reason 2 ldrs are faster
13478 is because these ARMs are able to do more than one cache access
13479 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13480 whilst the ARM8 has a double bandwidth cache. This means that
13481 these cores can do both an instruction fetch and a data fetch in
13482 a single cycle, so the trick of calculating the address into a
13483 scratch register (one of the result regs) and then doing a load
13484 multiple actually becomes slower (and no smaller in code size).
13485 That is the transformation
13487 ldr rd1, [rbase + offset]
13488 ldr rd2, [rbase + offset + 4]
13492 add rd1, rbase, offset
13493 ldmia rd1, {rd1, rd2}
13495 produces worse code -- '3 cycles + any stalls on rd2' instead of
13496 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13497 access per cycle, the first sequence could never complete in less
13498 than 6 cycles, whereas the ldm sequence would only take 5 and
13499 would make better use of sequential accesses if not hitting the
13502 We cheat here and test 'arm_ld_sched' which we currently know to
13503 only be true for the ARM8, ARM9 and StrongARM. If this ever
13504 changes, then the test below needs to be reworked. */
13505 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13508 /* XScale has load-store double instructions, but they have stricter
13509 alignment requirements than load-store multiple, so we cannot
13512 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13513 the pipeline until completion.
13521 An ldr instruction takes 1-3 cycles, but does not block the
13530 Best case ldr will always win. However, the more ldr instructions
13531 we issue, the less likely we are to be able to schedule them well.
13532 Using ldr instructions also increases code size.
13534 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13535 for counts of 3 or 4 regs. */
13536 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13541 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13542 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13543 an array ORDER which describes the sequence to use when accessing the
13544 offsets that produces an ascending order. In this sequence, each
13545 offset must be larger by exactly 4 than the previous one. ORDER[0]
13546 must have been filled in with the lowest offset by the caller.
13547 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13548 we use to verify that ORDER produces an ascending order of registers.
13549 Return true if it was possible to construct such an order, false if
13553 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13554 int *unsorted_regs
)
13557 for (i
= 1; i
< nops
; i
++)
13561 order
[i
] = order
[i
- 1];
13562 for (j
= 0; j
< nops
; j
++)
13563 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13565 /* We must find exactly one offset that is higher than the
13566 previous one by 4. */
13567 if (order
[i
] != order
[i
- 1])
13571 if (order
[i
] == order
[i
- 1])
13573 /* The register numbers must be ascending. */
13574 if (unsorted_regs
!= NULL
13575 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13581 /* Used to determine in a peephole whether a sequence of load
13582 instructions can be changed into a load-multiple instruction.
13583 NOPS is the number of separate load instructions we are examining. The
13584 first NOPS entries in OPERANDS are the destination registers, the
13585 next NOPS entries are memory operands. If this function is
13586 successful, *BASE is set to the common base register of the memory
13587 accesses; *LOAD_OFFSET is set to the first memory location's offset
13588 from that base register.
13589 REGS is an array filled in with the destination register numbers.
13590 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13591 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13592 the sequence of registers in REGS matches the loads from ascending memory
13593 locations, and the function verifies that the register numbers are
13594 themselves ascending. If CHECK_REGS is false, the register numbers
13595 are stored in the order they are found in the operands. */
13597 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13598 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13600 int unsorted_regs
[MAX_LDM_STM_OPS
];
13601 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13602 int order
[MAX_LDM_STM_OPS
];
13603 rtx base_reg_rtx
= NULL
;
13607 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13608 easily extended if required. */
13609 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13611 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13613 /* Loop over the operands and check that the memory references are
13614 suitable (i.e. immediate offsets from the same base register). At
13615 the same time, extract the target register, and the memory
13617 for (i
= 0; i
< nops
; i
++)
13622 /* Convert a subreg of a mem into the mem itself. */
13623 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13624 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13626 gcc_assert (MEM_P (operands
[nops
+ i
]));
13628 /* Don't reorder volatile memory references; it doesn't seem worth
13629 looking for the case where the order is ok anyway. */
13630 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13633 offset
= const0_rtx
;
13635 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13636 || (GET_CODE (reg
) == SUBREG
13637 && REG_P (reg
= SUBREG_REG (reg
))))
13638 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13639 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13640 || (GET_CODE (reg
) == SUBREG
13641 && REG_P (reg
= SUBREG_REG (reg
))))
13642 && (CONST_INT_P (offset
13643 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13647 base_reg
= REGNO (reg
);
13648 base_reg_rtx
= reg
;
13649 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13652 else if (base_reg
!= (int) REGNO (reg
))
13653 /* Not addressed from the same base register. */
13656 unsorted_regs
[i
] = (REG_P (operands
[i
])
13657 ? REGNO (operands
[i
])
13658 : REGNO (SUBREG_REG (operands
[i
])));
13660 /* If it isn't an integer register, or if it overwrites the
13661 base register but isn't the last insn in the list, then
13662 we can't do this. */
13663 if (unsorted_regs
[i
] < 0
13664 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13665 || unsorted_regs
[i
] > 14
13666 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13669 /* Don't allow SP to be loaded unless it is also the base
13670 register. It guarantees that SP is reset correctly when
13671 an LDM instruction is interrupted. Otherwise, we might
13672 end up with a corrupt stack. */
13673 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13676 unsorted_offsets
[i
] = INTVAL (offset
);
13677 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13681 /* Not a suitable memory address. */
13685 /* All the useful information has now been extracted from the
13686 operands into unsorted_regs and unsorted_offsets; additionally,
13687 order[0] has been set to the lowest offset in the list. Sort
13688 the offsets into order, verifying that they are adjacent, and
13689 check that the register numbers are ascending. */
13690 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13691 check_regs
? unsorted_regs
: NULL
))
13695 memcpy (saved_order
, order
, sizeof order
);
13701 for (i
= 0; i
< nops
; i
++)
13702 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13704 *load_offset
= unsorted_offsets
[order
[0]];
13708 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13711 if (unsorted_offsets
[order
[0]] == 0)
13712 ldm_case
= 1; /* ldmia */
13713 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13714 ldm_case
= 2; /* ldmib */
13715 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13716 ldm_case
= 3; /* ldmda */
13717 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13718 ldm_case
= 4; /* ldmdb */
13719 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13720 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13725 if (!multiple_operation_profitable_p (false, nops
,
13727 ? unsorted_offsets
[order
[0]] : 0))
13733 /* Used to determine in a peephole whether a sequence of store instructions can
13734 be changed into a store-multiple instruction.
13735 NOPS is the number of separate store instructions we are examining.
13736 NOPS_TOTAL is the total number of instructions recognized by the peephole
13738 The first NOPS entries in OPERANDS are the source registers, the next
13739 NOPS entries are memory operands. If this function is successful, *BASE is
13740 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13741 to the first memory location's offset from that base register. REGS is an
13742 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13743 likewise filled with the corresponding rtx's.
13744 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13745 numbers to an ascending order of stores.
13746 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13747 from ascending memory locations, and the function verifies that the register
13748 numbers are themselves ascending. If CHECK_REGS is false, the register
13749 numbers are stored in the order they are found in the operands. */
13751 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13752 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13753 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13755 int unsorted_regs
[MAX_LDM_STM_OPS
];
13756 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13757 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13758 int order
[MAX_LDM_STM_OPS
];
13760 rtx base_reg_rtx
= NULL
;
13763 /* Write back of base register is currently only supported for Thumb 1. */
13764 int base_writeback
= TARGET_THUMB1
;
13766 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13767 easily extended if required. */
13768 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13770 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13772 /* Loop over the operands and check that the memory references are
13773 suitable (i.e. immediate offsets from the same base register). At
13774 the same time, extract the target register, and the memory
13776 for (i
= 0; i
< nops
; i
++)
13781 /* Convert a subreg of a mem into the mem itself. */
13782 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13783 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13785 gcc_assert (MEM_P (operands
[nops
+ i
]));
13787 /* Don't reorder volatile memory references; it doesn't seem worth
13788 looking for the case where the order is ok anyway. */
13789 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13792 offset
= const0_rtx
;
13794 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13795 || (GET_CODE (reg
) == SUBREG
13796 && REG_P (reg
= SUBREG_REG (reg
))))
13797 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13798 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13799 || (GET_CODE (reg
) == SUBREG
13800 && REG_P (reg
= SUBREG_REG (reg
))))
13801 && (CONST_INT_P (offset
13802 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13804 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13805 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13806 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13810 base_reg
= REGNO (reg
);
13811 base_reg_rtx
= reg
;
13812 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13815 else if (base_reg
!= (int) REGNO (reg
))
13816 /* Not addressed from the same base register. */
13819 /* If it isn't an integer register, then we can't do this. */
13820 if (unsorted_regs
[i
] < 0
13821 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13822 /* The effects are unpredictable if the base register is
13823 both updated and stored. */
13824 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13825 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13826 || unsorted_regs
[i
] > 14)
13829 unsorted_offsets
[i
] = INTVAL (offset
);
13830 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13834 /* Not a suitable memory address. */
13838 /* All the useful information has now been extracted from the
13839 operands into unsorted_regs and unsorted_offsets; additionally,
13840 order[0] has been set to the lowest offset in the list. Sort
13841 the offsets into order, verifying that they are adjacent, and
13842 check that the register numbers are ascending. */
13843 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13844 check_regs
? unsorted_regs
: NULL
))
13848 memcpy (saved_order
, order
, sizeof order
);
13854 for (i
= 0; i
< nops
; i
++)
13856 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13858 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13861 *load_offset
= unsorted_offsets
[order
[0]];
13865 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13868 if (unsorted_offsets
[order
[0]] == 0)
13869 stm_case
= 1; /* stmia */
13870 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13871 stm_case
= 2; /* stmib */
13872 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13873 stm_case
= 3; /* stmda */
13874 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13875 stm_case
= 4; /* stmdb */
13879 if (!multiple_operation_profitable_p (false, nops
, 0))
13885 /* Routines for use in generating RTL. */
13887 /* Generate a load-multiple instruction. COUNT is the number of loads in
13888 the instruction; REGS and MEMS are arrays containing the operands.
13889 BASEREG is the base register to be used in addressing the memory operands.
13890 WBACK_OFFSET is nonzero if the instruction should update the base
13894 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13895 HOST_WIDE_INT wback_offset
)
13900 if (!multiple_operation_profitable_p (false, count
, 0))
13906 for (i
= 0; i
< count
; i
++)
13907 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13909 if (wback_offset
!= 0)
13910 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13912 seq
= get_insns ();
13918 result
= gen_rtx_PARALLEL (VOIDmode
,
13919 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13920 if (wback_offset
!= 0)
13922 XVECEXP (result
, 0, 0)
13923 = gen_rtx_SET (VOIDmode
, basereg
,
13924 plus_constant (Pmode
, basereg
, wback_offset
));
13929 for (j
= 0; i
< count
; i
++, j
++)
13930 XVECEXP (result
, 0, i
)
13931 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13936 /* Generate a store-multiple instruction. COUNT is the number of stores in
13937 the instruction; REGS and MEMS are arrays containing the operands.
13938 BASEREG is the base register to be used in addressing the memory operands.
13939 WBACK_OFFSET is nonzero if the instruction should update the base
13943 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13944 HOST_WIDE_INT wback_offset
)
13949 if (GET_CODE (basereg
) == PLUS
)
13950 basereg
= XEXP (basereg
, 0);
13952 if (!multiple_operation_profitable_p (false, count
, 0))
13958 for (i
= 0; i
< count
; i
++)
13959 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13961 if (wback_offset
!= 0)
13962 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13964 seq
= get_insns ();
13970 result
= gen_rtx_PARALLEL (VOIDmode
,
13971 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13972 if (wback_offset
!= 0)
13974 XVECEXP (result
, 0, 0)
13975 = gen_rtx_SET (VOIDmode
, basereg
,
13976 plus_constant (Pmode
, basereg
, wback_offset
));
13981 for (j
= 0; i
< count
; i
++, j
++)
13982 XVECEXP (result
, 0, i
)
13983 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13988 /* Generate either a load-multiple or a store-multiple instruction. This
13989 function can be used in situations where we can start with a single MEM
13990 rtx and adjust its address upwards.
13991 COUNT is the number of operations in the instruction, not counting a
13992 possible update of the base register. REGS is an array containing the
13994 BASEREG is the base register to be used in addressing the memory operands,
13995 which are constructed from BASEMEM.
13996 WRITE_BACK specifies whether the generated instruction should include an
13997 update of the base register.
13998 OFFSETP is used to pass an offset to and from this function; this offset
13999 is not used when constructing the address (instead BASEMEM should have an
14000 appropriate offset in its address), it is used only for setting
14001 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14004 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14005 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14007 rtx mems
[MAX_LDM_STM_OPS
];
14008 HOST_WIDE_INT offset
= *offsetp
;
14011 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14013 if (GET_CODE (basereg
) == PLUS
)
14014 basereg
= XEXP (basereg
, 0);
14016 for (i
= 0; i
< count
; i
++)
14018 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14019 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14027 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14028 write_back
? 4 * count
: 0);
14030 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14031 write_back
? 4 * count
: 0);
14035 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14036 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14038 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14043 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14044 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14046 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14050 /* Called from a peephole2 expander to turn a sequence of loads into an
14051 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14052 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14053 is true if we can reorder the registers because they are used commutatively
14055 Returns true iff we could generate a new instruction. */
14058 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14060 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14061 rtx mems
[MAX_LDM_STM_OPS
];
14062 int i
, j
, base_reg
;
14064 HOST_WIDE_INT offset
;
14065 int write_back
= FALSE
;
14069 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14070 &base_reg
, &offset
, !sort_regs
);
14076 for (i
= 0; i
< nops
- 1; i
++)
14077 for (j
= i
+ 1; j
< nops
; j
++)
14078 if (regs
[i
] > regs
[j
])
14084 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14088 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14089 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14095 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14096 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14098 if (!TARGET_THUMB1
)
14100 base_reg
= regs
[0];
14101 base_reg_rtx
= newbase
;
14105 for (i
= 0; i
< nops
; i
++)
14107 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14108 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14111 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14112 write_back
? offset
+ i
* 4 : 0));
14116 /* Called from a peephole2 expander to turn a sequence of stores into an
14117 STM instruction. OPERANDS are the operands found by the peephole matcher;
14118 NOPS indicates how many separate stores we are trying to combine.
14119 Returns true iff we could generate a new instruction. */
14122 gen_stm_seq (rtx
*operands
, int nops
)
14125 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14126 rtx mems
[MAX_LDM_STM_OPS
];
14129 HOST_WIDE_INT offset
;
14130 int write_back
= FALSE
;
14133 bool base_reg_dies
;
14135 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14136 mem_order
, &base_reg
, &offset
, true);
14141 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14143 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14146 gcc_assert (base_reg_dies
);
14152 gcc_assert (base_reg_dies
);
14153 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14157 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14159 for (i
= 0; i
< nops
; i
++)
14161 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14162 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14165 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14166 write_back
? offset
+ i
* 4 : 0));
14170 /* Called from a peephole2 expander to turn a sequence of stores that are
14171 preceded by constant loads into an STM instruction. OPERANDS are the
14172 operands found by the peephole matcher; NOPS indicates how many
14173 separate stores we are trying to combine; there are 2 * NOPS
14174 instructions in the peephole.
14175 Returns true iff we could generate a new instruction. */
14178 gen_const_stm_seq (rtx
*operands
, int nops
)
14180 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14181 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14182 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14183 rtx mems
[MAX_LDM_STM_OPS
];
14186 HOST_WIDE_INT offset
;
14187 int write_back
= FALSE
;
14190 bool base_reg_dies
;
14192 HARD_REG_SET allocated
;
14194 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14195 mem_order
, &base_reg
, &offset
, false);
14200 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14202 /* If the same register is used more than once, try to find a free
14204 CLEAR_HARD_REG_SET (allocated
);
14205 for (i
= 0; i
< nops
; i
++)
14207 for (j
= i
+ 1; j
< nops
; j
++)
14208 if (regs
[i
] == regs
[j
])
14210 rtx t
= peep2_find_free_register (0, nops
* 2,
14211 TARGET_THUMB1
? "l" : "r",
14212 SImode
, &allocated
);
14216 regs
[i
] = REGNO (t
);
14220 /* Compute an ordering that maps the register numbers to an ascending
14223 for (i
= 0; i
< nops
; i
++)
14224 if (regs
[i
] < regs
[reg_order
[0]])
14227 for (i
= 1; i
< nops
; i
++)
14229 int this_order
= reg_order
[i
- 1];
14230 for (j
= 0; j
< nops
; j
++)
14231 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14232 && (this_order
== reg_order
[i
- 1]
14233 || regs
[j
] < regs
[this_order
]))
14235 reg_order
[i
] = this_order
;
14238 /* Ensure that registers that must be live after the instruction end
14239 up with the correct value. */
14240 for (i
= 0; i
< nops
; i
++)
14242 int this_order
= reg_order
[i
];
14243 if ((this_order
!= mem_order
[i
]
14244 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14245 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14249 /* Load the constants. */
14250 for (i
= 0; i
< nops
; i
++)
14252 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14253 sorted_regs
[i
] = regs
[reg_order
[i
]];
14254 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14257 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14259 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14262 gcc_assert (base_reg_dies
);
14268 gcc_assert (base_reg_dies
);
14269 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14273 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14275 for (i
= 0; i
< nops
; i
++)
14277 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14278 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14281 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14282 write_back
? offset
+ i
* 4 : 0));
14286 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14287 unaligned copies on processors which support unaligned semantics for those
14288 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14289 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14290 An interleave factor of 1 (the minimum) will perform no interleaving.
14291 Load/store multiple are used for aligned addresses where possible. */
14294 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14295 HOST_WIDE_INT length
,
14296 unsigned int interleave_factor
)
14298 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14299 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14300 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14301 HOST_WIDE_INT i
, j
;
14302 HOST_WIDE_INT remaining
= length
, words
;
14303 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14305 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14306 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14307 HOST_WIDE_INT srcoffset
, dstoffset
;
14308 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14311 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14313 /* Use hard registers if we have aligned source or destination so we can use
14314 load/store multiple with contiguous registers. */
14315 if (dst_aligned
|| src_aligned
)
14316 for (i
= 0; i
< interleave_factor
; i
++)
14317 regs
[i
] = gen_rtx_REG (SImode
, i
);
14319 for (i
= 0; i
< interleave_factor
; i
++)
14320 regs
[i
] = gen_reg_rtx (SImode
);
14322 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14323 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14325 srcoffset
= dstoffset
= 0;
14327 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14328 For copying the last bytes we want to subtract this offset again. */
14329 src_autoinc
= dst_autoinc
= 0;
14331 for (i
= 0; i
< interleave_factor
; i
++)
14334 /* Copy BLOCK_SIZE_BYTES chunks. */
14336 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14339 if (src_aligned
&& interleave_factor
> 1)
14341 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14342 TRUE
, srcbase
, &srcoffset
));
14343 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14347 for (j
= 0; j
< interleave_factor
; j
++)
14349 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14351 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14352 srcoffset
+ j
* UNITS_PER_WORD
);
14353 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14355 srcoffset
+= block_size_bytes
;
14359 if (dst_aligned
&& interleave_factor
> 1)
14361 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14362 TRUE
, dstbase
, &dstoffset
));
14363 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14367 for (j
= 0; j
< interleave_factor
; j
++)
14369 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14371 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14372 dstoffset
+ j
* UNITS_PER_WORD
);
14373 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14375 dstoffset
+= block_size_bytes
;
14378 remaining
-= block_size_bytes
;
14381 /* Copy any whole words left (note these aren't interleaved with any
14382 subsequent halfword/byte load/stores in the interests of simplicity). */
14384 words
= remaining
/ UNITS_PER_WORD
;
14386 gcc_assert (words
< interleave_factor
);
14388 if (src_aligned
&& words
> 1)
14390 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14392 src_autoinc
+= UNITS_PER_WORD
* words
;
14396 for (j
= 0; j
< words
; j
++)
14398 addr
= plus_constant (Pmode
, src
,
14399 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14400 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14401 srcoffset
+ j
* UNITS_PER_WORD
);
14402 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14404 srcoffset
+= words
* UNITS_PER_WORD
;
14407 if (dst_aligned
&& words
> 1)
14409 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14411 dst_autoinc
+= words
* UNITS_PER_WORD
;
14415 for (j
= 0; j
< words
; j
++)
14417 addr
= plus_constant (Pmode
, dst
,
14418 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14419 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14420 dstoffset
+ j
* UNITS_PER_WORD
);
14421 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14423 dstoffset
+= words
* UNITS_PER_WORD
;
14426 remaining
-= words
* UNITS_PER_WORD
;
14428 gcc_assert (remaining
< 4);
14430 /* Copy a halfword if necessary. */
14432 if (remaining
>= 2)
14434 halfword_tmp
= gen_reg_rtx (SImode
);
14436 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14437 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14438 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14440 /* Either write out immediately, or delay until we've loaded the last
14441 byte, depending on interleave factor. */
14442 if (interleave_factor
== 1)
14444 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14445 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14446 emit_insn (gen_unaligned_storehi (mem
,
14447 gen_lowpart (HImode
, halfword_tmp
)));
14448 halfword_tmp
= NULL
;
14456 gcc_assert (remaining
< 2);
14458 /* Copy last byte. */
14460 if ((remaining
& 1) != 0)
14462 byte_tmp
= gen_reg_rtx (SImode
);
14464 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14465 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14466 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14468 if (interleave_factor
== 1)
14470 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14471 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14472 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14481 /* Store last halfword if we haven't done so already. */
14485 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14486 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14487 emit_insn (gen_unaligned_storehi (mem
,
14488 gen_lowpart (HImode
, halfword_tmp
)));
14492 /* Likewise for last byte. */
14496 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14497 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14498 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14502 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14505 /* From mips_adjust_block_mem:
14507 Helper function for doing a loop-based block operation on memory
14508 reference MEM. Each iteration of the loop will operate on LENGTH
14511 Create a new base register for use within the loop and point it to
14512 the start of MEM. Create a new memory reference that uses this
14513 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14516 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14519 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14521 /* Although the new mem does not refer to a known location,
14522 it does keep up to LENGTH bytes of alignment. */
14523 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14524 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14527 /* From mips_block_move_loop:
14529 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14530 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14531 the memory regions do not overlap. */
14534 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14535 unsigned int interleave_factor
,
14536 HOST_WIDE_INT bytes_per_iter
)
14538 rtx src_reg
, dest_reg
, final_src
, test
;
14539 HOST_WIDE_INT leftover
;
14541 leftover
= length
% bytes_per_iter
;
14542 length
-= leftover
;
14544 /* Create registers and memory references for use within the loop. */
14545 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14546 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14548 /* Calculate the value that SRC_REG should have after the last iteration of
14550 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14551 0, 0, OPTAB_WIDEN
);
14553 /* Emit the start of the loop. */
14554 rtx_code_label
*label
= gen_label_rtx ();
14555 emit_label (label
);
14557 /* Emit the loop body. */
14558 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14559 interleave_factor
);
14561 /* Move on to the next block. */
14562 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14563 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14565 /* Emit the loop condition. */
14566 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14567 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14569 /* Mop up any left-over bytes. */
14571 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14574 /* Emit a block move when either the source or destination is unaligned (not
14575 aligned to a four-byte boundary). This may need further tuning depending on
14576 core type, optimize_size setting, etc. */
14579 arm_movmemqi_unaligned (rtx
*operands
)
14581 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14585 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14586 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14587 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14588 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14589 or dst_aligned though: allow more interleaving in those cases since the
14590 resulting code can be smaller. */
14591 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14592 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14595 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14596 interleave_factor
, bytes_per_iter
);
14598 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14599 interleave_factor
);
14603 /* Note that the loop created by arm_block_move_unaligned_loop may be
14604 subject to loop unrolling, which makes tuning this condition a little
14607 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14609 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14616 arm_gen_movmemqi (rtx
*operands
)
14618 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14619 HOST_WIDE_INT srcoffset
, dstoffset
;
14621 rtx src
, dst
, srcbase
, dstbase
;
14622 rtx part_bytes_reg
= NULL
;
14625 if (!CONST_INT_P (operands
[2])
14626 || !CONST_INT_P (operands
[3])
14627 || INTVAL (operands
[2]) > 64)
14630 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14631 return arm_movmemqi_unaligned (operands
);
14633 if (INTVAL (operands
[3]) & 3)
14636 dstbase
= operands
[0];
14637 srcbase
= operands
[1];
14639 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14640 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14642 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14643 out_words_to_go
= INTVAL (operands
[2]) / 4;
14644 last_bytes
= INTVAL (operands
[2]) & 3;
14645 dstoffset
= srcoffset
= 0;
14647 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14648 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14650 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14652 if (in_words_to_go
> 4)
14653 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14654 TRUE
, srcbase
, &srcoffset
));
14656 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14657 src
, FALSE
, srcbase
,
14660 if (out_words_to_go
)
14662 if (out_words_to_go
> 4)
14663 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14664 TRUE
, dstbase
, &dstoffset
));
14665 else if (out_words_to_go
!= 1)
14666 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14667 out_words_to_go
, dst
,
14670 dstbase
, &dstoffset
));
14673 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14674 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14675 if (last_bytes
!= 0)
14677 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14683 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14684 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14687 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14688 if (out_words_to_go
)
14692 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14693 sreg
= copy_to_reg (mem
);
14695 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14696 emit_move_insn (mem
, sreg
);
14699 gcc_assert (!in_words_to_go
); /* Sanity check */
14702 if (in_words_to_go
)
14704 gcc_assert (in_words_to_go
> 0);
14706 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14707 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14710 gcc_assert (!last_bytes
|| part_bytes_reg
);
14712 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14714 rtx tmp
= gen_reg_rtx (SImode
);
14716 /* The bytes we want are in the top end of the word. */
14717 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14718 GEN_INT (8 * (4 - last_bytes
))));
14719 part_bytes_reg
= tmp
;
14723 mem
= adjust_automodify_address (dstbase
, QImode
,
14724 plus_constant (Pmode
, dst
,
14726 dstoffset
+ last_bytes
- 1);
14727 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14731 tmp
= gen_reg_rtx (SImode
);
14732 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14733 part_bytes_reg
= tmp
;
14740 if (last_bytes
> 1)
14742 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14743 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14747 rtx tmp
= gen_reg_rtx (SImode
);
14748 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14749 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14750 part_bytes_reg
= tmp
;
14757 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14758 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14765 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14768 next_consecutive_mem (rtx mem
)
14770 machine_mode mode
= GET_MODE (mem
);
14771 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14772 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14774 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14777 /* Copy using LDRD/STRD instructions whenever possible.
14778 Returns true upon success. */
14780 gen_movmem_ldrd_strd (rtx
*operands
)
14782 unsigned HOST_WIDE_INT len
;
14783 HOST_WIDE_INT align
;
14784 rtx src
, dst
, base
;
14786 bool src_aligned
, dst_aligned
;
14787 bool src_volatile
, dst_volatile
;
14789 gcc_assert (CONST_INT_P (operands
[2]));
14790 gcc_assert (CONST_INT_P (operands
[3]));
14792 len
= UINTVAL (operands
[2]);
14796 /* Maximum alignment we can assume for both src and dst buffers. */
14797 align
= INTVAL (operands
[3]);
14799 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14802 /* Place src and dst addresses in registers
14803 and update the corresponding mem rtx. */
14805 dst_volatile
= MEM_VOLATILE_P (dst
);
14806 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14807 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14808 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14811 src_volatile
= MEM_VOLATILE_P (src
);
14812 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14813 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14814 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14816 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14819 if (src_volatile
|| dst_volatile
)
14822 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14823 if (!(dst_aligned
|| src_aligned
))
14824 return arm_gen_movmemqi (operands
);
14826 src
= adjust_address (src
, DImode
, 0);
14827 dst
= adjust_address (dst
, DImode
, 0);
14831 reg0
= gen_reg_rtx (DImode
);
14833 emit_move_insn (reg0
, src
);
14835 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14838 emit_move_insn (dst
, reg0
);
14840 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14842 src
= next_consecutive_mem (src
);
14843 dst
= next_consecutive_mem (dst
);
14846 gcc_assert (len
< 8);
14849 /* More than a word but less than a double-word to copy. Copy a word. */
14850 reg0
= gen_reg_rtx (SImode
);
14851 src
= adjust_address (src
, SImode
, 0);
14852 dst
= adjust_address (dst
, SImode
, 0);
14854 emit_move_insn (reg0
, src
);
14856 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14859 emit_move_insn (dst
, reg0
);
14861 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14863 src
= next_consecutive_mem (src
);
14864 dst
= next_consecutive_mem (dst
);
14871 /* Copy the remaining bytes. */
14874 dst
= adjust_address (dst
, HImode
, 0);
14875 src
= adjust_address (src
, HImode
, 0);
14876 reg0
= gen_reg_rtx (SImode
);
14878 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14880 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14883 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14885 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14887 src
= next_consecutive_mem (src
);
14888 dst
= next_consecutive_mem (dst
);
14893 dst
= adjust_address (dst
, QImode
, 0);
14894 src
= adjust_address (src
, QImode
, 0);
14895 reg0
= gen_reg_rtx (QImode
);
14896 emit_move_insn (reg0
, src
);
14897 emit_move_insn (dst
, reg0
);
14901 /* Select a dominance comparison mode if possible for a test of the general
14902 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14903 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14904 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14905 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14906 In all cases OP will be either EQ or NE, but we don't need to know which
14907 here. If we are unable to support a dominance comparison we return
14908 CC mode. This will then fail to match for the RTL expressions that
14909 generate this call. */
14911 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14913 enum rtx_code cond1
, cond2
;
14916 /* Currently we will probably get the wrong result if the individual
14917 comparisons are not simple. This also ensures that it is safe to
14918 reverse a comparison if necessary. */
14919 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14921 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14925 /* The if_then_else variant of this tests the second condition if the
14926 first passes, but is true if the first fails. Reverse the first
14927 condition to get a true "inclusive-or" expression. */
14928 if (cond_or
== DOM_CC_NX_OR_Y
)
14929 cond1
= reverse_condition (cond1
);
14931 /* If the comparisons are not equal, and one doesn't dominate the other,
14932 then we can't do this. */
14934 && !comparison_dominates_p (cond1
, cond2
)
14935 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14939 std::swap (cond1
, cond2
);
14944 if (cond_or
== DOM_CC_X_AND_Y
)
14949 case EQ
: return CC_DEQmode
;
14950 case LE
: return CC_DLEmode
;
14951 case LEU
: return CC_DLEUmode
;
14952 case GE
: return CC_DGEmode
;
14953 case GEU
: return CC_DGEUmode
;
14954 default: gcc_unreachable ();
14958 if (cond_or
== DOM_CC_X_AND_Y
)
14970 gcc_unreachable ();
14974 if (cond_or
== DOM_CC_X_AND_Y
)
14986 gcc_unreachable ();
14990 if (cond_or
== DOM_CC_X_AND_Y
)
14991 return CC_DLTUmode
;
14996 return CC_DLTUmode
;
14998 return CC_DLEUmode
;
15002 gcc_unreachable ();
15006 if (cond_or
== DOM_CC_X_AND_Y
)
15007 return CC_DGTUmode
;
15012 return CC_DGTUmode
;
15014 return CC_DGEUmode
;
15018 gcc_unreachable ();
15021 /* The remaining cases only occur when both comparisons are the
15024 gcc_assert (cond1
== cond2
);
15028 gcc_assert (cond1
== cond2
);
15032 gcc_assert (cond1
== cond2
);
15036 gcc_assert (cond1
== cond2
);
15037 return CC_DLEUmode
;
15040 gcc_assert (cond1
== cond2
);
15041 return CC_DGEUmode
;
15044 gcc_unreachable ();
15049 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15051 /* All floating point compares return CCFP if it is an equality
15052 comparison, and CCFPE otherwise. */
15053 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15076 gcc_unreachable ();
15080 /* A compare with a shifted operand. Because of canonicalization, the
15081 comparison will have to be swapped when we emit the assembler. */
15082 if (GET_MODE (y
) == SImode
15083 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15084 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15085 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15086 || GET_CODE (x
) == ROTATERT
))
15089 /* This operation is performed swapped, but since we only rely on the Z
15090 flag we don't need an additional mode. */
15091 if (GET_MODE (y
) == SImode
15092 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15093 && GET_CODE (x
) == NEG
15094 && (op
== EQ
|| op
== NE
))
15097 /* This is a special case that is used by combine to allow a
15098 comparison of a shifted byte load to be split into a zero-extend
15099 followed by a comparison of the shifted integer (only valid for
15100 equalities and unsigned inequalities). */
15101 if (GET_MODE (x
) == SImode
15102 && GET_CODE (x
) == ASHIFT
15103 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15104 && GET_CODE (XEXP (x
, 0)) == SUBREG
15105 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15106 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15107 && (op
== EQ
|| op
== NE
15108 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15109 && CONST_INT_P (y
))
15112 /* A construct for a conditional compare, if the false arm contains
15113 0, then both conditions must be true, otherwise either condition
15114 must be true. Not all conditions are possible, so CCmode is
15115 returned if it can't be done. */
15116 if (GET_CODE (x
) == IF_THEN_ELSE
15117 && (XEXP (x
, 2) == const0_rtx
15118 || XEXP (x
, 2) == const1_rtx
)
15119 && COMPARISON_P (XEXP (x
, 0))
15120 && COMPARISON_P (XEXP (x
, 1)))
15121 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15122 INTVAL (XEXP (x
, 2)));
15124 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15125 if (GET_CODE (x
) == AND
15126 && (op
== EQ
|| op
== NE
)
15127 && COMPARISON_P (XEXP (x
, 0))
15128 && COMPARISON_P (XEXP (x
, 1)))
15129 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15132 if (GET_CODE (x
) == IOR
15133 && (op
== EQ
|| op
== NE
)
15134 && COMPARISON_P (XEXP (x
, 0))
15135 && COMPARISON_P (XEXP (x
, 1)))
15136 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15139 /* An operation (on Thumb) where we want to test for a single bit.
15140 This is done by shifting that bit up into the top bit of a
15141 scratch register; we can then branch on the sign bit. */
15143 && GET_MODE (x
) == SImode
15144 && (op
== EQ
|| op
== NE
)
15145 && GET_CODE (x
) == ZERO_EXTRACT
15146 && XEXP (x
, 1) == const1_rtx
)
15149 /* An operation that sets the condition codes as a side-effect, the
15150 V flag is not set correctly, so we can only use comparisons where
15151 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15153 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15154 if (GET_MODE (x
) == SImode
15156 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15157 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15158 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15159 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15160 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15161 || GET_CODE (x
) == LSHIFTRT
15162 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15163 || GET_CODE (x
) == ROTATERT
15164 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15165 return CC_NOOVmode
;
15167 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15170 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15171 && GET_CODE (x
) == PLUS
15172 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15175 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15181 /* A DImode comparison against zero can be implemented by
15182 or'ing the two halves together. */
15183 if (y
== const0_rtx
)
15186 /* We can do an equality test in three Thumb instructions. */
15196 /* DImode unsigned comparisons can be implemented by cmp +
15197 cmpeq without a scratch register. Not worth doing in
15208 /* DImode signed and unsigned comparisons can be implemented
15209 by cmp + sbcs with a scratch register, but that does not
15210 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15211 gcc_assert (op
!= EQ
&& op
!= NE
);
15215 gcc_unreachable ();
15219 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15220 return GET_MODE (x
);
15225 /* X and Y are two things to compare using CODE. Emit the compare insn and
15226 return the rtx for register 0 in the proper mode. FP means this is a
15227 floating point compare: I don't think that it is needed on the arm. */
15229 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15233 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15235 /* We might have X as a constant, Y as a register because of the predicates
15236 used for cmpdi. If so, force X to a register here. */
15237 if (dimode_comparison
&& !REG_P (x
))
15238 x
= force_reg (DImode
, x
);
15240 mode
= SELECT_CC_MODE (code
, x
, y
);
15241 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15243 if (dimode_comparison
15244 && mode
!= CC_CZmode
)
15248 /* To compare two non-zero values for equality, XOR them and
15249 then compare against zero. Not used for ARM mode; there
15250 CC_CZmode is cheaper. */
15251 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15253 gcc_assert (!reload_completed
);
15254 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15258 /* A scratch register is required. */
15259 if (reload_completed
)
15260 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15262 scratch
= gen_rtx_SCRATCH (SImode
);
15264 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15265 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15266 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15269 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15274 /* Generate a sequence of insns that will generate the correct return
15275 address mask depending on the physical architecture that the program
15278 arm_gen_return_addr_mask (void)
15280 rtx reg
= gen_reg_rtx (Pmode
);
15282 emit_insn (gen_return_addr_mask (reg
));
15287 arm_reload_in_hi (rtx
*operands
)
15289 rtx ref
= operands
[1];
15291 HOST_WIDE_INT offset
= 0;
15293 if (GET_CODE (ref
) == SUBREG
)
15295 offset
= SUBREG_BYTE (ref
);
15296 ref
= SUBREG_REG (ref
);
15301 /* We have a pseudo which has been spilt onto the stack; there
15302 are two cases here: the first where there is a simple
15303 stack-slot replacement and a second where the stack-slot is
15304 out of range, or is used as a subreg. */
15305 if (reg_equiv_mem (REGNO (ref
)))
15307 ref
= reg_equiv_mem (REGNO (ref
));
15308 base
= find_replacement (&XEXP (ref
, 0));
15311 /* The slot is out of range, or was dressed up in a SUBREG. */
15312 base
= reg_equiv_address (REGNO (ref
));
15315 base
= find_replacement (&XEXP (ref
, 0));
15317 /* Handle the case where the address is too complex to be offset by 1. */
15318 if (GET_CODE (base
) == MINUS
15319 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15321 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15323 emit_set_insn (base_plus
, base
);
15326 else if (GET_CODE (base
) == PLUS
)
15328 /* The addend must be CONST_INT, or we would have dealt with it above. */
15329 HOST_WIDE_INT hi
, lo
;
15331 offset
+= INTVAL (XEXP (base
, 1));
15332 base
= XEXP (base
, 0);
15334 /* Rework the address into a legal sequence of insns. */
15335 /* Valid range for lo is -4095 -> 4095 */
15338 : -((-offset
) & 0xfff));
15340 /* Corner case, if lo is the max offset then we would be out of range
15341 once we have added the additional 1 below, so bump the msb into the
15342 pre-loading insn(s). */
15346 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15347 ^ (HOST_WIDE_INT
) 0x80000000)
15348 - (HOST_WIDE_INT
) 0x80000000);
15350 gcc_assert (hi
+ lo
== offset
);
15354 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15356 /* Get the base address; addsi3 knows how to handle constants
15357 that require more than one insn. */
15358 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15364 /* Operands[2] may overlap operands[0] (though it won't overlap
15365 operands[1]), that's why we asked for a DImode reg -- so we can
15366 use the bit that does not overlap. */
15367 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15368 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15370 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15372 emit_insn (gen_zero_extendqisi2 (scratch
,
15373 gen_rtx_MEM (QImode
,
15374 plus_constant (Pmode
, base
,
15376 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15377 gen_rtx_MEM (QImode
,
15378 plus_constant (Pmode
, base
,
15380 if (!BYTES_BIG_ENDIAN
)
15381 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15382 gen_rtx_IOR (SImode
,
15385 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15389 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15390 gen_rtx_IOR (SImode
,
15391 gen_rtx_ASHIFT (SImode
, scratch
,
15393 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15396 /* Handle storing a half-word to memory during reload by synthesizing as two
15397 byte stores. Take care not to clobber the input values until after we
15398 have moved them somewhere safe. This code assumes that if the DImode
15399 scratch in operands[2] overlaps either the input value or output address
15400 in some way, then that value must die in this insn (we absolutely need
15401 two scratch registers for some corner cases). */
15403 arm_reload_out_hi (rtx
*operands
)
15405 rtx ref
= operands
[0];
15406 rtx outval
= operands
[1];
15408 HOST_WIDE_INT offset
= 0;
15410 if (GET_CODE (ref
) == SUBREG
)
15412 offset
= SUBREG_BYTE (ref
);
15413 ref
= SUBREG_REG (ref
);
15418 /* We have a pseudo which has been spilt onto the stack; there
15419 are two cases here: the first where there is a simple
15420 stack-slot replacement and a second where the stack-slot is
15421 out of range, or is used as a subreg. */
15422 if (reg_equiv_mem (REGNO (ref
)))
15424 ref
= reg_equiv_mem (REGNO (ref
));
15425 base
= find_replacement (&XEXP (ref
, 0));
15428 /* The slot is out of range, or was dressed up in a SUBREG. */
15429 base
= reg_equiv_address (REGNO (ref
));
15432 base
= find_replacement (&XEXP (ref
, 0));
15434 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15436 /* Handle the case where the address is too complex to be offset by 1. */
15437 if (GET_CODE (base
) == MINUS
15438 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15440 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15442 /* Be careful not to destroy OUTVAL. */
15443 if (reg_overlap_mentioned_p (base_plus
, outval
))
15445 /* Updating base_plus might destroy outval, see if we can
15446 swap the scratch and base_plus. */
15447 if (!reg_overlap_mentioned_p (scratch
, outval
))
15448 std::swap (scratch
, base_plus
);
15451 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15453 /* Be conservative and copy OUTVAL into the scratch now,
15454 this should only be necessary if outval is a subreg
15455 of something larger than a word. */
15456 /* XXX Might this clobber base? I can't see how it can,
15457 since scratch is known to overlap with OUTVAL, and
15458 must be wider than a word. */
15459 emit_insn (gen_movhi (scratch_hi
, outval
));
15460 outval
= scratch_hi
;
15464 emit_set_insn (base_plus
, base
);
15467 else if (GET_CODE (base
) == PLUS
)
15469 /* The addend must be CONST_INT, or we would have dealt with it above. */
15470 HOST_WIDE_INT hi
, lo
;
15472 offset
+= INTVAL (XEXP (base
, 1));
15473 base
= XEXP (base
, 0);
15475 /* Rework the address into a legal sequence of insns. */
15476 /* Valid range for lo is -4095 -> 4095 */
15479 : -((-offset
) & 0xfff));
15481 /* Corner case, if lo is the max offset then we would be out of range
15482 once we have added the additional 1 below, so bump the msb into the
15483 pre-loading insn(s). */
15487 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15488 ^ (HOST_WIDE_INT
) 0x80000000)
15489 - (HOST_WIDE_INT
) 0x80000000);
15491 gcc_assert (hi
+ lo
== offset
);
15495 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15497 /* Be careful not to destroy OUTVAL. */
15498 if (reg_overlap_mentioned_p (base_plus
, outval
))
15500 /* Updating base_plus might destroy outval, see if we
15501 can swap the scratch and base_plus. */
15502 if (!reg_overlap_mentioned_p (scratch
, outval
))
15503 std::swap (scratch
, base_plus
);
15506 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15508 /* Be conservative and copy outval into scratch now,
15509 this should only be necessary if outval is a
15510 subreg of something larger than a word. */
15511 /* XXX Might this clobber base? I can't see how it
15512 can, since scratch is known to overlap with
15514 emit_insn (gen_movhi (scratch_hi
, outval
));
15515 outval
= scratch_hi
;
15519 /* Get the base address; addsi3 knows how to handle constants
15520 that require more than one insn. */
15521 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15527 if (BYTES_BIG_ENDIAN
)
15529 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15530 plus_constant (Pmode
, base
,
15532 gen_lowpart (QImode
, outval
)));
15533 emit_insn (gen_lshrsi3 (scratch
,
15534 gen_rtx_SUBREG (SImode
, outval
, 0),
15536 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15538 gen_lowpart (QImode
, scratch
)));
15542 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15544 gen_lowpart (QImode
, outval
)));
15545 emit_insn (gen_lshrsi3 (scratch
,
15546 gen_rtx_SUBREG (SImode
, outval
, 0),
15548 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15549 plus_constant (Pmode
, base
,
15551 gen_lowpart (QImode
, scratch
)));
15555 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15556 (padded to the size of a word) should be passed in a register. */
15559 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15561 if (TARGET_AAPCS_BASED
)
15562 return must_pass_in_stack_var_size (mode
, type
);
15564 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15569 Return true if an argument passed on the stack should be padded upwards,
15570 i.e. if the least-significant byte has useful data.
15571 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15572 aggregate types are placed in the lowest memory address. */
15575 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15577 if (!TARGET_AAPCS_BASED
)
15578 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15580 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15587 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15588 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15589 register has useful data, and return the opposite if the most
15590 significant byte does. */
15593 arm_pad_reg_upward (machine_mode mode
,
15594 tree type
, int first ATTRIBUTE_UNUSED
)
15596 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15598 /* For AAPCS, small aggregates, small fixed-point types,
15599 and small complex types are always padded upwards. */
15602 if ((AGGREGATE_TYPE_P (type
)
15603 || TREE_CODE (type
) == COMPLEX_TYPE
15604 || FIXED_POINT_TYPE_P (type
))
15605 && int_size_in_bytes (type
) <= 4)
15610 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15611 && GET_MODE_SIZE (mode
) <= 4)
15616 /* Otherwise, use default padding. */
15617 return !BYTES_BIG_ENDIAN
;
15620 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15621 assuming that the address in the base register is word aligned. */
15623 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15625 HOST_WIDE_INT max_offset
;
15627 /* Offset must be a multiple of 4 in Thumb mode. */
15628 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15633 else if (TARGET_ARM
)
15638 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15641 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15642 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15643 Assumes that the address in the base register RN is word aligned. Pattern
15644 guarantees that both memory accesses use the same base register,
15645 the offsets are constants within the range, and the gap between the offsets is 4.
15646 If preload complete then check that registers are legal. WBACK indicates whether
15647 address is updated. LOAD indicates whether memory access is load or store. */
15649 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15650 bool wback
, bool load
)
15652 unsigned int t
, t2
, n
;
15654 if (!reload_completed
)
15657 if (!offset_ok_for_ldrd_strd (offset
))
15664 if ((TARGET_THUMB2
)
15665 && ((wback
&& (n
== t
|| n
== t2
))
15666 || (t
== SP_REGNUM
)
15667 || (t
== PC_REGNUM
)
15668 || (t2
== SP_REGNUM
)
15669 || (t2
== PC_REGNUM
)
15670 || (!load
&& (n
== PC_REGNUM
))
15671 || (load
&& (t
== t2
))
15672 /* Triggers Cortex-M3 LDRD errata. */
15673 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15677 && ((wback
&& (n
== t
|| n
== t2
))
15678 || (t2
== PC_REGNUM
)
15679 || (t
% 2 != 0) /* First destination register is not even. */
15681 /* PC can be used as base register (for offset addressing only),
15682 but it is depricated. */
15683 || (n
== PC_REGNUM
)))
15689 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15690 operand MEM's address contains an immediate offset from the base
15691 register and has no side effects, in which case it sets BASE and
15692 OFFSET accordingly. */
15694 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15698 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15700 /* TODO: Handle more general memory operand patterns, such as
15701 PRE_DEC and PRE_INC. */
15703 if (side_effects_p (mem
))
15706 /* Can't deal with subregs. */
15707 if (GET_CODE (mem
) == SUBREG
)
15710 gcc_assert (MEM_P (mem
));
15712 *offset
= const0_rtx
;
15714 addr
= XEXP (mem
, 0);
15716 /* If addr isn't valid for DImode, then we can't handle it. */
15717 if (!arm_legitimate_address_p (DImode
, addr
,
15718 reload_in_progress
|| reload_completed
))
15726 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15728 *base
= XEXP (addr
, 0);
15729 *offset
= XEXP (addr
, 1);
15730 return (REG_P (*base
) && CONST_INT_P (*offset
));
15736 /* Called from a peephole2 to replace two word-size accesses with a
15737 single LDRD/STRD instruction. Returns true iff we can generate a
15738 new instruction sequence. That is, both accesses use the same base
15739 register and the gap between constant offsets is 4. This function
15740 may reorder its operands to match ldrd/strd RTL templates.
15741 OPERANDS are the operands found by the peephole matcher;
15742 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15743 corresponding memory operands. LOAD indicaates whether the access
15744 is load or store. CONST_STORE indicates a store of constant
15745 integer values held in OPERANDS[4,5] and assumes that the pattern
15746 is of length 4 insn, for the purpose of checking dead registers.
15747 COMMUTE indicates that register operands may be reordered. */
15749 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15750 bool const_store
, bool commute
)
15753 HOST_WIDE_INT offsets
[2], offset
;
15754 rtx base
= NULL_RTX
;
15755 rtx cur_base
, cur_offset
, tmp
;
15757 HARD_REG_SET regset
;
15759 gcc_assert (!const_store
|| !load
);
15760 /* Check that the memory references are immediate offsets from the
15761 same base register. Extract the base register, the destination
15762 registers, and the corresponding memory offsets. */
15763 for (i
= 0; i
< nops
; i
++)
15765 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15770 else if (REGNO (base
) != REGNO (cur_base
))
15773 offsets
[i
] = INTVAL (cur_offset
);
15774 if (GET_CODE (operands
[i
]) == SUBREG
)
15776 tmp
= SUBREG_REG (operands
[i
]);
15777 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15782 /* Make sure there is no dependency between the individual loads. */
15783 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15784 return false; /* RAW */
15786 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15787 return false; /* WAW */
15789 /* If the same input register is used in both stores
15790 when storing different constants, try to find a free register.
15791 For example, the code
15796 can be transformed into
15799 in Thumb mode assuming that r1 is free. */
15801 && REGNO (operands
[0]) == REGNO (operands
[1])
15802 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15806 CLEAR_HARD_REG_SET (regset
);
15807 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15808 if (tmp
== NULL_RTX
)
15811 /* Use the new register in the first load to ensure that
15812 if the original input register is not dead after peephole,
15813 then it will have the correct constant value. */
15816 else if (TARGET_ARM
)
15819 int regno
= REGNO (operands
[0]);
15820 if (!peep2_reg_dead_p (4, operands
[0]))
15822 /* When the input register is even and is not dead after the
15823 pattern, it has to hold the second constant but we cannot
15824 form a legal STRD in ARM mode with this register as the second
15826 if (regno
% 2 == 0)
15829 /* Is regno-1 free? */
15830 SET_HARD_REG_SET (regset
);
15831 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15832 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15833 if (tmp
== NULL_RTX
)
15840 /* Find a DImode register. */
15841 CLEAR_HARD_REG_SET (regset
);
15842 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15843 if (tmp
!= NULL_RTX
)
15845 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15846 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15850 /* Can we use the input register to form a DI register? */
15851 SET_HARD_REG_SET (regset
);
15852 CLEAR_HARD_REG_BIT(regset
,
15853 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15854 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15855 if (tmp
== NULL_RTX
)
15857 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15861 gcc_assert (operands
[0] != NULL_RTX
);
15862 gcc_assert (operands
[1] != NULL_RTX
);
15863 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15864 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15868 /* Make sure the instructions are ordered with lower memory access first. */
15869 if (offsets
[0] > offsets
[1])
15871 gap
= offsets
[0] - offsets
[1];
15872 offset
= offsets
[1];
15874 /* Swap the instructions such that lower memory is accessed first. */
15875 std::swap (operands
[0], operands
[1]);
15876 std::swap (operands
[2], operands
[3]);
15878 std::swap (operands
[4], operands
[5]);
15882 gap
= offsets
[1] - offsets
[0];
15883 offset
= offsets
[0];
15886 /* Make sure accesses are to consecutive memory locations. */
15890 /* Make sure we generate legal instructions. */
15891 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15895 /* In Thumb state, where registers are almost unconstrained, there
15896 is little hope to fix it. */
15900 if (load
&& commute
)
15902 /* Try reordering registers. */
15903 std::swap (operands
[0], operands
[1]);
15904 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15911 /* If input registers are dead after this pattern, they can be
15912 reordered or replaced by other registers that are free in the
15913 current pattern. */
15914 if (!peep2_reg_dead_p (4, operands
[0])
15915 || !peep2_reg_dead_p (4, operands
[1]))
15918 /* Try to reorder the input registers. */
15919 /* For example, the code
15924 can be transformed into
15929 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15932 std::swap (operands
[0], operands
[1]);
15936 /* Try to find a free DI register. */
15937 CLEAR_HARD_REG_SET (regset
);
15938 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15939 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15942 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15943 if (tmp
== NULL_RTX
)
15946 /* DREG must be an even-numbered register in DImode.
15947 Split it into SI registers. */
15948 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15949 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15950 gcc_assert (operands
[0] != NULL_RTX
);
15951 gcc_assert (operands
[1] != NULL_RTX
);
15952 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15953 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15955 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15967 /* Print a symbolic form of X to the debug file, F. */
15969 arm_print_value (FILE *f
, rtx x
)
15971 switch (GET_CODE (x
))
15974 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15978 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15986 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15988 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15989 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15997 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16001 fprintf (f
, "`%s'", XSTR (x
, 0));
16005 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16009 arm_print_value (f
, XEXP (x
, 0));
16013 arm_print_value (f
, XEXP (x
, 0));
16015 arm_print_value (f
, XEXP (x
, 1));
16023 fprintf (f
, "????");
16028 /* Routines for manipulation of the constant pool. */
16030 /* Arm instructions cannot load a large constant directly into a
16031 register; they have to come from a pc relative load. The constant
16032 must therefore be placed in the addressable range of the pc
16033 relative load. Depending on the precise pc relative load
16034 instruction the range is somewhere between 256 bytes and 4k. This
16035 means that we often have to dump a constant inside a function, and
16036 generate code to branch around it.
16038 It is important to minimize this, since the branches will slow
16039 things down and make the code larger.
16041 Normally we can hide the table after an existing unconditional
16042 branch so that there is no interruption of the flow, but in the
16043 worst case the code looks like this:
16061 We fix this by performing a scan after scheduling, which notices
16062 which instructions need to have their operands fetched from the
16063 constant table and builds the table.
16065 The algorithm starts by building a table of all the constants that
16066 need fixing up and all the natural barriers in the function (places
16067 where a constant table can be dropped without breaking the flow).
16068 For each fixup we note how far the pc-relative replacement will be
16069 able to reach and the offset of the instruction into the function.
16071 Having built the table we then group the fixes together to form
16072 tables that are as large as possible (subject to addressing
16073 constraints) and emit each table of constants after the last
16074 barrier that is within range of all the instructions in the group.
16075 If a group does not contain a barrier, then we forcibly create one
16076 by inserting a jump instruction into the flow. Once the table has
16077 been inserted, the insns are then modified to reference the
16078 relevant entry in the pool.
16080 Possible enhancements to the algorithm (not implemented) are:
16082 1) For some processors and object formats, there may be benefit in
16083 aligning the pools to the start of cache lines; this alignment
16084 would need to be taken into account when calculating addressability
16087 /* These typedefs are located at the start of this file, so that
16088 they can be used in the prototypes there. This comment is to
16089 remind readers of that fact so that the following structures
16090 can be understood more easily.
16092 typedef struct minipool_node Mnode;
16093 typedef struct minipool_fixup Mfix; */
16095 struct minipool_node
16097 /* Doubly linked chain of entries. */
16100 /* The maximum offset into the code that this entry can be placed. While
16101 pushing fixes for forward references, all entries are sorted in order
16102 of increasing max_address. */
16103 HOST_WIDE_INT max_address
;
16104 /* Similarly for an entry inserted for a backwards ref. */
16105 HOST_WIDE_INT min_address
;
16106 /* The number of fixes referencing this entry. This can become zero
16107 if we "unpush" an entry. In this case we ignore the entry when we
16108 come to emit the code. */
16110 /* The offset from the start of the minipool. */
16111 HOST_WIDE_INT offset
;
16112 /* The value in table. */
16114 /* The mode of value. */
16116 /* The size of the value. With iWMMXt enabled
16117 sizes > 4 also imply an alignment of 8-bytes. */
16121 struct minipool_fixup
16125 HOST_WIDE_INT address
;
16131 HOST_WIDE_INT forwards
;
16132 HOST_WIDE_INT backwards
;
16135 /* Fixes less than a word need padding out to a word boundary. */
16136 #define MINIPOOL_FIX_SIZE(mode) \
16137 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16139 static Mnode
* minipool_vector_head
;
16140 static Mnode
* minipool_vector_tail
;
16141 static rtx_code_label
*minipool_vector_label
;
16142 static int minipool_pad
;
16144 /* The linked list of all minipool fixes required for this function. */
16145 Mfix
* minipool_fix_head
;
16146 Mfix
* minipool_fix_tail
;
16147 /* The fix entry for the current minipool, once it has been placed. */
16148 Mfix
* minipool_barrier
;
16150 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16151 #define JUMP_TABLES_IN_TEXT_SECTION 0
16154 static HOST_WIDE_INT
16155 get_jump_table_size (rtx_jump_table_data
*insn
)
16157 /* ADDR_VECs only take room if read-only data does into the text
16159 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16161 rtx body
= PATTERN (insn
);
16162 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16163 HOST_WIDE_INT size
;
16164 HOST_WIDE_INT modesize
;
16166 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16167 size
= modesize
* XVECLEN (body
, elt
);
16171 /* Round up size of TBB table to a halfword boundary. */
16172 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16175 /* No padding necessary for TBH. */
16178 /* Add two bytes for alignment on Thumb. */
16183 gcc_unreachable ();
16191 /* Return the maximum amount of padding that will be inserted before
16194 static HOST_WIDE_INT
16195 get_label_padding (rtx label
)
16197 HOST_WIDE_INT align
, min_insn_size
;
16199 align
= 1 << label_to_alignment (label
);
16200 min_insn_size
= TARGET_THUMB
? 2 : 4;
16201 return align
> min_insn_size
? align
- min_insn_size
: 0;
16204 /* Move a minipool fix MP from its current location to before MAX_MP.
16205 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16206 constraints may need updating. */
16208 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16209 HOST_WIDE_INT max_address
)
16211 /* The code below assumes these are different. */
16212 gcc_assert (mp
!= max_mp
);
16214 if (max_mp
== NULL
)
16216 if (max_address
< mp
->max_address
)
16217 mp
->max_address
= max_address
;
16221 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16222 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16224 mp
->max_address
= max_address
;
16226 /* Unlink MP from its current position. Since max_mp is non-null,
16227 mp->prev must be non-null. */
16228 mp
->prev
->next
= mp
->next
;
16229 if (mp
->next
!= NULL
)
16230 mp
->next
->prev
= mp
->prev
;
16232 minipool_vector_tail
= mp
->prev
;
16234 /* Re-insert it before MAX_MP. */
16236 mp
->prev
= max_mp
->prev
;
16239 if (mp
->prev
!= NULL
)
16240 mp
->prev
->next
= mp
;
16242 minipool_vector_head
= mp
;
16245 /* Save the new entry. */
16248 /* Scan over the preceding entries and adjust their addresses as
16250 while (mp
->prev
!= NULL
16251 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16253 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16260 /* Add a constant to the minipool for a forward reference. Returns the
16261 node added or NULL if the constant will not fit in this pool. */
16263 add_minipool_forward_ref (Mfix
*fix
)
16265 /* If set, max_mp is the first pool_entry that has a lower
16266 constraint than the one we are trying to add. */
16267 Mnode
* max_mp
= NULL
;
16268 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16271 /* If the minipool starts before the end of FIX->INSN then this FIX
16272 can not be placed into the current pool. Furthermore, adding the
16273 new constant pool entry may cause the pool to start FIX_SIZE bytes
16275 if (minipool_vector_head
&&
16276 (fix
->address
+ get_attr_length (fix
->insn
)
16277 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16280 /* Scan the pool to see if a constant with the same value has
16281 already been added. While we are doing this, also note the
16282 location where we must insert the constant if it doesn't already
16284 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16286 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16287 && fix
->mode
== mp
->mode
16288 && (!LABEL_P (fix
->value
)
16289 || (CODE_LABEL_NUMBER (fix
->value
)
16290 == CODE_LABEL_NUMBER (mp
->value
)))
16291 && rtx_equal_p (fix
->value
, mp
->value
))
16293 /* More than one fix references this entry. */
16295 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16298 /* Note the insertion point if necessary. */
16300 && mp
->max_address
> max_address
)
16303 /* If we are inserting an 8-bytes aligned quantity and
16304 we have not already found an insertion point, then
16305 make sure that all such 8-byte aligned quantities are
16306 placed at the start of the pool. */
16307 if (ARM_DOUBLEWORD_ALIGN
16309 && fix
->fix_size
>= 8
16310 && mp
->fix_size
< 8)
16313 max_address
= mp
->max_address
;
16317 /* The value is not currently in the minipool, so we need to create
16318 a new entry for it. If MAX_MP is NULL, the entry will be put on
16319 the end of the list since the placement is less constrained than
16320 any existing entry. Otherwise, we insert the new fix before
16321 MAX_MP and, if necessary, adjust the constraints on the other
16324 mp
->fix_size
= fix
->fix_size
;
16325 mp
->mode
= fix
->mode
;
16326 mp
->value
= fix
->value
;
16328 /* Not yet required for a backwards ref. */
16329 mp
->min_address
= -65536;
16331 if (max_mp
== NULL
)
16333 mp
->max_address
= max_address
;
16335 mp
->prev
= minipool_vector_tail
;
16337 if (mp
->prev
== NULL
)
16339 minipool_vector_head
= mp
;
16340 minipool_vector_label
= gen_label_rtx ();
16343 mp
->prev
->next
= mp
;
16345 minipool_vector_tail
= mp
;
16349 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16350 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16352 mp
->max_address
= max_address
;
16355 mp
->prev
= max_mp
->prev
;
16357 if (mp
->prev
!= NULL
)
16358 mp
->prev
->next
= mp
;
16360 minipool_vector_head
= mp
;
16363 /* Save the new entry. */
16366 /* Scan over the preceding entries and adjust their addresses as
16368 while (mp
->prev
!= NULL
16369 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16371 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16379 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16380 HOST_WIDE_INT min_address
)
16382 HOST_WIDE_INT offset
;
16384 /* The code below assumes these are different. */
16385 gcc_assert (mp
!= min_mp
);
16387 if (min_mp
== NULL
)
16389 if (min_address
> mp
->min_address
)
16390 mp
->min_address
= min_address
;
16394 /* We will adjust this below if it is too loose. */
16395 mp
->min_address
= min_address
;
16397 /* Unlink MP from its current position. Since min_mp is non-null,
16398 mp->next must be non-null. */
16399 mp
->next
->prev
= mp
->prev
;
16400 if (mp
->prev
!= NULL
)
16401 mp
->prev
->next
= mp
->next
;
16403 minipool_vector_head
= mp
->next
;
16405 /* Reinsert it after MIN_MP. */
16407 mp
->next
= min_mp
->next
;
16409 if (mp
->next
!= NULL
)
16410 mp
->next
->prev
= mp
;
16412 minipool_vector_tail
= mp
;
16418 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16420 mp
->offset
= offset
;
16421 if (mp
->refcount
> 0)
16422 offset
+= mp
->fix_size
;
16424 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16425 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16431 /* Add a constant to the minipool for a backward reference. Returns the
16432 node added or NULL if the constant will not fit in this pool.
16434 Note that the code for insertion for a backwards reference can be
16435 somewhat confusing because the calculated offsets for each fix do
16436 not take into account the size of the pool (which is still under
16439 add_minipool_backward_ref (Mfix
*fix
)
16441 /* If set, min_mp is the last pool_entry that has a lower constraint
16442 than the one we are trying to add. */
16443 Mnode
*min_mp
= NULL
;
16444 /* This can be negative, since it is only a constraint. */
16445 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16448 /* If we can't reach the current pool from this insn, or if we can't
16449 insert this entry at the end of the pool without pushing other
16450 fixes out of range, then we don't try. This ensures that we
16451 can't fail later on. */
16452 if (min_address
>= minipool_barrier
->address
16453 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16454 >= minipool_barrier
->address
))
16457 /* Scan the pool to see if a constant with the same value has
16458 already been added. While we are doing this, also note the
16459 location where we must insert the constant if it doesn't already
16461 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16463 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16464 && fix
->mode
== mp
->mode
16465 && (!LABEL_P (fix
->value
)
16466 || (CODE_LABEL_NUMBER (fix
->value
)
16467 == CODE_LABEL_NUMBER (mp
->value
)))
16468 && rtx_equal_p (fix
->value
, mp
->value
)
16469 /* Check that there is enough slack to move this entry to the
16470 end of the table (this is conservative). */
16471 && (mp
->max_address
16472 > (minipool_barrier
->address
16473 + minipool_vector_tail
->offset
16474 + minipool_vector_tail
->fix_size
)))
16477 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16480 if (min_mp
!= NULL
)
16481 mp
->min_address
+= fix
->fix_size
;
16484 /* Note the insertion point if necessary. */
16485 if (mp
->min_address
< min_address
)
16487 /* For now, we do not allow the insertion of 8-byte alignment
16488 requiring nodes anywhere but at the start of the pool. */
16489 if (ARM_DOUBLEWORD_ALIGN
16490 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16495 else if (mp
->max_address
16496 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16498 /* Inserting before this entry would push the fix beyond
16499 its maximum address (which can happen if we have
16500 re-located a forwards fix); force the new fix to come
16502 if (ARM_DOUBLEWORD_ALIGN
16503 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16508 min_address
= mp
->min_address
+ fix
->fix_size
;
16511 /* Do not insert a non-8-byte aligned quantity before 8-byte
16512 aligned quantities. */
16513 else if (ARM_DOUBLEWORD_ALIGN
16514 && fix
->fix_size
< 8
16515 && mp
->fix_size
>= 8)
16518 min_address
= mp
->min_address
+ fix
->fix_size
;
16523 /* We need to create a new entry. */
16525 mp
->fix_size
= fix
->fix_size
;
16526 mp
->mode
= fix
->mode
;
16527 mp
->value
= fix
->value
;
16529 mp
->max_address
= minipool_barrier
->address
+ 65536;
16531 mp
->min_address
= min_address
;
16533 if (min_mp
== NULL
)
16536 mp
->next
= minipool_vector_head
;
16538 if (mp
->next
== NULL
)
16540 minipool_vector_tail
= mp
;
16541 minipool_vector_label
= gen_label_rtx ();
16544 mp
->next
->prev
= mp
;
16546 minipool_vector_head
= mp
;
16550 mp
->next
= min_mp
->next
;
16554 if (mp
->next
!= NULL
)
16555 mp
->next
->prev
= mp
;
16557 minipool_vector_tail
= mp
;
16560 /* Save the new entry. */
16568 /* Scan over the following entries and adjust their offsets. */
16569 while (mp
->next
!= NULL
)
16571 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16572 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16575 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16577 mp
->next
->offset
= mp
->offset
;
16586 assign_minipool_offsets (Mfix
*barrier
)
16588 HOST_WIDE_INT offset
= 0;
16591 minipool_barrier
= barrier
;
16593 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16595 mp
->offset
= offset
;
16597 if (mp
->refcount
> 0)
16598 offset
+= mp
->fix_size
;
16602 /* Output the literal table */
16604 dump_minipool (rtx_insn
*scan
)
16610 if (ARM_DOUBLEWORD_ALIGN
)
16611 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16612 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16619 fprintf (dump_file
,
16620 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16621 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16623 scan
= emit_label_after (gen_label_rtx (), scan
);
16624 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16625 scan
= emit_label_after (minipool_vector_label
, scan
);
16627 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16629 if (mp
->refcount
> 0)
16633 fprintf (dump_file
,
16634 ";; Offset %u, min %ld, max %ld ",
16635 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16636 (unsigned long) mp
->max_address
);
16637 arm_print_value (dump_file
, mp
->value
);
16638 fputc ('\n', dump_file
);
16641 switch (GET_MODE_SIZE (mp
->mode
))
16643 #ifdef HAVE_consttable_1
16645 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16649 #ifdef HAVE_consttable_2
16651 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16655 #ifdef HAVE_consttable_4
16657 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16661 #ifdef HAVE_consttable_8
16663 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16667 #ifdef HAVE_consttable_16
16669 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16674 gcc_unreachable ();
16682 minipool_vector_head
= minipool_vector_tail
= NULL
;
16683 scan
= emit_insn_after (gen_consttable_end (), scan
);
16684 scan
= emit_barrier_after (scan
);
16687 /* Return the cost of forcibly inserting a barrier after INSN. */
16689 arm_barrier_cost (rtx insn
)
16691 /* Basing the location of the pool on the loop depth is preferable,
16692 but at the moment, the basic block information seems to be
16693 corrupt by this stage of the compilation. */
16694 int base_cost
= 50;
16695 rtx next
= next_nonnote_insn (insn
);
16697 if (next
!= NULL
&& LABEL_P (next
))
16700 switch (GET_CODE (insn
))
16703 /* It will always be better to place the table before the label, rather
16712 return base_cost
- 10;
16715 return base_cost
+ 10;
16719 /* Find the best place in the insn stream in the range
16720 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16721 Create the barrier by inserting a jump and add a new fix entry for
16724 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16726 HOST_WIDE_INT count
= 0;
16727 rtx_barrier
*barrier
;
16728 rtx_insn
*from
= fix
->insn
;
16729 /* The instruction after which we will insert the jump. */
16730 rtx_insn
*selected
= NULL
;
16732 /* The address at which the jump instruction will be placed. */
16733 HOST_WIDE_INT selected_address
;
16735 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16736 rtx_code_label
*label
= gen_label_rtx ();
16738 selected_cost
= arm_barrier_cost (from
);
16739 selected_address
= fix
->address
;
16741 while (from
&& count
< max_count
)
16743 rtx_jump_table_data
*tmp
;
16746 /* This code shouldn't have been called if there was a natural barrier
16748 gcc_assert (!BARRIER_P (from
));
16750 /* Count the length of this insn. This must stay in sync with the
16751 code that pushes minipool fixes. */
16752 if (LABEL_P (from
))
16753 count
+= get_label_padding (from
);
16755 count
+= get_attr_length (from
);
16757 /* If there is a jump table, add its length. */
16758 if (tablejump_p (from
, NULL
, &tmp
))
16760 count
+= get_jump_table_size (tmp
);
16762 /* Jump tables aren't in a basic block, so base the cost on
16763 the dispatch insn. If we select this location, we will
16764 still put the pool after the table. */
16765 new_cost
= arm_barrier_cost (from
);
16767 if (count
< max_count
16768 && (!selected
|| new_cost
<= selected_cost
))
16771 selected_cost
= new_cost
;
16772 selected_address
= fix
->address
+ count
;
16775 /* Continue after the dispatch table. */
16776 from
= NEXT_INSN (tmp
);
16780 new_cost
= arm_barrier_cost (from
);
16782 if (count
< max_count
16783 && (!selected
|| new_cost
<= selected_cost
))
16786 selected_cost
= new_cost
;
16787 selected_address
= fix
->address
+ count
;
16790 from
= NEXT_INSN (from
);
16793 /* Make sure that we found a place to insert the jump. */
16794 gcc_assert (selected
);
16796 /* Make sure we do not split a call and its corresponding
16797 CALL_ARG_LOCATION note. */
16798 if (CALL_P (selected
))
16800 rtx_insn
*next
= NEXT_INSN (selected
);
16801 if (next
&& NOTE_P (next
)
16802 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16806 /* Create a new JUMP_INSN that branches around a barrier. */
16807 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16808 JUMP_LABEL (from
) = label
;
16809 barrier
= emit_barrier_after (from
);
16810 emit_label_after (label
, barrier
);
16812 /* Create a minipool barrier entry for the new barrier. */
16813 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16814 new_fix
->insn
= barrier
;
16815 new_fix
->address
= selected_address
;
16816 new_fix
->next
= fix
->next
;
16817 fix
->next
= new_fix
;
16822 /* Record that there is a natural barrier in the insn stream at
16825 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16827 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16830 fix
->address
= address
;
16833 if (minipool_fix_head
!= NULL
)
16834 minipool_fix_tail
->next
= fix
;
16836 minipool_fix_head
= fix
;
16838 minipool_fix_tail
= fix
;
16841 /* Record INSN, which will need fixing up to load a value from the
16842 minipool. ADDRESS is the offset of the insn since the start of the
16843 function; LOC is a pointer to the part of the insn which requires
16844 fixing; VALUE is the constant that must be loaded, which is of type
16847 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16848 machine_mode mode
, rtx value
)
16850 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16853 fix
->address
= address
;
16856 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16857 fix
->value
= value
;
16858 fix
->forwards
= get_attr_pool_range (insn
);
16859 fix
->backwards
= get_attr_neg_pool_range (insn
);
16860 fix
->minipool
= NULL
;
16862 /* If an insn doesn't have a range defined for it, then it isn't
16863 expecting to be reworked by this code. Better to stop now than
16864 to generate duff assembly code. */
16865 gcc_assert (fix
->forwards
|| fix
->backwards
);
16867 /* If an entry requires 8-byte alignment then assume all constant pools
16868 require 4 bytes of padding. Trying to do this later on a per-pool
16869 basis is awkward because existing pool entries have to be modified. */
16870 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16875 fprintf (dump_file
,
16876 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16877 GET_MODE_NAME (mode
),
16878 INSN_UID (insn
), (unsigned long) address
,
16879 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16880 arm_print_value (dump_file
, fix
->value
);
16881 fprintf (dump_file
, "\n");
16884 /* Add it to the chain of fixes. */
16887 if (minipool_fix_head
!= NULL
)
16888 minipool_fix_tail
->next
= fix
;
16890 minipool_fix_head
= fix
;
16892 minipool_fix_tail
= fix
;
16895 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16896 Returns the number of insns needed, or 99 if we always want to synthesize
16899 arm_max_const_double_inline_cost ()
16901 /* Let the value get synthesized to avoid the use of literal pools. */
16902 if (arm_disable_literal_pool
)
16905 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16908 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16909 Returns the number of insns needed, or 99 if we don't know how to
16912 arm_const_double_inline_cost (rtx val
)
16914 rtx lowpart
, highpart
;
16917 mode
= GET_MODE (val
);
16919 if (mode
== VOIDmode
)
16922 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16924 lowpart
= gen_lowpart (SImode
, val
);
16925 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16927 gcc_assert (CONST_INT_P (lowpart
));
16928 gcc_assert (CONST_INT_P (highpart
));
16930 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16931 NULL_RTX
, NULL_RTX
, 0, 0)
16932 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16933 NULL_RTX
, NULL_RTX
, 0, 0));
16936 /* Cost of loading a SImode constant. */
16938 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16940 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16941 NULL_RTX
, NULL_RTX
, 1, 0);
16944 /* Return true if it is worthwhile to split a 64-bit constant into two
16945 32-bit operations. This is the case if optimizing for size, or
16946 if we have load delay slots, or if one 32-bit part can be done with
16947 a single data operation. */
16949 arm_const_double_by_parts (rtx val
)
16951 machine_mode mode
= GET_MODE (val
);
16954 if (optimize_size
|| arm_ld_sched
)
16957 if (mode
== VOIDmode
)
16960 part
= gen_highpart_mode (SImode
, mode
, val
);
16962 gcc_assert (CONST_INT_P (part
));
16964 if (const_ok_for_arm (INTVAL (part
))
16965 || const_ok_for_arm (~INTVAL (part
)))
16968 part
= gen_lowpart (SImode
, val
);
16970 gcc_assert (CONST_INT_P (part
));
16972 if (const_ok_for_arm (INTVAL (part
))
16973 || const_ok_for_arm (~INTVAL (part
)))
16979 /* Return true if it is possible to inline both the high and low parts
16980 of a 64-bit constant into 32-bit data processing instructions. */
16982 arm_const_double_by_immediates (rtx val
)
16984 machine_mode mode
= GET_MODE (val
);
16987 if (mode
== VOIDmode
)
16990 part
= gen_highpart_mode (SImode
, mode
, val
);
16992 gcc_assert (CONST_INT_P (part
));
16994 if (!const_ok_for_arm (INTVAL (part
)))
16997 part
= gen_lowpart (SImode
, val
);
16999 gcc_assert (CONST_INT_P (part
));
17001 if (!const_ok_for_arm (INTVAL (part
)))
17007 /* Scan INSN and note any of its operands that need fixing.
17008 If DO_PUSHES is false we do not actually push any of the fixups
17011 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17015 extract_constrain_insn (insn
);
17017 if (recog_data
.n_alternatives
== 0)
17020 /* Fill in recog_op_alt with information about the constraints of
17022 preprocess_constraints (insn
);
17024 const operand_alternative
*op_alt
= which_op_alt ();
17025 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17027 /* Things we need to fix can only occur in inputs. */
17028 if (recog_data
.operand_type
[opno
] != OP_IN
)
17031 /* If this alternative is a memory reference, then any mention
17032 of constants in this alternative is really to fool reload
17033 into allowing us to accept one there. We need to fix them up
17034 now so that we output the right code. */
17035 if (op_alt
[opno
].memory_ok
)
17037 rtx op
= recog_data
.operand
[opno
];
17039 if (CONSTANT_P (op
))
17042 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17043 recog_data
.operand_mode
[opno
], op
);
17045 else if (MEM_P (op
)
17046 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17047 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17051 rtx cop
= avoid_constant_pool_reference (op
);
17053 /* Casting the address of something to a mode narrower
17054 than a word can cause avoid_constant_pool_reference()
17055 to return the pool reference itself. That's no good to
17056 us here. Lets just hope that we can use the
17057 constant pool value directly. */
17059 cop
= get_pool_constant (XEXP (op
, 0));
17061 push_minipool_fix (insn
, address
,
17062 recog_data
.operand_loc
[opno
],
17063 recog_data
.operand_mode
[opno
], cop
);
17073 /* Rewrite move insn into subtract of 0 if the condition codes will
17074 be useful in next conditional jump insn. */
17077 thumb1_reorg (void)
17081 FOR_EACH_BB_FN (bb
, cfun
)
17084 rtx pat
, op0
, set
= NULL
;
17085 rtx_insn
*prev
, *insn
= BB_END (bb
);
17086 bool insn_clobbered
= false;
17088 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17089 insn
= PREV_INSN (insn
);
17091 /* Find the last cbranchsi4_insn in basic block BB. */
17092 if (insn
== BB_HEAD (bb
)
17093 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17096 /* Get the register with which we are comparing. */
17097 pat
= PATTERN (insn
);
17098 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17100 /* Find the first flag setting insn before INSN in basic block BB. */
17101 gcc_assert (insn
!= BB_HEAD (bb
));
17102 for (prev
= PREV_INSN (insn
);
17104 && prev
!= BB_HEAD (bb
)
17106 || DEBUG_INSN_P (prev
)
17107 || ((set
= single_set (prev
)) != NULL
17108 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17109 prev
= PREV_INSN (prev
))
17111 if (reg_set_p (op0
, prev
))
17112 insn_clobbered
= true;
17115 /* Skip if op0 is clobbered by insn other than prev. */
17116 if (insn_clobbered
)
17122 dest
= SET_DEST (set
);
17123 src
= SET_SRC (set
);
17124 if (!low_register_operand (dest
, SImode
)
17125 || !low_register_operand (src
, SImode
))
17128 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17129 in INSN. Both src and dest of the move insn are checked. */
17130 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17132 dest
= copy_rtx (dest
);
17133 src
= copy_rtx (src
);
17134 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17135 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17136 INSN_CODE (prev
) = -1;
17137 /* Set test register in INSN to dest. */
17138 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17139 INSN_CODE (insn
) = -1;
17144 /* Convert instructions to their cc-clobbering variant if possible, since
17145 that allows us to use smaller encodings. */
17148 thumb2_reorg (void)
17153 INIT_REG_SET (&live
);
17155 /* We are freeing block_for_insn in the toplev to keep compatibility
17156 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17157 compute_bb_for_insn ();
17160 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17162 FOR_EACH_BB_FN (bb
, cfun
)
17164 if (current_tune
->disparage_flag_setting_t16_encodings
17165 && optimize_bb_for_speed_p (bb
))
17169 Convert_Action action
= SKIP
;
17170 Convert_Action action_for_partial_flag_setting
17171 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17172 && optimize_bb_for_speed_p (bb
))
17175 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17176 df_simulate_initialize_backwards (bb
, &live
);
17177 FOR_BB_INSNS_REVERSE (bb
, insn
)
17179 if (NONJUMP_INSN_P (insn
)
17180 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17181 && GET_CODE (PATTERN (insn
)) == SET
)
17184 rtx pat
= PATTERN (insn
);
17185 rtx dst
= XEXP (pat
, 0);
17186 rtx src
= XEXP (pat
, 1);
17187 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17189 if (!OBJECT_P (src
))
17190 op0
= XEXP (src
, 0);
17192 if (BINARY_P (src
))
17193 op1
= XEXP (src
, 1);
17195 if (low_register_operand (dst
, SImode
))
17197 switch (GET_CODE (src
))
17200 /* Adding two registers and storing the result
17201 in the first source is already a 16-bit
17203 if (rtx_equal_p (dst
, op0
)
17204 && register_operand (op1
, SImode
))
17207 if (low_register_operand (op0
, SImode
))
17209 /* ADDS <Rd>,<Rn>,<Rm> */
17210 if (low_register_operand (op1
, SImode
))
17212 /* ADDS <Rdn>,#<imm8> */
17213 /* SUBS <Rdn>,#<imm8> */
17214 else if (rtx_equal_p (dst
, op0
)
17215 && CONST_INT_P (op1
)
17216 && IN_RANGE (INTVAL (op1
), -255, 255))
17218 /* ADDS <Rd>,<Rn>,#<imm3> */
17219 /* SUBS <Rd>,<Rn>,#<imm3> */
17220 else if (CONST_INT_P (op1
)
17221 && IN_RANGE (INTVAL (op1
), -7, 7))
17224 /* ADCS <Rd>, <Rn> */
17225 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17226 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17227 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17229 && COMPARISON_P (op1
)
17230 && cc_register (XEXP (op1
, 0), VOIDmode
)
17231 && maybe_get_arm_condition_code (op1
) == ARM_CS
17232 && XEXP (op1
, 1) == const0_rtx
)
17237 /* RSBS <Rd>,<Rn>,#0
17238 Not handled here: see NEG below. */
17239 /* SUBS <Rd>,<Rn>,#<imm3>
17241 Not handled here: see PLUS above. */
17242 /* SUBS <Rd>,<Rn>,<Rm> */
17243 if (low_register_operand (op0
, SImode
)
17244 && low_register_operand (op1
, SImode
))
17249 /* MULS <Rdm>,<Rn>,<Rdm>
17250 As an exception to the rule, this is only used
17251 when optimizing for size since MULS is slow on all
17252 known implementations. We do not even want to use
17253 MULS in cold code, if optimizing for speed, so we
17254 test the global flag here. */
17255 if (!optimize_size
)
17257 /* else fall through. */
17261 /* ANDS <Rdn>,<Rm> */
17262 if (rtx_equal_p (dst
, op0
)
17263 && low_register_operand (op1
, SImode
))
17264 action
= action_for_partial_flag_setting
;
17265 else if (rtx_equal_p (dst
, op1
)
17266 && low_register_operand (op0
, SImode
))
17267 action
= action_for_partial_flag_setting
== SKIP
17268 ? SKIP
: SWAP_CONV
;
17274 /* ASRS <Rdn>,<Rm> */
17275 /* LSRS <Rdn>,<Rm> */
17276 /* LSLS <Rdn>,<Rm> */
17277 if (rtx_equal_p (dst
, op0
)
17278 && low_register_operand (op1
, SImode
))
17279 action
= action_for_partial_flag_setting
;
17280 /* ASRS <Rd>,<Rm>,#<imm5> */
17281 /* LSRS <Rd>,<Rm>,#<imm5> */
17282 /* LSLS <Rd>,<Rm>,#<imm5> */
17283 else if (low_register_operand (op0
, SImode
)
17284 && CONST_INT_P (op1
)
17285 && IN_RANGE (INTVAL (op1
), 0, 31))
17286 action
= action_for_partial_flag_setting
;
17290 /* RORS <Rdn>,<Rm> */
17291 if (rtx_equal_p (dst
, op0
)
17292 && low_register_operand (op1
, SImode
))
17293 action
= action_for_partial_flag_setting
;
17297 /* MVNS <Rd>,<Rm> */
17298 if (low_register_operand (op0
, SImode
))
17299 action
= action_for_partial_flag_setting
;
17303 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17304 if (low_register_operand (op0
, SImode
))
17309 /* MOVS <Rd>,#<imm8> */
17310 if (CONST_INT_P (src
)
17311 && IN_RANGE (INTVAL (src
), 0, 255))
17312 action
= action_for_partial_flag_setting
;
17316 /* MOVS and MOV<c> with registers have different
17317 encodings, so are not relevant here. */
17325 if (action
!= SKIP
)
17327 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17328 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17331 if (action
== SWAP_CONV
)
17333 src
= copy_rtx (src
);
17334 XEXP (src
, 0) = op1
;
17335 XEXP (src
, 1) = op0
;
17336 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17337 vec
= gen_rtvec (2, pat
, clobber
);
17339 else /* action == CONV */
17340 vec
= gen_rtvec (2, pat
, clobber
);
17342 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17343 INSN_CODE (insn
) = -1;
17347 if (NONDEBUG_INSN_P (insn
))
17348 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17352 CLEAR_REG_SET (&live
);
17355 /* Gcc puts the pool in the wrong place for ARM, since we can only
17356 load addresses a limited distance around the pc. We do some
17357 special munging to move the constant pool values to the correct
17358 point in the code. */
17363 HOST_WIDE_INT address
= 0;
17368 else if (TARGET_THUMB2
)
17371 /* Ensure all insns that must be split have been split at this point.
17372 Otherwise, the pool placement code below may compute incorrect
17373 insn lengths. Note that when optimizing, all insns have already
17374 been split at this point. */
17376 split_all_insns_noflow ();
17378 minipool_fix_head
= minipool_fix_tail
= NULL
;
17380 /* The first insn must always be a note, or the code below won't
17381 scan it properly. */
17382 insn
= get_insns ();
17383 gcc_assert (NOTE_P (insn
));
17386 /* Scan all the insns and record the operands that will need fixing. */
17387 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17389 if (BARRIER_P (insn
))
17390 push_minipool_barrier (insn
, address
);
17391 else if (INSN_P (insn
))
17393 rtx_jump_table_data
*table
;
17395 note_invalid_constants (insn
, address
, true);
17396 address
+= get_attr_length (insn
);
17398 /* If the insn is a vector jump, add the size of the table
17399 and skip the table. */
17400 if (tablejump_p (insn
, NULL
, &table
))
17402 address
+= get_jump_table_size (table
);
17406 else if (LABEL_P (insn
))
17407 /* Add the worst-case padding due to alignment. We don't add
17408 the _current_ padding because the minipool insertions
17409 themselves might change it. */
17410 address
+= get_label_padding (insn
);
17413 fix
= minipool_fix_head
;
17415 /* Now scan the fixups and perform the required changes. */
17420 Mfix
* last_added_fix
;
17421 Mfix
* last_barrier
= NULL
;
17424 /* Skip any further barriers before the next fix. */
17425 while (fix
&& BARRIER_P (fix
->insn
))
17428 /* No more fixes. */
17432 last_added_fix
= NULL
;
17434 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17436 if (BARRIER_P (ftmp
->insn
))
17438 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17441 last_barrier
= ftmp
;
17443 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17446 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17449 /* If we found a barrier, drop back to that; any fixes that we
17450 could have reached but come after the barrier will now go in
17451 the next mini-pool. */
17452 if (last_barrier
!= NULL
)
17454 /* Reduce the refcount for those fixes that won't go into this
17456 for (fdel
= last_barrier
->next
;
17457 fdel
&& fdel
!= ftmp
;
17460 fdel
->minipool
->refcount
--;
17461 fdel
->minipool
= NULL
;
17464 ftmp
= last_barrier
;
17468 /* ftmp is first fix that we can't fit into this pool and
17469 there no natural barriers that we could use. Insert a
17470 new barrier in the code somewhere between the previous
17471 fix and this one, and arrange to jump around it. */
17472 HOST_WIDE_INT max_address
;
17474 /* The last item on the list of fixes must be a barrier, so
17475 we can never run off the end of the list of fixes without
17476 last_barrier being set. */
17479 max_address
= minipool_vector_head
->max_address
;
17480 /* Check that there isn't another fix that is in range that
17481 we couldn't fit into this pool because the pool was
17482 already too large: we need to put the pool before such an
17483 instruction. The pool itself may come just after the
17484 fix because create_fix_barrier also allows space for a
17485 jump instruction. */
17486 if (ftmp
->address
< max_address
)
17487 max_address
= ftmp
->address
+ 1;
17489 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17492 assign_minipool_offsets (last_barrier
);
17496 if (!BARRIER_P (ftmp
->insn
)
17497 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17504 /* Scan over the fixes we have identified for this pool, fixing them
17505 up and adding the constants to the pool itself. */
17506 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17507 this_fix
= this_fix
->next
)
17508 if (!BARRIER_P (this_fix
->insn
))
17511 = plus_constant (Pmode
,
17512 gen_rtx_LABEL_REF (VOIDmode
,
17513 minipool_vector_label
),
17514 this_fix
->minipool
->offset
);
17515 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17518 dump_minipool (last_barrier
->insn
);
17522 /* From now on we must synthesize any constants that we can't handle
17523 directly. This can happen if the RTL gets split during final
17524 instruction generation. */
17525 cfun
->machine
->after_arm_reorg
= 1;
17527 /* Free the minipool memory. */
17528 obstack_free (&minipool_obstack
, minipool_startobj
);
17531 /* Routines to output assembly language. */
17533 /* Return string representation of passed in real value. */
17534 static const char *
17535 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17537 if (!fp_consts_inited
)
17540 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17544 /* OPERANDS[0] is the entire list of insns that constitute pop,
17545 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17546 is in the list, UPDATE is true iff the list contains explicit
17547 update of base register. */
17549 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17555 const char *conditional
;
17556 int num_saves
= XVECLEN (operands
[0], 0);
17557 unsigned int regno
;
17558 unsigned int regno_base
= REGNO (operands
[1]);
17561 offset
+= update
? 1 : 0;
17562 offset
+= return_pc
? 1 : 0;
17564 /* Is the base register in the list? */
17565 for (i
= offset
; i
< num_saves
; i
++)
17567 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17568 /* If SP is in the list, then the base register must be SP. */
17569 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17570 /* If base register is in the list, there must be no explicit update. */
17571 if (regno
== regno_base
)
17572 gcc_assert (!update
);
17575 conditional
= reverse
? "%?%D0" : "%?%d0";
17576 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17578 /* Output pop (not stmfd) because it has a shorter encoding. */
17579 gcc_assert (update
);
17580 sprintf (pattern
, "pop%s\t{", conditional
);
17584 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17585 It's just a convention, their semantics are identical. */
17586 if (regno_base
== SP_REGNUM
)
17587 sprintf (pattern
, "ldm%sfd\t", conditional
);
17588 else if (TARGET_UNIFIED_ASM
)
17589 sprintf (pattern
, "ldmia%s\t", conditional
);
17591 sprintf (pattern
, "ldm%sia\t", conditional
);
17593 strcat (pattern
, reg_names
[regno_base
]);
17595 strcat (pattern
, "!, {");
17597 strcat (pattern
, ", {");
17600 /* Output the first destination register. */
17602 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17604 /* Output the rest of the destination registers. */
17605 for (i
= offset
+ 1; i
< num_saves
; i
++)
17607 strcat (pattern
, ", ");
17609 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17612 strcat (pattern
, "}");
17614 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17615 strcat (pattern
, "^");
17617 output_asm_insn (pattern
, &cond
);
17621 /* Output the assembly for a store multiple. */
17624 vfp_output_vstmd (rtx
* operands
)
17630 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17631 ? XEXP (operands
[0], 0)
17632 : XEXP (XEXP (operands
[0], 0), 0);
17633 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17636 strcpy (pattern
, "vpush%?.64\t{%P1");
17638 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17640 p
= strlen (pattern
);
17642 gcc_assert (REG_P (operands
[1]));
17644 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17645 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17647 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17649 strcpy (&pattern
[p
], "}");
17651 output_asm_insn (pattern
, operands
);
17656 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17657 number of bytes pushed. */
17660 vfp_emit_fstmd (int base_reg
, int count
)
17667 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17668 register pairs are stored by a store multiple insn. We avoid this
17669 by pushing an extra pair. */
17670 if (count
== 2 && !arm_arch6
)
17672 if (base_reg
== LAST_VFP_REGNUM
- 3)
17677 /* FSTMD may not store more than 16 doubleword registers at once. Split
17678 larger stores into multiple parts (up to a maximum of two, in
17683 /* NOTE: base_reg is an internal register number, so each D register
17685 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17686 saved
+= vfp_emit_fstmd (base_reg
, 16);
17690 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17691 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17693 reg
= gen_rtx_REG (DFmode
, base_reg
);
17696 XVECEXP (par
, 0, 0)
17697 = gen_rtx_SET (VOIDmode
,
17700 gen_rtx_PRE_MODIFY (Pmode
,
17703 (Pmode
, stack_pointer_rtx
,
17706 gen_rtx_UNSPEC (BLKmode
,
17707 gen_rtvec (1, reg
),
17708 UNSPEC_PUSH_MULT
));
17710 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17711 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17712 RTX_FRAME_RELATED_P (tmp
) = 1;
17713 XVECEXP (dwarf
, 0, 0) = tmp
;
17715 tmp
= gen_rtx_SET (VOIDmode
,
17716 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17718 RTX_FRAME_RELATED_P (tmp
) = 1;
17719 XVECEXP (dwarf
, 0, 1) = tmp
;
17721 for (i
= 1; i
< count
; i
++)
17723 reg
= gen_rtx_REG (DFmode
, base_reg
);
17725 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17727 tmp
= gen_rtx_SET (VOIDmode
,
17728 gen_frame_mem (DFmode
,
17729 plus_constant (Pmode
,
17733 RTX_FRAME_RELATED_P (tmp
) = 1;
17734 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17737 par
= emit_insn (par
);
17738 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17739 RTX_FRAME_RELATED_P (par
) = 1;
17744 /* Emit a call instruction with pattern PAT. ADDR is the address of
17745 the call target. */
17748 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17752 insn
= emit_call_insn (pat
);
17754 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17755 If the call might use such an entry, add a use of the PIC register
17756 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17757 if (TARGET_VXWORKS_RTP
17760 && GET_CODE (addr
) == SYMBOL_REF
17761 && (SYMBOL_REF_DECL (addr
)
17762 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17763 : !SYMBOL_REF_LOCAL_P (addr
)))
17765 require_pic_register ();
17766 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17769 if (TARGET_AAPCS_BASED
)
17771 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17772 linker. We need to add an IP clobber to allow setting
17773 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17774 is not needed since it's a fixed register. */
17775 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17776 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17780 /* Output a 'call' insn. */
17782 output_call (rtx
*operands
)
17784 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17786 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17787 if (REGNO (operands
[0]) == LR_REGNUM
)
17789 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17790 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17793 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17795 if (TARGET_INTERWORK
|| arm_arch4t
)
17796 output_asm_insn ("bx%?\t%0", operands
);
17798 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17803 /* Output a 'call' insn that is a reference in memory. This is
17804 disabled for ARMv5 and we prefer a blx instead because otherwise
17805 there's a significant performance overhead. */
17807 output_call_mem (rtx
*operands
)
17809 gcc_assert (!arm_arch5
);
17810 if (TARGET_INTERWORK
)
17812 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17813 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17814 output_asm_insn ("bx%?\t%|ip", operands
);
17816 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17818 /* LR is used in the memory address. We load the address in the
17819 first instruction. It's safe to use IP as the target of the
17820 load since the call will kill it anyway. */
17821 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17822 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17824 output_asm_insn ("bx%?\t%|ip", operands
);
17826 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17830 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17831 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17838 /* Output a move from arm registers to arm registers of a long double
17839 OPERANDS[0] is the destination.
17840 OPERANDS[1] is the source. */
17842 output_mov_long_double_arm_from_arm (rtx
*operands
)
17844 /* We have to be careful here because the two might overlap. */
17845 int dest_start
= REGNO (operands
[0]);
17846 int src_start
= REGNO (operands
[1]);
17850 if (dest_start
< src_start
)
17852 for (i
= 0; i
< 3; i
++)
17854 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17855 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17856 output_asm_insn ("mov%?\t%0, %1", ops
);
17861 for (i
= 2; i
>= 0; i
--)
17863 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17864 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17865 output_asm_insn ("mov%?\t%0, %1", ops
);
17873 arm_emit_movpair (rtx dest
, rtx src
)
17875 /* If the src is an immediate, simplify it. */
17876 if (CONST_INT_P (src
))
17878 HOST_WIDE_INT val
= INTVAL (src
);
17879 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17880 if ((val
>> 16) & 0x0000ffff)
17881 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17883 GEN_INT ((val
>> 16) & 0x0000ffff));
17886 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17887 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17890 /* Output a move between double words. It must be REG<-MEM
17893 output_move_double (rtx
*operands
, bool emit
, int *count
)
17895 enum rtx_code code0
= GET_CODE (operands
[0]);
17896 enum rtx_code code1
= GET_CODE (operands
[1]);
17901 /* The only case when this might happen is when
17902 you are looking at the length of a DImode instruction
17903 that has an invalid constant in it. */
17904 if (code0
== REG
&& code1
!= MEM
)
17906 gcc_assert (!emit
);
17913 unsigned int reg0
= REGNO (operands
[0]);
17915 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17917 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17919 switch (GET_CODE (XEXP (operands
[1], 0)))
17926 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17927 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17929 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17934 gcc_assert (TARGET_LDRD
);
17936 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17943 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17945 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17953 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17955 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17960 gcc_assert (TARGET_LDRD
);
17962 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17967 /* Autoicrement addressing modes should never have overlapping
17968 base and destination registers, and overlapping index registers
17969 are already prohibited, so this doesn't need to worry about
17971 otherops
[0] = operands
[0];
17972 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17973 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17975 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17977 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17979 /* Registers overlap so split out the increment. */
17982 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17983 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17990 /* Use a single insn if we can.
17991 FIXME: IWMMXT allows offsets larger than ldrd can
17992 handle, fix these up with a pair of ldr. */
17994 || !CONST_INT_P (otherops
[2])
17995 || (INTVAL (otherops
[2]) > -256
17996 && INTVAL (otherops
[2]) < 256))
17999 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18005 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18006 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18016 /* Use a single insn if we can.
18017 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18018 fix these up with a pair of ldr. */
18020 || !CONST_INT_P (otherops
[2])
18021 || (INTVAL (otherops
[2]) > -256
18022 && INTVAL (otherops
[2]) < 256))
18025 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18031 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18032 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18042 /* We might be able to use ldrd %0, %1 here. However the range is
18043 different to ldr/adr, and it is broken on some ARMv7-M
18044 implementations. */
18045 /* Use the second register of the pair to avoid problematic
18047 otherops
[1] = operands
[1];
18049 output_asm_insn ("adr%?\t%0, %1", otherops
);
18050 operands
[1] = otherops
[0];
18054 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18056 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18063 /* ??? This needs checking for thumb2. */
18065 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18066 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18068 otherops
[0] = operands
[0];
18069 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18070 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18072 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18074 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18076 switch ((int) INTVAL (otherops
[2]))
18080 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18086 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18092 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18096 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18097 operands
[1] = otherops
[0];
18099 && (REG_P (otherops
[2])
18101 || (CONST_INT_P (otherops
[2])
18102 && INTVAL (otherops
[2]) > -256
18103 && INTVAL (otherops
[2]) < 256)))
18105 if (reg_overlap_mentioned_p (operands
[0],
18108 /* Swap base and index registers over to
18109 avoid a conflict. */
18110 std::swap (otherops
[1], otherops
[2]);
18112 /* If both registers conflict, it will usually
18113 have been fixed by a splitter. */
18114 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18115 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18119 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18120 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18127 otherops
[0] = operands
[0];
18129 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18134 if (CONST_INT_P (otherops
[2]))
18138 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18139 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18141 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18147 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18153 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18160 return "ldr%(d%)\t%0, [%1]";
18162 return "ldm%(ia%)\t%1, %M0";
18166 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18167 /* Take care of overlapping base/data reg. */
18168 if (reg_mentioned_p (operands
[0], operands
[1]))
18172 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18173 output_asm_insn ("ldr%?\t%0, %1", operands
);
18183 output_asm_insn ("ldr%?\t%0, %1", operands
);
18184 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18194 /* Constraints should ensure this. */
18195 gcc_assert (code0
== MEM
&& code1
== REG
);
18196 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18197 || (TARGET_ARM
&& TARGET_LDRD
));
18199 switch (GET_CODE (XEXP (operands
[0], 0)))
18205 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18207 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18212 gcc_assert (TARGET_LDRD
);
18214 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18221 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18223 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18231 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18233 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18238 gcc_assert (TARGET_LDRD
);
18240 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18245 otherops
[0] = operands
[1];
18246 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18247 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18249 /* IWMMXT allows offsets larger than ldrd can handle,
18250 fix these up with a pair of ldr. */
18252 && CONST_INT_P (otherops
[2])
18253 && (INTVAL(otherops
[2]) <= -256
18254 || INTVAL(otherops
[2]) >= 256))
18256 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18260 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18261 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18270 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18271 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18277 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18280 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18285 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18290 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18291 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18293 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18297 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18304 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18311 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18316 && (REG_P (otherops
[2])
18318 || (CONST_INT_P (otherops
[2])
18319 && INTVAL (otherops
[2]) > -256
18320 && INTVAL (otherops
[2]) < 256)))
18322 otherops
[0] = operands
[1];
18323 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18325 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18331 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18332 otherops
[1] = operands
[1];
18335 output_asm_insn ("str%?\t%1, %0", operands
);
18336 output_asm_insn ("str%?\t%H1, %0", otherops
);
18346 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18347 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18350 output_move_quad (rtx
*operands
)
18352 if (REG_P (operands
[0]))
18354 /* Load, or reg->reg move. */
18356 if (MEM_P (operands
[1]))
18358 switch (GET_CODE (XEXP (operands
[1], 0)))
18361 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18366 output_asm_insn ("adr%?\t%0, %1", operands
);
18367 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18371 gcc_unreachable ();
18379 gcc_assert (REG_P (operands
[1]));
18381 dest
= REGNO (operands
[0]);
18382 src
= REGNO (operands
[1]);
18384 /* This seems pretty dumb, but hopefully GCC won't try to do it
18387 for (i
= 0; i
< 4; i
++)
18389 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18390 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18391 output_asm_insn ("mov%?\t%0, %1", ops
);
18394 for (i
= 3; i
>= 0; i
--)
18396 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18397 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18398 output_asm_insn ("mov%?\t%0, %1", ops
);
18404 gcc_assert (MEM_P (operands
[0]));
18405 gcc_assert (REG_P (operands
[1]));
18406 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18408 switch (GET_CODE (XEXP (operands
[0], 0)))
18411 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18415 gcc_unreachable ();
18422 /* Output a VFP load or store instruction. */
18425 output_move_vfp (rtx
*operands
)
18427 rtx reg
, mem
, addr
, ops
[2];
18428 int load
= REG_P (operands
[0]);
18429 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18430 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18435 reg
= operands
[!load
];
18436 mem
= operands
[load
];
18438 mode
= GET_MODE (reg
);
18440 gcc_assert (REG_P (reg
));
18441 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18442 gcc_assert (mode
== SFmode
18446 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18447 gcc_assert (MEM_P (mem
));
18449 addr
= XEXP (mem
, 0);
18451 switch (GET_CODE (addr
))
18454 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18455 ops
[0] = XEXP (addr
, 0);
18460 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18461 ops
[0] = XEXP (addr
, 0);
18466 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18472 sprintf (buff
, templ
,
18473 load
? "ld" : "st",
18476 integer_p
? "\t%@ int" : "");
18477 output_asm_insn (buff
, ops
);
18482 /* Output a Neon double-word or quad-word load or store, or a load
18483 or store for larger structure modes.
18485 WARNING: The ordering of elements is weird in big-endian mode,
18486 because the EABI requires that vectors stored in memory appear
18487 as though they were stored by a VSTM, as required by the EABI.
18488 GCC RTL defines element ordering based on in-memory order.
18489 This can be different from the architectural ordering of elements
18490 within a NEON register. The intrinsics defined in arm_neon.h use the
18491 NEON register element ordering, not the GCC RTL element ordering.
18493 For example, the in-memory ordering of a big-endian a quadword
18494 vector with 16-bit elements when stored from register pair {d0,d1}
18495 will be (lowest address first, d0[N] is NEON register element N):
18497 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18499 When necessary, quadword registers (dN, dN+1) are moved to ARM
18500 registers from rN in the order:
18502 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18504 So that STM/LDM can be used on vectors in ARM registers, and the
18505 same memory layout will result as if VSTM/VLDM were used.
18507 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18508 possible, which allows use of appropriate alignment tags.
18509 Note that the choice of "64" is independent of the actual vector
18510 element size; this size simply ensures that the behavior is
18511 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18513 Due to limitations of those instructions, use of VST1.64/VLD1.64
18514 is not possible if:
18515 - the address contains PRE_DEC, or
18516 - the mode refers to more than 4 double-word registers
18518 In those cases, it would be possible to replace VSTM/VLDM by a
18519 sequence of instructions; this is not currently implemented since
18520 this is not certain to actually improve performance. */
18523 output_move_neon (rtx
*operands
)
18525 rtx reg
, mem
, addr
, ops
[2];
18526 int regno
, nregs
, load
= REG_P (operands
[0]);
18531 reg
= operands
[!load
];
18532 mem
= operands
[load
];
18534 mode
= GET_MODE (reg
);
18536 gcc_assert (REG_P (reg
));
18537 regno
= REGNO (reg
);
18538 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18539 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18540 || NEON_REGNO_OK_FOR_QUAD (regno
));
18541 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18542 || VALID_NEON_QREG_MODE (mode
)
18543 || VALID_NEON_STRUCT_MODE (mode
));
18544 gcc_assert (MEM_P (mem
));
18546 addr
= XEXP (mem
, 0);
18548 /* Strip off const from addresses like (const (plus (...))). */
18549 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18550 addr
= XEXP (addr
, 0);
18552 switch (GET_CODE (addr
))
18555 /* We have to use vldm / vstm for too-large modes. */
18558 templ
= "v%smia%%?\t%%0!, %%h1";
18559 ops
[0] = XEXP (addr
, 0);
18563 templ
= "v%s1.64\t%%h1, %%A0";
18570 /* We have to use vldm / vstm in this case, since there is no
18571 pre-decrement form of the vld1 / vst1 instructions. */
18572 templ
= "v%smdb%%?\t%%0!, %%h1";
18573 ops
[0] = XEXP (addr
, 0);
18578 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18579 gcc_unreachable ();
18582 /* We have to use vldm / vstm for too-large modes. */
18586 templ
= "v%smia%%?\t%%m0, %%h1";
18588 templ
= "v%s1.64\t%%h1, %%A0";
18594 /* Fall through. */
18600 for (i
= 0; i
< nregs
; i
++)
18602 /* We're only using DImode here because it's a convenient size. */
18603 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18604 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18605 if (reg_overlap_mentioned_p (ops
[0], mem
))
18607 gcc_assert (overlap
== -1);
18612 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18613 output_asm_insn (buff
, ops
);
18618 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18619 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18620 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18621 output_asm_insn (buff
, ops
);
18628 gcc_unreachable ();
18631 sprintf (buff
, templ
, load
? "ld" : "st");
18632 output_asm_insn (buff
, ops
);
18637 /* Compute and return the length of neon_mov<mode>, where <mode> is
18638 one of VSTRUCT modes: EI, OI, CI or XI. */
18640 arm_attr_length_move_neon (rtx_insn
*insn
)
18642 rtx reg
, mem
, addr
;
18646 extract_insn_cached (insn
);
18648 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18650 mode
= GET_MODE (recog_data
.operand
[0]);
18661 gcc_unreachable ();
18665 load
= REG_P (recog_data
.operand
[0]);
18666 reg
= recog_data
.operand
[!load
];
18667 mem
= recog_data
.operand
[load
];
18669 gcc_assert (MEM_P (mem
));
18671 mode
= GET_MODE (reg
);
18672 addr
= XEXP (mem
, 0);
18674 /* Strip off const from addresses like (const (plus (...))). */
18675 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18676 addr
= XEXP (addr
, 0);
18678 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18680 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18687 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18691 arm_address_offset_is_imm (rtx_insn
*insn
)
18695 extract_insn_cached (insn
);
18697 if (REG_P (recog_data
.operand
[0]))
18700 mem
= recog_data
.operand
[0];
18702 gcc_assert (MEM_P (mem
));
18704 addr
= XEXP (mem
, 0);
18707 || (GET_CODE (addr
) == PLUS
18708 && REG_P (XEXP (addr
, 0))
18709 && CONST_INT_P (XEXP (addr
, 1))))
18715 /* Output an ADD r, s, #n where n may be too big for one instruction.
18716 If adding zero to one register, output nothing. */
18718 output_add_immediate (rtx
*operands
)
18720 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18722 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18725 output_multi_immediate (operands
,
18726 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18729 output_multi_immediate (operands
,
18730 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18737 /* Output a multiple immediate operation.
18738 OPERANDS is the vector of operands referred to in the output patterns.
18739 INSTR1 is the output pattern to use for the first constant.
18740 INSTR2 is the output pattern to use for subsequent constants.
18741 IMMED_OP is the index of the constant slot in OPERANDS.
18742 N is the constant value. */
18743 static const char *
18744 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18745 int immed_op
, HOST_WIDE_INT n
)
18747 #if HOST_BITS_PER_WIDE_INT > 32
18753 /* Quick and easy output. */
18754 operands
[immed_op
] = const0_rtx
;
18755 output_asm_insn (instr1
, operands
);
18760 const char * instr
= instr1
;
18762 /* Note that n is never zero here (which would give no output). */
18763 for (i
= 0; i
< 32; i
+= 2)
18767 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18768 output_asm_insn (instr
, operands
);
18778 /* Return the name of a shifter operation. */
18779 static const char *
18780 arm_shift_nmem(enum rtx_code code
)
18785 return ARM_LSL_NAME
;
18801 /* Return the appropriate ARM instruction for the operation code.
18802 The returned result should not be overwritten. OP is the rtx of the
18803 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18806 arithmetic_instr (rtx op
, int shift_first_arg
)
18808 switch (GET_CODE (op
))
18814 return shift_first_arg
? "rsb" : "sub";
18829 return arm_shift_nmem(GET_CODE(op
));
18832 gcc_unreachable ();
18836 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18837 for the operation code. The returned result should not be overwritten.
18838 OP is the rtx code of the shift.
18839 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18841 static const char *
18842 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18845 enum rtx_code code
= GET_CODE (op
);
18850 if (!CONST_INT_P (XEXP (op
, 1)))
18852 output_operand_lossage ("invalid shift operand");
18857 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18865 mnem
= arm_shift_nmem(code
);
18866 if (CONST_INT_P (XEXP (op
, 1)))
18868 *amountp
= INTVAL (XEXP (op
, 1));
18870 else if (REG_P (XEXP (op
, 1)))
18877 output_operand_lossage ("invalid shift operand");
18883 /* We never have to worry about the amount being other than a
18884 power of 2, since this case can never be reloaded from a reg. */
18885 if (!CONST_INT_P (XEXP (op
, 1)))
18887 output_operand_lossage ("invalid shift operand");
18891 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18893 /* Amount must be a power of two. */
18894 if (*amountp
& (*amountp
- 1))
18896 output_operand_lossage ("invalid shift operand");
18900 *amountp
= int_log2 (*amountp
);
18901 return ARM_LSL_NAME
;
18904 output_operand_lossage ("invalid shift operand");
18908 /* This is not 100% correct, but follows from the desire to merge
18909 multiplication by a power of 2 with the recognizer for a
18910 shift. >=32 is not a valid shift for "lsl", so we must try and
18911 output a shift that produces the correct arithmetical result.
18912 Using lsr #32 is identical except for the fact that the carry bit
18913 is not set correctly if we set the flags; but we never use the
18914 carry bit from such an operation, so we can ignore that. */
18915 if (code
== ROTATERT
)
18916 /* Rotate is just modulo 32. */
18918 else if (*amountp
!= (*amountp
& 31))
18920 if (code
== ASHIFT
)
18925 /* Shifts of 0 are no-ops. */
18932 /* Obtain the shift from the POWER of two. */
18934 static HOST_WIDE_INT
18935 int_log2 (HOST_WIDE_INT power
)
18937 HOST_WIDE_INT shift
= 0;
18939 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18941 gcc_assert (shift
<= 31);
18948 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18949 because /bin/as is horribly restrictive. The judgement about
18950 whether or not each character is 'printable' (and can be output as
18951 is) or not (and must be printed with an octal escape) must be made
18952 with reference to the *host* character set -- the situation is
18953 similar to that discussed in the comments above pp_c_char in
18954 c-pretty-print.c. */
18956 #define MAX_ASCII_LEN 51
18959 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18962 int len_so_far
= 0;
18964 fputs ("\t.ascii\t\"", stream
);
18966 for (i
= 0; i
< len
; i
++)
18970 if (len_so_far
>= MAX_ASCII_LEN
)
18972 fputs ("\"\n\t.ascii\t\"", stream
);
18978 if (c
== '\\' || c
== '\"')
18980 putc ('\\', stream
);
18988 fprintf (stream
, "\\%03o", c
);
18993 fputs ("\"\n", stream
);
18996 /* Compute the register save mask for registers 0 through 12
18997 inclusive. This code is used by arm_compute_save_reg_mask. */
18999 static unsigned long
19000 arm_compute_save_reg0_reg12_mask (void)
19002 unsigned long func_type
= arm_current_func_type ();
19003 unsigned long save_reg_mask
= 0;
19006 if (IS_INTERRUPT (func_type
))
19008 unsigned int max_reg
;
19009 /* Interrupt functions must not corrupt any registers,
19010 even call clobbered ones. If this is a leaf function
19011 we can just examine the registers used by the RTL, but
19012 otherwise we have to assume that whatever function is
19013 called might clobber anything, and so we have to save
19014 all the call-clobbered registers as well. */
19015 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19016 /* FIQ handlers have registers r8 - r12 banked, so
19017 we only need to check r0 - r7, Normal ISRs only
19018 bank r14 and r15, so we must check up to r12.
19019 r13 is the stack pointer which is always preserved,
19020 so we do not need to consider it here. */
19025 for (reg
= 0; reg
<= max_reg
; reg
++)
19026 if (df_regs_ever_live_p (reg
)
19027 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19028 save_reg_mask
|= (1 << reg
);
19030 /* Also save the pic base register if necessary. */
19032 && !TARGET_SINGLE_PIC_BASE
19033 && arm_pic_register
!= INVALID_REGNUM
19034 && crtl
->uses_pic_offset_table
)
19035 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19037 else if (IS_VOLATILE(func_type
))
19039 /* For noreturn functions we historically omitted register saves
19040 altogether. However this really messes up debugging. As a
19041 compromise save just the frame pointers. Combined with the link
19042 register saved elsewhere this should be sufficient to get
19044 if (frame_pointer_needed
)
19045 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19046 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19047 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19048 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19049 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19053 /* In the normal case we only need to save those registers
19054 which are call saved and which are used by this function. */
19055 for (reg
= 0; reg
<= 11; reg
++)
19056 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
19057 save_reg_mask
|= (1 << reg
);
19059 /* Handle the frame pointer as a special case. */
19060 if (frame_pointer_needed
)
19061 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19063 /* If we aren't loading the PIC register,
19064 don't stack it even though it may be live. */
19066 && !TARGET_SINGLE_PIC_BASE
19067 && arm_pic_register
!= INVALID_REGNUM
19068 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19069 || crtl
->uses_pic_offset_table
))
19070 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19072 /* The prologue will copy SP into R0, so save it. */
19073 if (IS_STACKALIGN (func_type
))
19074 save_reg_mask
|= 1;
19077 /* Save registers so the exception handler can modify them. */
19078 if (crtl
->calls_eh_return
)
19084 reg
= EH_RETURN_DATA_REGNO (i
);
19085 if (reg
== INVALID_REGNUM
)
19087 save_reg_mask
|= 1 << reg
;
19091 return save_reg_mask
;
19094 /* Return true if r3 is live at the start of the function. */
19097 arm_r3_live_at_start_p (void)
19099 /* Just look at cfg info, which is still close enough to correct at this
19100 point. This gives false positives for broken functions that might use
19101 uninitialized data that happens to be allocated in r3, but who cares? */
19102 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19105 /* Compute the number of bytes used to store the static chain register on the
19106 stack, above the stack frame. We need to know this accurately to get the
19107 alignment of the rest of the stack frame correct. */
19110 arm_compute_static_chain_stack_bytes (void)
19112 /* See the defining assertion in arm_expand_prologue. */
19113 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19114 && IS_NESTED (arm_current_func_type ())
19115 && arm_r3_live_at_start_p ()
19116 && crtl
->args
.pretend_args_size
== 0)
19122 /* Compute a bit mask of which registers need to be
19123 saved on the stack for the current function.
19124 This is used by arm_get_frame_offsets, which may add extra registers. */
19126 static unsigned long
19127 arm_compute_save_reg_mask (void)
19129 unsigned int save_reg_mask
= 0;
19130 unsigned long func_type
= arm_current_func_type ();
19133 if (IS_NAKED (func_type
))
19134 /* This should never really happen. */
19137 /* If we are creating a stack frame, then we must save the frame pointer,
19138 IP (which will hold the old stack pointer), LR and the PC. */
19139 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19141 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19144 | (1 << PC_REGNUM
);
19146 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19148 /* Decide if we need to save the link register.
19149 Interrupt routines have their own banked link register,
19150 so they never need to save it.
19151 Otherwise if we do not use the link register we do not need to save
19152 it. If we are pushing other registers onto the stack however, we
19153 can save an instruction in the epilogue by pushing the link register
19154 now and then popping it back into the PC. This incurs extra memory
19155 accesses though, so we only do it when optimizing for size, and only
19156 if we know that we will not need a fancy return sequence. */
19157 if (df_regs_ever_live_p (LR_REGNUM
)
19160 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19161 && !crtl
->calls_eh_return
))
19162 save_reg_mask
|= 1 << LR_REGNUM
;
19164 if (cfun
->machine
->lr_save_eliminated
)
19165 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19167 if (TARGET_REALLY_IWMMXT
19168 && ((bit_count (save_reg_mask
)
19169 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19170 arm_compute_static_chain_stack_bytes())
19173 /* The total number of registers that are going to be pushed
19174 onto the stack is odd. We need to ensure that the stack
19175 is 64-bit aligned before we start to save iWMMXt registers,
19176 and also before we start to create locals. (A local variable
19177 might be a double or long long which we will load/store using
19178 an iWMMXt instruction). Therefore we need to push another
19179 ARM register, so that the stack will be 64-bit aligned. We
19180 try to avoid using the arg registers (r0 -r3) as they might be
19181 used to pass values in a tail call. */
19182 for (reg
= 4; reg
<= 12; reg
++)
19183 if ((save_reg_mask
& (1 << reg
)) == 0)
19187 save_reg_mask
|= (1 << reg
);
19190 cfun
->machine
->sibcall_blocked
= 1;
19191 save_reg_mask
|= (1 << 3);
19195 /* We may need to push an additional register for use initializing the
19196 PIC base register. */
19197 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19198 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19200 reg
= thumb_find_work_register (1 << 4);
19201 if (!call_used_regs
[reg
])
19202 save_reg_mask
|= (1 << reg
);
19205 return save_reg_mask
;
19209 /* Compute a bit mask of which registers need to be
19210 saved on the stack for the current function. */
19211 static unsigned long
19212 thumb1_compute_save_reg_mask (void)
19214 unsigned long mask
;
19218 for (reg
= 0; reg
< 12; reg
++)
19219 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19223 && !TARGET_SINGLE_PIC_BASE
19224 && arm_pic_register
!= INVALID_REGNUM
19225 && crtl
->uses_pic_offset_table
)
19226 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19228 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19229 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19230 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19232 /* LR will also be pushed if any lo regs are pushed. */
19233 if (mask
& 0xff || thumb_force_lr_save ())
19234 mask
|= (1 << LR_REGNUM
);
19236 /* Make sure we have a low work register if we need one.
19237 We will need one if we are going to push a high register,
19238 but we are not currently intending to push a low register. */
19239 if ((mask
& 0xff) == 0
19240 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19242 /* Use thumb_find_work_register to choose which register
19243 we will use. If the register is live then we will
19244 have to push it. Use LAST_LO_REGNUM as our fallback
19245 choice for the register to select. */
19246 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19247 /* Make sure the register returned by thumb_find_work_register is
19248 not part of the return value. */
19249 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19250 reg
= LAST_LO_REGNUM
;
19252 if (! call_used_regs
[reg
])
19256 /* The 504 below is 8 bytes less than 512 because there are two possible
19257 alignment words. We can't tell here if they will be present or not so we
19258 have to play it safe and assume that they are. */
19259 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19260 ROUND_UP_WORD (get_frame_size ()) +
19261 crtl
->outgoing_args_size
) >= 504)
19263 /* This is the same as the code in thumb1_expand_prologue() which
19264 determines which register to use for stack decrement. */
19265 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19266 if (mask
& (1 << reg
))
19269 if (reg
> LAST_LO_REGNUM
)
19271 /* Make sure we have a register available for stack decrement. */
19272 mask
|= 1 << LAST_LO_REGNUM
;
19280 /* Return the number of bytes required to save VFP registers. */
19282 arm_get_vfp_saved_size (void)
19284 unsigned int regno
;
19289 /* Space for saved VFP registers. */
19290 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19293 for (regno
= FIRST_VFP_REGNUM
;
19294 regno
< LAST_VFP_REGNUM
;
19297 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19298 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19302 /* Workaround ARM10 VFPr1 bug. */
19303 if (count
== 2 && !arm_arch6
)
19305 saved
+= count
* 8;
19314 if (count
== 2 && !arm_arch6
)
19316 saved
+= count
* 8;
19323 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19324 everything bar the final return instruction. If simple_return is true,
19325 then do not output epilogue, because it has already been emitted in RTL. */
19327 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19328 bool simple_return
)
19330 char conditional
[10];
19333 unsigned long live_regs_mask
;
19334 unsigned long func_type
;
19335 arm_stack_offsets
*offsets
;
19337 func_type
= arm_current_func_type ();
19339 if (IS_NAKED (func_type
))
19342 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19344 /* If this function was declared non-returning, and we have
19345 found a tail call, then we have to trust that the called
19346 function won't return. */
19351 /* Otherwise, trap an attempted return by aborting. */
19353 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19355 assemble_external_libcall (ops
[1]);
19356 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19362 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19364 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19366 cfun
->machine
->return_used_this_function
= 1;
19368 offsets
= arm_get_frame_offsets ();
19369 live_regs_mask
= offsets
->saved_regs_mask
;
19371 if (!simple_return
&& live_regs_mask
)
19373 const char * return_reg
;
19375 /* If we do not have any special requirements for function exit
19376 (e.g. interworking) then we can load the return address
19377 directly into the PC. Otherwise we must load it into LR. */
19379 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19380 return_reg
= reg_names
[PC_REGNUM
];
19382 return_reg
= reg_names
[LR_REGNUM
];
19384 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19386 /* There are three possible reasons for the IP register
19387 being saved. 1) a stack frame was created, in which case
19388 IP contains the old stack pointer, or 2) an ISR routine
19389 corrupted it, or 3) it was saved to align the stack on
19390 iWMMXt. In case 1, restore IP into SP, otherwise just
19392 if (frame_pointer_needed
)
19394 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19395 live_regs_mask
|= (1 << SP_REGNUM
);
19398 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19401 /* On some ARM architectures it is faster to use LDR rather than
19402 LDM to load a single register. On other architectures, the
19403 cost is the same. In 26 bit mode, or for exception handlers,
19404 we have to use LDM to load the PC so that the CPSR is also
19406 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19407 if (live_regs_mask
== (1U << reg
))
19410 if (reg
<= LAST_ARM_REGNUM
19411 && (reg
!= LR_REGNUM
19413 || ! IS_INTERRUPT (func_type
)))
19415 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19416 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19423 /* Generate the load multiple instruction to restore the
19424 registers. Note we can get here, even if
19425 frame_pointer_needed is true, but only if sp already
19426 points to the base of the saved core registers. */
19427 if (live_regs_mask
& (1 << SP_REGNUM
))
19429 unsigned HOST_WIDE_INT stack_adjust
;
19431 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19432 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19434 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19435 if (TARGET_UNIFIED_ASM
)
19436 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19438 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19441 /* If we can't use ldmib (SA110 bug),
19442 then try to pop r3 instead. */
19444 live_regs_mask
|= 1 << 3;
19446 if (TARGET_UNIFIED_ASM
)
19447 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19449 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19453 if (TARGET_UNIFIED_ASM
)
19454 sprintf (instr
, "pop%s\t{", conditional
);
19456 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19458 p
= instr
+ strlen (instr
);
19460 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19461 if (live_regs_mask
& (1 << reg
))
19463 int l
= strlen (reg_names
[reg
]);
19469 memcpy (p
, ", ", 2);
19473 memcpy (p
, "%|", 2);
19474 memcpy (p
+ 2, reg_names
[reg
], l
);
19478 if (live_regs_mask
& (1 << LR_REGNUM
))
19480 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19481 /* If returning from an interrupt, restore the CPSR. */
19482 if (IS_INTERRUPT (func_type
))
19489 output_asm_insn (instr
, & operand
);
19491 /* See if we need to generate an extra instruction to
19492 perform the actual function return. */
19494 && func_type
!= ARM_FT_INTERWORKED
19495 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19497 /* The return has already been handled
19498 by loading the LR into the PC. */
19505 switch ((int) ARM_FUNC_TYPE (func_type
))
19509 /* ??? This is wrong for unified assembly syntax. */
19510 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19513 case ARM_FT_INTERWORKED
:
19514 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19517 case ARM_FT_EXCEPTION
:
19518 /* ??? This is wrong for unified assembly syntax. */
19519 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19523 /* Use bx if it's available. */
19524 if (arm_arch5
|| arm_arch4t
)
19525 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19527 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19531 output_asm_insn (instr
, & operand
);
19537 /* Write the function name into the code section, directly preceding
19538 the function prologue.
19540 Code will be output similar to this:
19542 .ascii "arm_poke_function_name", 0
19545 .word 0xff000000 + (t1 - t0)
19546 arm_poke_function_name
19548 stmfd sp!, {fp, ip, lr, pc}
19551 When performing a stack backtrace, code can inspect the value
19552 of 'pc' stored at 'fp' + 0. If the trace function then looks
19553 at location pc - 12 and the top 8 bits are set, then we know
19554 that there is a function name embedded immediately preceding this
19555 location and has length ((pc[-3]) & 0xff000000).
19557 We assume that pc is declared as a pointer to an unsigned long.
19559 It is of no benefit to output the function name if we are assembling
19560 a leaf function. These function types will not contain a stack
19561 backtrace structure, therefore it is not possible to determine the
19564 arm_poke_function_name (FILE *stream
, const char *name
)
19566 unsigned long alignlength
;
19567 unsigned long length
;
19570 length
= strlen (name
) + 1;
19571 alignlength
= ROUND_UP_WORD (length
);
19573 ASM_OUTPUT_ASCII (stream
, name
, length
);
19574 ASM_OUTPUT_ALIGN (stream
, 2);
19575 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19576 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19579 /* Place some comments into the assembler stream
19580 describing the current function. */
19582 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19584 unsigned long func_type
;
19586 /* ??? Do we want to print some of the below anyway? */
19590 /* Sanity check. */
19591 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19593 func_type
= arm_current_func_type ();
19595 switch ((int) ARM_FUNC_TYPE (func_type
))
19598 case ARM_FT_NORMAL
:
19600 case ARM_FT_INTERWORKED
:
19601 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19604 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19607 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19609 case ARM_FT_EXCEPTION
:
19610 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19614 if (IS_NAKED (func_type
))
19615 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19617 if (IS_VOLATILE (func_type
))
19618 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19620 if (IS_NESTED (func_type
))
19621 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19622 if (IS_STACKALIGN (func_type
))
19623 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19625 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19627 crtl
->args
.pretend_args_size
, frame_size
);
19629 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19630 frame_pointer_needed
,
19631 cfun
->machine
->uses_anonymous_args
);
19633 if (cfun
->machine
->lr_save_eliminated
)
19634 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19636 if (crtl
->calls_eh_return
)
19637 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19642 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19643 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19645 arm_stack_offsets
*offsets
;
19651 /* Emit any call-via-reg trampolines that are needed for v4t support
19652 of call_reg and call_value_reg type insns. */
19653 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19655 rtx label
= cfun
->machine
->call_via
[regno
];
19659 switch_to_section (function_section (current_function_decl
));
19660 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19661 CODE_LABEL_NUMBER (label
));
19662 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19666 /* ??? Probably not safe to set this here, since it assumes that a
19667 function will be emitted as assembly immediately after we generate
19668 RTL for it. This does not happen for inline functions. */
19669 cfun
->machine
->return_used_this_function
= 0;
19671 else /* TARGET_32BIT */
19673 /* We need to take into account any stack-frame rounding. */
19674 offsets
= arm_get_frame_offsets ();
19676 gcc_assert (!use_return_insn (FALSE
, NULL
)
19677 || (cfun
->machine
->return_used_this_function
!= 0)
19678 || offsets
->saved_regs
== offsets
->outgoing_args
19679 || frame_pointer_needed
);
19683 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19684 STR and STRD. If an even number of registers are being pushed, one
19685 or more STRD patterns are created for each register pair. If an
19686 odd number of registers are pushed, emit an initial STR followed by
19687 as many STRD instructions as are needed. This works best when the
19688 stack is initially 64-bit aligned (the normal case), since it
19689 ensures that each STRD is also 64-bit aligned. */
19691 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19696 rtx par
= NULL_RTX
;
19697 rtx dwarf
= NULL_RTX
;
19701 num_regs
= bit_count (saved_regs_mask
);
19703 /* Must be at least one register to save, and can't save SP or PC. */
19704 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19705 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19706 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19708 /* Create sequence for DWARF info. All the frame-related data for
19709 debugging is held in this wrapper. */
19710 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19712 /* Describe the stack adjustment. */
19713 tmp
= gen_rtx_SET (VOIDmode
,
19715 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19716 RTX_FRAME_RELATED_P (tmp
) = 1;
19717 XVECEXP (dwarf
, 0, 0) = tmp
;
19719 /* Find the first register. */
19720 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19725 /* If there's an odd number of registers to push. Start off by
19726 pushing a single register. This ensures that subsequent strd
19727 operations are dword aligned (assuming that SP was originally
19728 64-bit aligned). */
19729 if ((num_regs
& 1) != 0)
19731 rtx reg
, mem
, insn
;
19733 reg
= gen_rtx_REG (SImode
, regno
);
19735 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19736 stack_pointer_rtx
));
19738 mem
= gen_frame_mem (Pmode
,
19740 (Pmode
, stack_pointer_rtx
,
19741 plus_constant (Pmode
, stack_pointer_rtx
,
19744 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19745 RTX_FRAME_RELATED_P (tmp
) = 1;
19746 insn
= emit_insn (tmp
);
19747 RTX_FRAME_RELATED_P (insn
) = 1;
19748 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19749 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19751 RTX_FRAME_RELATED_P (tmp
) = 1;
19754 XVECEXP (dwarf
, 0, i
) = tmp
;
19758 while (i
< num_regs
)
19759 if (saved_regs_mask
& (1 << regno
))
19761 rtx reg1
, reg2
, mem1
, mem2
;
19762 rtx tmp0
, tmp1
, tmp2
;
19765 /* Find the register to pair with this one. */
19766 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19770 reg1
= gen_rtx_REG (SImode
, regno
);
19771 reg2
= gen_rtx_REG (SImode
, regno2
);
19778 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19781 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19783 -4 * (num_regs
- 1)));
19784 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19785 plus_constant (Pmode
, stack_pointer_rtx
,
19787 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19788 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19789 RTX_FRAME_RELATED_P (tmp0
) = 1;
19790 RTX_FRAME_RELATED_P (tmp1
) = 1;
19791 RTX_FRAME_RELATED_P (tmp2
) = 1;
19792 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19793 XVECEXP (par
, 0, 0) = tmp0
;
19794 XVECEXP (par
, 0, 1) = tmp1
;
19795 XVECEXP (par
, 0, 2) = tmp2
;
19796 insn
= emit_insn (par
);
19797 RTX_FRAME_RELATED_P (insn
) = 1;
19798 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19802 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19805 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19808 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19809 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19810 RTX_FRAME_RELATED_P (tmp1
) = 1;
19811 RTX_FRAME_RELATED_P (tmp2
) = 1;
19812 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19813 XVECEXP (par
, 0, 0) = tmp1
;
19814 XVECEXP (par
, 0, 1) = tmp2
;
19818 /* Create unwind information. This is an approximation. */
19819 tmp1
= gen_rtx_SET (VOIDmode
,
19820 gen_frame_mem (Pmode
,
19821 plus_constant (Pmode
,
19825 tmp2
= gen_rtx_SET (VOIDmode
,
19826 gen_frame_mem (Pmode
,
19827 plus_constant (Pmode
,
19832 RTX_FRAME_RELATED_P (tmp1
) = 1;
19833 RTX_FRAME_RELATED_P (tmp2
) = 1;
19834 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19835 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19837 regno
= regno2
+ 1;
19845 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19846 whenever possible, otherwise it emits single-word stores. The first store
19847 also allocates stack space for all saved registers, using writeback with
19848 post-addressing mode. All other stores use offset addressing. If no STRD
19849 can be emitted, this function emits a sequence of single-word stores,
19850 and not an STM as before, because single-word stores provide more freedom
19851 scheduling and can be turned into an STM by peephole optimizations. */
19853 arm_emit_strd_push (unsigned long saved_regs_mask
)
19856 int i
, j
, dwarf_index
= 0;
19858 rtx dwarf
= NULL_RTX
;
19859 rtx insn
= NULL_RTX
;
19862 /* TODO: A more efficient code can be emitted by changing the
19863 layout, e.g., first push all pairs that can use STRD to keep the
19864 stack aligned, and then push all other registers. */
19865 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19866 if (saved_regs_mask
& (1 << i
))
19869 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19870 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19871 gcc_assert (num_regs
> 0);
19873 /* Create sequence for DWARF info. */
19874 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19876 /* For dwarf info, we generate explicit stack update. */
19877 tmp
= gen_rtx_SET (VOIDmode
,
19879 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19880 RTX_FRAME_RELATED_P (tmp
) = 1;
19881 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19883 /* Save registers. */
19884 offset
= - 4 * num_regs
;
19886 while (j
<= LAST_ARM_REGNUM
)
19887 if (saved_regs_mask
& (1 << j
))
19890 && (saved_regs_mask
& (1 << (j
+ 1))))
19892 /* Current register and previous register form register pair for
19893 which STRD can be generated. */
19896 /* Allocate stack space for all saved registers. */
19897 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19898 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19899 mem
= gen_frame_mem (DImode
, tmp
);
19902 else if (offset
> 0)
19903 mem
= gen_frame_mem (DImode
,
19904 plus_constant (Pmode
,
19908 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19910 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19911 RTX_FRAME_RELATED_P (tmp
) = 1;
19912 tmp
= emit_insn (tmp
);
19914 /* Record the first store insn. */
19915 if (dwarf_index
== 1)
19918 /* Generate dwarf info. */
19919 mem
= gen_frame_mem (SImode
,
19920 plus_constant (Pmode
,
19923 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19924 RTX_FRAME_RELATED_P (tmp
) = 1;
19925 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19927 mem
= gen_frame_mem (SImode
,
19928 plus_constant (Pmode
,
19931 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19932 RTX_FRAME_RELATED_P (tmp
) = 1;
19933 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19940 /* Emit a single word store. */
19943 /* Allocate stack space for all saved registers. */
19944 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19945 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19946 mem
= gen_frame_mem (SImode
, tmp
);
19949 else if (offset
> 0)
19950 mem
= gen_frame_mem (SImode
,
19951 plus_constant (Pmode
,
19955 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19957 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19958 RTX_FRAME_RELATED_P (tmp
) = 1;
19959 tmp
= emit_insn (tmp
);
19961 /* Record the first store insn. */
19962 if (dwarf_index
== 1)
19965 /* Generate dwarf info. */
19966 mem
= gen_frame_mem (SImode
,
19967 plus_constant(Pmode
,
19970 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19971 RTX_FRAME_RELATED_P (tmp
) = 1;
19972 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19981 /* Attach dwarf info to the first insn we generate. */
19982 gcc_assert (insn
!= NULL_RTX
);
19983 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19984 RTX_FRAME_RELATED_P (insn
) = 1;
19987 /* Generate and emit an insn that we will recognize as a push_multi.
19988 Unfortunately, since this insn does not reflect very well the actual
19989 semantics of the operation, we need to annotate the insn for the benefit
19990 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19991 MASK for registers that should be annotated for DWARF2 frame unwind
19994 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19997 int num_dwarf_regs
= 0;
20001 int dwarf_par_index
;
20004 /* We don't record the PC in the dwarf frame information. */
20005 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20007 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20009 if (mask
& (1 << i
))
20011 if (dwarf_regs_mask
& (1 << i
))
20015 gcc_assert (num_regs
&& num_regs
<= 16);
20016 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20018 /* For the body of the insn we are going to generate an UNSPEC in
20019 parallel with several USEs. This allows the insn to be recognized
20020 by the push_multi pattern in the arm.md file.
20022 The body of the insn looks something like this:
20025 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20026 (const_int:SI <num>)))
20027 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20033 For the frame note however, we try to be more explicit and actually
20034 show each register being stored into the stack frame, plus a (single)
20035 decrement of the stack pointer. We do it this way in order to be
20036 friendly to the stack unwinding code, which only wants to see a single
20037 stack decrement per instruction. The RTL we generate for the note looks
20038 something like this:
20041 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20042 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20043 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20044 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20048 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20049 instead we'd have a parallel expression detailing all
20050 the stores to the various memory addresses so that debug
20051 information is more up-to-date. Remember however while writing
20052 this to take care of the constraints with the push instruction.
20054 Note also that this has to be taken care of for the VFP registers.
20056 For more see PR43399. */
20058 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20059 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20060 dwarf_par_index
= 1;
20062 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20064 if (mask
& (1 << i
))
20066 reg
= gen_rtx_REG (SImode
, i
);
20068 XVECEXP (par
, 0, 0)
20069 = gen_rtx_SET (VOIDmode
,
20072 gen_rtx_PRE_MODIFY (Pmode
,
20075 (Pmode
, stack_pointer_rtx
,
20078 gen_rtx_UNSPEC (BLKmode
,
20079 gen_rtvec (1, reg
),
20080 UNSPEC_PUSH_MULT
));
20082 if (dwarf_regs_mask
& (1 << i
))
20084 tmp
= gen_rtx_SET (VOIDmode
,
20085 gen_frame_mem (SImode
, stack_pointer_rtx
),
20087 RTX_FRAME_RELATED_P (tmp
) = 1;
20088 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20095 for (j
= 1, i
++; j
< num_regs
; i
++)
20097 if (mask
& (1 << i
))
20099 reg
= gen_rtx_REG (SImode
, i
);
20101 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20103 if (dwarf_regs_mask
& (1 << i
))
20106 = gen_rtx_SET (VOIDmode
,
20109 plus_constant (Pmode
, stack_pointer_rtx
,
20112 RTX_FRAME_RELATED_P (tmp
) = 1;
20113 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20120 par
= emit_insn (par
);
20122 tmp
= gen_rtx_SET (VOIDmode
,
20124 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20125 RTX_FRAME_RELATED_P (tmp
) = 1;
20126 XVECEXP (dwarf
, 0, 0) = tmp
;
20128 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20133 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20134 SIZE is the offset to be adjusted.
20135 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20137 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20141 RTX_FRAME_RELATED_P (insn
) = 1;
20142 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20143 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20146 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20147 SAVED_REGS_MASK shows which registers need to be restored.
20149 Unfortunately, since this insn does not reflect very well the actual
20150 semantics of the operation, we need to annotate the insn for the benefit
20151 of DWARF2 frame unwind information. */
20153 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20158 rtx dwarf
= NULL_RTX
;
20164 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20165 offset_adj
= return_in_pc
? 1 : 0;
20166 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20167 if (saved_regs_mask
& (1 << i
))
20170 gcc_assert (num_regs
&& num_regs
<= 16);
20172 /* If SP is in reglist, then we don't emit SP update insn. */
20173 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20175 /* The parallel needs to hold num_regs SETs
20176 and one SET for the stack update. */
20177 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20182 XVECEXP (par
, 0, 0) = tmp
;
20187 /* Increment the stack pointer, based on there being
20188 num_regs 4-byte registers to restore. */
20189 tmp
= gen_rtx_SET (VOIDmode
,
20191 plus_constant (Pmode
,
20194 RTX_FRAME_RELATED_P (tmp
) = 1;
20195 XVECEXP (par
, 0, offset_adj
) = tmp
;
20198 /* Now restore every reg, which may include PC. */
20199 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20200 if (saved_regs_mask
& (1 << i
))
20202 reg
= gen_rtx_REG (SImode
, i
);
20203 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20205 /* Emit single load with writeback. */
20206 tmp
= gen_frame_mem (SImode
,
20207 gen_rtx_POST_INC (Pmode
,
20208 stack_pointer_rtx
));
20209 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20210 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20214 tmp
= gen_rtx_SET (VOIDmode
,
20218 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20219 RTX_FRAME_RELATED_P (tmp
) = 1;
20220 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20222 /* We need to maintain a sequence for DWARF info too. As dwarf info
20223 should not have PC, skip PC. */
20224 if (i
!= PC_REGNUM
)
20225 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20231 par
= emit_jump_insn (par
);
20233 par
= emit_insn (par
);
20235 REG_NOTES (par
) = dwarf
;
20237 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20238 stack_pointer_rtx
, stack_pointer_rtx
);
20241 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20242 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20244 Unfortunately, since this insn does not reflect very well the actual
20245 semantics of the operation, we need to annotate the insn for the benefit
20246 of DWARF2 frame unwind information. */
20248 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20252 rtx dwarf
= NULL_RTX
;
20255 gcc_assert (num_regs
&& num_regs
<= 32);
20257 /* Workaround ARM10 VFPr1 bug. */
20258 if (num_regs
== 2 && !arm_arch6
)
20260 if (first_reg
== 15)
20266 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20267 there could be up to 32 D-registers to restore.
20268 If there are more than 16 D-registers, make two recursive calls,
20269 each of which emits one pop_multi instruction. */
20272 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20273 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20277 /* The parallel needs to hold num_regs SETs
20278 and one SET for the stack update. */
20279 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20281 /* Increment the stack pointer, based on there being
20282 num_regs 8-byte registers to restore. */
20283 tmp
= gen_rtx_SET (VOIDmode
,
20285 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20286 RTX_FRAME_RELATED_P (tmp
) = 1;
20287 XVECEXP (par
, 0, 0) = tmp
;
20289 /* Now show every reg that will be restored, using a SET for each. */
20290 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20292 reg
= gen_rtx_REG (DFmode
, i
);
20294 tmp
= gen_rtx_SET (VOIDmode
,
20298 plus_constant (Pmode
, base_reg
, 8 * j
)));
20299 RTX_FRAME_RELATED_P (tmp
) = 1;
20300 XVECEXP (par
, 0, j
+ 1) = tmp
;
20302 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20307 par
= emit_insn (par
);
20308 REG_NOTES (par
) = dwarf
;
20310 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20311 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20313 RTX_FRAME_RELATED_P (par
) = 1;
20314 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20317 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20318 base_reg
, base_reg
);
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20322 number of registers are being popped, multiple LDRD patterns are created for
20323 all register pairs. If odd number of registers are popped, last register is
20324 loaded by using LDR pattern. */
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20330 rtx par
= NULL_RTX
;
20331 rtx dwarf
= NULL_RTX
;
20332 rtx tmp
, reg
, tmp1
;
20335 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20336 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20337 if (saved_regs_mask
& (1 << i
))
20340 gcc_assert (num_regs
&& num_regs
<= 16);
20342 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20343 to be popped. So, if num_regs is even, now it will become odd,
20344 and we can generate pop with PC. If num_regs is odd, it will be
20345 even now, and ldr with return can be generated for PC. */
20349 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20351 /* Var j iterates over all the registers to gather all the registers in
20352 saved_regs_mask. Var i gives index of saved registers in stack frame.
20353 A PARALLEL RTX of register-pair is created here, so that pattern for
20354 LDRD can be matched. As PC is always last register to be popped, and
20355 we have already decremented num_regs if PC, we don't have to worry
20356 about PC in this loop. */
20357 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20358 if (saved_regs_mask
& (1 << j
))
20360 /* Create RTX for memory load. */
20361 reg
= gen_rtx_REG (SImode
, j
);
20362 tmp
= gen_rtx_SET (SImode
,
20364 gen_frame_mem (SImode
,
20365 plus_constant (Pmode
,
20366 stack_pointer_rtx
, 4 * i
)));
20367 RTX_FRAME_RELATED_P (tmp
) = 1;
20371 /* When saved-register index (i) is even, the RTX to be emitted is
20372 yet to be created. Hence create it first. The LDRD pattern we
20373 are generating is :
20374 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20375 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20376 where target registers need not be consecutive. */
20377 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20381 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20382 added as 0th element and if i is odd, reg_i is added as 1st element
20383 of LDRD pattern shown above. */
20384 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20385 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20389 /* When saved-register index (i) is odd, RTXs for both the registers
20390 to be loaded are generated in above given LDRD pattern, and the
20391 pattern can be emitted now. */
20392 par
= emit_insn (par
);
20393 REG_NOTES (par
) = dwarf
;
20394 RTX_FRAME_RELATED_P (par
) = 1;
20400 /* If the number of registers pushed is odd AND return_in_pc is false OR
20401 number of registers are even AND return_in_pc is true, last register is
20402 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20403 then LDR with post increment. */
20405 /* Increment the stack pointer, based on there being
20406 num_regs 4-byte registers to restore. */
20407 tmp
= gen_rtx_SET (VOIDmode
,
20409 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20410 RTX_FRAME_RELATED_P (tmp
) = 1;
20411 tmp
= emit_insn (tmp
);
20414 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20415 stack_pointer_rtx
, stack_pointer_rtx
);
20420 if (((num_regs
% 2) == 1 && !return_in_pc
)
20421 || ((num_regs
% 2) == 0 && return_in_pc
))
20423 /* Scan for the single register to be popped. Skip until the saved
20424 register is found. */
20425 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20427 /* Gen LDR with post increment here. */
20428 tmp1
= gen_rtx_MEM (SImode
,
20429 gen_rtx_POST_INC (SImode
,
20430 stack_pointer_rtx
));
20431 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20433 reg
= gen_rtx_REG (SImode
, j
);
20434 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20435 RTX_FRAME_RELATED_P (tmp
) = 1;
20436 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20440 /* If return_in_pc, j must be PC_REGNUM. */
20441 gcc_assert (j
== PC_REGNUM
);
20442 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20443 XVECEXP (par
, 0, 0) = ret_rtx
;
20444 XVECEXP (par
, 0, 1) = tmp
;
20445 par
= emit_jump_insn (par
);
20449 par
= emit_insn (tmp
);
20450 REG_NOTES (par
) = dwarf
;
20451 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20452 stack_pointer_rtx
, stack_pointer_rtx
);
20456 else if ((num_regs
% 2) == 1 && return_in_pc
)
20458 /* There are 2 registers to be popped. So, generate the pattern
20459 pop_multiple_with_stack_update_and_return to pop in PC. */
20460 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20466 /* LDRD in ARM mode needs consecutive registers as operands. This function
20467 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20468 offset addressing and then generates one separate stack udpate. This provides
20469 more scheduling freedom, compared to writeback on every load. However,
20470 if the function returns using load into PC directly
20471 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20472 before the last load. TODO: Add a peephole optimization to recognize
20473 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20474 peephole optimization to merge the load at stack-offset zero
20475 with the stack update instruction using load with writeback
20476 in post-index addressing mode. */
20478 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20482 rtx par
= NULL_RTX
;
20483 rtx dwarf
= NULL_RTX
;
20486 /* Restore saved registers. */
20487 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20489 while (j
<= LAST_ARM_REGNUM
)
20490 if (saved_regs_mask
& (1 << j
))
20493 && (saved_regs_mask
& (1 << (j
+ 1)))
20494 && (j
+ 1) != PC_REGNUM
)
20496 /* Current register and next register form register pair for which
20497 LDRD can be generated. PC is always the last register popped, and
20498 we handle it separately. */
20500 mem
= gen_frame_mem (DImode
,
20501 plus_constant (Pmode
,
20505 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20507 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20508 tmp
= emit_insn (tmp
);
20509 RTX_FRAME_RELATED_P (tmp
) = 1;
20511 /* Generate dwarf info. */
20513 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20514 gen_rtx_REG (SImode
, j
),
20516 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20517 gen_rtx_REG (SImode
, j
+ 1),
20520 REG_NOTES (tmp
) = dwarf
;
20525 else if (j
!= PC_REGNUM
)
20527 /* Emit a single word load. */
20529 mem
= gen_frame_mem (SImode
,
20530 plus_constant (Pmode
,
20534 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20536 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20537 tmp
= emit_insn (tmp
);
20538 RTX_FRAME_RELATED_P (tmp
) = 1;
20540 /* Generate dwarf info. */
20541 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20542 gen_rtx_REG (SImode
, j
),
20548 else /* j == PC_REGNUM */
20554 /* Update the stack. */
20557 tmp
= gen_rtx_SET (Pmode
,
20559 plus_constant (Pmode
,
20562 tmp
= emit_insn (tmp
);
20563 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20564 stack_pointer_rtx
, stack_pointer_rtx
);
20568 if (saved_regs_mask
& (1 << PC_REGNUM
))
20570 /* Only PC is to be popped. */
20571 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20572 XVECEXP (par
, 0, 0) = ret_rtx
;
20573 tmp
= gen_rtx_SET (SImode
,
20574 gen_rtx_REG (SImode
, PC_REGNUM
),
20575 gen_frame_mem (SImode
,
20576 gen_rtx_POST_INC (SImode
,
20577 stack_pointer_rtx
)));
20578 RTX_FRAME_RELATED_P (tmp
) = 1;
20579 XVECEXP (par
, 0, 1) = tmp
;
20580 par
= emit_jump_insn (par
);
20582 /* Generate dwarf info. */
20583 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20584 gen_rtx_REG (SImode
, PC_REGNUM
),
20586 REG_NOTES (par
) = dwarf
;
20587 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20588 stack_pointer_rtx
, stack_pointer_rtx
);
20592 /* Calculate the size of the return value that is passed in registers. */
20594 arm_size_return_regs (void)
20598 if (crtl
->return_rtx
!= 0)
20599 mode
= GET_MODE (crtl
->return_rtx
);
20601 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20603 return GET_MODE_SIZE (mode
);
20606 /* Return true if the current function needs to save/restore LR. */
20608 thumb_force_lr_save (void)
20610 return !cfun
->machine
->lr_save_eliminated
20611 && (!leaf_function_p ()
20612 || thumb_far_jump_used_p ()
20613 || df_regs_ever_live_p (LR_REGNUM
));
20616 /* We do not know if r3 will be available because
20617 we do have an indirect tailcall happening in this
20618 particular case. */
20620 is_indirect_tailcall_p (rtx call
)
20622 rtx pat
= PATTERN (call
);
20624 /* Indirect tail call. */
20625 pat
= XVECEXP (pat
, 0, 0);
20626 if (GET_CODE (pat
) == SET
)
20627 pat
= SET_SRC (pat
);
20629 pat
= XEXP (XEXP (pat
, 0), 0);
20630 return REG_P (pat
);
20633 /* Return true if r3 is used by any of the tail call insns in the
20634 current function. */
20636 any_sibcall_could_use_r3 (void)
20641 if (!crtl
->tail_call_emit
)
20643 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20644 if (e
->flags
& EDGE_SIBCALL
)
20646 rtx call
= BB_END (e
->src
);
20647 if (!CALL_P (call
))
20648 call
= prev_nonnote_nondebug_insn (call
);
20649 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20650 if (find_regno_fusage (call
, USE
, 3)
20651 || is_indirect_tailcall_p (call
))
20658 /* Compute the distance from register FROM to register TO.
20659 These can be the arg pointer (26), the soft frame pointer (25),
20660 the stack pointer (13) or the hard frame pointer (11).
20661 In thumb mode r7 is used as the soft frame pointer, if needed.
20662 Typical stack layout looks like this:
20664 old stack pointer -> | |
20667 | | saved arguments for
20668 | | vararg functions
20671 hard FP & arg pointer -> | | \
20679 soft frame pointer -> | | /
20684 locals base pointer -> | | /
20689 current stack pointer -> | | /
20692 For a given function some or all of these stack components
20693 may not be needed, giving rise to the possibility of
20694 eliminating some of the registers.
20696 The values returned by this function must reflect the behavior
20697 of arm_expand_prologue() and arm_compute_save_reg_mask().
20699 The sign of the number returned reflects the direction of stack
20700 growth, so the values are positive for all eliminations except
20701 from the soft frame pointer to the hard frame pointer.
20703 SFP may point just inside the local variables block to ensure correct
20707 /* Calculate stack offsets. These are used to calculate register elimination
20708 offsets and in prologue/epilogue code. Also calculates which registers
20709 should be saved. */
20711 static arm_stack_offsets
*
20712 arm_get_frame_offsets (void)
20714 struct arm_stack_offsets
*offsets
;
20715 unsigned long func_type
;
20719 HOST_WIDE_INT frame_size
;
20722 offsets
= &cfun
->machine
->stack_offsets
;
20724 /* We need to know if we are a leaf function. Unfortunately, it
20725 is possible to be called after start_sequence has been called,
20726 which causes get_insns to return the insns for the sequence,
20727 not the function, which will cause leaf_function_p to return
20728 the incorrect result.
20730 to know about leaf functions once reload has completed, and the
20731 frame size cannot be changed after that time, so we can safely
20732 use the cached value. */
20734 if (reload_completed
)
20737 /* Initially this is the size of the local variables. It will translated
20738 into an offset once we have determined the size of preceding data. */
20739 frame_size
= ROUND_UP_WORD (get_frame_size ());
20741 leaf
= leaf_function_p ();
20743 /* Space for variadic functions. */
20744 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20746 /* In Thumb mode this is incorrect, but never used. */
20748 = (offsets
->saved_args
20749 + arm_compute_static_chain_stack_bytes ()
20750 + (frame_pointer_needed
? 4 : 0));
20754 unsigned int regno
;
20756 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20757 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20758 saved
= core_saved
;
20760 /* We know that SP will be doubleword aligned on entry, and we must
20761 preserve that condition at any subroutine call. We also require the
20762 soft frame pointer to be doubleword aligned. */
20764 if (TARGET_REALLY_IWMMXT
)
20766 /* Check for the call-saved iWMMXt registers. */
20767 for (regno
= FIRST_IWMMXT_REGNUM
;
20768 regno
<= LAST_IWMMXT_REGNUM
;
20770 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20774 func_type
= arm_current_func_type ();
20775 /* Space for saved VFP registers. */
20776 if (! IS_VOLATILE (func_type
)
20777 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20778 saved
+= arm_get_vfp_saved_size ();
20780 else /* TARGET_THUMB1 */
20782 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20783 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20784 saved
= core_saved
;
20785 if (TARGET_BACKTRACE
)
20789 /* Saved registers include the stack frame. */
20790 offsets
->saved_regs
20791 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20792 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20794 /* A leaf function does not need any stack alignment if it has nothing
20796 if (leaf
&& frame_size
== 0
20797 /* However if it calls alloca(), we have a dynamically allocated
20798 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20799 && ! cfun
->calls_alloca
)
20801 offsets
->outgoing_args
= offsets
->soft_frame
;
20802 offsets
->locals_base
= offsets
->soft_frame
;
20806 /* Ensure SFP has the correct alignment. */
20807 if (ARM_DOUBLEWORD_ALIGN
20808 && (offsets
->soft_frame
& 7))
20810 offsets
->soft_frame
+= 4;
20811 /* Try to align stack by pushing an extra reg. Don't bother doing this
20812 when there is a stack frame as the alignment will be rolled into
20813 the normal stack adjustment. */
20814 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20818 /* Register r3 is caller-saved. Normally it does not need to be
20819 saved on entry by the prologue. However if we choose to save
20820 it for padding then we may confuse the compiler into thinking
20821 a prologue sequence is required when in fact it is not. This
20822 will occur when shrink-wrapping if r3 is used as a scratch
20823 register and there are no other callee-saved writes.
20825 This situation can be avoided when other callee-saved registers
20826 are available and r3 is not mandatory if we choose a callee-saved
20827 register for padding. */
20828 bool prefer_callee_reg_p
= false;
20830 /* If it is safe to use r3, then do so. This sometimes
20831 generates better code on Thumb-2 by avoiding the need to
20832 use 32-bit push/pop instructions. */
20833 if (! any_sibcall_could_use_r3 ()
20834 && arm_size_return_regs () <= 12
20835 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20837 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20840 if (!TARGET_THUMB2
)
20841 prefer_callee_reg_p
= true;
20844 || prefer_callee_reg_p
)
20846 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20848 /* Avoid fixed registers; they may be changed at
20849 arbitrary times so it's unsafe to restore them
20850 during the epilogue. */
20852 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20862 offsets
->saved_regs
+= 4;
20863 offsets
->saved_regs_mask
|= (1 << reg
);
20868 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20869 offsets
->outgoing_args
= (offsets
->locals_base
20870 + crtl
->outgoing_args_size
);
20872 if (ARM_DOUBLEWORD_ALIGN
)
20874 /* Ensure SP remains doubleword aligned. */
20875 if (offsets
->outgoing_args
& 7)
20876 offsets
->outgoing_args
+= 4;
20877 gcc_assert (!(offsets
->outgoing_args
& 7));
20884 /* Calculate the relative offsets for the different stack pointers. Positive
20885 offsets are in the direction of stack growth. */
20888 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20890 arm_stack_offsets
*offsets
;
20892 offsets
= arm_get_frame_offsets ();
20894 /* OK, now we have enough information to compute the distances.
20895 There must be an entry in these switch tables for each pair
20896 of registers in ELIMINABLE_REGS, even if some of the entries
20897 seem to be redundant or useless. */
20900 case ARG_POINTER_REGNUM
:
20903 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20906 case FRAME_POINTER_REGNUM
:
20907 /* This is the reverse of the soft frame pointer
20908 to hard frame pointer elimination below. */
20909 return offsets
->soft_frame
- offsets
->saved_args
;
20911 case ARM_HARD_FRAME_POINTER_REGNUM
:
20912 /* This is only non-zero in the case where the static chain register
20913 is stored above the frame. */
20914 return offsets
->frame
- offsets
->saved_args
- 4;
20916 case STACK_POINTER_REGNUM
:
20917 /* If nothing has been pushed on the stack at all
20918 then this will return -4. This *is* correct! */
20919 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20922 gcc_unreachable ();
20924 gcc_unreachable ();
20926 case FRAME_POINTER_REGNUM
:
20929 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20932 case ARM_HARD_FRAME_POINTER_REGNUM
:
20933 /* The hard frame pointer points to the top entry in the
20934 stack frame. The soft frame pointer to the bottom entry
20935 in the stack frame. If there is no stack frame at all,
20936 then they are identical. */
20938 return offsets
->frame
- offsets
->soft_frame
;
20940 case STACK_POINTER_REGNUM
:
20941 return offsets
->outgoing_args
- offsets
->soft_frame
;
20944 gcc_unreachable ();
20946 gcc_unreachable ();
20949 /* You cannot eliminate from the stack pointer.
20950 In theory you could eliminate from the hard frame
20951 pointer to the stack pointer, but this will never
20952 happen, since if a stack frame is not needed the
20953 hard frame pointer will never be used. */
20954 gcc_unreachable ();
20958 /* Given FROM and TO register numbers, say whether this elimination is
20959 allowed. Frame pointer elimination is automatically handled.
20961 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20962 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20963 pointer, we must eliminate FRAME_POINTER_REGNUM into
20964 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20965 ARG_POINTER_REGNUM. */
20968 arm_can_eliminate (const int from
, const int to
)
20970 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20971 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20972 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20973 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20977 /* Emit RTL to save coprocessor registers on function entry. Returns the
20978 number of bytes pushed. */
20981 arm_save_coproc_regs(void)
20983 int saved_size
= 0;
20985 unsigned start_reg
;
20988 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20989 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20991 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20992 insn
= gen_rtx_MEM (V2SImode
, insn
);
20993 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20994 RTX_FRAME_RELATED_P (insn
) = 1;
20998 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21000 start_reg
= FIRST_VFP_REGNUM
;
21002 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21004 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21005 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21007 if (start_reg
!= reg
)
21008 saved_size
+= vfp_emit_fstmd (start_reg
,
21009 (reg
- start_reg
) / 2);
21010 start_reg
= reg
+ 2;
21013 if (start_reg
!= reg
)
21014 saved_size
+= vfp_emit_fstmd (start_reg
,
21015 (reg
- start_reg
) / 2);
21021 /* Set the Thumb frame pointer from the stack pointer. */
21024 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21026 HOST_WIDE_INT amount
;
21029 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21031 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21032 stack_pointer_rtx
, GEN_INT (amount
)));
21035 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21036 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21037 expects the first two operands to be the same. */
21040 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21042 hard_frame_pointer_rtx
));
21046 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21047 hard_frame_pointer_rtx
,
21048 stack_pointer_rtx
));
21050 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21051 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21052 RTX_FRAME_RELATED_P (dwarf
) = 1;
21053 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21056 RTX_FRAME_RELATED_P (insn
) = 1;
21059 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21062 arm_expand_prologue (void)
21067 unsigned long live_regs_mask
;
21068 unsigned long func_type
;
21070 int saved_pretend_args
= 0;
21071 int saved_regs
= 0;
21072 unsigned HOST_WIDE_INT args_to_push
;
21073 arm_stack_offsets
*offsets
;
21075 func_type
= arm_current_func_type ();
21077 /* Naked functions don't have prologues. */
21078 if (IS_NAKED (func_type
))
21081 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21082 args_to_push
= crtl
->args
.pretend_args_size
;
21084 /* Compute which register we will have to save onto the stack. */
21085 offsets
= arm_get_frame_offsets ();
21086 live_regs_mask
= offsets
->saved_regs_mask
;
21088 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21090 if (IS_STACKALIGN (func_type
))
21094 /* Handle a word-aligned stack pointer. We generate the following:
21099 <save and restore r0 in normal prologue/epilogue>
21103 The unwinder doesn't need to know about the stack realignment.
21104 Just tell it we saved SP in r0. */
21105 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21107 r0
= gen_rtx_REG (SImode
, 0);
21108 r1
= gen_rtx_REG (SImode
, 1);
21110 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21111 RTX_FRAME_RELATED_P (insn
) = 1;
21112 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21114 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21116 /* ??? The CFA changes here, which may cause GDB to conclude that it
21117 has entered a different function. That said, the unwind info is
21118 correct, individually, before and after this instruction because
21119 we've described the save of SP, which will override the default
21120 handling of SP as restoring from the CFA. */
21121 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21124 /* For APCS frames, if IP register is clobbered
21125 when creating frame, save that register in a special
21127 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21129 if (IS_INTERRUPT (func_type
))
21131 /* Interrupt functions must not corrupt any registers.
21132 Creating a frame pointer however, corrupts the IP
21133 register, so we must push it first. */
21134 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21136 /* Do not set RTX_FRAME_RELATED_P on this insn.
21137 The dwarf stack unwinding code only wants to see one
21138 stack decrement per function, and this is not it. If
21139 this instruction is labeled as being part of the frame
21140 creation sequence then dwarf2out_frame_debug_expr will
21141 die when it encounters the assignment of IP to FP
21142 later on, since the use of SP here establishes SP as
21143 the CFA register and not IP.
21145 Anyway this instruction is not really part of the stack
21146 frame creation although it is part of the prologue. */
21148 else if (IS_NESTED (func_type
))
21150 /* The static chain register is the same as the IP register
21151 used as a scratch register during stack frame creation.
21152 To get around this need to find somewhere to store IP
21153 whilst the frame is being created. We try the following
21156 1. The last argument register r3 if it is available.
21157 2. A slot on the stack above the frame if there are no
21158 arguments to push onto the stack.
21159 3. Register r3 again, after pushing the argument registers
21160 onto the stack, if this is a varargs function.
21161 4. The last slot on the stack created for the arguments to
21162 push, if this isn't a varargs function.
21164 Note - we only need to tell the dwarf2 backend about the SP
21165 adjustment in the second variant; the static chain register
21166 doesn't need to be unwound, as it doesn't contain a value
21167 inherited from the caller. */
21169 if (!arm_r3_live_at_start_p ())
21170 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21171 else if (args_to_push
== 0)
21175 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21178 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21179 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21182 /* Just tell the dwarf backend that we adjusted SP. */
21183 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21184 plus_constant (Pmode
, stack_pointer_rtx
,
21186 RTX_FRAME_RELATED_P (insn
) = 1;
21187 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21191 /* Store the args on the stack. */
21192 if (cfun
->machine
->uses_anonymous_args
)
21195 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21196 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21197 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21198 saved_pretend_args
= 1;
21204 if (args_to_push
== 4)
21205 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21208 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21209 plus_constant (Pmode
,
21213 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21215 /* Just tell the dwarf backend that we adjusted SP. */
21217 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21218 plus_constant (Pmode
, stack_pointer_rtx
,
21220 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21223 RTX_FRAME_RELATED_P (insn
) = 1;
21224 fp_offset
= args_to_push
;
21229 insn
= emit_set_insn (ip_rtx
,
21230 plus_constant (Pmode
, stack_pointer_rtx
,
21232 RTX_FRAME_RELATED_P (insn
) = 1;
21237 /* Push the argument registers, or reserve space for them. */
21238 if (cfun
->machine
->uses_anonymous_args
)
21239 insn
= emit_multi_reg_push
21240 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21241 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21244 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21245 GEN_INT (- args_to_push
)));
21246 RTX_FRAME_RELATED_P (insn
) = 1;
21249 /* If this is an interrupt service routine, and the link register
21250 is going to be pushed, and we're not generating extra
21251 push of IP (needed when frame is needed and frame layout if apcs),
21252 subtracting four from LR now will mean that the function return
21253 can be done with a single instruction. */
21254 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21255 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21256 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21259 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21261 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21264 if (live_regs_mask
)
21266 unsigned long dwarf_regs_mask
= live_regs_mask
;
21268 saved_regs
+= bit_count (live_regs_mask
) * 4;
21269 if (optimize_size
&& !frame_pointer_needed
21270 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21272 /* If no coprocessor registers are being pushed and we don't have
21273 to worry about a frame pointer then push extra registers to
21274 create the stack frame. This is done is a way that does not
21275 alter the frame layout, so is independent of the epilogue. */
21279 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21281 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21282 if (frame
&& n
* 4 >= frame
)
21285 live_regs_mask
|= (1 << n
) - 1;
21286 saved_regs
+= frame
;
21291 && current_tune
->prefer_ldrd_strd
21292 && !optimize_function_for_size_p (cfun
))
21294 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21296 thumb2_emit_strd_push (live_regs_mask
);
21297 else if (TARGET_ARM
21298 && !TARGET_APCS_FRAME
21299 && !IS_INTERRUPT (func_type
))
21300 arm_emit_strd_push (live_regs_mask
);
21303 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21304 RTX_FRAME_RELATED_P (insn
) = 1;
21309 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21310 RTX_FRAME_RELATED_P (insn
) = 1;
21314 if (! IS_VOLATILE (func_type
))
21315 saved_regs
+= arm_save_coproc_regs ();
21317 if (frame_pointer_needed
&& TARGET_ARM
)
21319 /* Create the new frame pointer. */
21320 if (TARGET_APCS_FRAME
)
21322 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21323 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21324 RTX_FRAME_RELATED_P (insn
) = 1;
21326 if (IS_NESTED (func_type
))
21328 /* Recover the static chain register. */
21329 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21330 insn
= gen_rtx_REG (SImode
, 3);
21333 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21334 insn
= gen_frame_mem (SImode
, insn
);
21336 emit_set_insn (ip_rtx
, insn
);
21337 /* Add a USE to stop propagate_one_insn() from barfing. */
21338 emit_insn (gen_force_register_use (ip_rtx
));
21343 insn
= GEN_INT (saved_regs
- 4);
21344 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21345 stack_pointer_rtx
, insn
));
21346 RTX_FRAME_RELATED_P (insn
) = 1;
21350 if (flag_stack_usage_info
)
21351 current_function_static_stack_size
21352 = offsets
->outgoing_args
- offsets
->saved_args
;
21354 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21356 /* This add can produce multiple insns for a large constant, so we
21357 need to get tricky. */
21358 rtx_insn
*last
= get_last_insn ();
21360 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21361 - offsets
->outgoing_args
);
21363 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21367 last
= last
? NEXT_INSN (last
) : get_insns ();
21368 RTX_FRAME_RELATED_P (last
) = 1;
21370 while (last
!= insn
);
21372 /* If the frame pointer is needed, emit a special barrier that
21373 will prevent the scheduler from moving stores to the frame
21374 before the stack adjustment. */
21375 if (frame_pointer_needed
)
21376 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21377 hard_frame_pointer_rtx
));
21381 if (frame_pointer_needed
&& TARGET_THUMB2
)
21382 thumb_set_frame_pointer (offsets
);
21384 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21386 unsigned long mask
;
21388 mask
= live_regs_mask
;
21389 mask
&= THUMB2_WORK_REGS
;
21390 if (!IS_NESTED (func_type
))
21391 mask
|= (1 << IP_REGNUM
);
21392 arm_load_pic_register (mask
);
21395 /* If we are profiling, make sure no instructions are scheduled before
21396 the call to mcount. Similarly if the user has requested no
21397 scheduling in the prolog. Similarly if we want non-call exceptions
21398 using the EABI unwinder, to prevent faulting instructions from being
21399 swapped with a stack adjustment. */
21400 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21401 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21402 && cfun
->can_throw_non_call_exceptions
))
21403 emit_insn (gen_blockage ());
21405 /* If the link register is being kept alive, with the return address in it,
21406 then make sure that it does not get reused by the ce2 pass. */
21407 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21408 cfun
->machine
->lr_save_eliminated
= 1;
21411 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21413 arm_print_condition (FILE *stream
)
21415 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21417 /* Branch conversion is not implemented for Thumb-2. */
21420 output_operand_lossage ("predicated Thumb instruction");
21423 if (current_insn_predicate
!= NULL
)
21425 output_operand_lossage
21426 ("predicated instruction in conditional sequence");
21430 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21432 else if (current_insn_predicate
)
21434 enum arm_cond_code code
;
21438 output_operand_lossage ("predicated Thumb instruction");
21442 code
= get_arm_condition_code (current_insn_predicate
);
21443 fputs (arm_condition_codes
[code
], stream
);
21448 /* Globally reserved letters: acln
21449 Puncutation letters currently used: @_|?().!#
21450 Lower case letters currently used: bcdefhimpqtvwxyz
21451 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21452 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21454 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21456 If CODE is 'd', then the X is a condition operand and the instruction
21457 should only be executed if the condition is true.
21458 if CODE is 'D', then the X is a condition operand and the instruction
21459 should only be executed if the condition is false: however, if the mode
21460 of the comparison is CCFPEmode, then always execute the instruction -- we
21461 do this because in these circumstances !GE does not necessarily imply LT;
21462 in these cases the instruction pattern will take care to make sure that
21463 an instruction containing %d will follow, thereby undoing the effects of
21464 doing this instruction unconditionally.
21465 If CODE is 'N' then X is a floating point operand that must be negated
21467 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21468 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21470 arm_print_operand (FILE *stream
, rtx x
, int code
)
21475 fputs (ASM_COMMENT_START
, stream
);
21479 fputs (user_label_prefix
, stream
);
21483 fputs (REGISTER_PREFIX
, stream
);
21487 arm_print_condition (stream
);
21491 /* Nothing in unified syntax, otherwise the current condition code. */
21492 if (!TARGET_UNIFIED_ASM
)
21493 arm_print_condition (stream
);
21497 /* The current condition code in unified syntax, otherwise nothing. */
21498 if (TARGET_UNIFIED_ASM
)
21499 arm_print_condition (stream
);
21503 /* The current condition code for a condition code setting instruction.
21504 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21505 if (TARGET_UNIFIED_ASM
)
21507 fputc('s', stream
);
21508 arm_print_condition (stream
);
21512 arm_print_condition (stream
);
21513 fputc('s', stream
);
21518 /* If the instruction is conditionally executed then print
21519 the current condition code, otherwise print 's'. */
21520 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21521 if (current_insn_predicate
)
21522 arm_print_condition (stream
);
21524 fputc('s', stream
);
21527 /* %# is a "break" sequence. It doesn't output anything, but is used to
21528 separate e.g. operand numbers from following text, if that text consists
21529 of further digits which we don't want to be part of the operand
21537 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21538 r
= real_value_negate (&r
);
21539 fprintf (stream
, "%s", fp_const_from_val (&r
));
21543 /* An integer or symbol address without a preceding # sign. */
21545 switch (GET_CODE (x
))
21548 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21552 output_addr_const (stream
, x
);
21556 if (GET_CODE (XEXP (x
, 0)) == PLUS
21557 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21559 output_addr_const (stream
, x
);
21562 /* Fall through. */
21565 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21569 /* An integer that we want to print in HEX. */
21571 switch (GET_CODE (x
))
21574 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21578 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21583 if (CONST_INT_P (x
))
21586 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21587 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21591 putc ('~', stream
);
21592 output_addr_const (stream
, x
);
21597 /* Print the log2 of a CONST_INT. */
21601 if (!CONST_INT_P (x
)
21602 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21603 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21605 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21610 /* The low 16 bits of an immediate constant. */
21611 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21615 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21619 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21627 shift
= shift_op (x
, &val
);
21631 fprintf (stream
, ", %s ", shift
);
21633 arm_print_operand (stream
, XEXP (x
, 1), 0);
21635 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21640 /* An explanation of the 'Q', 'R' and 'H' register operands:
21642 In a pair of registers containing a DI or DF value the 'Q'
21643 operand returns the register number of the register containing
21644 the least significant part of the value. The 'R' operand returns
21645 the register number of the register containing the most
21646 significant part of the value.
21648 The 'H' operand returns the higher of the two register numbers.
21649 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21650 same as the 'Q' operand, since the most significant part of the
21651 value is held in the lower number register. The reverse is true
21652 on systems where WORDS_BIG_ENDIAN is false.
21654 The purpose of these operands is to distinguish between cases
21655 where the endian-ness of the values is important (for example
21656 when they are added together), and cases where the endian-ness
21657 is irrelevant, but the order of register operations is important.
21658 For example when loading a value from memory into a register
21659 pair, the endian-ness does not matter. Provided that the value
21660 from the lower memory address is put into the lower numbered
21661 register, and the value from the higher address is put into the
21662 higher numbered register, the load will work regardless of whether
21663 the value being loaded is big-wordian or little-wordian. The
21664 order of the two register loads can matter however, if the address
21665 of the memory location is actually held in one of the registers
21666 being overwritten by the load.
21668 The 'Q' and 'R' constraints are also available for 64-bit
21671 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21673 rtx part
= gen_lowpart (SImode
, x
);
21674 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21678 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21680 output_operand_lossage ("invalid operand for code '%c'", code
);
21684 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21688 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21690 machine_mode mode
= GET_MODE (x
);
21693 if (mode
== VOIDmode
)
21695 part
= gen_highpart_mode (SImode
, mode
, x
);
21696 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21700 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21702 output_operand_lossage ("invalid operand for code '%c'", code
);
21706 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21710 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21712 output_operand_lossage ("invalid operand for code '%c'", code
);
21716 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21720 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21722 output_operand_lossage ("invalid operand for code '%c'", code
);
21726 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21730 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21732 output_operand_lossage ("invalid operand for code '%c'", code
);
21736 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21740 asm_fprintf (stream
, "%r",
21741 REG_P (XEXP (x
, 0))
21742 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21746 asm_fprintf (stream
, "{%r-%r}",
21748 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21751 /* Like 'M', but writing doubleword vector registers, for use by Neon
21755 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21756 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21758 asm_fprintf (stream
, "{d%d}", regno
);
21760 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21765 /* CONST_TRUE_RTX means always -- that's the default. */
21766 if (x
== const_true_rtx
)
21769 if (!COMPARISON_P (x
))
21771 output_operand_lossage ("invalid operand for code '%c'", code
);
21775 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21780 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21781 want to do that. */
21782 if (x
== const_true_rtx
)
21784 output_operand_lossage ("instruction never executed");
21787 if (!COMPARISON_P (x
))
21789 output_operand_lossage ("invalid operand for code '%c'", code
);
21793 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21794 (get_arm_condition_code (x
))],
21804 /* Former Maverick support, removed after GCC-4.7. */
21805 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21810 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21811 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21812 /* Bad value for wCG register number. */
21814 output_operand_lossage ("invalid operand for code '%c'", code
);
21819 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21822 /* Print an iWMMXt control register name. */
21824 if (!CONST_INT_P (x
)
21826 || INTVAL (x
) >= 16)
21827 /* Bad value for wC register number. */
21829 output_operand_lossage ("invalid operand for code '%c'", code
);
21835 static const char * wc_reg_names
[16] =
21837 "wCID", "wCon", "wCSSF", "wCASF",
21838 "wC4", "wC5", "wC6", "wC7",
21839 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21840 "wC12", "wC13", "wC14", "wC15"
21843 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21847 /* Print the high single-precision register of a VFP double-precision
21851 machine_mode mode
= GET_MODE (x
);
21854 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21856 output_operand_lossage ("invalid operand for code '%c'", code
);
21861 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21863 output_operand_lossage ("invalid operand for code '%c'", code
);
21867 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21871 /* Print a VFP/Neon double precision or quad precision register name. */
21875 machine_mode mode
= GET_MODE (x
);
21876 int is_quad
= (code
== 'q');
21879 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21881 output_operand_lossage ("invalid operand for code '%c'", code
);
21886 || !IS_VFP_REGNUM (REGNO (x
)))
21888 output_operand_lossage ("invalid operand for code '%c'", code
);
21893 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21894 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21896 output_operand_lossage ("invalid operand for code '%c'", code
);
21900 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21901 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21905 /* These two codes print the low/high doubleword register of a Neon quad
21906 register, respectively. For pair-structure types, can also print
21907 low/high quadword registers. */
21911 machine_mode mode
= GET_MODE (x
);
21914 if ((GET_MODE_SIZE (mode
) != 16
21915 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21917 output_operand_lossage ("invalid operand for code '%c'", code
);
21922 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21924 output_operand_lossage ("invalid operand for code '%c'", code
);
21928 if (GET_MODE_SIZE (mode
) == 16)
21929 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21930 + (code
== 'f' ? 1 : 0));
21932 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21933 + (code
== 'f' ? 1 : 0));
21937 /* Print a VFPv3 floating-point constant, represented as an integer
21941 int index
= vfp3_const_double_index (x
);
21942 gcc_assert (index
!= -1);
21943 fprintf (stream
, "%d", index
);
21947 /* Print bits representing opcode features for Neon.
21949 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21950 and polynomials as unsigned.
21952 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21954 Bit 2 is 1 for rounding functions, 0 otherwise. */
21956 /* Identify the type as 's', 'u', 'p' or 'f'. */
21959 HOST_WIDE_INT bits
= INTVAL (x
);
21960 fputc ("uspf"[bits
& 3], stream
);
21964 /* Likewise, but signed and unsigned integers are both 'i'. */
21967 HOST_WIDE_INT bits
= INTVAL (x
);
21968 fputc ("iipf"[bits
& 3], stream
);
21972 /* As for 'T', but emit 'u' instead of 'p'. */
21975 HOST_WIDE_INT bits
= INTVAL (x
);
21976 fputc ("usuf"[bits
& 3], stream
);
21980 /* Bit 2: rounding (vs none). */
21983 HOST_WIDE_INT bits
= INTVAL (x
);
21984 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21988 /* Memory operand for vld1/vst1 instruction. */
21992 bool postinc
= FALSE
;
21993 rtx postinc_reg
= NULL
;
21994 unsigned align
, memsize
, align_bits
;
21996 gcc_assert (MEM_P (x
));
21997 addr
= XEXP (x
, 0);
21998 if (GET_CODE (addr
) == POST_INC
)
22001 addr
= XEXP (addr
, 0);
22003 if (GET_CODE (addr
) == POST_MODIFY
)
22005 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22006 addr
= XEXP (addr
, 0);
22008 asm_fprintf (stream
, "[%r", REGNO (addr
));
22010 /* We know the alignment of this access, so we can emit a hint in the
22011 instruction (for some alignments) as an aid to the memory subsystem
22013 align
= MEM_ALIGN (x
) >> 3;
22014 memsize
= MEM_SIZE (x
);
22016 /* Only certain alignment specifiers are supported by the hardware. */
22017 if (memsize
== 32 && (align
% 32) == 0)
22019 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22021 else if (memsize
>= 8 && (align
% 8) == 0)
22026 if (align_bits
!= 0)
22027 asm_fprintf (stream
, ":%d", align_bits
);
22029 asm_fprintf (stream
, "]");
22032 fputs("!", stream
);
22034 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22042 gcc_assert (MEM_P (x
));
22043 addr
= XEXP (x
, 0);
22044 gcc_assert (REG_P (addr
));
22045 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22049 /* Translate an S register number into a D register number and element index. */
22052 machine_mode mode
= GET_MODE (x
);
22055 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22057 output_operand_lossage ("invalid operand for code '%c'", code
);
22062 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22064 output_operand_lossage ("invalid operand for code '%c'", code
);
22068 regno
= regno
- FIRST_VFP_REGNUM
;
22069 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22074 gcc_assert (CONST_DOUBLE_P (x
));
22076 result
= vfp3_const_double_for_fract_bits (x
);
22078 result
= vfp3_const_double_for_bits (x
);
22079 fprintf (stream
, "#%d", result
);
22082 /* Register specifier for vld1.16/vst1.16. Translate the S register
22083 number into a D register number and element index. */
22086 machine_mode mode
= GET_MODE (x
);
22089 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22091 output_operand_lossage ("invalid operand for code '%c'", code
);
22096 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22098 output_operand_lossage ("invalid operand for code '%c'", code
);
22102 regno
= regno
- FIRST_VFP_REGNUM
;
22103 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22110 output_operand_lossage ("missing operand");
22114 switch (GET_CODE (x
))
22117 asm_fprintf (stream
, "%r", REGNO (x
));
22121 output_memory_reference_mode
= GET_MODE (x
);
22122 output_address (XEXP (x
, 0));
22128 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22129 sizeof (fpstr
), 0, 1);
22130 fprintf (stream
, "#%s", fpstr
);
22135 gcc_assert (GET_CODE (x
) != NEG
);
22136 fputc ('#', stream
);
22137 if (GET_CODE (x
) == HIGH
)
22139 fputs (":lower16:", stream
);
22143 output_addr_const (stream
, x
);
22149 /* Target hook for printing a memory address. */
22151 arm_print_operand_address (FILE *stream
, rtx x
)
22155 int is_minus
= GET_CODE (x
) == MINUS
;
22158 asm_fprintf (stream
, "[%r]", REGNO (x
));
22159 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22161 rtx base
= XEXP (x
, 0);
22162 rtx index
= XEXP (x
, 1);
22163 HOST_WIDE_INT offset
= 0;
22165 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22167 /* Ensure that BASE is a register. */
22168 /* (one of them must be). */
22169 /* Also ensure the SP is not used as in index register. */
22170 std::swap (base
, index
);
22172 switch (GET_CODE (index
))
22175 offset
= INTVAL (index
);
22178 asm_fprintf (stream
, "[%r, #%wd]",
22179 REGNO (base
), offset
);
22183 asm_fprintf (stream
, "[%r, %s%r]",
22184 REGNO (base
), is_minus
? "-" : "",
22194 asm_fprintf (stream
, "[%r, %s%r",
22195 REGNO (base
), is_minus
? "-" : "",
22196 REGNO (XEXP (index
, 0)));
22197 arm_print_operand (stream
, index
, 'S');
22198 fputs ("]", stream
);
22203 gcc_unreachable ();
22206 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22207 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22209 extern machine_mode output_memory_reference_mode
;
22211 gcc_assert (REG_P (XEXP (x
, 0)));
22213 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22214 asm_fprintf (stream
, "[%r, #%s%d]!",
22215 REGNO (XEXP (x
, 0)),
22216 GET_CODE (x
) == PRE_DEC
? "-" : "",
22217 GET_MODE_SIZE (output_memory_reference_mode
));
22219 asm_fprintf (stream
, "[%r], #%s%d",
22220 REGNO (XEXP (x
, 0)),
22221 GET_CODE (x
) == POST_DEC
? "-" : "",
22222 GET_MODE_SIZE (output_memory_reference_mode
));
22224 else if (GET_CODE (x
) == PRE_MODIFY
)
22226 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22227 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22228 asm_fprintf (stream
, "#%wd]!",
22229 INTVAL (XEXP (XEXP (x
, 1), 1)));
22231 asm_fprintf (stream
, "%r]!",
22232 REGNO (XEXP (XEXP (x
, 1), 1)));
22234 else if (GET_CODE (x
) == POST_MODIFY
)
22236 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22237 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22238 asm_fprintf (stream
, "#%wd",
22239 INTVAL (XEXP (XEXP (x
, 1), 1)));
22241 asm_fprintf (stream
, "%r",
22242 REGNO (XEXP (XEXP (x
, 1), 1)));
22244 else output_addr_const (stream
, x
);
22249 asm_fprintf (stream
, "[%r]", REGNO (x
));
22250 else if (GET_CODE (x
) == POST_INC
)
22251 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22252 else if (GET_CODE (x
) == PLUS
)
22254 gcc_assert (REG_P (XEXP (x
, 0)));
22255 if (CONST_INT_P (XEXP (x
, 1)))
22256 asm_fprintf (stream
, "[%r, #%wd]",
22257 REGNO (XEXP (x
, 0)),
22258 INTVAL (XEXP (x
, 1)));
22260 asm_fprintf (stream
, "[%r, %r]",
22261 REGNO (XEXP (x
, 0)),
22262 REGNO (XEXP (x
, 1)));
22265 output_addr_const (stream
, x
);
22269 /* Target hook for indicating whether a punctuation character for
22270 TARGET_PRINT_OPERAND is valid. */
22272 arm_print_operand_punct_valid_p (unsigned char code
)
22274 return (code
== '@' || code
== '|' || code
== '.'
22275 || code
== '(' || code
== ')' || code
== '#'
22276 || (TARGET_32BIT
&& (code
== '?'))
22277 || (TARGET_THUMB2
&& (code
== '!'))
22278 || (TARGET_THUMB
&& (code
== '_')));
22281 /* Target hook for assembling integer objects. The ARM version needs to
22282 handle word-sized values specially. */
22284 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22288 if (size
== UNITS_PER_WORD
&& aligned_p
)
22290 fputs ("\t.word\t", asm_out_file
);
22291 output_addr_const (asm_out_file
, x
);
22293 /* Mark symbols as position independent. We only do this in the
22294 .text segment, not in the .data segment. */
22295 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22296 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22298 /* See legitimize_pic_address for an explanation of the
22299 TARGET_VXWORKS_RTP check. */
22300 if (!arm_pic_data_is_text_relative
22301 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22302 fputs ("(GOT)", asm_out_file
);
22304 fputs ("(GOTOFF)", asm_out_file
);
22306 fputc ('\n', asm_out_file
);
22310 mode
= GET_MODE (x
);
22312 if (arm_vector_mode_supported_p (mode
))
22316 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22318 units
= CONST_VECTOR_NUNITS (x
);
22319 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22321 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22322 for (i
= 0; i
< units
; i
++)
22324 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22326 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22329 for (i
= 0; i
< units
; i
++)
22331 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22332 REAL_VALUE_TYPE rval
;
22334 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22337 (rval
, GET_MODE_INNER (mode
),
22338 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22344 return default_assemble_integer (x
, size
, aligned_p
);
22348 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22352 if (!TARGET_AAPCS_BASED
)
22355 default_named_section_asm_out_constructor
22356 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22360 /* Put these in the .init_array section, using a special relocation. */
22361 if (priority
!= DEFAULT_INIT_PRIORITY
)
22364 sprintf (buf
, "%s.%.5u",
22365 is_ctor
? ".init_array" : ".fini_array",
22367 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22374 switch_to_section (s
);
22375 assemble_align (POINTER_SIZE
);
22376 fputs ("\t.word\t", asm_out_file
);
22377 output_addr_const (asm_out_file
, symbol
);
22378 fputs ("(target1)\n", asm_out_file
);
22381 /* Add a function to the list of static constructors. */
22384 arm_elf_asm_constructor (rtx symbol
, int priority
)
22386 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22389 /* Add a function to the list of static destructors. */
22392 arm_elf_asm_destructor (rtx symbol
, int priority
)
22394 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22397 /* A finite state machine takes care of noticing whether or not instructions
22398 can be conditionally executed, and thus decrease execution time and code
22399 size by deleting branch instructions. The fsm is controlled by
22400 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22402 /* The state of the fsm controlling condition codes are:
22403 0: normal, do nothing special
22404 1: make ASM_OUTPUT_OPCODE not output this instruction
22405 2: make ASM_OUTPUT_OPCODE not output this instruction
22406 3: make instructions conditional
22407 4: make instructions conditional
22409 State transitions (state->state by whom under condition):
22410 0 -> 1 final_prescan_insn if the `target' is a label
22411 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22412 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22413 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22414 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22415 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22416 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22417 (the target insn is arm_target_insn).
22419 If the jump clobbers the conditions then we use states 2 and 4.
22421 A similar thing can be done with conditional return insns.
22423 XXX In case the `target' is an unconditional branch, this conditionalising
22424 of the instructions always reduces code size, but not always execution
22425 time. But then, I want to reduce the code size to somewhere near what
22426 /bin/cc produces. */
22428 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22429 instructions. When a COND_EXEC instruction is seen the subsequent
22430 instructions are scanned so that multiple conditional instructions can be
22431 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22432 specify the length and true/false mask for the IT block. These will be
22433 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22435 /* Returns the index of the ARM condition code string in
22436 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22437 COMPARISON should be an rtx like `(eq (...) (...))'. */
22440 maybe_get_arm_condition_code (rtx comparison
)
22442 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22443 enum arm_cond_code code
;
22444 enum rtx_code comp_code
= GET_CODE (comparison
);
22446 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22447 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22448 XEXP (comparison
, 1));
22452 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22453 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22454 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22455 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22456 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22457 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22458 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22459 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22460 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22461 case CC_DLTUmode
: code
= ARM_CC
;
22464 if (comp_code
== EQ
)
22465 return ARM_INVERSE_CONDITION_CODE (code
);
22466 if (comp_code
== NE
)
22473 case NE
: return ARM_NE
;
22474 case EQ
: return ARM_EQ
;
22475 case GE
: return ARM_PL
;
22476 case LT
: return ARM_MI
;
22477 default: return ARM_NV
;
22483 case NE
: return ARM_NE
;
22484 case EQ
: return ARM_EQ
;
22485 default: return ARM_NV
;
22491 case NE
: return ARM_MI
;
22492 case EQ
: return ARM_PL
;
22493 default: return ARM_NV
;
22498 /* We can handle all cases except UNEQ and LTGT. */
22501 case GE
: return ARM_GE
;
22502 case GT
: return ARM_GT
;
22503 case LE
: return ARM_LS
;
22504 case LT
: return ARM_MI
;
22505 case NE
: return ARM_NE
;
22506 case EQ
: return ARM_EQ
;
22507 case ORDERED
: return ARM_VC
;
22508 case UNORDERED
: return ARM_VS
;
22509 case UNLT
: return ARM_LT
;
22510 case UNLE
: return ARM_LE
;
22511 case UNGT
: return ARM_HI
;
22512 case UNGE
: return ARM_PL
;
22513 /* UNEQ and LTGT do not have a representation. */
22514 case UNEQ
: /* Fall through. */
22515 case LTGT
: /* Fall through. */
22516 default: return ARM_NV
;
22522 case NE
: return ARM_NE
;
22523 case EQ
: return ARM_EQ
;
22524 case GE
: return ARM_LE
;
22525 case GT
: return ARM_LT
;
22526 case LE
: return ARM_GE
;
22527 case LT
: return ARM_GT
;
22528 case GEU
: return ARM_LS
;
22529 case GTU
: return ARM_CC
;
22530 case LEU
: return ARM_CS
;
22531 case LTU
: return ARM_HI
;
22532 default: return ARM_NV
;
22538 case LTU
: return ARM_CS
;
22539 case GEU
: return ARM_CC
;
22540 default: return ARM_NV
;
22546 case NE
: return ARM_NE
;
22547 case EQ
: return ARM_EQ
;
22548 case GEU
: return ARM_CS
;
22549 case GTU
: return ARM_HI
;
22550 case LEU
: return ARM_LS
;
22551 case LTU
: return ARM_CC
;
22552 default: return ARM_NV
;
22558 case GE
: return ARM_GE
;
22559 case LT
: return ARM_LT
;
22560 case GEU
: return ARM_CS
;
22561 case LTU
: return ARM_CC
;
22562 default: return ARM_NV
;
22568 case NE
: return ARM_NE
;
22569 case EQ
: return ARM_EQ
;
22570 case GE
: return ARM_GE
;
22571 case GT
: return ARM_GT
;
22572 case LE
: return ARM_LE
;
22573 case LT
: return ARM_LT
;
22574 case GEU
: return ARM_CS
;
22575 case GTU
: return ARM_HI
;
22576 case LEU
: return ARM_LS
;
22577 case LTU
: return ARM_CC
;
22578 default: return ARM_NV
;
22581 default: gcc_unreachable ();
22585 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22586 static enum arm_cond_code
22587 get_arm_condition_code (rtx comparison
)
22589 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22590 gcc_assert (code
!= ARM_NV
);
22594 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22597 thumb2_final_prescan_insn (rtx_insn
*insn
)
22599 rtx_insn
*first_insn
= insn
;
22600 rtx body
= PATTERN (insn
);
22602 enum arm_cond_code code
;
22607 /* max_insns_skipped in the tune was already taken into account in the
22608 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22609 just emit the IT blocks as we can. It does not make sense to split
22611 max
= MAX_INSN_PER_IT_BLOCK
;
22613 /* Remove the previous insn from the count of insns to be output. */
22614 if (arm_condexec_count
)
22615 arm_condexec_count
--;
22617 /* Nothing to do if we are already inside a conditional block. */
22618 if (arm_condexec_count
)
22621 if (GET_CODE (body
) != COND_EXEC
)
22624 /* Conditional jumps are implemented directly. */
22628 predicate
= COND_EXEC_TEST (body
);
22629 arm_current_cc
= get_arm_condition_code (predicate
);
22631 n
= get_attr_ce_count (insn
);
22632 arm_condexec_count
= 1;
22633 arm_condexec_mask
= (1 << n
) - 1;
22634 arm_condexec_masklen
= n
;
22635 /* See if subsequent instructions can be combined into the same block. */
22638 insn
= next_nonnote_insn (insn
);
22640 /* Jumping into the middle of an IT block is illegal, so a label or
22641 barrier terminates the block. */
22642 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22645 body
= PATTERN (insn
);
22646 /* USE and CLOBBER aren't really insns, so just skip them. */
22647 if (GET_CODE (body
) == USE
22648 || GET_CODE (body
) == CLOBBER
)
22651 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22652 if (GET_CODE (body
) != COND_EXEC
)
22654 /* Maximum number of conditionally executed instructions in a block. */
22655 n
= get_attr_ce_count (insn
);
22656 if (arm_condexec_masklen
+ n
> max
)
22659 predicate
= COND_EXEC_TEST (body
);
22660 code
= get_arm_condition_code (predicate
);
22661 mask
= (1 << n
) - 1;
22662 if (arm_current_cc
== code
)
22663 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22664 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22667 arm_condexec_count
++;
22668 arm_condexec_masklen
+= n
;
22670 /* A jump must be the last instruction in a conditional block. */
22674 /* Restore recog_data (getting the attributes of other insns can
22675 destroy this array, but final.c assumes that it remains intact
22676 across this call). */
22677 extract_constrain_insn_cached (first_insn
);
22681 arm_final_prescan_insn (rtx_insn
*insn
)
22683 /* BODY will hold the body of INSN. */
22684 rtx body
= PATTERN (insn
);
22686 /* This will be 1 if trying to repeat the trick, and things need to be
22687 reversed if it appears to fail. */
22690 /* If we start with a return insn, we only succeed if we find another one. */
22691 int seeking_return
= 0;
22692 enum rtx_code return_code
= UNKNOWN
;
22694 /* START_INSN will hold the insn from where we start looking. This is the
22695 first insn after the following code_label if REVERSE is true. */
22696 rtx_insn
*start_insn
= insn
;
22698 /* If in state 4, check if the target branch is reached, in order to
22699 change back to state 0. */
22700 if (arm_ccfsm_state
== 4)
22702 if (insn
== arm_target_insn
)
22704 arm_target_insn
= NULL
;
22705 arm_ccfsm_state
= 0;
22710 /* If in state 3, it is possible to repeat the trick, if this insn is an
22711 unconditional branch to a label, and immediately following this branch
22712 is the previous target label which is only used once, and the label this
22713 branch jumps to is not too far off. */
22714 if (arm_ccfsm_state
== 3)
22716 if (simplejump_p (insn
))
22718 start_insn
= next_nonnote_insn (start_insn
);
22719 if (BARRIER_P (start_insn
))
22721 /* XXX Isn't this always a barrier? */
22722 start_insn
= next_nonnote_insn (start_insn
);
22724 if (LABEL_P (start_insn
)
22725 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22726 && LABEL_NUSES (start_insn
) == 1)
22731 else if (ANY_RETURN_P (body
))
22733 start_insn
= next_nonnote_insn (start_insn
);
22734 if (BARRIER_P (start_insn
))
22735 start_insn
= next_nonnote_insn (start_insn
);
22736 if (LABEL_P (start_insn
)
22737 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22738 && LABEL_NUSES (start_insn
) == 1)
22741 seeking_return
= 1;
22742 return_code
= GET_CODE (body
);
22751 gcc_assert (!arm_ccfsm_state
|| reverse
);
22752 if (!JUMP_P (insn
))
22755 /* This jump might be paralleled with a clobber of the condition codes
22756 the jump should always come first */
22757 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22758 body
= XVECEXP (body
, 0, 0);
22761 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22762 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22765 int fail
= FALSE
, succeed
= FALSE
;
22766 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22767 int then_not_else
= TRUE
;
22768 rtx_insn
*this_insn
= start_insn
;
22771 /* Register the insn jumped to. */
22774 if (!seeking_return
)
22775 label
= XEXP (SET_SRC (body
), 0);
22777 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22778 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22779 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22781 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22782 then_not_else
= FALSE
;
22784 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22786 seeking_return
= 1;
22787 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22789 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22791 seeking_return
= 1;
22792 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22793 then_not_else
= FALSE
;
22796 gcc_unreachable ();
22798 /* See how many insns this branch skips, and what kind of insns. If all
22799 insns are okay, and the label or unconditional branch to the same
22800 label is not too far away, succeed. */
22801 for (insns_skipped
= 0;
22802 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22806 this_insn
= next_nonnote_insn (this_insn
);
22810 switch (GET_CODE (this_insn
))
22813 /* Succeed if it is the target label, otherwise fail since
22814 control falls in from somewhere else. */
22815 if (this_insn
== label
)
22817 arm_ccfsm_state
= 1;
22825 /* Succeed if the following insn is the target label.
22827 If return insns are used then the last insn in a function
22828 will be a barrier. */
22829 this_insn
= next_nonnote_insn (this_insn
);
22830 if (this_insn
&& this_insn
== label
)
22832 arm_ccfsm_state
= 1;
22840 /* The AAPCS says that conditional calls should not be
22841 used since they make interworking inefficient (the
22842 linker can't transform BL<cond> into BLX). That's
22843 only a problem if the machine has BLX. */
22850 /* Succeed if the following insn is the target label, or
22851 if the following two insns are a barrier and the
22853 this_insn
= next_nonnote_insn (this_insn
);
22854 if (this_insn
&& BARRIER_P (this_insn
))
22855 this_insn
= next_nonnote_insn (this_insn
);
22857 if (this_insn
&& this_insn
== label
22858 && insns_skipped
< max_insns_skipped
)
22860 arm_ccfsm_state
= 1;
22868 /* If this is an unconditional branch to the same label, succeed.
22869 If it is to another label, do nothing. If it is conditional,
22871 /* XXX Probably, the tests for SET and the PC are
22874 scanbody
= PATTERN (this_insn
);
22875 if (GET_CODE (scanbody
) == SET
22876 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22878 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22879 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22881 arm_ccfsm_state
= 2;
22884 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22887 /* Fail if a conditional return is undesirable (e.g. on a
22888 StrongARM), but still allow this if optimizing for size. */
22889 else if (GET_CODE (scanbody
) == return_code
22890 && !use_return_insn (TRUE
, NULL
)
22893 else if (GET_CODE (scanbody
) == return_code
)
22895 arm_ccfsm_state
= 2;
22898 else if (GET_CODE (scanbody
) == PARALLEL
)
22900 switch (get_attr_conds (this_insn
))
22910 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22915 /* Instructions using or affecting the condition codes make it
22917 scanbody
= PATTERN (this_insn
);
22918 if (!(GET_CODE (scanbody
) == SET
22919 || GET_CODE (scanbody
) == PARALLEL
)
22920 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22930 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22931 arm_target_label
= CODE_LABEL_NUMBER (label
);
22934 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22936 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22938 this_insn
= next_nonnote_insn (this_insn
);
22939 gcc_assert (!this_insn
22940 || (!BARRIER_P (this_insn
)
22941 && !LABEL_P (this_insn
)));
22945 /* Oh, dear! we ran off the end.. give up. */
22946 extract_constrain_insn_cached (insn
);
22947 arm_ccfsm_state
= 0;
22948 arm_target_insn
= NULL
;
22951 arm_target_insn
= this_insn
;
22954 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22957 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22959 if (reverse
|| then_not_else
)
22960 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22963 /* Restore recog_data (getting the attributes of other insns can
22964 destroy this array, but final.c assumes that it remains intact
22965 across this call. */
22966 extract_constrain_insn_cached (insn
);
22970 /* Output IT instructions. */
22972 thumb2_asm_output_opcode (FILE * stream
)
22977 if (arm_condexec_mask
)
22979 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22980 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22982 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22983 arm_condition_codes
[arm_current_cc
]);
22984 arm_condexec_mask
= 0;
22988 /* Returns true if REGNO is a valid register
22989 for holding a quantity of type MODE. */
22991 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22993 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22994 return (regno
== CC_REGNUM
22995 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22996 && regno
== VFPCC_REGNUM
));
22998 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23002 /* For the Thumb we only allow values bigger than SImode in
23003 registers 0 - 6, so that there is always a second low
23004 register available to hold the upper part of the value.
23005 We probably we ought to ensure that the register is the
23006 start of an even numbered register pair. */
23007 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23009 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23010 && IS_VFP_REGNUM (regno
))
23012 if (mode
== SFmode
|| mode
== SImode
)
23013 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23015 if (mode
== DFmode
)
23016 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23018 /* VFP registers can hold HFmode values, but there is no point in
23019 putting them there unless we have hardware conversion insns. */
23020 if (mode
== HFmode
)
23021 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23024 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23025 || (VALID_NEON_QREG_MODE (mode
)
23026 && NEON_REGNO_OK_FOR_QUAD (regno
))
23027 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23028 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23029 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23030 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23031 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23036 if (TARGET_REALLY_IWMMXT
)
23038 if (IS_IWMMXT_GR_REGNUM (regno
))
23039 return mode
== SImode
;
23041 if (IS_IWMMXT_REGNUM (regno
))
23042 return VALID_IWMMXT_REG_MODE (mode
);
23045 /* We allow almost any value to be stored in the general registers.
23046 Restrict doubleword quantities to even register pairs in ARM state
23047 so that we can use ldrd. Do not allow very large Neon structure
23048 opaque modes in general registers; they would use too many. */
23049 if (regno
<= LAST_ARM_REGNUM
)
23051 if (ARM_NUM_REGS (mode
) > 4)
23057 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23060 if (regno
== FRAME_POINTER_REGNUM
23061 || regno
== ARG_POINTER_REGNUM
)
23062 /* We only allow integers in the fake hard registers. */
23063 return GET_MODE_CLASS (mode
) == MODE_INT
;
23068 /* Implement MODES_TIEABLE_P. */
23071 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23073 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23076 /* We specifically want to allow elements of "structure" modes to
23077 be tieable to the structure. This more general condition allows
23078 other rarer situations too. */
23080 && (VALID_NEON_DREG_MODE (mode1
)
23081 || VALID_NEON_QREG_MODE (mode1
)
23082 || VALID_NEON_STRUCT_MODE (mode1
))
23083 && (VALID_NEON_DREG_MODE (mode2
)
23084 || VALID_NEON_QREG_MODE (mode2
)
23085 || VALID_NEON_STRUCT_MODE (mode2
)))
23091 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23092 not used in arm mode. */
23095 arm_regno_class (int regno
)
23097 if (regno
== PC_REGNUM
)
23102 if (regno
== STACK_POINTER_REGNUM
)
23104 if (regno
== CC_REGNUM
)
23111 if (TARGET_THUMB2
&& regno
< 8)
23114 if ( regno
<= LAST_ARM_REGNUM
23115 || regno
== FRAME_POINTER_REGNUM
23116 || regno
== ARG_POINTER_REGNUM
)
23117 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23119 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23120 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23122 if (IS_VFP_REGNUM (regno
))
23124 if (regno
<= D7_VFP_REGNUM
)
23125 return VFP_D0_D7_REGS
;
23126 else if (regno
<= LAST_LO_VFP_REGNUM
)
23127 return VFP_LO_REGS
;
23129 return VFP_HI_REGS
;
23132 if (IS_IWMMXT_REGNUM (regno
))
23133 return IWMMXT_REGS
;
23135 if (IS_IWMMXT_GR_REGNUM (regno
))
23136 return IWMMXT_GR_REGS
;
23141 /* Handle a special case when computing the offset
23142 of an argument from the frame pointer. */
23144 arm_debugger_arg_offset (int value
, rtx addr
)
23148 /* We are only interested if dbxout_parms() failed to compute the offset. */
23152 /* We can only cope with the case where the address is held in a register. */
23156 /* If we are using the frame pointer to point at the argument, then
23157 an offset of 0 is correct. */
23158 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23161 /* If we are using the stack pointer to point at the
23162 argument, then an offset of 0 is correct. */
23163 /* ??? Check this is consistent with thumb2 frame layout. */
23164 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23165 && REGNO (addr
) == SP_REGNUM
)
23168 /* Oh dear. The argument is pointed to by a register rather
23169 than being held in a register, or being stored at a known
23170 offset from the frame pointer. Since GDB only understands
23171 those two kinds of argument we must translate the address
23172 held in the register into an offset from the frame pointer.
23173 We do this by searching through the insns for the function
23174 looking to see where this register gets its value. If the
23175 register is initialized from the frame pointer plus an offset
23176 then we are in luck and we can continue, otherwise we give up.
23178 This code is exercised by producing debugging information
23179 for a function with arguments like this:
23181 double func (double a, double b, int c, double d) {return d;}
23183 Without this code the stab for parameter 'd' will be set to
23184 an offset of 0 from the frame pointer, rather than 8. */
23186 /* The if() statement says:
23188 If the insn is a normal instruction
23189 and if the insn is setting the value in a register
23190 and if the register being set is the register holding the address of the argument
23191 and if the address is computing by an addition
23192 that involves adding to a register
23193 which is the frame pointer
23198 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23200 if ( NONJUMP_INSN_P (insn
)
23201 && GET_CODE (PATTERN (insn
)) == SET
23202 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23203 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23204 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23205 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23206 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23209 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23218 warning (0, "unable to compute real location of stacked parameter");
23219 value
= 8; /* XXX magic hack */
23225 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23227 static const char *
23228 arm_invalid_parameter_type (const_tree t
)
23230 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23231 return N_("function parameters cannot have __fp16 type");
23235 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23237 static const char *
23238 arm_invalid_return_type (const_tree t
)
23240 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23241 return N_("functions cannot return __fp16 type");
23245 /* Implement TARGET_PROMOTED_TYPE. */
23248 arm_promoted_type (const_tree t
)
23250 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23251 return float_type_node
;
23255 /* Implement TARGET_CONVERT_TO_TYPE.
23256 Specifically, this hook implements the peculiarity of the ARM
23257 half-precision floating-point C semantics that requires conversions between
23258 __fp16 to or from double to do an intermediate conversion to float. */
23261 arm_convert_to_type (tree type
, tree expr
)
23263 tree fromtype
= TREE_TYPE (expr
);
23264 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23266 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23267 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23268 return convert (type
, convert (float_type_node
, expr
));
23272 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23273 This simply adds HFmode as a supported mode; even though we don't
23274 implement arithmetic on this type directly, it's supported by
23275 optabs conversions, much the way the double-word arithmetic is
23276 special-cased in the default hook. */
23279 arm_scalar_mode_supported_p (machine_mode mode
)
23281 if (mode
== HFmode
)
23282 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23283 else if (ALL_FIXED_POINT_MODE_P (mode
))
23286 return default_scalar_mode_supported_p (mode
);
23289 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23291 neon_reinterpret (rtx dest
, rtx src
)
23293 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23296 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23297 not to early-clobber SRC registers in the process.
23299 We assume that the operands described by SRC and DEST represent a
23300 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23301 number of components into which the copy has been decomposed. */
23303 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23307 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23308 || REGNO (operands
[0]) < REGNO (operands
[1]))
23310 for (i
= 0; i
< count
; i
++)
23312 operands
[2 * i
] = dest
[i
];
23313 operands
[2 * i
+ 1] = src
[i
];
23318 for (i
= 0; i
< count
; i
++)
23320 operands
[2 * i
] = dest
[count
- i
- 1];
23321 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23326 /* Split operands into moves from op[1] + op[2] into op[0]. */
23329 neon_split_vcombine (rtx operands
[3])
23331 unsigned int dest
= REGNO (operands
[0]);
23332 unsigned int src1
= REGNO (operands
[1]);
23333 unsigned int src2
= REGNO (operands
[2]);
23334 machine_mode halfmode
= GET_MODE (operands
[1]);
23335 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23336 rtx destlo
, desthi
;
23338 if (src1
== dest
&& src2
== dest
+ halfregs
)
23340 /* No-op move. Can't split to nothing; emit something. */
23341 emit_note (NOTE_INSN_DELETED
);
23345 /* Preserve register attributes for variable tracking. */
23346 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23347 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23348 GET_MODE_SIZE (halfmode
));
23350 /* Special case of reversed high/low parts. Use VSWP. */
23351 if (src2
== dest
&& src1
== dest
+ halfregs
)
23353 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
23354 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
23355 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23359 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23361 /* Try to avoid unnecessary moves if part of the result
23362 is in the right place already. */
23364 emit_move_insn (destlo
, operands
[1]);
23365 if (src2
!= dest
+ halfregs
)
23366 emit_move_insn (desthi
, operands
[2]);
23370 if (src2
!= dest
+ halfregs
)
23371 emit_move_insn (desthi
, operands
[2]);
23373 emit_move_insn (destlo
, operands
[1]);
23377 /* Return the number (counting from 0) of
23378 the least significant set bit in MASK. */
23381 number_of_first_bit_set (unsigned mask
)
23383 return ctz_hwi (mask
);
23386 /* Like emit_multi_reg_push, but allowing for a different set of
23387 registers to be described as saved. MASK is the set of registers
23388 to be saved; REAL_REGS is the set of registers to be described as
23389 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23392 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23394 unsigned long regno
;
23395 rtx par
[10], tmp
, reg
;
23399 /* Build the parallel of the registers actually being stored. */
23400 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23402 regno
= ctz_hwi (mask
);
23403 reg
= gen_rtx_REG (SImode
, regno
);
23406 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23408 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23413 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23414 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23415 tmp
= gen_frame_mem (BLKmode
, tmp
);
23416 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
23419 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23420 insn
= emit_insn (tmp
);
23422 /* Always build the stack adjustment note for unwind info. */
23423 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23424 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
23427 /* Build the parallel of the registers recorded as saved for unwind. */
23428 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23430 regno
= ctz_hwi (real_regs
);
23431 reg
= gen_rtx_REG (SImode
, regno
);
23433 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23434 tmp
= gen_frame_mem (SImode
, tmp
);
23435 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
23436 RTX_FRAME_RELATED_P (tmp
) = 1;
23444 RTX_FRAME_RELATED_P (par
[0]) = 1;
23445 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23448 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23453 /* Emit code to push or pop registers to or from the stack. F is the
23454 assembly file. MASK is the registers to pop. */
23456 thumb_pop (FILE *f
, unsigned long mask
)
23459 int lo_mask
= mask
& 0xFF;
23460 int pushed_words
= 0;
23464 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23466 /* Special case. Do not generate a POP PC statement here, do it in
23468 thumb_exit (f
, -1);
23472 fprintf (f
, "\tpop\t{");
23474 /* Look at the low registers first. */
23475 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23479 asm_fprintf (f
, "%r", regno
);
23481 if ((lo_mask
& ~1) != 0)
23488 if (mask
& (1 << PC_REGNUM
))
23490 /* Catch popping the PC. */
23491 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23492 || crtl
->calls_eh_return
)
23494 /* The PC is never poped directly, instead
23495 it is popped into r3 and then BX is used. */
23496 fprintf (f
, "}\n");
23498 thumb_exit (f
, -1);
23507 asm_fprintf (f
, "%r", PC_REGNUM
);
23511 fprintf (f
, "}\n");
23514 /* Generate code to return from a thumb function.
23515 If 'reg_containing_return_addr' is -1, then the return address is
23516 actually on the stack, at the stack pointer. */
23518 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23520 unsigned regs_available_for_popping
;
23521 unsigned regs_to_pop
;
23523 unsigned available
;
23527 int restore_a4
= FALSE
;
23529 /* Compute the registers we need to pop. */
23533 if (reg_containing_return_addr
== -1)
23535 regs_to_pop
|= 1 << LR_REGNUM
;
23539 if (TARGET_BACKTRACE
)
23541 /* Restore the (ARM) frame pointer and stack pointer. */
23542 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23546 /* If there is nothing to pop then just emit the BX instruction and
23548 if (pops_needed
== 0)
23550 if (crtl
->calls_eh_return
)
23551 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23553 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23556 /* Otherwise if we are not supporting interworking and we have not created
23557 a backtrace structure and the function was not entered in ARM mode then
23558 just pop the return address straight into the PC. */
23559 else if (!TARGET_INTERWORK
23560 && !TARGET_BACKTRACE
23561 && !is_called_in_ARM_mode (current_function_decl
)
23562 && !crtl
->calls_eh_return
)
23564 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23568 /* Find out how many of the (return) argument registers we can corrupt. */
23569 regs_available_for_popping
= 0;
23571 /* If returning via __builtin_eh_return, the bottom three registers
23572 all contain information needed for the return. */
23573 if (crtl
->calls_eh_return
)
23577 /* If we can deduce the registers used from the function's
23578 return value. This is more reliable that examining
23579 df_regs_ever_live_p () because that will be set if the register is
23580 ever used in the function, not just if the register is used
23581 to hold a return value. */
23583 if (crtl
->return_rtx
!= 0)
23584 mode
= GET_MODE (crtl
->return_rtx
);
23586 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23588 size
= GET_MODE_SIZE (mode
);
23592 /* In a void function we can use any argument register.
23593 In a function that returns a structure on the stack
23594 we can use the second and third argument registers. */
23595 if (mode
== VOIDmode
)
23596 regs_available_for_popping
=
23597 (1 << ARG_REGISTER (1))
23598 | (1 << ARG_REGISTER (2))
23599 | (1 << ARG_REGISTER (3));
23601 regs_available_for_popping
=
23602 (1 << ARG_REGISTER (2))
23603 | (1 << ARG_REGISTER (3));
23605 else if (size
<= 4)
23606 regs_available_for_popping
=
23607 (1 << ARG_REGISTER (2))
23608 | (1 << ARG_REGISTER (3));
23609 else if (size
<= 8)
23610 regs_available_for_popping
=
23611 (1 << ARG_REGISTER (3));
23614 /* Match registers to be popped with registers into which we pop them. */
23615 for (available
= regs_available_for_popping
,
23616 required
= regs_to_pop
;
23617 required
!= 0 && available
!= 0;
23618 available
&= ~(available
& - available
),
23619 required
&= ~(required
& - required
))
23622 /* If we have any popping registers left over, remove them. */
23624 regs_available_for_popping
&= ~available
;
23626 /* Otherwise if we need another popping register we can use
23627 the fourth argument register. */
23628 else if (pops_needed
)
23630 /* If we have not found any free argument registers and
23631 reg a4 contains the return address, we must move it. */
23632 if (regs_available_for_popping
== 0
23633 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23635 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23636 reg_containing_return_addr
= LR_REGNUM
;
23638 else if (size
> 12)
23640 /* Register a4 is being used to hold part of the return value,
23641 but we have dire need of a free, low register. */
23644 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23647 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23649 /* The fourth argument register is available. */
23650 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23656 /* Pop as many registers as we can. */
23657 thumb_pop (f
, regs_available_for_popping
);
23659 /* Process the registers we popped. */
23660 if (reg_containing_return_addr
== -1)
23662 /* The return address was popped into the lowest numbered register. */
23663 regs_to_pop
&= ~(1 << LR_REGNUM
);
23665 reg_containing_return_addr
=
23666 number_of_first_bit_set (regs_available_for_popping
);
23668 /* Remove this register for the mask of available registers, so that
23669 the return address will not be corrupted by further pops. */
23670 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23673 /* If we popped other registers then handle them here. */
23674 if (regs_available_for_popping
)
23678 /* Work out which register currently contains the frame pointer. */
23679 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23681 /* Move it into the correct place. */
23682 asm_fprintf (f
, "\tmov\t%r, %r\n",
23683 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23685 /* (Temporarily) remove it from the mask of popped registers. */
23686 regs_available_for_popping
&= ~(1 << frame_pointer
);
23687 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23689 if (regs_available_for_popping
)
23693 /* We popped the stack pointer as well,
23694 find the register that contains it. */
23695 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23697 /* Move it into the stack register. */
23698 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23700 /* At this point we have popped all necessary registers, so
23701 do not worry about restoring regs_available_for_popping
23702 to its correct value:
23704 assert (pops_needed == 0)
23705 assert (regs_available_for_popping == (1 << frame_pointer))
23706 assert (regs_to_pop == (1 << STACK_POINTER)) */
23710 /* Since we have just move the popped value into the frame
23711 pointer, the popping register is available for reuse, and
23712 we know that we still have the stack pointer left to pop. */
23713 regs_available_for_popping
|= (1 << frame_pointer
);
23717 /* If we still have registers left on the stack, but we no longer have
23718 any registers into which we can pop them, then we must move the return
23719 address into the link register and make available the register that
23721 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23723 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23725 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23726 reg_containing_return_addr
);
23728 reg_containing_return_addr
= LR_REGNUM
;
23731 /* If we have registers left on the stack then pop some more.
23732 We know that at most we will want to pop FP and SP. */
23733 if (pops_needed
> 0)
23738 thumb_pop (f
, regs_available_for_popping
);
23740 /* We have popped either FP or SP.
23741 Move whichever one it is into the correct register. */
23742 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23743 move_to
= number_of_first_bit_set (regs_to_pop
);
23745 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23747 regs_to_pop
&= ~(1 << move_to
);
23752 /* If we still have not popped everything then we must have only
23753 had one register available to us and we are now popping the SP. */
23754 if (pops_needed
> 0)
23758 thumb_pop (f
, regs_available_for_popping
);
23760 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23762 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23764 assert (regs_to_pop == (1 << STACK_POINTER))
23765 assert (pops_needed == 1)
23769 /* If necessary restore the a4 register. */
23772 if (reg_containing_return_addr
!= LR_REGNUM
)
23774 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23775 reg_containing_return_addr
= LR_REGNUM
;
23778 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23781 if (crtl
->calls_eh_return
)
23782 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23784 /* Return to caller. */
23785 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23788 /* Scan INSN just before assembler is output for it.
23789 For Thumb-1, we track the status of the condition codes; this
23790 information is used in the cbranchsi4_insn pattern. */
23792 thumb1_final_prescan_insn (rtx_insn
*insn
)
23794 if (flag_print_asm_name
)
23795 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23796 INSN_ADDRESSES (INSN_UID (insn
)));
23797 /* Don't overwrite the previous setter when we get to a cbranch. */
23798 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23800 enum attr_conds conds
;
23802 if (cfun
->machine
->thumb1_cc_insn
)
23804 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23805 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23808 conds
= get_attr_conds (insn
);
23809 if (conds
== CONDS_SET
)
23811 rtx set
= single_set (insn
);
23812 cfun
->machine
->thumb1_cc_insn
= insn
;
23813 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23814 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23815 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23816 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23818 rtx src1
= XEXP (SET_SRC (set
), 1);
23819 if (src1
== const0_rtx
)
23820 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23822 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23824 /* Record the src register operand instead of dest because
23825 cprop_hardreg pass propagates src. */
23826 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23829 else if (conds
!= CONDS_NOCOND
)
23830 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23833 /* Check if unexpected far jump is used. */
23834 if (cfun
->machine
->lr_save_eliminated
23835 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23836 internal_error("Unexpected thumb1 far jump");
23840 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23842 unsigned HOST_WIDE_INT mask
= 0xff;
23845 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23846 if (val
== 0) /* XXX */
23849 for (i
= 0; i
< 25; i
++)
23850 if ((val
& (mask
<< i
)) == val
)
23856 /* Returns nonzero if the current function contains,
23857 or might contain a far jump. */
23859 thumb_far_jump_used_p (void)
23862 bool far_jump
= false;
23863 unsigned int func_size
= 0;
23865 /* This test is only important for leaf functions. */
23866 /* assert (!leaf_function_p ()); */
23868 /* If we have already decided that far jumps may be used,
23869 do not bother checking again, and always return true even if
23870 it turns out that they are not being used. Once we have made
23871 the decision that far jumps are present (and that hence the link
23872 register will be pushed onto the stack) we cannot go back on it. */
23873 if (cfun
->machine
->far_jump_used
)
23876 /* If this function is not being called from the prologue/epilogue
23877 generation code then it must be being called from the
23878 INITIAL_ELIMINATION_OFFSET macro. */
23879 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23881 /* In this case we know that we are being asked about the elimination
23882 of the arg pointer register. If that register is not being used,
23883 then there are no arguments on the stack, and we do not have to
23884 worry that a far jump might force the prologue to push the link
23885 register, changing the stack offsets. In this case we can just
23886 return false, since the presence of far jumps in the function will
23887 not affect stack offsets.
23889 If the arg pointer is live (or if it was live, but has now been
23890 eliminated and so set to dead) then we do have to test to see if
23891 the function might contain a far jump. This test can lead to some
23892 false negatives, since before reload is completed, then length of
23893 branch instructions is not known, so gcc defaults to returning their
23894 longest length, which in turn sets the far jump attribute to true.
23896 A false negative will not result in bad code being generated, but it
23897 will result in a needless push and pop of the link register. We
23898 hope that this does not occur too often.
23900 If we need doubleword stack alignment this could affect the other
23901 elimination offsets so we can't risk getting it wrong. */
23902 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23903 cfun
->machine
->arg_pointer_live
= 1;
23904 else if (!cfun
->machine
->arg_pointer_live
)
23908 /* We should not change far_jump_used during or after reload, as there is
23909 no chance to change stack frame layout. */
23910 if (reload_in_progress
|| reload_completed
)
23913 /* Check to see if the function contains a branch
23914 insn with the far jump attribute set. */
23915 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23917 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23921 func_size
+= get_attr_length (insn
);
23924 /* Attribute far_jump will always be true for thumb1 before
23925 shorten_branch pass. So checking far_jump attribute before
23926 shorten_branch isn't much useful.
23928 Following heuristic tries to estimate more accurately if a far jump
23929 may finally be used. The heuristic is very conservative as there is
23930 no chance to roll-back the decision of not to use far jump.
23932 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23933 2-byte insn is associated with a 4 byte constant pool. Using
23934 function size 2048/3 as the threshold is conservative enough. */
23937 if ((func_size
* 3) >= 2048)
23939 /* Record the fact that we have decided that
23940 the function does use far jumps. */
23941 cfun
->machine
->far_jump_used
= 1;
23949 /* Return nonzero if FUNC must be entered in ARM mode. */
23951 is_called_in_ARM_mode (tree func
)
23953 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23955 /* Ignore the problem about functions whose address is taken. */
23956 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23960 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23966 /* Given the stack offsets and register mask in OFFSETS, decide how
23967 many additional registers to push instead of subtracting a constant
23968 from SP. For epilogues the principle is the same except we use pop.
23969 FOR_PROLOGUE indicates which we're generating. */
23971 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23973 HOST_WIDE_INT amount
;
23974 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23975 /* Extract a mask of the ones we can give to the Thumb's push/pop
23977 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23978 /* Then count how many other high registers will need to be pushed. */
23979 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23980 int n_free
, reg_base
, size
;
23982 if (!for_prologue
&& frame_pointer_needed
)
23983 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23985 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23987 /* If the stack frame size is 512 exactly, we can save one load
23988 instruction, which should make this a win even when optimizing
23990 if (!optimize_size
&& amount
!= 512)
23993 /* Can't do this if there are high registers to push. */
23994 if (high_regs_pushed
!= 0)
23997 /* Shouldn't do it in the prologue if no registers would normally
23998 be pushed at all. In the epilogue, also allow it if we'll have
23999 a pop insn for the PC. */
24002 || TARGET_BACKTRACE
24003 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24004 || TARGET_INTERWORK
24005 || crtl
->args
.pretend_args_size
!= 0))
24008 /* Don't do this if thumb_expand_prologue wants to emit instructions
24009 between the push and the stack frame allocation. */
24011 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24012 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24019 size
= arm_size_return_regs ();
24020 reg_base
= ARM_NUM_INTS (size
);
24021 live_regs_mask
>>= reg_base
;
24024 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24025 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24027 live_regs_mask
>>= 1;
24033 gcc_assert (amount
/ 4 * 4 == amount
);
24035 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24036 return (amount
- 508) / 4;
24037 if (amount
<= n_free
* 4)
24042 /* The bits which aren't usefully expanded as rtl. */
24044 thumb1_unexpanded_epilogue (void)
24046 arm_stack_offsets
*offsets
;
24048 unsigned long live_regs_mask
= 0;
24049 int high_regs_pushed
= 0;
24051 int had_to_push_lr
;
24054 if (cfun
->machine
->return_used_this_function
!= 0)
24057 if (IS_NAKED (arm_current_func_type ()))
24060 offsets
= arm_get_frame_offsets ();
24061 live_regs_mask
= offsets
->saved_regs_mask
;
24062 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24064 /* If we can deduce the registers used from the function's return value.
24065 This is more reliable that examining df_regs_ever_live_p () because that
24066 will be set if the register is ever used in the function, not just if
24067 the register is used to hold a return value. */
24068 size
= arm_size_return_regs ();
24070 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24073 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24074 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24077 /* The prolog may have pushed some high registers to use as
24078 work registers. e.g. the testsuite file:
24079 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24080 compiles to produce:
24081 push {r4, r5, r6, r7, lr}
24085 as part of the prolog. We have to undo that pushing here. */
24087 if (high_regs_pushed
)
24089 unsigned long mask
= live_regs_mask
& 0xff;
24092 /* The available low registers depend on the size of the value we are
24100 /* Oh dear! We have no low registers into which we can pop
24103 ("no low registers available for popping high registers");
24105 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24106 if (live_regs_mask
& (1 << next_hi_reg
))
24109 while (high_regs_pushed
)
24111 /* Find lo register(s) into which the high register(s) can
24113 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24115 if (mask
& (1 << regno
))
24116 high_regs_pushed
--;
24117 if (high_regs_pushed
== 0)
24121 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24123 /* Pop the values into the low register(s). */
24124 thumb_pop (asm_out_file
, mask
);
24126 /* Move the value(s) into the high registers. */
24127 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24129 if (mask
& (1 << regno
))
24131 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24134 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24135 if (live_regs_mask
& (1 << next_hi_reg
))
24140 live_regs_mask
&= ~0x0f00;
24143 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24144 live_regs_mask
&= 0xff;
24146 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24148 /* Pop the return address into the PC. */
24149 if (had_to_push_lr
)
24150 live_regs_mask
|= 1 << PC_REGNUM
;
24152 /* Either no argument registers were pushed or a backtrace
24153 structure was created which includes an adjusted stack
24154 pointer, so just pop everything. */
24155 if (live_regs_mask
)
24156 thumb_pop (asm_out_file
, live_regs_mask
);
24158 /* We have either just popped the return address into the
24159 PC or it is was kept in LR for the entire function.
24160 Note that thumb_pop has already called thumb_exit if the
24161 PC was in the list. */
24162 if (!had_to_push_lr
)
24163 thumb_exit (asm_out_file
, LR_REGNUM
);
24167 /* Pop everything but the return address. */
24168 if (live_regs_mask
)
24169 thumb_pop (asm_out_file
, live_regs_mask
);
24171 if (had_to_push_lr
)
24175 /* We have no free low regs, so save one. */
24176 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24180 /* Get the return address into a temporary register. */
24181 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24185 /* Move the return address to lr. */
24186 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24188 /* Restore the low register. */
24189 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24194 regno
= LAST_ARG_REGNUM
;
24199 /* Remove the argument registers that were pushed onto the stack. */
24200 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24201 SP_REGNUM
, SP_REGNUM
,
24202 crtl
->args
.pretend_args_size
);
24204 thumb_exit (asm_out_file
, regno
);
24210 /* Functions to save and restore machine-specific function data. */
24211 static struct machine_function
*
24212 arm_init_machine_status (void)
24214 struct machine_function
*machine
;
24215 machine
= ggc_cleared_alloc
<machine_function
> ();
24217 #if ARM_FT_UNKNOWN != 0
24218 machine
->func_type
= ARM_FT_UNKNOWN
;
24223 /* Return an RTX indicating where the return address to the
24224 calling function can be found. */
24226 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24231 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24234 /* Do anything needed before RTL is emitted for each function. */
24236 arm_init_expanders (void)
24238 /* Arrange to initialize and mark the machine per-function status. */
24239 init_machine_status
= arm_init_machine_status
;
24241 /* This is to stop the combine pass optimizing away the alignment
24242 adjustment of va_arg. */
24243 /* ??? It is claimed that this should not be necessary. */
24245 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24249 /* Like arm_compute_initial_elimination offset. Simpler because there
24250 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24251 to point at the base of the local variables after static stack
24252 space for a function has been allocated. */
24255 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24257 arm_stack_offsets
*offsets
;
24259 offsets
= arm_get_frame_offsets ();
24263 case ARG_POINTER_REGNUM
:
24266 case STACK_POINTER_REGNUM
:
24267 return offsets
->outgoing_args
- offsets
->saved_args
;
24269 case FRAME_POINTER_REGNUM
:
24270 return offsets
->soft_frame
- offsets
->saved_args
;
24272 case ARM_HARD_FRAME_POINTER_REGNUM
:
24273 return offsets
->saved_regs
- offsets
->saved_args
;
24275 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24276 return offsets
->locals_base
- offsets
->saved_args
;
24279 gcc_unreachable ();
24283 case FRAME_POINTER_REGNUM
:
24286 case STACK_POINTER_REGNUM
:
24287 return offsets
->outgoing_args
- offsets
->soft_frame
;
24289 case ARM_HARD_FRAME_POINTER_REGNUM
:
24290 return offsets
->saved_regs
- offsets
->soft_frame
;
24292 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24293 return offsets
->locals_base
- offsets
->soft_frame
;
24296 gcc_unreachable ();
24301 gcc_unreachable ();
24305 /* Generate the function's prologue. */
24308 thumb1_expand_prologue (void)
24312 HOST_WIDE_INT amount
;
24313 arm_stack_offsets
*offsets
;
24314 unsigned long func_type
;
24316 unsigned long live_regs_mask
;
24317 unsigned long l_mask
;
24318 unsigned high_regs_pushed
= 0;
24320 func_type
= arm_current_func_type ();
24322 /* Naked functions don't have prologues. */
24323 if (IS_NAKED (func_type
))
24326 if (IS_INTERRUPT (func_type
))
24328 error ("interrupt Service Routines cannot be coded in Thumb mode");
24332 if (is_called_in_ARM_mode (current_function_decl
))
24333 emit_insn (gen_prologue_thumb1_interwork ());
24335 offsets
= arm_get_frame_offsets ();
24336 live_regs_mask
= offsets
->saved_regs_mask
;
24338 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24339 l_mask
= live_regs_mask
& 0x40ff;
24340 /* Then count how many other high registers will need to be pushed. */
24341 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24343 if (crtl
->args
.pretend_args_size
)
24345 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24347 if (cfun
->machine
->uses_anonymous_args
)
24349 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24350 unsigned long mask
;
24352 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24353 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24355 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24359 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24360 stack_pointer_rtx
, x
));
24362 RTX_FRAME_RELATED_P (insn
) = 1;
24365 if (TARGET_BACKTRACE
)
24367 HOST_WIDE_INT offset
= 0;
24368 unsigned work_register
;
24369 rtx work_reg
, x
, arm_hfp_rtx
;
24371 /* We have been asked to create a stack backtrace structure.
24372 The code looks like this:
24376 0 sub SP, #16 Reserve space for 4 registers.
24377 2 push {R7} Push low registers.
24378 4 add R7, SP, #20 Get the stack pointer before the push.
24379 6 str R7, [SP, #8] Store the stack pointer
24380 (before reserving the space).
24381 8 mov R7, PC Get hold of the start of this code + 12.
24382 10 str R7, [SP, #16] Store it.
24383 12 mov R7, FP Get hold of the current frame pointer.
24384 14 str R7, [SP, #4] Store it.
24385 16 mov R7, LR Get hold of the current return address.
24386 18 str R7, [SP, #12] Store it.
24387 20 add R7, SP, #16 Point at the start of the
24388 backtrace structure.
24389 22 mov FP, R7 Put this value into the frame pointer. */
24391 work_register
= thumb_find_work_register (live_regs_mask
);
24392 work_reg
= gen_rtx_REG (SImode
, work_register
);
24393 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24395 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24396 stack_pointer_rtx
, GEN_INT (-16)));
24397 RTX_FRAME_RELATED_P (insn
) = 1;
24401 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24402 RTX_FRAME_RELATED_P (insn
) = 1;
24404 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24407 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24408 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24410 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24411 x
= gen_frame_mem (SImode
, x
);
24412 emit_move_insn (x
, work_reg
);
24414 /* Make sure that the instruction fetching the PC is in the right place
24415 to calculate "start of backtrace creation code + 12". */
24416 /* ??? The stores using the common WORK_REG ought to be enough to
24417 prevent the scheduler from doing anything weird. Failing that
24418 we could always move all of the following into an UNSPEC_VOLATILE. */
24421 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24422 emit_move_insn (work_reg
, x
);
24424 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24425 x
= gen_frame_mem (SImode
, x
);
24426 emit_move_insn (x
, work_reg
);
24428 emit_move_insn (work_reg
, arm_hfp_rtx
);
24430 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24431 x
= gen_frame_mem (SImode
, x
);
24432 emit_move_insn (x
, work_reg
);
24436 emit_move_insn (work_reg
, arm_hfp_rtx
);
24438 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24439 x
= gen_frame_mem (SImode
, x
);
24440 emit_move_insn (x
, work_reg
);
24442 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24443 emit_move_insn (work_reg
, x
);
24445 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24446 x
= gen_frame_mem (SImode
, x
);
24447 emit_move_insn (x
, work_reg
);
24450 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24451 emit_move_insn (work_reg
, x
);
24453 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24454 x
= gen_frame_mem (SImode
, x
);
24455 emit_move_insn (x
, work_reg
);
24457 x
= GEN_INT (offset
+ 12);
24458 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24460 emit_move_insn (arm_hfp_rtx
, work_reg
);
24462 /* Optimization: If we are not pushing any low registers but we are going
24463 to push some high registers then delay our first push. This will just
24464 be a push of LR and we can combine it with the push of the first high
24466 else if ((l_mask
& 0xff) != 0
24467 || (high_regs_pushed
== 0 && l_mask
))
24469 unsigned long mask
= l_mask
;
24470 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24471 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24472 RTX_FRAME_RELATED_P (insn
) = 1;
24475 if (high_regs_pushed
)
24477 unsigned pushable_regs
;
24478 unsigned next_hi_reg
;
24479 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24480 : crtl
->args
.info
.nregs
;
24481 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24483 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24484 if (live_regs_mask
& (1 << next_hi_reg
))
24487 /* Here we need to mask out registers used for passing arguments
24488 even if they can be pushed. This is to avoid using them to stash the high
24489 registers. Such kind of stash may clobber the use of arguments. */
24490 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24492 if (pushable_regs
== 0)
24493 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24495 while (high_regs_pushed
> 0)
24497 unsigned long real_regs_mask
= 0;
24499 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24501 if (pushable_regs
& (1 << regno
))
24503 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24504 gen_rtx_REG (SImode
, next_hi_reg
));
24506 high_regs_pushed
--;
24507 real_regs_mask
|= (1 << next_hi_reg
);
24509 if (high_regs_pushed
)
24511 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24513 if (live_regs_mask
& (1 << next_hi_reg
))
24518 pushable_regs
&= ~((1 << regno
) - 1);
24524 /* If we had to find a work register and we have not yet
24525 saved the LR then add it to the list of regs to push. */
24526 if (l_mask
== (1 << LR_REGNUM
))
24528 pushable_regs
|= l_mask
;
24529 real_regs_mask
|= l_mask
;
24533 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24534 RTX_FRAME_RELATED_P (insn
) = 1;
24538 /* Load the pic register before setting the frame pointer,
24539 so we can use r7 as a temporary work register. */
24540 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24541 arm_load_pic_register (live_regs_mask
);
24543 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24544 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24545 stack_pointer_rtx
);
24547 if (flag_stack_usage_info
)
24548 current_function_static_stack_size
24549 = offsets
->outgoing_args
- offsets
->saved_args
;
24551 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24552 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24557 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24558 GEN_INT (- amount
)));
24559 RTX_FRAME_RELATED_P (insn
) = 1;
24565 /* The stack decrement is too big for an immediate value in a single
24566 insn. In theory we could issue multiple subtracts, but after
24567 three of them it becomes more space efficient to place the full
24568 value in the constant pool and load into a register. (Also the
24569 ARM debugger really likes to see only one stack decrement per
24570 function). So instead we look for a scratch register into which
24571 we can load the decrement, and then we subtract this from the
24572 stack pointer. Unfortunately on the thumb the only available
24573 scratch registers are the argument registers, and we cannot use
24574 these as they may hold arguments to the function. Instead we
24575 attempt to locate a call preserved register which is used by this
24576 function. If we can find one, then we know that it will have
24577 been pushed at the start of the prologue and so we can corrupt
24579 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24580 if (live_regs_mask
& (1 << regno
))
24583 gcc_assert(regno
<= LAST_LO_REGNUM
);
24585 reg
= gen_rtx_REG (SImode
, regno
);
24587 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24589 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24590 stack_pointer_rtx
, reg
));
24592 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
24593 plus_constant (Pmode
, stack_pointer_rtx
,
24595 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24596 RTX_FRAME_RELATED_P (insn
) = 1;
24600 if (frame_pointer_needed
)
24601 thumb_set_frame_pointer (offsets
);
24603 /* If we are profiling, make sure no instructions are scheduled before
24604 the call to mcount. Similarly if the user has requested no
24605 scheduling in the prolog. Similarly if we want non-call exceptions
24606 using the EABI unwinder, to prevent faulting instructions from being
24607 swapped with a stack adjustment. */
24608 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24609 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24610 && cfun
->can_throw_non_call_exceptions
))
24611 emit_insn (gen_blockage ());
24613 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24614 if (live_regs_mask
& 0xff)
24615 cfun
->machine
->lr_save_eliminated
= 0;
24618 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24619 POP instruction can be generated. LR should be replaced by PC. All
24620 the checks required are already done by USE_RETURN_INSN (). Hence,
24621 all we really need to check here is if single register is to be
24622 returned, or multiple register return. */
24624 thumb2_expand_return (bool simple_return
)
24627 unsigned long saved_regs_mask
;
24628 arm_stack_offsets
*offsets
;
24630 offsets
= arm_get_frame_offsets ();
24631 saved_regs_mask
= offsets
->saved_regs_mask
;
24633 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24634 if (saved_regs_mask
& (1 << i
))
24637 if (!simple_return
&& saved_regs_mask
)
24641 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24642 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24643 rtx addr
= gen_rtx_MEM (SImode
,
24644 gen_rtx_POST_INC (SImode
,
24645 stack_pointer_rtx
));
24646 set_mem_alias_set (addr
, get_frame_alias_set ());
24647 XVECEXP (par
, 0, 0) = ret_rtx
;
24648 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
24649 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24650 emit_jump_insn (par
);
24654 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24655 saved_regs_mask
|= (1 << PC_REGNUM
);
24656 arm_emit_multi_reg_pop (saved_regs_mask
);
24661 emit_jump_insn (simple_return_rtx
);
24666 thumb1_expand_epilogue (void)
24668 HOST_WIDE_INT amount
;
24669 arm_stack_offsets
*offsets
;
24672 /* Naked functions don't have prologues. */
24673 if (IS_NAKED (arm_current_func_type ()))
24676 offsets
= arm_get_frame_offsets ();
24677 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24679 if (frame_pointer_needed
)
24681 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24682 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24684 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24686 gcc_assert (amount
>= 0);
24689 emit_insn (gen_blockage ());
24692 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24693 GEN_INT (amount
)));
24696 /* r3 is always free in the epilogue. */
24697 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24699 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24700 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24704 /* Emit a USE (stack_pointer_rtx), so that
24705 the stack adjustment will not be deleted. */
24706 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24708 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24709 emit_insn (gen_blockage ());
24711 /* Emit a clobber for each insn that will be restored in the epilogue,
24712 so that flow2 will get register lifetimes correct. */
24713 for (regno
= 0; regno
< 13; regno
++)
24714 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24715 emit_clobber (gen_rtx_REG (SImode
, regno
));
24717 if (! df_regs_ever_live_p (LR_REGNUM
))
24718 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24721 /* Epilogue code for APCS frame. */
24723 arm_expand_epilogue_apcs_frame (bool really_return
)
24725 unsigned long func_type
;
24726 unsigned long saved_regs_mask
;
24729 int floats_from_frame
= 0;
24730 arm_stack_offsets
*offsets
;
24732 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24733 func_type
= arm_current_func_type ();
24735 /* Get frame offsets for ARM. */
24736 offsets
= arm_get_frame_offsets ();
24737 saved_regs_mask
= offsets
->saved_regs_mask
;
24739 /* Find the offset of the floating-point save area in the frame. */
24741 = (offsets
->saved_args
24742 + arm_compute_static_chain_stack_bytes ()
24745 /* Compute how many core registers saved and how far away the floats are. */
24746 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24747 if (saved_regs_mask
& (1 << i
))
24750 floats_from_frame
+= 4;
24753 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24756 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24758 /* The offset is from IP_REGNUM. */
24759 int saved_size
= arm_get_vfp_saved_size ();
24760 if (saved_size
> 0)
24763 floats_from_frame
+= saved_size
;
24764 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24765 hard_frame_pointer_rtx
,
24766 GEN_INT (-floats_from_frame
)));
24767 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24768 ip_rtx
, hard_frame_pointer_rtx
);
24771 /* Generate VFP register multi-pop. */
24772 start_reg
= FIRST_VFP_REGNUM
;
24774 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24775 /* Look for a case where a reg does not need restoring. */
24776 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24777 && (!df_regs_ever_live_p (i
+ 1)
24778 || call_used_regs
[i
+ 1]))
24780 if (start_reg
!= i
)
24781 arm_emit_vfp_multi_reg_pop (start_reg
,
24782 (i
- start_reg
) / 2,
24783 gen_rtx_REG (SImode
,
24788 /* Restore the remaining regs that we have discovered (or possibly
24789 even all of them, if the conditional in the for loop never
24791 if (start_reg
!= i
)
24792 arm_emit_vfp_multi_reg_pop (start_reg
,
24793 (i
- start_reg
) / 2,
24794 gen_rtx_REG (SImode
, IP_REGNUM
));
24799 /* The frame pointer is guaranteed to be non-double-word aligned, as
24800 it is set to double-word-aligned old_stack_pointer - 4. */
24802 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24804 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24805 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24807 rtx addr
= gen_frame_mem (V2SImode
,
24808 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24810 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24811 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24812 gen_rtx_REG (V2SImode
, i
),
24818 /* saved_regs_mask should contain IP which contains old stack pointer
24819 at the time of activation creation. Since SP and IP are adjacent registers,
24820 we can restore the value directly into SP. */
24821 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24822 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24823 saved_regs_mask
|= (1 << SP_REGNUM
);
24825 /* There are two registers left in saved_regs_mask - LR and PC. We
24826 only need to restore LR (the return address), but to
24827 save time we can load it directly into PC, unless we need a
24828 special function exit sequence, or we are not really returning. */
24830 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24831 && !crtl
->calls_eh_return
)
24832 /* Delete LR from the register mask, so that LR on
24833 the stack is loaded into the PC in the register mask. */
24834 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24836 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24838 num_regs
= bit_count (saved_regs_mask
);
24839 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24842 emit_insn (gen_blockage ());
24843 /* Unwind the stack to just below the saved registers. */
24844 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24845 hard_frame_pointer_rtx
,
24846 GEN_INT (- 4 * num_regs
)));
24848 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24849 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24852 arm_emit_multi_reg_pop (saved_regs_mask
);
24854 if (IS_INTERRUPT (func_type
))
24856 /* Interrupt handlers will have pushed the
24857 IP onto the stack, so restore it now. */
24859 rtx addr
= gen_rtx_MEM (SImode
,
24860 gen_rtx_POST_INC (SImode
,
24861 stack_pointer_rtx
));
24862 set_mem_alias_set (addr
, get_frame_alias_set ());
24863 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24864 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24865 gen_rtx_REG (SImode
, IP_REGNUM
),
24869 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24872 if (crtl
->calls_eh_return
)
24873 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24875 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24877 if (IS_STACKALIGN (func_type
))
24878 /* Restore the original stack pointer. Before prologue, the stack was
24879 realigned and the original stack pointer saved in r0. For details,
24880 see comment in arm_expand_prologue. */
24881 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
24883 emit_jump_insn (simple_return_rtx
);
24886 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24887 function is not a sibcall. */
24889 arm_expand_epilogue (bool really_return
)
24891 unsigned long func_type
;
24892 unsigned long saved_regs_mask
;
24896 arm_stack_offsets
*offsets
;
24898 func_type
= arm_current_func_type ();
24900 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24901 let output_return_instruction take care of instruction emission if any. */
24902 if (IS_NAKED (func_type
)
24903 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24906 emit_jump_insn (simple_return_rtx
);
24910 /* If we are throwing an exception, then we really must be doing a
24911 return, so we can't tail-call. */
24912 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24914 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24916 arm_expand_epilogue_apcs_frame (really_return
);
24920 /* Get frame offsets for ARM. */
24921 offsets
= arm_get_frame_offsets ();
24922 saved_regs_mask
= offsets
->saved_regs_mask
;
24923 num_regs
= bit_count (saved_regs_mask
);
24925 if (frame_pointer_needed
)
24928 /* Restore stack pointer if necessary. */
24931 /* In ARM mode, frame pointer points to first saved register.
24932 Restore stack pointer to last saved register. */
24933 amount
= offsets
->frame
- offsets
->saved_regs
;
24935 /* Force out any pending memory operations that reference stacked data
24936 before stack de-allocation occurs. */
24937 emit_insn (gen_blockage ());
24938 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24939 hard_frame_pointer_rtx
,
24940 GEN_INT (amount
)));
24941 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24943 hard_frame_pointer_rtx
);
24945 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24947 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24951 /* In Thumb-2 mode, the frame pointer points to the last saved
24953 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24956 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24957 hard_frame_pointer_rtx
,
24958 GEN_INT (amount
)));
24959 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24960 hard_frame_pointer_rtx
,
24961 hard_frame_pointer_rtx
);
24964 /* Force out any pending memory operations that reference stacked data
24965 before stack de-allocation occurs. */
24966 emit_insn (gen_blockage ());
24967 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24968 hard_frame_pointer_rtx
));
24969 arm_add_cfa_adjust_cfa_note (insn
, 0,
24971 hard_frame_pointer_rtx
);
24972 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24974 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24979 /* Pop off outgoing args and local frame to adjust stack pointer to
24980 last saved register. */
24981 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24985 /* Force out any pending memory operations that reference stacked data
24986 before stack de-allocation occurs. */
24987 emit_insn (gen_blockage ());
24988 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24990 GEN_INT (amount
)));
24991 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24992 stack_pointer_rtx
, stack_pointer_rtx
);
24993 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24995 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24999 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25001 /* Generate VFP register multi-pop. */
25002 int end_reg
= LAST_VFP_REGNUM
+ 1;
25004 /* Scan the registers in reverse order. We need to match
25005 any groupings made in the prologue and generate matching
25006 vldm operations. The need to match groups is because,
25007 unlike pop, vldm can only do consecutive regs. */
25008 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25009 /* Look for a case where a reg does not need restoring. */
25010 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25011 && (!df_regs_ever_live_p (i
+ 1)
25012 || call_used_regs
[i
+ 1]))
25014 /* Restore the regs discovered so far (from reg+2 to
25016 if (end_reg
> i
+ 2)
25017 arm_emit_vfp_multi_reg_pop (i
+ 2,
25018 (end_reg
- (i
+ 2)) / 2,
25019 stack_pointer_rtx
);
25023 /* Restore the remaining regs that we have discovered (or possibly
25024 even all of them, if the conditional in the for loop never
25026 if (end_reg
> i
+ 2)
25027 arm_emit_vfp_multi_reg_pop (i
+ 2,
25028 (end_reg
- (i
+ 2)) / 2,
25029 stack_pointer_rtx
);
25033 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25034 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25037 rtx addr
= gen_rtx_MEM (V2SImode
,
25038 gen_rtx_POST_INC (SImode
,
25039 stack_pointer_rtx
));
25040 set_mem_alias_set (addr
, get_frame_alias_set ());
25041 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25042 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25043 gen_rtx_REG (V2SImode
, i
),
25045 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25046 stack_pointer_rtx
, stack_pointer_rtx
);
25049 if (saved_regs_mask
)
25052 bool return_in_pc
= false;
25054 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25055 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25056 && !IS_STACKALIGN (func_type
)
25058 && crtl
->args
.pretend_args_size
== 0
25059 && saved_regs_mask
& (1 << LR_REGNUM
)
25060 && !crtl
->calls_eh_return
)
25062 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25063 saved_regs_mask
|= (1 << PC_REGNUM
);
25064 return_in_pc
= true;
25067 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25069 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25070 if (saved_regs_mask
& (1 << i
))
25072 rtx addr
= gen_rtx_MEM (SImode
,
25073 gen_rtx_POST_INC (SImode
,
25074 stack_pointer_rtx
));
25075 set_mem_alias_set (addr
, get_frame_alias_set ());
25077 if (i
== PC_REGNUM
)
25079 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25080 XVECEXP (insn
, 0, 0) = ret_rtx
;
25081 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
25082 gen_rtx_REG (SImode
, i
),
25084 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25085 insn
= emit_jump_insn (insn
);
25089 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25091 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25092 gen_rtx_REG (SImode
, i
),
25094 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25096 stack_pointer_rtx
);
25103 && current_tune
->prefer_ldrd_strd
25104 && !optimize_function_for_size_p (cfun
))
25107 thumb2_emit_ldrd_pop (saved_regs_mask
);
25108 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25109 arm_emit_ldrd_pop (saved_regs_mask
);
25111 arm_emit_multi_reg_pop (saved_regs_mask
);
25114 arm_emit_multi_reg_pop (saved_regs_mask
);
25117 if (return_in_pc
== true)
25121 if (crtl
->args
.pretend_args_size
)
25124 rtx dwarf
= NULL_RTX
;
25126 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25128 GEN_INT (crtl
->args
.pretend_args_size
)));
25130 RTX_FRAME_RELATED_P (tmp
) = 1;
25132 if (cfun
->machine
->uses_anonymous_args
)
25134 /* Restore pretend args. Refer arm_expand_prologue on how to save
25135 pretend_args in stack. */
25136 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25137 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25138 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25139 if (saved_regs_mask
& (1 << i
))
25141 rtx reg
= gen_rtx_REG (SImode
, i
);
25142 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25145 REG_NOTES (tmp
) = dwarf
;
25147 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25148 stack_pointer_rtx
, stack_pointer_rtx
);
25151 if (!really_return
)
25154 if (crtl
->calls_eh_return
)
25155 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25157 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25159 if (IS_STACKALIGN (func_type
))
25160 /* Restore the original stack pointer. Before prologue, the stack was
25161 realigned and the original stack pointer saved in r0. For details,
25162 see comment in arm_expand_prologue. */
25163 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
25165 emit_jump_insn (simple_return_rtx
);
25168 /* Implementation of insn prologue_thumb1_interwork. This is the first
25169 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25172 thumb1_output_interwork (void)
25175 FILE *f
= asm_out_file
;
25177 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25178 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25180 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25182 /* Generate code sequence to switch us into Thumb mode. */
25183 /* The .code 32 directive has already been emitted by
25184 ASM_DECLARE_FUNCTION_NAME. */
25185 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25186 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25188 /* Generate a label, so that the debugger will notice the
25189 change in instruction sets. This label is also used by
25190 the assembler to bypass the ARM code when this function
25191 is called from a Thumb encoded function elsewhere in the
25192 same file. Hence the definition of STUB_NAME here must
25193 agree with the definition in gas/config/tc-arm.c. */
25195 #define STUB_NAME ".real_start_of"
25197 fprintf (f
, "\t.code\t16\n");
25199 if (arm_dllexport_name_p (name
))
25200 name
= arm_strip_name_encoding (name
);
25202 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25203 fprintf (f
, "\t.thumb_func\n");
25204 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25209 /* Handle the case of a double word load into a low register from
25210 a computed memory address. The computed address may involve a
25211 register which is overwritten by the load. */
25213 thumb_load_double_from_address (rtx
*operands
)
25221 gcc_assert (REG_P (operands
[0]));
25222 gcc_assert (MEM_P (operands
[1]));
25224 /* Get the memory address. */
25225 addr
= XEXP (operands
[1], 0);
25227 /* Work out how the memory address is computed. */
25228 switch (GET_CODE (addr
))
25231 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25233 if (REGNO (operands
[0]) == REGNO (addr
))
25235 output_asm_insn ("ldr\t%H0, %2", operands
);
25236 output_asm_insn ("ldr\t%0, %1", operands
);
25240 output_asm_insn ("ldr\t%0, %1", operands
);
25241 output_asm_insn ("ldr\t%H0, %2", operands
);
25246 /* Compute <address> + 4 for the high order load. */
25247 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25249 output_asm_insn ("ldr\t%0, %1", operands
);
25250 output_asm_insn ("ldr\t%H0, %2", operands
);
25254 arg1
= XEXP (addr
, 0);
25255 arg2
= XEXP (addr
, 1);
25257 if (CONSTANT_P (arg1
))
25258 base
= arg2
, offset
= arg1
;
25260 base
= arg1
, offset
= arg2
;
25262 gcc_assert (REG_P (base
));
25264 /* Catch the case of <address> = <reg> + <reg> */
25265 if (REG_P (offset
))
25267 int reg_offset
= REGNO (offset
);
25268 int reg_base
= REGNO (base
);
25269 int reg_dest
= REGNO (operands
[0]);
25271 /* Add the base and offset registers together into the
25272 higher destination register. */
25273 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25274 reg_dest
+ 1, reg_base
, reg_offset
);
25276 /* Load the lower destination register from the address in
25277 the higher destination register. */
25278 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25279 reg_dest
, reg_dest
+ 1);
25281 /* Load the higher destination register from its own address
25283 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25284 reg_dest
+ 1, reg_dest
+ 1);
25288 /* Compute <address> + 4 for the high order load. */
25289 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25291 /* If the computed address is held in the low order register
25292 then load the high order register first, otherwise always
25293 load the low order register first. */
25294 if (REGNO (operands
[0]) == REGNO (base
))
25296 output_asm_insn ("ldr\t%H0, %2", operands
);
25297 output_asm_insn ("ldr\t%0, %1", operands
);
25301 output_asm_insn ("ldr\t%0, %1", operands
);
25302 output_asm_insn ("ldr\t%H0, %2", operands
);
25308 /* With no registers to worry about we can just load the value
25310 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25312 output_asm_insn ("ldr\t%H0, %2", operands
);
25313 output_asm_insn ("ldr\t%0, %1", operands
);
25317 gcc_unreachable ();
25324 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25331 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25334 operands
[4] = operands
[5];
25337 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25338 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25342 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25343 std::swap (operands
[4], operands
[5]);
25344 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25345 std::swap (operands
[5], operands
[6]);
25346 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25347 std::swap (operands
[4], operands
[5]);
25349 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25350 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25354 gcc_unreachable ();
25360 /* Output a call-via instruction for thumb state. */
25362 thumb_call_via_reg (rtx reg
)
25364 int regno
= REGNO (reg
);
25367 gcc_assert (regno
< LR_REGNUM
);
25369 /* If we are in the normal text section we can use a single instance
25370 per compilation unit. If we are doing function sections, then we need
25371 an entry per section, since we can't rely on reachability. */
25372 if (in_section
== text_section
)
25374 thumb_call_reg_needed
= 1;
25376 if (thumb_call_via_label
[regno
] == NULL
)
25377 thumb_call_via_label
[regno
] = gen_label_rtx ();
25378 labelp
= thumb_call_via_label
+ regno
;
25382 if (cfun
->machine
->call_via
[regno
] == NULL
)
25383 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25384 labelp
= cfun
->machine
->call_via
+ regno
;
25387 output_asm_insn ("bl\t%a0", labelp
);
25391 /* Routines for generating rtl. */
25393 thumb_expand_movmemqi (rtx
*operands
)
25395 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25396 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25397 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25398 HOST_WIDE_INT offset
= 0;
25402 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25408 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25414 rtx reg
= gen_reg_rtx (SImode
);
25415 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25416 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25423 rtx reg
= gen_reg_rtx (HImode
);
25424 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25425 plus_constant (Pmode
, in
,
25427 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25436 rtx reg
= gen_reg_rtx (QImode
);
25437 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25438 plus_constant (Pmode
, in
,
25440 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25447 thumb_reload_out_hi (rtx
*operands
)
25449 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25452 /* Handle reading a half-word from memory during reload. */
25454 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25456 gcc_unreachable ();
25459 /* Return the length of a function name prefix
25460 that starts with the character 'c'. */
25462 arm_get_strip_length (int c
)
25466 ARM_NAME_ENCODING_LENGTHS
25471 /* Return a pointer to a function's name with any
25472 and all prefix encodings stripped from it. */
25474 arm_strip_name_encoding (const char *name
)
25478 while ((skip
= arm_get_strip_length (* name
)))
25484 /* If there is a '*' anywhere in the name's prefix, then
25485 emit the stripped name verbatim, otherwise prepend an
25486 underscore if leading underscores are being used. */
25488 arm_asm_output_labelref (FILE *stream
, const char *name
)
25493 while ((skip
= arm_get_strip_length (* name
)))
25495 verbatim
|= (*name
== '*');
25500 fputs (name
, stream
);
25502 asm_fprintf (stream
, "%U%s", name
);
25505 /* This function is used to emit an EABI tag and its associated value.
25506 We emit the numerical value of the tag in case the assembler does not
25507 support textual tags. (Eg gas prior to 2.20). If requested we include
25508 the tag name in a comment so that anyone reading the assembler output
25509 will know which tag is being set.
25511 This function is not static because arm-c.c needs it too. */
25514 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25516 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25517 if (flag_verbose_asm
|| flag_debug_asm
)
25518 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25519 asm_fprintf (asm_out_file
, "\n");
25523 arm_file_start (void)
25527 if (TARGET_UNIFIED_ASM
)
25528 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
25532 const char *fpu_name
;
25533 if (arm_selected_arch
)
25535 /* armv7ve doesn't support any extensions. */
25536 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25538 /* Keep backward compatability for assemblers
25539 which don't support armv7ve. */
25540 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25541 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25542 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25543 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25544 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25548 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25552 gcc_assert (strlen (arm_selected_arch
->name
)
25553 <= sizeof (buf
) / sizeof (*pos
));
25554 strncpy (buf
, arm_selected_arch
->name
,
25555 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25556 buf
[pos
- arm_selected_arch
->name
] = '\0';
25557 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25558 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25561 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25564 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25565 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25568 const char* truncated_name
25569 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25570 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25573 if (TARGET_SOFT_FLOAT
)
25575 fpu_name
= "softvfp";
25579 fpu_name
= arm_fpu_desc
->name
;
25580 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25582 if (TARGET_HARD_FLOAT
)
25583 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25584 if (TARGET_HARD_FLOAT_ABI
)
25585 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25588 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25590 /* Some of these attributes only apply when the corresponding features
25591 are used. However we don't have any easy way of figuring this out.
25592 Conservatively record the setting that would have been used. */
25594 if (flag_rounding_math
)
25595 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25597 if (!flag_unsafe_math_optimizations
)
25599 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25600 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25602 if (flag_signaling_nans
)
25603 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25605 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25606 flag_finite_math_only
? 1 : 3);
25608 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25609 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25610 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25611 flag_short_enums
? 1 : 2);
25613 /* Tag_ABI_optimization_goals. */
25616 else if (optimize
>= 2)
25622 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25624 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25627 if (arm_fp16_format
)
25628 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25629 (int) arm_fp16_format
);
25631 if (arm_lang_output_object_attributes_hook
)
25632 arm_lang_output_object_attributes_hook();
25635 default_file_start ();
25639 arm_file_end (void)
25643 if (NEED_INDICATE_EXEC_STACK
)
25644 /* Add .note.GNU-stack. */
25645 file_end_indicate_exec_stack ();
25647 if (! thumb_call_reg_needed
)
25650 switch_to_section (text_section
);
25651 asm_fprintf (asm_out_file
, "\t.code 16\n");
25652 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25654 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25656 rtx label
= thumb_call_via_label
[regno
];
25660 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25661 CODE_LABEL_NUMBER (label
));
25662 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25668 /* Symbols in the text segment can be accessed without indirecting via the
25669 constant pool; it may take an extra binary operation, but this is still
25670 faster than indirecting via memory. Don't do this when not optimizing,
25671 since we won't be calculating al of the offsets necessary to do this
25675 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25677 if (optimize
> 0 && TREE_CONSTANT (decl
))
25678 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25680 default_encode_section_info (decl
, rtl
, first
);
25682 #endif /* !ARM_PE */
25685 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25687 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25688 && !strcmp (prefix
, "L"))
25690 arm_ccfsm_state
= 0;
25691 arm_target_insn
= NULL
;
25693 default_internal_label (stream
, prefix
, labelno
);
25696 /* Output code to add DELTA to the first argument, and then jump
25697 to FUNCTION. Used for C++ multiple inheritance. */
25699 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25700 HOST_WIDE_INT delta
,
25701 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25704 static int thunk_label
= 0;
25707 int mi_delta
= delta
;
25708 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25710 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25713 mi_delta
= - mi_delta
;
25715 final_start_function (emit_barrier (), file
, 1);
25719 int labelno
= thunk_label
++;
25720 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25721 /* Thunks are entered in arm mode when avaiable. */
25722 if (TARGET_THUMB1_ONLY
)
25724 /* push r3 so we can use it as a temporary. */
25725 /* TODO: Omit this save if r3 is not used. */
25726 fputs ("\tpush {r3}\n", file
);
25727 fputs ("\tldr\tr3, ", file
);
25731 fputs ("\tldr\tr12, ", file
);
25733 assemble_name (file
, label
);
25734 fputc ('\n', file
);
25737 /* If we are generating PIC, the ldr instruction below loads
25738 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25739 the address of the add + 8, so we have:
25741 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25744 Note that we have "+ 1" because some versions of GNU ld
25745 don't set the low bit of the result for R_ARM_REL32
25746 relocations against thumb function symbols.
25747 On ARMv6M this is +4, not +8. */
25748 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25749 assemble_name (file
, labelpc
);
25750 fputs (":\n", file
);
25751 if (TARGET_THUMB1_ONLY
)
25753 /* This is 2 insns after the start of the thunk, so we know it
25754 is 4-byte aligned. */
25755 fputs ("\tadd\tr3, pc, r3\n", file
);
25756 fputs ("\tmov r12, r3\n", file
);
25759 fputs ("\tadd\tr12, pc, r12\n", file
);
25761 else if (TARGET_THUMB1_ONLY
)
25762 fputs ("\tmov r12, r3\n", file
);
25764 if (TARGET_THUMB1_ONLY
)
25766 if (mi_delta
> 255)
25768 fputs ("\tldr\tr3, ", file
);
25769 assemble_name (file
, label
);
25770 fputs ("+4\n", file
);
25771 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25772 mi_op
, this_regno
, this_regno
);
25774 else if (mi_delta
!= 0)
25776 /* Thumb1 unified syntax requires s suffix in instruction name when
25777 one of the operands is immediate. */
25778 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25779 mi_op
, this_regno
, this_regno
,
25785 /* TODO: Use movw/movt for large constants when available. */
25786 while (mi_delta
!= 0)
25788 if ((mi_delta
& (3 << shift
)) == 0)
25792 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25793 mi_op
, this_regno
, this_regno
,
25794 mi_delta
& (0xff << shift
));
25795 mi_delta
&= ~(0xff << shift
);
25802 if (TARGET_THUMB1_ONLY
)
25803 fputs ("\tpop\t{r3}\n", file
);
25805 fprintf (file
, "\tbx\tr12\n");
25806 ASM_OUTPUT_ALIGN (file
, 2);
25807 assemble_name (file
, label
);
25808 fputs (":\n", file
);
25811 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25812 rtx tem
= XEXP (DECL_RTL (function
), 0);
25813 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25814 pipeline offset is four rather than eight. Adjust the offset
25816 tem
= plus_constant (GET_MODE (tem
), tem
,
25817 TARGET_THUMB1_ONLY
? -3 : -7);
25818 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25820 gen_rtx_SYMBOL_REF (Pmode
,
25821 ggc_strdup (labelpc
)));
25822 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25825 /* Output ".word .LTHUNKn". */
25826 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25828 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25829 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25833 fputs ("\tb\t", file
);
25834 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25835 if (NEED_PLT_RELOC
)
25836 fputs ("(PLT)", file
);
25837 fputc ('\n', file
);
25840 final_end_function ();
25844 arm_emit_vector_const (FILE *file
, rtx x
)
25847 const char * pattern
;
25849 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25851 switch (GET_MODE (x
))
25853 case V2SImode
: pattern
= "%08x"; break;
25854 case V4HImode
: pattern
= "%04x"; break;
25855 case V8QImode
: pattern
= "%02x"; break;
25856 default: gcc_unreachable ();
25859 fprintf (file
, "0x");
25860 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25864 element
= CONST_VECTOR_ELT (x
, i
);
25865 fprintf (file
, pattern
, INTVAL (element
));
25871 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25872 HFmode constant pool entries are actually loaded with ldr. */
25874 arm_emit_fp16_const (rtx c
)
25879 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25880 bits
= real_to_target (NULL
, &r
, HFmode
);
25881 if (WORDS_BIG_ENDIAN
)
25882 assemble_zeros (2);
25883 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25884 if (!WORDS_BIG_ENDIAN
)
25885 assemble_zeros (2);
25889 arm_output_load_gr (rtx
*operands
)
25896 if (!MEM_P (operands
[1])
25897 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25898 || !REG_P (reg
= XEXP (sum
, 0))
25899 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25900 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25901 return "wldrw%?\t%0, %1";
25903 /* Fix up an out-of-range load of a GR register. */
25904 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25905 wcgr
= operands
[0];
25907 output_asm_insn ("ldr%?\t%0, %1", operands
);
25909 operands
[0] = wcgr
;
25911 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25912 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25917 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25919 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25920 named arg and all anonymous args onto the stack.
25921 XXX I know the prologue shouldn't be pushing registers, but it is faster
25925 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25929 int second_time ATTRIBUTE_UNUSED
)
25931 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25934 cfun
->machine
->uses_anonymous_args
= 1;
25935 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25937 nregs
= pcum
->aapcs_ncrn
;
25938 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25942 nregs
= pcum
->nregs
;
25944 if (nregs
< NUM_ARG_REGS
)
25945 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25948 /* We can't rely on the caller doing the proper promotion when
25949 using APCS or ATPCS. */
25952 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25954 return !TARGET_AAPCS_BASED
;
25957 static machine_mode
25958 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25960 int *punsignedp ATTRIBUTE_UNUSED
,
25961 const_tree fntype ATTRIBUTE_UNUSED
,
25962 int for_return ATTRIBUTE_UNUSED
)
25964 if (GET_MODE_CLASS (mode
) == MODE_INT
25965 && GET_MODE_SIZE (mode
) < 4)
25971 /* AAPCS based ABIs use short enums by default. */
25974 arm_default_short_enums (void)
25976 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25980 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25983 arm_align_anon_bitfield (void)
25985 return TARGET_AAPCS_BASED
;
25989 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25992 arm_cxx_guard_type (void)
25994 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
25998 /* The EABI says test the least significant bit of a guard variable. */
26001 arm_cxx_guard_mask_bit (void)
26003 return TARGET_AAPCS_BASED
;
26007 /* The EABI specifies that all array cookies are 8 bytes long. */
26010 arm_get_cookie_size (tree type
)
26014 if (!TARGET_AAPCS_BASED
)
26015 return default_cxx_get_cookie_size (type
);
26017 size
= build_int_cst (sizetype
, 8);
26022 /* The EABI says that array cookies should also contain the element size. */
26025 arm_cookie_has_size (void)
26027 return TARGET_AAPCS_BASED
;
26031 /* The EABI says constructors and destructors should return a pointer to
26032 the object constructed/destroyed. */
26035 arm_cxx_cdtor_returns_this (void)
26037 return TARGET_AAPCS_BASED
;
26040 /* The EABI says that an inline function may never be the key
26044 arm_cxx_key_method_may_be_inline (void)
26046 return !TARGET_AAPCS_BASED
;
26050 arm_cxx_determine_class_data_visibility (tree decl
)
26052 if (!TARGET_AAPCS_BASED
26053 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26056 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26057 is exported. However, on systems without dynamic vague linkage,
26058 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26059 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26060 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26062 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26063 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26067 arm_cxx_class_data_always_comdat (void)
26069 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26070 vague linkage if the class has no key function. */
26071 return !TARGET_AAPCS_BASED
;
26075 /* The EABI says __aeabi_atexit should be used to register static
26079 arm_cxx_use_aeabi_atexit (void)
26081 return TARGET_AAPCS_BASED
;
26086 arm_set_return_address (rtx source
, rtx scratch
)
26088 arm_stack_offsets
*offsets
;
26089 HOST_WIDE_INT delta
;
26091 unsigned long saved_regs
;
26093 offsets
= arm_get_frame_offsets ();
26094 saved_regs
= offsets
->saved_regs_mask
;
26096 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26097 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26100 if (frame_pointer_needed
)
26101 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26104 /* LR will be the first saved register. */
26105 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26110 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26111 GEN_INT (delta
& ~4095)));
26116 addr
= stack_pointer_rtx
;
26118 addr
= plus_constant (Pmode
, addr
, delta
);
26120 /* The store needs to be marked as frame related in order to prevent
26121 DSE from deleting it as dead if it is based on fp. */
26122 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26123 RTX_FRAME_RELATED_P (insn
) = 1;
26124 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26130 thumb_set_return_address (rtx source
, rtx scratch
)
26132 arm_stack_offsets
*offsets
;
26133 HOST_WIDE_INT delta
;
26134 HOST_WIDE_INT limit
;
26137 unsigned long mask
;
26141 offsets
= arm_get_frame_offsets ();
26142 mask
= offsets
->saved_regs_mask
;
26143 if (mask
& (1 << LR_REGNUM
))
26146 /* Find the saved regs. */
26147 if (frame_pointer_needed
)
26149 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26150 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26156 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26159 /* Allow for the stack frame. */
26160 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26162 /* The link register is always the first saved register. */
26165 /* Construct the address. */
26166 addr
= gen_rtx_REG (SImode
, reg
);
26169 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26170 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26174 addr
= plus_constant (Pmode
, addr
, delta
);
26176 /* The store needs to be marked as frame related in order to prevent
26177 DSE from deleting it as dead if it is based on fp. */
26178 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26179 RTX_FRAME_RELATED_P (insn
) = 1;
26180 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26183 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26186 /* Implements target hook vector_mode_supported_p. */
26188 arm_vector_mode_supported_p (machine_mode mode
)
26190 /* Neon also supports V2SImode, etc. listed in the clause below. */
26191 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26192 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26195 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26196 && ((mode
== V2SImode
)
26197 || (mode
== V4HImode
)
26198 || (mode
== V8QImode
)))
26201 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26202 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26203 || mode
== V2HAmode
))
26209 /* Implements target hook array_mode_supported_p. */
26212 arm_array_mode_supported_p (machine_mode mode
,
26213 unsigned HOST_WIDE_INT nelems
)
26216 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26217 && (nelems
>= 2 && nelems
<= 4))
26223 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26224 registers when autovectorizing for Neon, at least until multiple vector
26225 widths are supported properly by the middle-end. */
26227 static machine_mode
26228 arm_preferred_simd_mode (machine_mode mode
)
26234 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26236 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26238 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26240 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26242 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26249 if (TARGET_REALLY_IWMMXT
)
26265 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26267 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26268 using r0-r4 for function arguments, r7 for the stack frame and don't have
26269 enough left over to do doubleword arithmetic. For Thumb-2 all the
26270 potentially problematic instructions accept high registers so this is not
26271 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26272 that require many low registers. */
26274 arm_class_likely_spilled_p (reg_class_t rclass
)
26276 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26277 || rclass
== CC_REG
)
26283 /* Implements target hook small_register_classes_for_mode_p. */
26285 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26287 return TARGET_THUMB1
;
26290 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26291 ARM insns and therefore guarantee that the shift count is modulo 256.
26292 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26293 guarantee no particular behavior for out-of-range counts. */
26295 static unsigned HOST_WIDE_INT
26296 arm_shift_truncation_mask (machine_mode mode
)
26298 return mode
== SImode
? 255 : 0;
26302 /* Map internal gcc register numbers to DWARF2 register numbers. */
26305 arm_dbx_register_number (unsigned int regno
)
26310 if (IS_VFP_REGNUM (regno
))
26312 /* See comment in arm_dwarf_register_span. */
26313 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26314 return 64 + regno
- FIRST_VFP_REGNUM
;
26316 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26319 if (IS_IWMMXT_GR_REGNUM (regno
))
26320 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26322 if (IS_IWMMXT_REGNUM (regno
))
26323 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26325 gcc_unreachable ();
26328 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26329 GCC models tham as 64 32-bit registers, so we need to describe this to
26330 the DWARF generation code. Other registers can use the default. */
26332 arm_dwarf_register_span (rtx rtl
)
26340 regno
= REGNO (rtl
);
26341 if (!IS_VFP_REGNUM (regno
))
26344 /* XXX FIXME: The EABI defines two VFP register ranges:
26345 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26347 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26348 corresponding D register. Until GDB supports this, we shall use the
26349 legacy encodings. We also use these encodings for D0-D15 for
26350 compatibility with older debuggers. */
26351 mode
= GET_MODE (rtl
);
26352 if (GET_MODE_SIZE (mode
) < 8)
26355 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26357 nregs
= GET_MODE_SIZE (mode
) / 4;
26358 for (i
= 0; i
< nregs
; i
+= 2)
26359 if (TARGET_BIG_END
)
26361 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26362 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26366 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26367 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26372 nregs
= GET_MODE_SIZE (mode
) / 8;
26373 for (i
= 0; i
< nregs
; i
++)
26374 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26377 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26380 #if ARM_UNWIND_INFO
26381 /* Emit unwind directives for a store-multiple instruction or stack pointer
26382 push during alignment.
26383 These should only ever be generated by the function prologue code, so
26384 expect them to have a particular form.
26385 The store-multiple instruction sometimes pushes pc as the last register,
26386 although it should not be tracked into unwind information, or for -Os
26387 sometimes pushes some dummy registers before first register that needs
26388 to be tracked in unwind information; such dummy registers are there just
26389 to avoid separate stack adjustment, and will not be restored in the
26393 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26396 HOST_WIDE_INT offset
;
26397 HOST_WIDE_INT nregs
;
26401 unsigned padfirst
= 0, padlast
= 0;
26404 e
= XVECEXP (p
, 0, 0);
26405 gcc_assert (GET_CODE (e
) == SET
);
26407 /* First insn will adjust the stack pointer. */
26408 gcc_assert (GET_CODE (e
) == SET
26409 && REG_P (SET_DEST (e
))
26410 && REGNO (SET_DEST (e
)) == SP_REGNUM
26411 && GET_CODE (SET_SRC (e
)) == PLUS
);
26413 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26414 nregs
= XVECLEN (p
, 0) - 1;
26415 gcc_assert (nregs
);
26417 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26420 /* For -Os dummy registers can be pushed at the beginning to
26421 avoid separate stack pointer adjustment. */
26422 e
= XVECEXP (p
, 0, 1);
26423 e
= XEXP (SET_DEST (e
), 0);
26424 if (GET_CODE (e
) == PLUS
)
26425 padfirst
= INTVAL (XEXP (e
, 1));
26426 gcc_assert (padfirst
== 0 || optimize_size
);
26427 /* The function prologue may also push pc, but not annotate it as it is
26428 never restored. We turn this into a stack pointer adjustment. */
26429 e
= XVECEXP (p
, 0, nregs
);
26430 e
= XEXP (SET_DEST (e
), 0);
26431 if (GET_CODE (e
) == PLUS
)
26432 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26434 padlast
= offset
- 4;
26435 gcc_assert (padlast
== 0 || padlast
== 4);
26437 fprintf (asm_out_file
, "\t.pad #4\n");
26439 fprintf (asm_out_file
, "\t.save {");
26441 else if (IS_VFP_REGNUM (reg
))
26444 fprintf (asm_out_file
, "\t.vsave {");
26447 /* Unknown register type. */
26448 gcc_unreachable ();
26450 /* If the stack increment doesn't match the size of the saved registers,
26451 something has gone horribly wrong. */
26452 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26456 /* The remaining insns will describe the stores. */
26457 for (i
= 1; i
<= nregs
; i
++)
26459 /* Expect (set (mem <addr>) (reg)).
26460 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26461 e
= XVECEXP (p
, 0, i
);
26462 gcc_assert (GET_CODE (e
) == SET
26463 && MEM_P (SET_DEST (e
))
26464 && REG_P (SET_SRC (e
)));
26466 reg
= REGNO (SET_SRC (e
));
26467 gcc_assert (reg
>= lastreg
);
26470 fprintf (asm_out_file
, ", ");
26471 /* We can't use %r for vfp because we need to use the
26472 double precision register names. */
26473 if (IS_VFP_REGNUM (reg
))
26474 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26476 asm_fprintf (asm_out_file
, "%r", reg
);
26478 #ifdef ENABLE_CHECKING
26479 /* Check that the addresses are consecutive. */
26480 e
= XEXP (SET_DEST (e
), 0);
26481 if (GET_CODE (e
) == PLUS
)
26482 gcc_assert (REG_P (XEXP (e
, 0))
26483 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26484 && CONST_INT_P (XEXP (e
, 1))
26485 && offset
== INTVAL (XEXP (e
, 1)));
26489 && REGNO (e
) == SP_REGNUM
);
26490 offset
+= reg_size
;
26493 fprintf (asm_out_file
, "}\n");
26495 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26498 /* Emit unwind directives for a SET. */
26501 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26509 switch (GET_CODE (e0
))
26512 /* Pushing a single register. */
26513 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26514 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26515 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26518 asm_fprintf (asm_out_file
, "\t.save ");
26519 if (IS_VFP_REGNUM (REGNO (e1
)))
26520 asm_fprintf(asm_out_file
, "{d%d}\n",
26521 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26523 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26527 if (REGNO (e0
) == SP_REGNUM
)
26529 /* A stack increment. */
26530 if (GET_CODE (e1
) != PLUS
26531 || !REG_P (XEXP (e1
, 0))
26532 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26533 || !CONST_INT_P (XEXP (e1
, 1)))
26536 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26537 -INTVAL (XEXP (e1
, 1)));
26539 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26541 HOST_WIDE_INT offset
;
26543 if (GET_CODE (e1
) == PLUS
)
26545 if (!REG_P (XEXP (e1
, 0))
26546 || !CONST_INT_P (XEXP (e1
, 1)))
26548 reg
= REGNO (XEXP (e1
, 0));
26549 offset
= INTVAL (XEXP (e1
, 1));
26550 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26551 HARD_FRAME_POINTER_REGNUM
, reg
,
26554 else if (REG_P (e1
))
26557 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26558 HARD_FRAME_POINTER_REGNUM
, reg
);
26563 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26565 /* Move from sp to reg. */
26566 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26568 else if (GET_CODE (e1
) == PLUS
26569 && REG_P (XEXP (e1
, 0))
26570 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26571 && CONST_INT_P (XEXP (e1
, 1)))
26573 /* Set reg to offset from sp. */
26574 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26575 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26587 /* Emit unwind directives for the given insn. */
26590 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26593 bool handled_one
= false;
26595 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26598 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26599 && (TREE_NOTHROW (current_function_decl
)
26600 || crtl
->all_throwers_are_sibcalls
))
26603 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26606 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26608 switch (REG_NOTE_KIND (note
))
26610 case REG_FRAME_RELATED_EXPR
:
26611 pat
= XEXP (note
, 0);
26614 case REG_CFA_REGISTER
:
26615 pat
= XEXP (note
, 0);
26618 pat
= PATTERN (insn
);
26619 if (GET_CODE (pat
) == PARALLEL
)
26620 pat
= XVECEXP (pat
, 0, 0);
26623 /* Only emitted for IS_STACKALIGN re-alignment. */
26628 src
= SET_SRC (pat
);
26629 dest
= SET_DEST (pat
);
26631 gcc_assert (src
== stack_pointer_rtx
);
26632 reg
= REGNO (dest
);
26633 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26636 handled_one
= true;
26639 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26640 to get correct dwarf information for shrink-wrap. We should not
26641 emit unwind information for it because these are used either for
26642 pretend arguments or notes to adjust sp and restore registers from
26644 case REG_CFA_DEF_CFA
:
26645 case REG_CFA_ADJUST_CFA
:
26646 case REG_CFA_RESTORE
:
26649 case REG_CFA_EXPRESSION
:
26650 case REG_CFA_OFFSET
:
26651 /* ??? Only handling here what we actually emit. */
26652 gcc_unreachable ();
26660 pat
= PATTERN (insn
);
26663 switch (GET_CODE (pat
))
26666 arm_unwind_emit_set (asm_out_file
, pat
);
26670 /* Store multiple. */
26671 arm_unwind_emit_sequence (asm_out_file
, pat
);
26680 /* Output a reference from a function exception table to the type_info
26681 object X. The EABI specifies that the symbol should be relocated by
26682 an R_ARM_TARGET2 relocation. */
26685 arm_output_ttype (rtx x
)
26687 fputs ("\t.word\t", asm_out_file
);
26688 output_addr_const (asm_out_file
, x
);
26689 /* Use special relocations for symbol references. */
26690 if (!CONST_INT_P (x
))
26691 fputs ("(TARGET2)", asm_out_file
);
26692 fputc ('\n', asm_out_file
);
26697 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26700 arm_asm_emit_except_personality (rtx personality
)
26702 fputs ("\t.personality\t", asm_out_file
);
26703 output_addr_const (asm_out_file
, personality
);
26704 fputc ('\n', asm_out_file
);
26707 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26710 arm_asm_init_sections (void)
26712 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26715 #endif /* ARM_UNWIND_INFO */
26717 /* Output unwind directives for the start/end of a function. */
26720 arm_output_fn_unwind (FILE * f
, bool prologue
)
26722 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26726 fputs ("\t.fnstart\n", f
);
26729 /* If this function will never be unwound, then mark it as such.
26730 The came condition is used in arm_unwind_emit to suppress
26731 the frame annotations. */
26732 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26733 && (TREE_NOTHROW (current_function_decl
)
26734 || crtl
->all_throwers_are_sibcalls
))
26735 fputs("\t.cantunwind\n", f
);
26737 fputs ("\t.fnend\n", f
);
26742 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26744 enum tls_reloc reloc
;
26747 val
= XVECEXP (x
, 0, 0);
26748 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26750 output_addr_const (fp
, val
);
26755 fputs ("(tlsgd)", fp
);
26758 fputs ("(tlsldm)", fp
);
26761 fputs ("(tlsldo)", fp
);
26764 fputs ("(gottpoff)", fp
);
26767 fputs ("(tpoff)", fp
);
26770 fputs ("(tlsdesc)", fp
);
26773 gcc_unreachable ();
26782 fputs (" + (. - ", fp
);
26783 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26784 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26785 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26786 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26796 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26799 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26801 gcc_assert (size
== 4);
26802 fputs ("\t.word\t", file
);
26803 output_addr_const (file
, x
);
26804 fputs ("(tlsldo)", file
);
26807 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26810 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26812 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26813 return arm_emit_tls_decoration (fp
, x
);
26814 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26817 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26819 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26820 assemble_name_raw (fp
, label
);
26824 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26826 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26830 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26834 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26836 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26840 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26844 else if (GET_CODE (x
) == CONST_VECTOR
)
26845 return arm_emit_vector_const (fp
, x
);
26850 /* Output assembly for a shift instruction.
26851 SET_FLAGS determines how the instruction modifies the condition codes.
26852 0 - Do not set condition codes.
26853 1 - Set condition codes.
26854 2 - Use smallest instruction. */
26856 arm_output_shift(rtx
* operands
, int set_flags
)
26859 static const char flag_chars
[3] = {'?', '.', '!'};
26864 c
= flag_chars
[set_flags
];
26865 if (TARGET_UNIFIED_ASM
)
26867 shift
= shift_op(operands
[3], &val
);
26871 operands
[2] = GEN_INT(val
);
26872 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26875 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26878 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26879 output_asm_insn (pattern
, operands
);
26883 /* Output assembly for a WMMX immediate shift instruction. */
26885 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26887 int shift
= INTVAL (operands
[2]);
26889 machine_mode opmode
= GET_MODE (operands
[0]);
26891 gcc_assert (shift
>= 0);
26893 /* If the shift value in the register versions is > 63 (for D qualifier),
26894 31 (for W qualifier) or 15 (for H qualifier). */
26895 if (((opmode
== V4HImode
) && (shift
> 15))
26896 || ((opmode
== V2SImode
) && (shift
> 31))
26897 || ((opmode
== DImode
) && (shift
> 63)))
26901 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26902 output_asm_insn (templ
, operands
);
26903 if (opmode
== DImode
)
26905 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26906 output_asm_insn (templ
, operands
);
26911 /* The destination register will contain all zeros. */
26912 sprintf (templ
, "wzero\t%%0");
26913 output_asm_insn (templ
, operands
);
26918 if ((opmode
== DImode
) && (shift
> 32))
26920 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26921 output_asm_insn (templ
, operands
);
26922 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26923 output_asm_insn (templ
, operands
);
26927 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26928 output_asm_insn (templ
, operands
);
26933 /* Output assembly for a WMMX tinsr instruction. */
26935 arm_output_iwmmxt_tinsr (rtx
*operands
)
26937 int mask
= INTVAL (operands
[3]);
26940 int units
= mode_nunits
[GET_MODE (operands
[0])];
26941 gcc_assert ((mask
& (mask
- 1)) == 0);
26942 for (i
= 0; i
< units
; ++i
)
26944 if ((mask
& 0x01) == 1)
26950 gcc_assert (i
< units
);
26952 switch (GET_MODE (operands
[0]))
26955 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26958 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26961 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26964 gcc_unreachable ();
26967 output_asm_insn (templ
, operands
);
26972 /* Output a Thumb-1 casesi dispatch sequence. */
26974 thumb1_output_casesi (rtx
*operands
)
26976 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26978 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26980 switch (GET_MODE(diff_vec
))
26983 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26984 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26986 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26987 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26989 return "bl\t%___gnu_thumb1_case_si";
26991 gcc_unreachable ();
26995 /* Output a Thumb-2 casesi instruction. */
26997 thumb2_output_casesi (rtx
*operands
)
26999 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27001 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27003 output_asm_insn ("cmp\t%0, %1", operands
);
27004 output_asm_insn ("bhi\t%l3", operands
);
27005 switch (GET_MODE(diff_vec
))
27008 return "tbb\t[%|pc, %0]";
27010 return "tbh\t[%|pc, %0, lsl #1]";
27014 output_asm_insn ("adr\t%4, %l2", operands
);
27015 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27016 output_asm_insn ("add\t%4, %4, %5", operands
);
27021 output_asm_insn ("adr\t%4, %l2", operands
);
27022 return "ldr\t%|pc, [%4, %0, lsl #2]";
27025 gcc_unreachable ();
27029 /* Most ARM cores are single issue, but some newer ones can dual issue.
27030 The scheduler descriptions rely on this being correct. */
27032 arm_issue_rate (void)
27062 arm_mangle_type (const_tree type
)
27064 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27065 has to be managled as if it is in the "std" namespace. */
27066 if (TARGET_AAPCS_BASED
27067 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27068 return "St9__va_list";
27070 /* Half-precision float. */
27071 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27074 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27076 if (TYPE_NAME (type
) != NULL
)
27077 return arm_mangle_builtin_type (type
);
27079 /* Use the default mangling. */
27083 /* Order of allocation of core registers for Thumb: this allocation is
27084 written over the corresponding initial entries of the array
27085 initialized with REG_ALLOC_ORDER. We allocate all low registers
27086 first. Saving and restoring a low register is usually cheaper than
27087 using a call-clobbered high register. */
27089 static const int thumb_core_reg_alloc_order
[] =
27091 3, 2, 1, 0, 4, 5, 6, 7,
27092 14, 12, 8, 9, 10, 11
27095 /* Adjust register allocation order when compiling for Thumb. */
27098 arm_order_regs_for_local_alloc (void)
27100 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27101 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27103 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27104 sizeof (thumb_core_reg_alloc_order
));
27107 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27110 arm_frame_pointer_required (void)
27112 return (cfun
->has_nonlocal_label
27113 || SUBTARGET_FRAME_POINTER_REQUIRED
27114 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27117 /* Only thumb1 can't support conditional execution, so return true if
27118 the target is not thumb1. */
27120 arm_have_conditional_execution (void)
27122 return !TARGET_THUMB1
;
27125 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27126 static HOST_WIDE_INT
27127 arm_vector_alignment (const_tree type
)
27129 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27131 if (TARGET_AAPCS_BASED
)
27132 align
= MIN (align
, 64);
27137 static unsigned int
27138 arm_autovectorize_vector_sizes (void)
27140 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27144 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27146 /* Vectors which aren't in packed structures will not be less aligned than
27147 the natural alignment of their element type, so this is safe. */
27148 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27151 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27155 arm_builtin_support_vector_misalignment (machine_mode mode
,
27156 const_tree type
, int misalignment
,
27159 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27161 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27166 /* If the misalignment is unknown, we should be able to handle the access
27167 so long as it is not to a member of a packed data structure. */
27168 if (misalignment
== -1)
27171 /* Return true if the misalignment is a multiple of the natural alignment
27172 of the vector's element type. This is probably always going to be
27173 true in practice, since we've already established that this isn't a
27175 return ((misalignment
% align
) == 0);
27178 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27183 arm_conditional_register_usage (void)
27187 if (TARGET_THUMB1
&& optimize_size
)
27189 /* When optimizing for size on Thumb-1, it's better not
27190 to use the HI regs, because of the overhead of
27192 for (regno
= FIRST_HI_REGNUM
;
27193 regno
<= LAST_HI_REGNUM
; ++regno
)
27194 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27197 /* The link register can be clobbered by any branch insn,
27198 but we have no way to track that at present, so mark
27199 it as unavailable. */
27201 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27203 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27205 /* VFPv3 registers are disabled when earlier VFP
27206 versions are selected due to the definition of
27207 LAST_VFP_REGNUM. */
27208 for (regno
= FIRST_VFP_REGNUM
;
27209 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27211 fixed_regs
[regno
] = 0;
27212 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27213 || regno
>= FIRST_VFP_REGNUM
+ 32;
27217 if (TARGET_REALLY_IWMMXT
)
27219 regno
= FIRST_IWMMXT_GR_REGNUM
;
27220 /* The 2002/10/09 revision of the XScale ABI has wCG0
27221 and wCG1 as call-preserved registers. The 2002/11/21
27222 revision changed this so that all wCG registers are
27223 scratch registers. */
27224 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27225 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27226 fixed_regs
[regno
] = 0;
27227 /* The XScale ABI has wR0 - wR9 as scratch registers,
27228 the rest as call-preserved registers. */
27229 for (regno
= FIRST_IWMMXT_REGNUM
;
27230 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27232 fixed_regs
[regno
] = 0;
27233 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27237 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27239 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27240 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27242 else if (TARGET_APCS_STACK
)
27244 fixed_regs
[10] = 1;
27245 call_used_regs
[10] = 1;
27247 /* -mcaller-super-interworking reserves r11 for calls to
27248 _interwork_r11_call_via_rN(). Making the register global
27249 is an easy way of ensuring that it remains valid for all
27251 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27252 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27254 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27255 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27256 if (TARGET_CALLER_INTERWORKING
)
27257 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27259 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27263 arm_preferred_rename_class (reg_class_t rclass
)
27265 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27266 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27267 and code size can be reduced. */
27268 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27274 /* Compute the atrribute "length" of insn "*push_multi".
27275 So this function MUST be kept in sync with that insn pattern. */
27277 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27279 int i
, regno
, hi_reg
;
27280 int num_saves
= XVECLEN (parallel_op
, 0);
27290 regno
= REGNO (first_op
);
27291 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27292 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27294 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27295 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27303 /* Compute the number of instructions emitted by output_move_double. */
27305 arm_count_output_move_double_insns (rtx
*operands
)
27309 /* output_move_double may modify the operands array, so call it
27310 here on a copy of the array. */
27311 ops
[0] = operands
[0];
27312 ops
[1] = operands
[1];
27313 output_move_double (ops
, false, &count
);
27318 vfp3_const_double_for_fract_bits (rtx operand
)
27320 REAL_VALUE_TYPE r0
;
27322 if (!CONST_DOUBLE_P (operand
))
27325 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27326 if (exact_real_inverse (DFmode
, &r0
))
27328 if (exact_real_truncate (DFmode
, &r0
))
27330 HOST_WIDE_INT value
= real_to_integer (&r0
);
27331 value
= value
& 0xffffffff;
27332 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27333 return int_log2 (value
);
27340 vfp3_const_double_for_bits (rtx operand
)
27342 REAL_VALUE_TYPE r0
;
27344 if (!CONST_DOUBLE_P (operand
))
27347 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27348 if (exact_real_truncate (DFmode
, &r0
))
27350 HOST_WIDE_INT value
= real_to_integer (&r0
);
27351 value
= value
& 0xffffffff;
27352 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27353 return int_log2 (value
);
27359 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27362 arm_pre_atomic_barrier (enum memmodel model
)
27364 if (need_atomic_barrier_p (model
, true))
27365 emit_insn (gen_memory_barrier ());
27369 arm_post_atomic_barrier (enum memmodel model
)
27371 if (need_atomic_barrier_p (model
, false))
27372 emit_insn (gen_memory_barrier ());
27375 /* Emit the load-exclusive and store-exclusive instructions.
27376 Use acquire and release versions if necessary. */
27379 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27381 rtx (*gen
) (rtx
, rtx
);
27387 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27388 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27389 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27390 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27392 gcc_unreachable ();
27399 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27400 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27401 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27402 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27404 gcc_unreachable ();
27408 emit_insn (gen (rval
, mem
));
27412 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27415 rtx (*gen
) (rtx
, rtx
, rtx
);
27421 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27422 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27423 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27424 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27426 gcc_unreachable ();
27433 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27434 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27435 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27436 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27438 gcc_unreachable ();
27442 emit_insn (gen (bval
, rval
, mem
));
27445 /* Mark the previous jump instruction as unlikely. */
27448 emit_unlikely_jump (rtx insn
)
27450 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27452 insn
= emit_jump_insn (insn
);
27453 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27456 /* Expand a compare and swap pattern. */
27459 arm_expand_compare_and_swap (rtx operands
[])
27461 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27463 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27465 bval
= operands
[0];
27466 rval
= operands
[1];
27468 oldval
= operands
[3];
27469 newval
= operands
[4];
27470 is_weak
= operands
[5];
27471 mod_s
= operands
[6];
27472 mod_f
= operands
[7];
27473 mode
= GET_MODE (mem
);
27475 /* Normally the succ memory model must be stronger than fail, but in the
27476 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27477 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27479 if (TARGET_HAVE_LDACQ
27480 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
27481 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
27482 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27488 /* For narrow modes, we're going to perform the comparison in SImode,
27489 so do the zero-extension now. */
27490 rval
= gen_reg_rtx (SImode
);
27491 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27495 /* Force the value into a register if needed. We waited until after
27496 the zero-extension above to do this properly. */
27497 if (!arm_add_operand (oldval
, SImode
))
27498 oldval
= force_reg (SImode
, oldval
);
27502 if (!cmpdi_operand (oldval
, mode
))
27503 oldval
= force_reg (mode
, oldval
);
27507 gcc_unreachable ();
27512 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27513 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27514 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27515 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27517 gcc_unreachable ();
27520 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27522 if (mode
== QImode
|| mode
== HImode
)
27523 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27525 /* In all cases, we arrange for success to be signaled by Z set.
27526 This arrangement allows for the boolean result to be used directly
27527 in a subsequent branch, post optimization. */
27528 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27529 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27530 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
27533 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27534 another memory store between the load-exclusive and store-exclusive can
27535 reset the monitor from Exclusive to Open state. This means we must wait
27536 until after reload to split the pattern, lest we get a register spill in
27537 the middle of the atomic sequence. */
27540 arm_split_compare_and_swap (rtx operands
[])
27542 rtx rval
, mem
, oldval
, newval
, scratch
;
27544 enum memmodel mod_s
, mod_f
;
27546 rtx_code_label
*label1
, *label2
;
27549 rval
= operands
[0];
27551 oldval
= operands
[2];
27552 newval
= operands
[3];
27553 is_weak
= (operands
[4] != const0_rtx
);
27554 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
27555 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
27556 scratch
= operands
[7];
27557 mode
= GET_MODE (mem
);
27559 bool use_acquire
= TARGET_HAVE_LDACQ
27560 && !(mod_s
== MEMMODEL_RELAXED
27561 || mod_s
== MEMMODEL_CONSUME
27562 || mod_s
== MEMMODEL_RELEASE
);
27564 bool use_release
= TARGET_HAVE_LDACQ
27565 && !(mod_s
== MEMMODEL_RELAXED
27566 || mod_s
== MEMMODEL_CONSUME
27567 || mod_s
== MEMMODEL_ACQUIRE
);
27569 /* Checks whether a barrier is needed and emits one accordingly. */
27570 if (!(use_acquire
|| use_release
))
27571 arm_pre_atomic_barrier (mod_s
);
27576 label1
= gen_label_rtx ();
27577 emit_label (label1
);
27579 label2
= gen_label_rtx ();
27581 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27583 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27584 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27585 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27586 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27587 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27589 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27591 /* Weak or strong, we want EQ to be true for success, so that we
27592 match the flags that we got from the compare above. */
27593 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27594 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27595 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
27599 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27600 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27601 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27602 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27605 if (mod_f
!= MEMMODEL_RELAXED
)
27606 emit_label (label2
);
27608 /* Checks whether a barrier is needed and emits one accordingly. */
27609 if (!(use_acquire
|| use_release
))
27610 arm_post_atomic_barrier (mod_s
);
27612 if (mod_f
== MEMMODEL_RELAXED
)
27613 emit_label (label2
);
27617 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27618 rtx value
, rtx model_rtx
, rtx cond
)
27620 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
27621 machine_mode mode
= GET_MODE (mem
);
27622 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27623 rtx_code_label
*label
;
27626 bool use_acquire
= TARGET_HAVE_LDACQ
27627 && !(model
== MEMMODEL_RELAXED
27628 || model
== MEMMODEL_CONSUME
27629 || model
== MEMMODEL_RELEASE
);
27631 bool use_release
= TARGET_HAVE_LDACQ
27632 && !(model
== MEMMODEL_RELAXED
27633 || model
== MEMMODEL_CONSUME
27634 || model
== MEMMODEL_ACQUIRE
);
27636 /* Checks whether a barrier is needed and emits one accordingly. */
27637 if (!(use_acquire
|| use_release
))
27638 arm_pre_atomic_barrier (model
);
27640 label
= gen_label_rtx ();
27641 emit_label (label
);
27644 new_out
= gen_lowpart (wmode
, new_out
);
27646 old_out
= gen_lowpart (wmode
, old_out
);
27649 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27651 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27660 x
= gen_rtx_AND (wmode
, old_out
, value
);
27661 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27662 x
= gen_rtx_NOT (wmode
, new_out
);
27663 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27667 if (CONST_INT_P (value
))
27669 value
= GEN_INT (-INTVAL (value
));
27675 if (mode
== DImode
)
27677 /* DImode plus/minus need to clobber flags. */
27678 /* The adddi3 and subdi3 patterns are incorrectly written so that
27679 they require matching operands, even when we could easily support
27680 three operands. Thankfully, this can be fixed up post-splitting,
27681 as the individual add+adc patterns do accept three operands and
27682 post-reload cprop can make these moves go away. */
27683 emit_move_insn (new_out
, old_out
);
27685 x
= gen_adddi3 (new_out
, new_out
, value
);
27687 x
= gen_subdi3 (new_out
, new_out
, value
);
27694 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27695 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27699 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27702 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27703 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27705 /* Checks whether a barrier is needed and emits one accordingly. */
27706 if (!(use_acquire
|| use_release
))
27707 arm_post_atomic_barrier (model
);
27710 #define MAX_VECT_LEN 16
27712 struct expand_vec_perm_d
27714 rtx target
, op0
, op1
;
27715 unsigned char perm
[MAX_VECT_LEN
];
27716 machine_mode vmode
;
27717 unsigned char nelt
;
27722 /* Generate a variable permutation. */
27725 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27727 machine_mode vmode
= GET_MODE (target
);
27728 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27730 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27731 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27732 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27733 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27734 gcc_checking_assert (TARGET_NEON
);
27738 if (vmode
== V8QImode
)
27739 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27741 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27747 if (vmode
== V8QImode
)
27749 pair
= gen_reg_rtx (V16QImode
);
27750 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27751 pair
= gen_lowpart (TImode
, pair
);
27752 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27756 pair
= gen_reg_rtx (OImode
);
27757 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27758 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27764 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27766 machine_mode vmode
= GET_MODE (target
);
27767 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27768 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27769 rtx rmask
[MAX_VECT_LEN
], mask
;
27771 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27772 numbering of elements for big-endian, we must reverse the order. */
27773 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27775 /* The VTBL instruction does not use a modulo index, so we must take care
27776 of that ourselves. */
27777 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27778 for (i
= 0; i
< nelt
; ++i
)
27780 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27781 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27783 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27786 /* Generate or test for an insn that supports a constant permutation. */
27788 /* Recognize patterns for the VUZP insns. */
27791 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27793 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27794 rtx out0
, out1
, in0
, in1
, x
;
27795 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27797 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27800 /* Note that these are little-endian tests. Adjust for big-endian later. */
27801 if (d
->perm
[0] == 0)
27803 else if (d
->perm
[0] == 1)
27807 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27809 for (i
= 0; i
< nelt
; i
++)
27811 unsigned elt
= (i
* 2 + odd
) & mask
;
27812 if (d
->perm
[i
] != elt
)
27822 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27823 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27824 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27825 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27826 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27827 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27828 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27829 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27831 gcc_unreachable ();
27836 if (BYTES_BIG_ENDIAN
)
27838 x
= in0
, in0
= in1
, in1
= x
;
27843 out1
= gen_reg_rtx (d
->vmode
);
27845 x
= out0
, out0
= out1
, out1
= x
;
27847 emit_insn (gen (out0
, in0
, in1
, out1
));
27851 /* Recognize patterns for the VZIP insns. */
27854 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27856 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27857 rtx out0
, out1
, in0
, in1
, x
;
27858 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27860 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27863 /* Note that these are little-endian tests. Adjust for big-endian later. */
27865 if (d
->perm
[0] == high
)
27867 else if (d
->perm
[0] == 0)
27871 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27873 for (i
= 0; i
< nelt
/ 2; i
++)
27875 unsigned elt
= (i
+ high
) & mask
;
27876 if (d
->perm
[i
* 2] != elt
)
27878 elt
= (elt
+ nelt
) & mask
;
27879 if (d
->perm
[i
* 2 + 1] != elt
)
27889 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27890 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27891 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27892 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27893 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27894 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27895 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27896 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27898 gcc_unreachable ();
27903 if (BYTES_BIG_ENDIAN
)
27905 x
= in0
, in0
= in1
, in1
= x
;
27910 out1
= gen_reg_rtx (d
->vmode
);
27912 x
= out0
, out0
= out1
, out1
= x
;
27914 emit_insn (gen (out0
, in0
, in1
, out1
));
27918 /* Recognize patterns for the VREV insns. */
27921 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27923 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27924 rtx (*gen
)(rtx
, rtx
);
27926 if (!d
->one_vector_p
)
27935 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27936 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27944 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27945 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27946 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27947 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27955 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27956 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27957 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27958 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27959 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27960 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27961 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27962 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27971 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27972 for (j
= 0; j
<= diff
; j
+= 1)
27974 /* This is guaranteed to be true as the value of diff
27975 is 7, 3, 1 and we should have enough elements in the
27976 queue to generate this. Getting a vector mask with a
27977 value of diff other than these values implies that
27978 something is wrong by the time we get here. */
27979 gcc_assert (i
+ j
< nelt
);
27980 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27988 emit_insn (gen (d
->target
, d
->op0
));
27992 /* Recognize patterns for the VTRN insns. */
27995 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27997 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27998 rtx out0
, out1
, in0
, in1
, x
;
27999 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28001 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28004 /* Note that these are little-endian tests. Adjust for big-endian later. */
28005 if (d
->perm
[0] == 0)
28007 else if (d
->perm
[0] == 1)
28011 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28013 for (i
= 0; i
< nelt
; i
+= 2)
28015 if (d
->perm
[i
] != i
+ odd
)
28017 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28027 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28028 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28029 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28030 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28031 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28032 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28033 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28034 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28036 gcc_unreachable ();
28041 if (BYTES_BIG_ENDIAN
)
28043 x
= in0
, in0
= in1
, in1
= x
;
28048 out1
= gen_reg_rtx (d
->vmode
);
28050 x
= out0
, out0
= out1
, out1
= x
;
28052 emit_insn (gen (out0
, in0
, in1
, out1
));
28056 /* Recognize patterns for the VEXT insns. */
28059 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28061 unsigned int i
, nelt
= d
->nelt
;
28062 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28065 unsigned int location
;
28067 unsigned int next
= d
->perm
[0] + 1;
28069 /* TODO: Handle GCC's numbering of elements for big-endian. */
28070 if (BYTES_BIG_ENDIAN
)
28073 /* Check if the extracted indexes are increasing by one. */
28074 for (i
= 1; i
< nelt
; next
++, i
++)
28076 /* If we hit the most significant element of the 2nd vector in
28077 the previous iteration, no need to test further. */
28078 if (next
== 2 * nelt
)
28081 /* If we are operating on only one vector: it could be a
28082 rotation. If there are only two elements of size < 64, let
28083 arm_evpc_neon_vrev catch it. */
28084 if (d
->one_vector_p
&& (next
== nelt
))
28086 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28092 if (d
->perm
[i
] != next
)
28096 location
= d
->perm
[0];
28100 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28101 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28102 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28103 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28104 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28105 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28106 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28107 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28108 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28117 offset
= GEN_INT (location
);
28118 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28122 /* The NEON VTBL instruction is a fully variable permuation that's even
28123 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28124 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28125 can do slightly better by expanding this as a constant where we don't
28126 have to apply a mask. */
28129 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28131 rtx rperm
[MAX_VECT_LEN
], sel
;
28132 machine_mode vmode
= d
->vmode
;
28133 unsigned int i
, nelt
= d
->nelt
;
28135 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28136 numbering of elements for big-endian, we must reverse the order. */
28137 if (BYTES_BIG_ENDIAN
)
28143 /* Generic code will try constant permutation twice. Once with the
28144 original mode and again with the elements lowered to QImode.
28145 So wait and don't do the selector expansion ourselves. */
28146 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28149 for (i
= 0; i
< nelt
; ++i
)
28150 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28151 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28152 sel
= force_reg (vmode
, sel
);
28154 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28159 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28161 /* Check if the input mask matches vext before reordering the
28164 if (arm_evpc_neon_vext (d
))
28167 /* The pattern matching functions above are written to look for a small
28168 number to begin the sequence (0, 1, N/2). If we begin with an index
28169 from the second operand, we can swap the operands. */
28170 if (d
->perm
[0] >= d
->nelt
)
28172 unsigned i
, nelt
= d
->nelt
;
28175 for (i
= 0; i
< nelt
; ++i
)
28176 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28185 if (arm_evpc_neon_vuzp (d
))
28187 if (arm_evpc_neon_vzip (d
))
28189 if (arm_evpc_neon_vrev (d
))
28191 if (arm_evpc_neon_vtrn (d
))
28193 return arm_evpc_neon_vtbl (d
);
28198 /* Expand a vec_perm_const pattern. */
28201 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28203 struct expand_vec_perm_d d
;
28204 int i
, nelt
, which
;
28210 d
.vmode
= GET_MODE (target
);
28211 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28212 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28213 d
.testing_p
= false;
28215 for (i
= which
= 0; i
< nelt
; ++i
)
28217 rtx e
= XVECEXP (sel
, 0, i
);
28218 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28219 which
|= (ei
< nelt
? 1 : 2);
28229 d
.one_vector_p
= false;
28230 if (!rtx_equal_p (op0
, op1
))
28233 /* The elements of PERM do not suggest that only the first operand
28234 is used, but both operands are identical. Allow easier matching
28235 of the permutation by folding the permutation into the single
28239 for (i
= 0; i
< nelt
; ++i
)
28240 d
.perm
[i
] &= nelt
- 1;
28242 d
.one_vector_p
= true;
28247 d
.one_vector_p
= true;
28251 return arm_expand_vec_perm_const_1 (&d
);
28254 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28257 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28258 const unsigned char *sel
)
28260 struct expand_vec_perm_d d
;
28261 unsigned int i
, nelt
, which
;
28265 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28266 d
.testing_p
= true;
28267 memcpy (d
.perm
, sel
, nelt
);
28269 /* Categorize the set of elements in the selector. */
28270 for (i
= which
= 0; i
< nelt
; ++i
)
28272 unsigned char e
= d
.perm
[i
];
28273 gcc_assert (e
< 2 * nelt
);
28274 which
|= (e
< nelt
? 1 : 2);
28277 /* For all elements from second vector, fold the elements to first. */
28279 for (i
= 0; i
< nelt
; ++i
)
28282 /* Check whether the mask can be applied to the vector type. */
28283 d
.one_vector_p
= (which
!= 3);
28285 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28286 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28287 if (!d
.one_vector_p
)
28288 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28291 ret
= arm_expand_vec_perm_const_1 (&d
);
28298 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28300 /* If we are soft float and we do not have ldrd
28301 then all auto increment forms are ok. */
28302 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28307 /* Post increment and Pre Decrement are supported for all
28308 instruction forms except for vector forms. */
28311 if (VECTOR_MODE_P (mode
))
28313 if (code
!= ARM_PRE_DEC
)
28323 /* Without LDRD and mode size greater than
28324 word size, there is no point in auto-incrementing
28325 because ldm and stm will not have these forms. */
28326 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28329 /* Vector and floating point modes do not support
28330 these auto increment forms. */
28331 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28344 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28345 on ARM, since we know that shifts by negative amounts are no-ops.
28346 Additionally, the default expansion code is not available or suitable
28347 for post-reload insn splits (this can occur when the register allocator
28348 chooses not to do a shift in NEON).
28350 This function is used in both initial expand and post-reload splits, and
28351 handles all kinds of 64-bit shifts.
28353 Input requirements:
28354 - It is safe for the input and output to be the same register, but
28355 early-clobber rules apply for the shift amount and scratch registers.
28356 - Shift by register requires both scratch registers. In all other cases
28357 the scratch registers may be NULL.
28358 - Ashiftrt by a register also clobbers the CC register. */
28360 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28361 rtx amount
, rtx scratch1
, rtx scratch2
)
28363 rtx out_high
= gen_highpart (SImode
, out
);
28364 rtx out_low
= gen_lowpart (SImode
, out
);
28365 rtx in_high
= gen_highpart (SImode
, in
);
28366 rtx in_low
= gen_lowpart (SImode
, in
);
28369 in = the register pair containing the input value.
28370 out = the destination register pair.
28371 up = the high- or low-part of each pair.
28372 down = the opposite part to "up".
28373 In a shift, we can consider bits to shift from "up"-stream to
28374 "down"-stream, so in a left-shift "up" is the low-part and "down"
28375 is the high-part of each register pair. */
28377 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28378 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28379 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28380 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28382 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28384 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28385 && GET_MODE (out
) == DImode
);
28387 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28388 && GET_MODE (in
) == DImode
);
28390 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28391 && GET_MODE (amount
) == SImode
)
28392 || CONST_INT_P (amount
)));
28393 gcc_assert (scratch1
== NULL
28394 || (GET_CODE (scratch1
) == SCRATCH
)
28395 || (GET_MODE (scratch1
) == SImode
28396 && REG_P (scratch1
)));
28397 gcc_assert (scratch2
== NULL
28398 || (GET_CODE (scratch2
) == SCRATCH
)
28399 || (GET_MODE (scratch2
) == SImode
28400 && REG_P (scratch2
)));
28401 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28402 || !HARD_REGISTER_P (out
)
28403 || (REGNO (out
) != REGNO (amount
)
28404 && REGNO (out
) + 1 != REGNO (amount
)));
28406 /* Macros to make following code more readable. */
28407 #define SUB_32(DEST,SRC) \
28408 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28409 #define RSB_32(DEST,SRC) \
28410 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28411 #define SUB_S_32(DEST,SRC) \
28412 gen_addsi3_compare0 ((DEST), (SRC), \
28414 #define SET(DEST,SRC) \
28415 gen_rtx_SET (SImode, (DEST), (SRC))
28416 #define SHIFT(CODE,SRC,AMOUNT) \
28417 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28418 #define LSHIFT(CODE,SRC,AMOUNT) \
28419 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28420 SImode, (SRC), (AMOUNT))
28421 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28422 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28423 SImode, (SRC), (AMOUNT))
28425 gen_rtx_IOR (SImode, (A), (B))
28426 #define BRANCH(COND,LABEL) \
28427 gen_arm_cond_branch ((LABEL), \
28428 gen_rtx_ ## COND (CCmode, cc_reg, \
28432 /* Shifts by register and shifts by constant are handled separately. */
28433 if (CONST_INT_P (amount
))
28435 /* We have a shift-by-constant. */
28437 /* First, handle out-of-range shift amounts.
28438 In both cases we try to match the result an ARM instruction in a
28439 shift-by-register would give. This helps reduce execution
28440 differences between optimization levels, but it won't stop other
28441 parts of the compiler doing different things. This is "undefined
28442 behaviour, in any case. */
28443 if (INTVAL (amount
) <= 0)
28444 emit_insn (gen_movdi (out
, in
));
28445 else if (INTVAL (amount
) >= 64)
28447 if (code
== ASHIFTRT
)
28449 rtx const31_rtx
= GEN_INT (31);
28450 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28451 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28454 emit_insn (gen_movdi (out
, const0_rtx
));
28457 /* Now handle valid shifts. */
28458 else if (INTVAL (amount
) < 32)
28460 /* Shifts by a constant less than 32. */
28461 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28463 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28464 emit_insn (SET (out_down
,
28465 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28467 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28471 /* Shifts by a constant greater than 31. */
28472 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28474 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28475 if (code
== ASHIFTRT
)
28476 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28479 emit_insn (SET (out_up
, const0_rtx
));
28484 /* We have a shift-by-register. */
28485 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28487 /* This alternative requires the scratch registers. */
28488 gcc_assert (scratch1
&& REG_P (scratch1
));
28489 gcc_assert (scratch2
&& REG_P (scratch2
));
28491 /* We will need the values "amount-32" and "32-amount" later.
28492 Swapping them around now allows the later code to be more general. */
28496 emit_insn (SUB_32 (scratch1
, amount
));
28497 emit_insn (RSB_32 (scratch2
, amount
));
28500 emit_insn (RSB_32 (scratch1
, amount
));
28501 /* Also set CC = amount > 32. */
28502 emit_insn (SUB_S_32 (scratch2
, amount
));
28505 emit_insn (RSB_32 (scratch1
, amount
));
28506 emit_insn (SUB_32 (scratch2
, amount
));
28509 gcc_unreachable ();
28512 /* Emit code like this:
28515 out_down = in_down << amount;
28516 out_down = (in_up << (amount - 32)) | out_down;
28517 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28518 out_up = in_up << amount;
28521 out_down = in_down >> amount;
28522 out_down = (in_up << (32 - amount)) | out_down;
28524 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28525 out_up = in_up << amount;
28528 out_down = in_down >> amount;
28529 out_down = (in_up << (32 - amount)) | out_down;
28531 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28532 out_up = in_up << amount;
28534 The ARM and Thumb2 variants are the same but implemented slightly
28535 differently. If this were only called during expand we could just
28536 use the Thumb2 case and let combine do the right thing, but this
28537 can also be called from post-reload splitters. */
28539 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28541 if (!TARGET_THUMB2
)
28543 /* Emit code for ARM mode. */
28544 emit_insn (SET (out_down
,
28545 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28546 if (code
== ASHIFTRT
)
28548 rtx_code_label
*done_label
= gen_label_rtx ();
28549 emit_jump_insn (BRANCH (LT
, done_label
));
28550 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28552 emit_label (done_label
);
28555 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28560 /* Emit code for Thumb2 mode.
28561 Thumb2 can't do shift and or in one insn. */
28562 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28563 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28565 if (code
== ASHIFTRT
)
28567 rtx_code_label
*done_label
= gen_label_rtx ();
28568 emit_jump_insn (BRANCH (LT
, done_label
));
28569 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28570 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28571 emit_label (done_label
);
28575 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28576 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28580 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28595 /* Returns true if a valid comparison operation and makes
28596 the operands in a form that is valid. */
28598 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28600 enum rtx_code code
= GET_CODE (*comparison
);
28602 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28603 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28605 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28607 if (code
== UNEQ
|| code
== LTGT
)
28610 code_int
= (int)code
;
28611 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28612 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28617 if (!arm_add_operand (*op1
, mode
))
28618 *op1
= force_reg (mode
, *op1
);
28619 if (!arm_add_operand (*op2
, mode
))
28620 *op2
= force_reg (mode
, *op2
);
28624 if (!cmpdi_operand (*op1
, mode
))
28625 *op1
= force_reg (mode
, *op1
);
28626 if (!cmpdi_operand (*op2
, mode
))
28627 *op2
= force_reg (mode
, *op2
);
28632 if (!arm_float_compare_operand (*op1
, mode
))
28633 *op1
= force_reg (mode
, *op1
);
28634 if (!arm_float_compare_operand (*op2
, mode
))
28635 *op2
= force_reg (mode
, *op2
);
28645 /* Maximum number of instructions to set block of memory. */
28647 arm_block_set_max_insns (void)
28649 if (optimize_function_for_size_p (cfun
))
28652 return current_tune
->max_insns_inline_memset
;
28655 /* Return TRUE if it's profitable to set block of memory for
28656 non-vectorized case. VAL is the value to set the memory
28657 with. LENGTH is the number of bytes to set. ALIGN is the
28658 alignment of the destination memory in bytes. UNALIGNED_P
28659 is TRUE if we can only set the memory with instructions
28660 meeting alignment requirements. USE_STRD_P is TRUE if we
28661 can use strd to set the memory. */
28663 arm_block_set_non_vect_profit_p (rtx val
,
28664 unsigned HOST_WIDE_INT length
,
28665 unsigned HOST_WIDE_INT align
,
28666 bool unaligned_p
, bool use_strd_p
)
28669 /* For leftovers in bytes of 0-7, we can set the memory block using
28670 strb/strh/str with minimum instruction number. */
28671 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28675 num
= arm_const_inline_cost (SET
, val
);
28676 num
+= length
/ align
+ length
% align
;
28678 else if (use_strd_p
)
28680 num
= arm_const_double_inline_cost (val
);
28681 num
+= (length
>> 3) + leftover
[length
& 7];
28685 num
= arm_const_inline_cost (SET
, val
);
28686 num
+= (length
>> 2) + leftover
[length
& 3];
28689 /* We may be able to combine last pair STRH/STRB into a single STR
28690 by shifting one byte back. */
28691 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28694 return (num
<= arm_block_set_max_insns ());
28697 /* Return TRUE if it's profitable to set block of memory for
28698 vectorized case. LENGTH is the number of bytes to set.
28699 ALIGN is the alignment of destination memory in bytes.
28700 MODE is the vector mode used to set the memory. */
28702 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28703 unsigned HOST_WIDE_INT align
,
28707 bool unaligned_p
= ((align
& 3) != 0);
28708 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28710 /* Instruction loading constant value. */
28712 /* Instructions storing the memory. */
28713 num
+= (length
+ nelt
- 1) / nelt
;
28714 /* Instructions adjusting the address expression. Only need to
28715 adjust address expression if it's 4 bytes aligned and bytes
28716 leftover can only be stored by mis-aligned store instruction. */
28717 if (!unaligned_p
&& (length
& 3) != 0)
28720 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28721 if (!unaligned_p
&& mode
== V16QImode
)
28724 return (num
<= arm_block_set_max_insns ());
28727 /* Set a block of memory using vectorization instructions for the
28728 unaligned case. We fill the first LENGTH bytes of the memory
28729 area starting from DSTBASE with byte constant VALUE. ALIGN is
28730 the alignment requirement of memory. Return TRUE if succeeded. */
28732 arm_block_set_unaligned_vect (rtx dstbase
,
28733 unsigned HOST_WIDE_INT length
,
28734 unsigned HOST_WIDE_INT value
,
28735 unsigned HOST_WIDE_INT align
)
28737 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28739 rtx val_elt
, val_vec
, reg
;
28740 rtx rval
[MAX_VECT_LEN
];
28741 rtx (*gen_func
) (rtx
, rtx
);
28743 unsigned HOST_WIDE_INT v
= value
;
28745 gcc_assert ((align
& 0x3) != 0);
28746 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28747 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28748 if (length
>= nelt_v16
)
28751 gen_func
= gen_movmisalignv16qi
;
28756 gen_func
= gen_movmisalignv8qi
;
28758 nelt_mode
= GET_MODE_NUNITS (mode
);
28759 gcc_assert (length
>= nelt_mode
);
28760 /* Skip if it isn't profitable. */
28761 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28764 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28765 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28767 v
= sext_hwi (v
, BITS_PER_WORD
);
28768 val_elt
= GEN_INT (v
);
28769 for (j
= 0; j
< nelt_mode
; j
++)
28772 reg
= gen_reg_rtx (mode
);
28773 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28774 /* Emit instruction loading the constant value. */
28775 emit_move_insn (reg
, val_vec
);
28777 /* Handle nelt_mode bytes in a vector. */
28778 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28780 emit_insn ((*gen_func
) (mem
, reg
));
28781 if (i
+ 2 * nelt_mode
<= length
)
28782 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28785 /* If there are not less than nelt_v8 bytes leftover, we must be in
28787 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28789 /* Handle (8, 16) bytes leftover. */
28790 if (i
+ nelt_v8
< length
)
28792 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28793 /* We are shifting bytes back, set the alignment accordingly. */
28794 if ((length
& 1) != 0 && align
>= 2)
28795 set_mem_align (mem
, BITS_PER_UNIT
);
28797 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28799 /* Handle (0, 8] bytes leftover. */
28800 else if (i
< length
&& i
+ nelt_v8
>= length
)
28802 if (mode
== V16QImode
)
28804 reg
= gen_lowpart (V8QImode
, reg
);
28805 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28807 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28808 + (nelt_mode
- nelt_v8
))));
28809 /* We are shifting bytes back, set the alignment accordingly. */
28810 if ((length
& 1) != 0 && align
>= 2)
28811 set_mem_align (mem
, BITS_PER_UNIT
);
28813 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28819 /* Set a block of memory using vectorization instructions for the
28820 aligned case. We fill the first LENGTH bytes of the memory area
28821 starting from DSTBASE with byte constant VALUE. ALIGN is the
28822 alignment requirement of memory. Return TRUE if succeeded. */
28824 arm_block_set_aligned_vect (rtx dstbase
,
28825 unsigned HOST_WIDE_INT length
,
28826 unsigned HOST_WIDE_INT value
,
28827 unsigned HOST_WIDE_INT align
)
28829 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28830 rtx dst
, addr
, mem
;
28831 rtx val_elt
, val_vec
, reg
;
28832 rtx rval
[MAX_VECT_LEN
];
28834 unsigned HOST_WIDE_INT v
= value
;
28836 gcc_assert ((align
& 0x3) == 0);
28837 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28838 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28839 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28844 nelt_mode
= GET_MODE_NUNITS (mode
);
28845 gcc_assert (length
>= nelt_mode
);
28846 /* Skip if it isn't profitable. */
28847 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28850 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28852 v
= sext_hwi (v
, BITS_PER_WORD
);
28853 val_elt
= GEN_INT (v
);
28854 for (j
= 0; j
< nelt_mode
; j
++)
28857 reg
= gen_reg_rtx (mode
);
28858 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28859 /* Emit instruction loading the constant value. */
28860 emit_move_insn (reg
, val_vec
);
28863 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28864 if (mode
== V16QImode
)
28866 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28867 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28869 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28870 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28872 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28873 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28874 /* We are shifting bytes back, set the alignment accordingly. */
28875 if ((length
& 0x3) == 0)
28876 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28877 else if ((length
& 0x1) == 0)
28878 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28880 set_mem_align (mem
, BITS_PER_UNIT
);
28882 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28885 /* Fall through for bytes leftover. */
28887 nelt_mode
= GET_MODE_NUNITS (mode
);
28888 reg
= gen_lowpart (V8QImode
, reg
);
28891 /* Handle 8 bytes in a vector. */
28892 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28894 addr
= plus_constant (Pmode
, dst
, i
);
28895 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28896 emit_move_insn (mem
, reg
);
28899 /* Handle single word leftover by shifting 4 bytes back. We can
28900 use aligned access for this case. */
28901 if (i
+ UNITS_PER_WORD
== length
)
28903 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28904 mem
= adjust_automodify_address (dstbase
, mode
,
28905 addr
, i
- UNITS_PER_WORD
);
28906 /* We are shifting 4 bytes back, set the alignment accordingly. */
28907 if (align
> UNITS_PER_WORD
)
28908 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28910 emit_move_insn (mem
, reg
);
28912 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28913 We have to use unaligned access for this case. */
28914 else if (i
< length
)
28916 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28917 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28918 /* We are shifting bytes back, set the alignment accordingly. */
28919 if ((length
& 1) == 0)
28920 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28922 set_mem_align (mem
, BITS_PER_UNIT
);
28924 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28930 /* Set a block of memory using plain strh/strb instructions, only
28931 using instructions allowed by ALIGN on processor. We fill the
28932 first LENGTH bytes of the memory area starting from DSTBASE
28933 with byte constant VALUE. ALIGN is the alignment requirement
28936 arm_block_set_unaligned_non_vect (rtx dstbase
,
28937 unsigned HOST_WIDE_INT length
,
28938 unsigned HOST_WIDE_INT value
,
28939 unsigned HOST_WIDE_INT align
)
28942 rtx dst
, addr
, mem
;
28943 rtx val_exp
, val_reg
, reg
;
28945 HOST_WIDE_INT v
= value
;
28947 gcc_assert (align
== 1 || align
== 2);
28950 v
|= (value
<< BITS_PER_UNIT
);
28952 v
= sext_hwi (v
, BITS_PER_WORD
);
28953 val_exp
= GEN_INT (v
);
28954 /* Skip if it isn't profitable. */
28955 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28956 align
, true, false))
28959 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28960 mode
= (align
== 2 ? HImode
: QImode
);
28961 val_reg
= force_reg (SImode
, val_exp
);
28962 reg
= gen_lowpart (mode
, val_reg
);
28964 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28966 addr
= plus_constant (Pmode
, dst
, i
);
28967 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28968 emit_move_insn (mem
, reg
);
28971 /* Handle single byte leftover. */
28972 if (i
+ 1 == length
)
28974 reg
= gen_lowpart (QImode
, val_reg
);
28975 addr
= plus_constant (Pmode
, dst
, i
);
28976 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28977 emit_move_insn (mem
, reg
);
28981 gcc_assert (i
== length
);
28985 /* Set a block of memory using plain strd/str/strh/strb instructions,
28986 to permit unaligned copies on processors which support unaligned
28987 semantics for those instructions. We fill the first LENGTH bytes
28988 of the memory area starting from DSTBASE with byte constant VALUE.
28989 ALIGN is the alignment requirement of memory. */
28991 arm_block_set_aligned_non_vect (rtx dstbase
,
28992 unsigned HOST_WIDE_INT length
,
28993 unsigned HOST_WIDE_INT value
,
28994 unsigned HOST_WIDE_INT align
)
28997 rtx dst
, addr
, mem
;
28998 rtx val_exp
, val_reg
, reg
;
28999 unsigned HOST_WIDE_INT v
;
29002 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29003 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29005 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29006 if (length
< UNITS_PER_WORD
)
29007 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29010 v
|= (v
<< BITS_PER_WORD
);
29012 v
= sext_hwi (v
, BITS_PER_WORD
);
29014 val_exp
= GEN_INT (v
);
29015 /* Skip if it isn't profitable. */
29016 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29017 align
, false, use_strd_p
))
29022 /* Try without strd. */
29023 v
= (v
>> BITS_PER_WORD
);
29024 v
= sext_hwi (v
, BITS_PER_WORD
);
29025 val_exp
= GEN_INT (v
);
29026 use_strd_p
= false;
29027 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29028 align
, false, use_strd_p
))
29033 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29034 /* Handle double words using strd if possible. */
29037 val_reg
= force_reg (DImode
, val_exp
);
29039 for (; (i
+ 8 <= length
); i
+= 8)
29041 addr
= plus_constant (Pmode
, dst
, i
);
29042 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29043 emit_move_insn (mem
, reg
);
29047 val_reg
= force_reg (SImode
, val_exp
);
29049 /* Handle words. */
29050 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29051 for (; (i
+ 4 <= length
); i
+= 4)
29053 addr
= plus_constant (Pmode
, dst
, i
);
29054 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29055 if ((align
& 3) == 0)
29056 emit_move_insn (mem
, reg
);
29058 emit_insn (gen_unaligned_storesi (mem
, reg
));
29061 /* Merge last pair of STRH and STRB into a STR if possible. */
29062 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29064 addr
= plus_constant (Pmode
, dst
, i
- 1);
29065 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29066 /* We are shifting one byte back, set the alignment accordingly. */
29067 if ((align
& 1) == 0)
29068 set_mem_align (mem
, BITS_PER_UNIT
);
29070 /* Most likely this is an unaligned access, and we can't tell at
29071 compilation time. */
29072 emit_insn (gen_unaligned_storesi (mem
, reg
));
29076 /* Handle half word leftover. */
29077 if (i
+ 2 <= length
)
29079 reg
= gen_lowpart (HImode
, val_reg
);
29080 addr
= plus_constant (Pmode
, dst
, i
);
29081 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29082 if ((align
& 1) == 0)
29083 emit_move_insn (mem
, reg
);
29085 emit_insn (gen_unaligned_storehi (mem
, reg
));
29090 /* Handle single byte leftover. */
29091 if (i
+ 1 == length
)
29093 reg
= gen_lowpart (QImode
, val_reg
);
29094 addr
= plus_constant (Pmode
, dst
, i
);
29095 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29096 emit_move_insn (mem
, reg
);
29102 /* Set a block of memory using vectorization instructions for both
29103 aligned and unaligned cases. We fill the first LENGTH bytes of
29104 the memory area starting from DSTBASE with byte constant VALUE.
29105 ALIGN is the alignment requirement of memory. */
29107 arm_block_set_vect (rtx dstbase
,
29108 unsigned HOST_WIDE_INT length
,
29109 unsigned HOST_WIDE_INT value
,
29110 unsigned HOST_WIDE_INT align
)
29112 /* Check whether we need to use unaligned store instruction. */
29113 if (((align
& 3) != 0 || (length
& 3) != 0)
29114 /* Check whether unaligned store instruction is available. */
29115 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29118 if ((align
& 3) == 0)
29119 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29121 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29124 /* Expand string store operation. Firstly we try to do that by using
29125 vectorization instructions, then try with ARM unaligned access and
29126 double-word store if profitable. OPERANDS[0] is the destination,
29127 OPERANDS[1] is the number of bytes, operands[2] is the value to
29128 initialize the memory, OPERANDS[3] is the known alignment of the
29131 arm_gen_setmem (rtx
*operands
)
29133 rtx dstbase
= operands
[0];
29134 unsigned HOST_WIDE_INT length
;
29135 unsigned HOST_WIDE_INT value
;
29136 unsigned HOST_WIDE_INT align
;
29138 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29141 length
= UINTVAL (operands
[1]);
29145 value
= (UINTVAL (operands
[2]) & 0xFF);
29146 align
= UINTVAL (operands
[3]);
29147 if (TARGET_NEON
&& length
>= 8
29148 && current_tune
->string_ops_prefer_neon
29149 && arm_block_set_vect (dstbase
, length
, value
, align
))
29152 if (!unaligned_access
&& (align
& 3) != 0)
29153 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29155 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29158 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29160 static unsigned HOST_WIDE_INT
29161 arm_asan_shadow_offset (void)
29163 return (unsigned HOST_WIDE_INT
) 1 << 29;
29167 /* This is a temporary fix for PR60655. Ideally we need
29168 to handle most of these cases in the generic part but
29169 currently we reject minus (..) (sym_ref). We try to
29170 ameliorate the case with minus (sym_ref1) (sym_ref2)
29171 where they are in the same section. */
29174 arm_const_not_ok_for_debug_p (rtx p
)
29176 tree decl_op0
= NULL
;
29177 tree decl_op1
= NULL
;
29179 if (GET_CODE (p
) == MINUS
)
29181 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29183 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29185 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29186 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29188 if ((TREE_CODE (decl_op1
) == VAR_DECL
29189 || TREE_CODE (decl_op1
) == CONST_DECL
)
29190 && (TREE_CODE (decl_op0
) == VAR_DECL
29191 || TREE_CODE (decl_op0
) == CONST_DECL
))
29192 return (get_variable_section (decl_op1
, false)
29193 != get_variable_section (decl_op0
, false));
29195 if (TREE_CODE (decl_op1
) == LABEL_DECL
29196 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29197 return (DECL_CONTEXT (decl_op1
)
29198 != DECL_CONTEXT (decl_op0
));
29208 /* return TRUE if x is a reference to a value in a constant pool */
29210 arm_is_constant_pool_ref (rtx x
)
29213 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29214 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29217 /* If MEM is in the form of [base+offset], extract the two parts
29218 of address and set to BASE and OFFSET, otherwise return false
29219 after clearing BASE and OFFSET. */
29222 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29226 gcc_assert (MEM_P (mem
));
29228 addr
= XEXP (mem
, 0);
29230 /* Strip off const from addresses like (const (addr)). */
29231 if (GET_CODE (addr
) == CONST
)
29232 addr
= XEXP (addr
, 0);
29234 if (GET_CODE (addr
) == REG
)
29237 *offset
= const0_rtx
;
29241 if (GET_CODE (addr
) == PLUS
29242 && GET_CODE (XEXP (addr
, 0)) == REG
29243 && CONST_INT_P (XEXP (addr
, 1)))
29245 *base
= XEXP (addr
, 0);
29246 *offset
= XEXP (addr
, 1);
29251 *offset
= NULL_RTX
;
29256 /* If INSN is a load or store of address in the form of [base+offset],
29257 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29258 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29259 otherwise return FALSE. */
29262 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29266 gcc_assert (INSN_P (insn
));
29267 x
= PATTERN (insn
);
29268 if (GET_CODE (x
) != SET
)
29272 dest
= SET_DEST (x
);
29273 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29276 extract_base_offset_in_addr (dest
, base
, offset
);
29278 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29281 extract_base_offset_in_addr (src
, base
, offset
);
29286 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29289 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29291 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29292 and PRI are only calculated for these instructions. For other instruction,
29293 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29294 instruction fusion can be supported by returning different priorities.
29296 It's important that irrelevant instructions get the largest FUSION_PRI. */
29299 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29300 int *fusion_pri
, int *pri
)
29306 gcc_assert (INSN_P (insn
));
29309 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29316 /* Load goes first. */
29318 *fusion_pri
= tmp
- 1;
29320 *fusion_pri
= tmp
- 2;
29324 /* INSN with smaller base register goes first. */
29325 tmp
-= ((REGNO (base
) & 0xff) << 20);
29327 /* INSN with smaller offset goes first. */
29328 off_val
= (int)(INTVAL (offset
));
29330 tmp
-= (off_val
& 0xfffff);
29332 tmp
+= ((- off_val
) & 0xfffff);
29337 #include "gt-arm.h"