1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* Forward definitions of types. */
70 typedef struct minipool_node Mnode
;
71 typedef struct minipool_fixup Mfix
;
73 void (*arm_lang_output_object_attributes_hook
)(void);
80 /* Forward function declarations. */
81 static bool arm_const_not_ok_for_debug_p (rtx
);
82 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
83 static int arm_compute_static_chain_stack_bytes (void);
84 static arm_stack_offsets
*arm_get_frame_offsets (void);
85 static void arm_add_gc_roots (void);
86 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
87 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
88 static unsigned bit_count (unsigned long);
89 static unsigned feature_count (const arm_feature_set
*);
90 static int arm_address_register_rtx_p (rtx
, int);
91 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
92 static bool is_called_in_ARM_mode (tree
);
93 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
94 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
95 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
96 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
97 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
98 inline static int thumb1_index_register_rtx_p (rtx
, int);
99 static int thumb_far_jump_used_p (void);
100 static bool thumb_force_lr_save (void);
101 static unsigned arm_size_return_regs (void);
102 static bool arm_assemble_integer (rtx
, unsigned int, int);
103 static void arm_print_operand (FILE *, rtx
, int);
104 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
105 static bool arm_print_operand_punct_valid_p (unsigned char code
);
106 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
107 static arm_cc
get_arm_condition_code (rtx
);
108 static const char *output_multi_immediate (rtx
*, const char *, const char *,
110 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
111 static struct machine_function
*arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
114 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
115 static Mnode
*add_minipool_forward_ref (Mfix
*);
116 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_backward_ref (Mfix
*);
118 static void assign_minipool_offsets (Mfix
*);
119 static void arm_print_value (FILE *, rtx
);
120 static void dump_minipool (rtx_insn
*);
121 static int arm_barrier_cost (rtx_insn
*);
122 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
123 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
124 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree
);
131 static unsigned long arm_compute_func_type (void);
132 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
133 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
134 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
140 static int arm_comp_type_attributes (const_tree
, const_tree
);
141 static void arm_set_default_type_attributes (tree
);
142 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code
,
145 unsigned HOST_WIDE_INT val
,
146 struct four_ints
*return_sequence
);
147 static int optimal_immediate_sequence_1 (enum rtx_code code
,
148 unsigned HOST_WIDE_INT val
,
149 struct four_ints
*return_sequence
,
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree
, tree
);
153 static machine_mode
arm_promote_function_mode (const_tree
,
156 static bool arm_return_in_memory (const_tree
, const_tree
);
157 static rtx
arm_function_value (const_tree
, const_tree
, bool);
158 static rtx
arm_libcall_value_1 (machine_mode
);
159 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
166 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
167 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
168 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
169 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
172 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
173 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
174 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
175 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
176 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
177 static void emit_constant_insn (rtx cond
, rtx pattern
);
178 static rtx_insn
*emit_set_insn (rtx
, rtx
);
179 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
182 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
184 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
186 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
187 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
189 static rtx
aapcs_libcall_value (machine_mode
);
190 static int aapcs_select_return_coproc (const_tree
, const_tree
);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_encode_section_info (tree
, rtx
, int);
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree
, tree
*);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx_insn
*);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
219 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn
*);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
249 static bool arm_output_addr_const_extra (FILE *, rtx
);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree
);
252 static tree
arm_promoted_type (const_tree t
);
253 static tree
arm_convert_to_type (tree type
, tree expr
);
254 static bool arm_scalar_mode_supported_p (machine_mode
);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx
, tree
, rtx
);
259 static rtx
arm_trampoline_adjust_address (rtx
);
260 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool arm_array_mode_supported_p (machine_mode
,
265 unsigned HOST_WIDE_INT
);
266 static machine_mode
arm_preferred_simd_mode (machine_mode
);
267 static bool arm_class_likely_spilled_p (reg_class_t
);
268 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
269 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
274 static void arm_conditional_register_usage (void);
275 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
283 const unsigned char *sel
);
285 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
289 int misalign ATTRIBUTE_UNUSED
);
290 static unsigned arm_add_stmt_cost (void *data
, int count
,
291 enum vect_cost_for_stmt kind
,
292 struct _stmt_vec_info
*stmt_info
,
294 enum vect_cost_model_location where
);
296 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
297 bool op0_preserve_value
);
298 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
303 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
304 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
305 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
308 /* Table of machine attributes. */
309 static const struct attribute_spec arm_attribute_table
[] =
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
312 affects_type_identity } */
313 /* Function calls made to this symbol must be done indirectly, because
314 it may lie outside of the 26 bit addressing range of a normal function
316 { "long_call", 0, 0, false, true, true, NULL
, false },
317 /* Whereas these functions are always known to reside within the 26 bit
319 { "short_call", 0, 0, false, true, true, NULL
, false },
320 /* Specify the procedure call conventions for a function. */
321 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
323 /* Interrupt Service Routines have special prologue and epilogue requirements. */
324 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
326 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
328 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
331 /* ARM/PE has three new attributes:
333 dllexport - for exporting a function/variable that will live in a dll
334 dllimport - for importing a function/variable from a dll
336 Microsoft allows multiple declspecs in one __declspec, separating
337 them with spaces. We do NOT support this. Instead, use __declspec
340 { "dllimport", 0, 0, true, false, false, NULL
, false },
341 { "dllexport", 0, 0, true, false, false, NULL
, false },
342 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
344 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
345 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
346 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
347 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
350 { NULL
, 0, 0, false, false, false, NULL
, false }
353 /* Initialize the GCC target structure. */
354 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
355 #undef TARGET_MERGE_DECL_ATTRIBUTES
356 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
359 #undef TARGET_LEGITIMIZE_ADDRESS
360 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
362 #undef TARGET_ATTRIBUTE_TABLE
363 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
365 #undef TARGET_INSERT_ATTRIBUTES
366 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START arm_file_start
370 #undef TARGET_ASM_FILE_END
371 #define TARGET_ASM_FILE_END arm_file_end
373 #undef TARGET_ASM_ALIGNED_SI_OP
374 #define TARGET_ASM_ALIGNED_SI_OP NULL
375 #undef TARGET_ASM_INTEGER
376 #define TARGET_ASM_INTEGER arm_assemble_integer
378 #undef TARGET_PRINT_OPERAND
379 #define TARGET_PRINT_OPERAND arm_print_operand
380 #undef TARGET_PRINT_OPERAND_ADDRESS
381 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
382 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
383 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
388 #undef TARGET_ASM_FUNCTION_PROLOGUE
389 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
391 #undef TARGET_ASM_FUNCTION_EPILOGUE
392 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
394 #undef TARGET_CAN_INLINE_P
395 #define TARGET_CAN_INLINE_P arm_can_inline_p
397 #undef TARGET_RELAYOUT_FUNCTION
398 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
400 #undef TARGET_OPTION_OVERRIDE
401 #define TARGET_OPTION_OVERRIDE arm_option_override
403 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
404 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
406 #undef TARGET_OPTION_PRINT
407 #define TARGET_OPTION_PRINT arm_option_print
409 #undef TARGET_COMP_TYPE_ATTRIBUTES
410 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
412 #undef TARGET_SCHED_MACRO_FUSION_P
413 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
415 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
416 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
418 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
419 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
421 #undef TARGET_SCHED_ADJUST_COST
422 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
424 #undef TARGET_SET_CURRENT_FUNCTION
425 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
427 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
428 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER arm_sched_reorder
433 #undef TARGET_REGISTER_MOVE_COST
434 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
436 #undef TARGET_MEMORY_MOVE_COST
437 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
439 #undef TARGET_ENCODE_SECTION_INFO
441 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
443 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
446 #undef TARGET_STRIP_NAME_ENCODING
447 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
449 #undef TARGET_ASM_INTERNAL_LABEL
450 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
453 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
455 #undef TARGET_FUNCTION_VALUE
456 #define TARGET_FUNCTION_VALUE arm_function_value
458 #undef TARGET_LIBCALL_VALUE
459 #define TARGET_LIBCALL_VALUE arm_libcall_value
461 #undef TARGET_FUNCTION_VALUE_REGNO_P
462 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
464 #undef TARGET_ASM_OUTPUT_MI_THUNK
465 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
469 #undef TARGET_RTX_COSTS
470 #define TARGET_RTX_COSTS arm_rtx_costs
471 #undef TARGET_ADDRESS_COST
472 #define TARGET_ADDRESS_COST arm_address_cost
474 #undef TARGET_SHIFT_TRUNCATION_MASK
475 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
478 #undef TARGET_ARRAY_MODE_SUPPORTED_P
479 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
480 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
481 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
482 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
483 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
484 arm_autovectorize_vector_sizes
486 #undef TARGET_MACHINE_DEPENDENT_REORG
487 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
489 #undef TARGET_INIT_BUILTINS
490 #define TARGET_INIT_BUILTINS arm_init_builtins
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
493 #undef TARGET_BUILTIN_DECL
494 #define TARGET_BUILTIN_DECL arm_builtin_decl
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
499 #undef TARGET_PROMOTE_FUNCTION_MODE
500 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
501 #undef TARGET_PROMOTE_PROTOTYPES
502 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
503 #undef TARGET_PASS_BY_REFERENCE
504 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
505 #undef TARGET_ARG_PARTIAL_BYTES
506 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
507 #undef TARGET_FUNCTION_ARG
508 #define TARGET_FUNCTION_ARG arm_function_arg
509 #undef TARGET_FUNCTION_ARG_ADVANCE
510 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
511 #undef TARGET_FUNCTION_ARG_BOUNDARY
512 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
514 #undef TARGET_SETUP_INCOMING_VARARGS
515 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
517 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
518 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
520 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
521 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
522 #undef TARGET_TRAMPOLINE_INIT
523 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
524 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
525 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
527 #undef TARGET_WARN_FUNC_RETURN
528 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
530 #undef TARGET_DEFAULT_SHORT_ENUMS
531 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
533 #undef TARGET_ALIGN_ANON_BITFIELD
534 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
536 #undef TARGET_NARROW_VOLATILE_BITFIELD
537 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
539 #undef TARGET_CXX_GUARD_TYPE
540 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
542 #undef TARGET_CXX_GUARD_MASK_BIT
543 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
545 #undef TARGET_CXX_GET_COOKIE_SIZE
546 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
548 #undef TARGET_CXX_COOKIE_HAS_SIZE
549 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
551 #undef TARGET_CXX_CDTOR_RETURNS_THIS
552 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
554 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
555 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
557 #undef TARGET_CXX_USE_AEABI_ATEXIT
558 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
560 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
561 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
562 arm_cxx_determine_class_data_visibility
564 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
565 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
567 #undef TARGET_RETURN_IN_MSB
568 #define TARGET_RETURN_IN_MSB arm_return_in_msb
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
573 #undef TARGET_MUST_PASS_IN_STACK
574 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
577 #undef TARGET_ASM_UNWIND_EMIT
578 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
580 /* EABI unwinding tables use a different format for the typeinfo tables. */
581 #undef TARGET_ASM_TTYPE
582 #define TARGET_ASM_TTYPE arm_output_ttype
584 #undef TARGET_ARM_EABI_UNWINDER
585 #define TARGET_ARM_EABI_UNWINDER true
587 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
588 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
590 #undef TARGET_ASM_INIT_SECTIONS
591 #endif /* ARM_UNWIND_INFO */
592 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
594 #undef TARGET_DWARF_REGISTER_SPAN
595 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
597 #undef TARGET_CANNOT_COPY_INSN_P
598 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
601 #undef TARGET_HAVE_TLS
602 #define TARGET_HAVE_TLS true
605 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
606 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
608 #undef TARGET_LEGITIMATE_CONSTANT_P
609 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
611 #undef TARGET_CANNOT_FORCE_CONST_MEM
612 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
614 #undef TARGET_MAX_ANCHOR_OFFSET
615 #define TARGET_MAX_ANCHOR_OFFSET 4095
617 /* The minimum is set such that the total size of the block
618 for a particular anchor is -4088 + 1 + 4095 bytes, which is
619 divisible by eight, ensuring natural spacing of anchors. */
620 #undef TARGET_MIN_ANCHOR_OFFSET
621 #define TARGET_MIN_ANCHOR_OFFSET -4088
623 #undef TARGET_SCHED_ISSUE_RATE
624 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
628 arm_first_cycle_multipass_dfa_lookahead
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
632 arm_first_cycle_multipass_dfa_lookahead_guard
634 #undef TARGET_MANGLE_TYPE
635 #define TARGET_MANGLE_TYPE arm_mangle_type
637 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
638 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
640 #undef TARGET_BUILD_BUILTIN_VA_LIST
641 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
642 #undef TARGET_EXPAND_BUILTIN_VA_START
643 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
644 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
645 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
648 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
649 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
652 #undef TARGET_LEGITIMATE_ADDRESS_P
653 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
658 #undef TARGET_PROMOTED_TYPE
659 #define TARGET_PROMOTED_TYPE arm_promoted_type
661 #undef TARGET_CONVERT_TO_TYPE
662 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
664 #undef TARGET_SCALAR_MODE_SUPPORTED_P
665 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
667 #undef TARGET_FRAME_POINTER_REQUIRED
668 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
670 #undef TARGET_CAN_ELIMINATE
671 #define TARGET_CAN_ELIMINATE arm_can_eliminate
673 #undef TARGET_CONDITIONAL_REGISTER_USAGE
674 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
676 #undef TARGET_CLASS_LIKELY_SPILLED_P
677 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
679 #undef TARGET_VECTORIZE_BUILTINS
680 #define TARGET_VECTORIZE_BUILTINS
682 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
683 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
684 arm_builtin_vectorized_function
686 #undef TARGET_VECTOR_ALIGNMENT
687 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
689 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
690 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
691 arm_vector_alignment_reachable
693 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
694 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
695 arm_builtin_support_vector_misalignment
697 #undef TARGET_PREFERRED_RENAME_CLASS
698 #define TARGET_PREFERRED_RENAME_CLASS \
699 arm_preferred_rename_class
701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
703 arm_vectorize_vec_perm_const_ok
705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
707 arm_builtin_vectorization_cost
708 #undef TARGET_VECTORIZE_ADD_STMT_COST
709 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
711 #undef TARGET_CANONICALIZE_COMPARISON
712 #define TARGET_CANONICALIZE_COMPARISON \
713 arm_canonicalize_comparison
715 #undef TARGET_ASAN_SHADOW_OFFSET
716 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
718 #undef MAX_INSN_PER_IT_BLOCK
719 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
721 #undef TARGET_CAN_USE_DOLOOP_P
722 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
724 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
725 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
727 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
728 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
730 #undef TARGET_SCHED_FUSION_PRIORITY
731 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
733 #undef TARGET_ASM_FUNCTION_SECTION
734 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
736 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
737 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
739 #undef TARGET_SECTION_TYPE_FLAGS
740 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
742 struct gcc_target targetm
= TARGET_INITIALIZER
;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack
;
746 static char * minipool_startobj
;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped
= 5;
752 extern FILE * asm_out_file
;
754 /* True if we are currently building a constant table. */
755 int making_const_table
;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune
= arm_none
;
760 /* The current tuning set. */
761 const struct tune_params
*current_tune
;
763 /* Which floating point hardware to schedule for. */
766 /* Used for Thumb call_via trampolines. */
767 rtx thumb_call_via_label
[14];
768 static int thumb_call_reg_needed
;
770 /* The bits in this mask specify which
771 instructions we are allowed to generate. */
772 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
774 /* The bits in this mask specify which instruction scheduling options should
776 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
778 /* The highest ARM architecture version supported by the
780 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
782 /* The following are used in the arm.md file as equivalents to bits
783 in the above two flag variables. */
785 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 /* Nonzero if this chip supports the ARM 6K extensions. */
806 /* Nonzero if this chip supports the ARM 6KZ extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip supports the ARMv8.1 extensions. */
827 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
830 /* Nonzero if this chip supports the FP16 instructions extension of ARM
832 int arm_fp16_inst
= 0;
834 /* Nonzero if this chip can benefit from load scheduling. */
835 int arm_ld_sched
= 0;
837 /* Nonzero if this chip is a StrongARM. */
838 int arm_tune_strongarm
= 0;
840 /* Nonzero if this chip supports Intel Wireless MMX technology. */
841 int arm_arch_iwmmxt
= 0;
843 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
844 int arm_arch_iwmmxt2
= 0;
846 /* Nonzero if this chip is an XScale. */
847 int arm_arch_xscale
= 0;
849 /* Nonzero if tuning for XScale */
850 int arm_tune_xscale
= 0;
852 /* Nonzero if we want to tune for stores that access the write-buffer.
853 This typically means an ARM6 or ARM7 with MMU or MPU. */
854 int arm_tune_wbuf
= 0;
856 /* Nonzero if tuning for Cortex-A9. */
857 int arm_tune_cortex_a9
= 0;
859 /* Nonzero if we should define __THUMB_INTERWORK__ in the
861 XXX This is a bit of a hack, it's intended to help work around
862 problems in GLD which doesn't understand that armv5t code is
863 interworking clean. */
864 int arm_cpp_interwork
= 0;
866 /* Nonzero if chip supports Thumb 1. */
869 /* Nonzero if chip supports Thumb 2. */
872 /* Nonzero if chip supports integer division instruction. */
873 int arm_arch_arm_hwdiv
;
874 int arm_arch_thumb_hwdiv
;
876 /* Nonzero if chip disallows volatile memory access in IT block. */
877 int arm_arch_no_volatile_ce
;
879 /* Nonzero if we should use Neon to handle 64-bits operations rather
880 than core registers. */
881 int prefer_neon_for_64bits
= 0;
883 /* Nonzero if we shouldn't use literal pools. */
884 bool arm_disable_literal_pool
= false;
886 /* The register number to be used for the PIC offset register. */
887 unsigned arm_pic_register
= INVALID_REGNUM
;
889 enum arm_pcs arm_pcs_default
;
891 /* For an explanation of these variables, see final_prescan_insn below. */
893 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
894 enum arm_cond_code arm_current_cc
;
897 int arm_target_label
;
898 /* The number of conditionally executed insns, including the current insn. */
899 int arm_condexec_count
= 0;
900 /* A bitmask specifying the patterns for the IT block.
901 Zero means do not output an IT block before this insn. */
902 int arm_condexec_mask
= 0;
903 /* The number of bits used in arm_condexec_mask. */
904 int arm_condexec_masklen
= 0;
906 /* Nonzero if chip supports the ARMv8 CRC instructions. */
907 int arm_arch_crc
= 0;
909 /* Nonzero if the core has a very small, high-latency, multiply unit. */
910 int arm_m_profile_small_mul
= 0;
912 /* The condition codes of the ARM, and the inverse function. */
913 static const char * const arm_condition_codes
[] =
915 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
916 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
919 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
920 int arm_regs_in_sequence
[] =
922 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
925 #define ARM_LSL_NAME "lsl"
926 #define streq(string1, string2) (strcmp (string1, string2) == 0)
928 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
929 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
930 | (1 << PIC_OFFSET_TABLE_REGNUM)))
932 /* Initialization code. */
936 const char *const name
;
937 enum processor_type core
;
939 enum base_architecture base_arch
;
940 const arm_feature_set flags
;
941 const struct tune_params
*const tune
;
945 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
946 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
953 /* arm generic vectorizer costs. */
955 struct cpu_vec_costs arm_default_vec_cost
= {
956 1, /* scalar_stmt_cost. */
957 1, /* scalar load_cost. */
958 1, /* scalar_store_cost. */
959 1, /* vec_stmt_cost. */
960 1, /* vec_to_scalar_cost. */
961 1, /* scalar_to_vec_cost. */
962 1, /* vec_align_load_cost. */
963 1, /* vec_unalign_load_cost. */
964 1, /* vec_unalign_store_cost. */
965 1, /* vec_store_cost. */
966 3, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
971 #include "aarch-cost-tables.h"
975 const struct cpu_cost_table cortexa9_extra_costs
=
982 COSTS_N_INSNS (1), /* shift_reg. */
983 COSTS_N_INSNS (1), /* arith_shift. */
984 COSTS_N_INSNS (2), /* arith_shift_reg. */
986 COSTS_N_INSNS (1), /* log_shift_reg. */
987 COSTS_N_INSNS (1), /* extend. */
988 COSTS_N_INSNS (2), /* extend_arith. */
989 COSTS_N_INSNS (1), /* bfi. */
990 COSTS_N_INSNS (1), /* bfx. */
994 true /* non_exec_costs_exec. */
999 COSTS_N_INSNS (3), /* simple. */
1000 COSTS_N_INSNS (3), /* flag_setting. */
1001 COSTS_N_INSNS (2), /* extend. */
1002 COSTS_N_INSNS (3), /* add. */
1003 COSTS_N_INSNS (2), /* extend_add. */
1004 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1008 0, /* simple (N/A). */
1009 0, /* flag_setting (N/A). */
1010 COSTS_N_INSNS (4), /* extend. */
1012 COSTS_N_INSNS (4), /* extend_add. */
1018 COSTS_N_INSNS (2), /* load. */
1019 COSTS_N_INSNS (2), /* load_sign_extend. */
1020 COSTS_N_INSNS (2), /* ldrd. */
1021 COSTS_N_INSNS (2), /* ldm_1st. */
1022 1, /* ldm_regs_per_insn_1st. */
1023 2, /* ldm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (5), /* loadf. */
1025 COSTS_N_INSNS (5), /* loadd. */
1026 COSTS_N_INSNS (1), /* load_unaligned. */
1027 COSTS_N_INSNS (2), /* store. */
1028 COSTS_N_INSNS (2), /* strd. */
1029 COSTS_N_INSNS (2), /* stm_1st. */
1030 1, /* stm_regs_per_insn_1st. */
1031 2, /* stm_regs_per_insn_subsequent. */
1032 COSTS_N_INSNS (1), /* storef. */
1033 COSTS_N_INSNS (1), /* stored. */
1034 COSTS_N_INSNS (1), /* store_unaligned. */
1035 COSTS_N_INSNS (1), /* loadv. */
1036 COSTS_N_INSNS (1) /* storev. */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs
=
1084 COSTS_N_INSNS (1), /* shift. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1091 0, /* extend_arith. */
1097 true /* non_exec_costs_exec. */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1115 COSTS_N_INSNS (2), /* extend_add. */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1), /* store_unaligned. */
1138 COSTS_N_INSNS (1), /* loadv. */
1139 COSTS_N_INSNS (1) /* storev. */
1144 COSTS_N_INSNS (36), /* div. */
1145 COSTS_N_INSNS (11), /* mult. */
1146 COSTS_N_INSNS (20), /* mult_addsub. */
1147 COSTS_N_INSNS (30), /* fma. */
1148 COSTS_N_INSNS (9), /* addsub. */
1149 COSTS_N_INSNS (3), /* fpconst. */
1150 COSTS_N_INSNS (3), /* neg. */
1151 COSTS_N_INSNS (6), /* compare. */
1152 COSTS_N_INSNS (4), /* widen. */
1153 COSTS_N_INSNS (4), /* narrow. */
1154 COSTS_N_INSNS (8), /* toint. */
1155 COSTS_N_INSNS (8), /* fromint. */
1156 COSTS_N_INSNS (8) /* roundint. */
1160 COSTS_N_INSNS (64), /* div. */
1161 COSTS_N_INSNS (16), /* mult. */
1162 COSTS_N_INSNS (25), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (9), /* addsub. */
1165 COSTS_N_INSNS (3), /* fpconst. */
1166 COSTS_N_INSNS (3), /* neg. */
1167 COSTS_N_INSNS (6), /* compare. */
1168 COSTS_N_INSNS (6), /* widen. */
1169 COSTS_N_INSNS (6), /* narrow. */
1170 COSTS_N_INSNS (8), /* toint. */
1171 COSTS_N_INSNS (8), /* fromint. */
1172 COSTS_N_INSNS (8) /* roundint. */
1177 COSTS_N_INSNS (1) /* alu. */
1181 const struct cpu_cost_table cortexa5_extra_costs
=
1187 COSTS_N_INSNS (1), /* shift. */
1188 COSTS_N_INSNS (1), /* shift_reg. */
1189 COSTS_N_INSNS (1), /* arith_shift. */
1190 COSTS_N_INSNS (1), /* arith_shift_reg. */
1191 COSTS_N_INSNS (1), /* log_shift. */
1192 COSTS_N_INSNS (1), /* log_shift_reg. */
1193 COSTS_N_INSNS (1), /* extend. */
1194 COSTS_N_INSNS (1), /* extend_arith. */
1195 COSTS_N_INSNS (1), /* bfi. */
1196 COSTS_N_INSNS (1), /* bfx. */
1197 COSTS_N_INSNS (1), /* clz. */
1198 COSTS_N_INSNS (1), /* rev. */
1200 true /* non_exec_costs_exec. */
1207 COSTS_N_INSNS (1), /* flag_setting. */
1208 COSTS_N_INSNS (1), /* extend. */
1209 COSTS_N_INSNS (1), /* add. */
1210 COSTS_N_INSNS (1), /* extend_add. */
1211 COSTS_N_INSNS (7) /* idiv. */
1215 0, /* simple (N/A). */
1216 0, /* flag_setting (N/A). */
1217 COSTS_N_INSNS (1), /* extend. */
1219 COSTS_N_INSNS (2), /* extend_add. */
1225 COSTS_N_INSNS (1), /* load. */
1226 COSTS_N_INSNS (1), /* load_sign_extend. */
1227 COSTS_N_INSNS (6), /* ldrd. */
1228 COSTS_N_INSNS (1), /* ldm_1st. */
1229 1, /* ldm_regs_per_insn_1st. */
1230 2, /* ldm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* loadf. */
1232 COSTS_N_INSNS (4), /* loadd. */
1233 COSTS_N_INSNS (1), /* load_unaligned. */
1234 COSTS_N_INSNS (1), /* store. */
1235 COSTS_N_INSNS (3), /* strd. */
1236 COSTS_N_INSNS (1), /* stm_1st. */
1237 1, /* stm_regs_per_insn_1st. */
1238 2, /* stm_regs_per_insn_subsequent. */
1239 COSTS_N_INSNS (2), /* storef. */
1240 COSTS_N_INSNS (2), /* stored. */
1241 COSTS_N_INSNS (1), /* store_unaligned. */
1242 COSTS_N_INSNS (1), /* loadv. */
1243 COSTS_N_INSNS (1) /* storev. */
1248 COSTS_N_INSNS (15), /* div. */
1249 COSTS_N_INSNS (3), /* mult. */
1250 COSTS_N_INSNS (7), /* mult_addsub. */
1251 COSTS_N_INSNS (7), /* fma. */
1252 COSTS_N_INSNS (3), /* addsub. */
1253 COSTS_N_INSNS (3), /* fpconst. */
1254 COSTS_N_INSNS (3), /* neg. */
1255 COSTS_N_INSNS (3), /* compare. */
1256 COSTS_N_INSNS (3), /* widen. */
1257 COSTS_N_INSNS (3), /* narrow. */
1258 COSTS_N_INSNS (3), /* toint. */
1259 COSTS_N_INSNS (3), /* fromint. */
1260 COSTS_N_INSNS (3) /* roundint. */
1264 COSTS_N_INSNS (30), /* div. */
1265 COSTS_N_INSNS (6), /* mult. */
1266 COSTS_N_INSNS (10), /* mult_addsub. */
1267 COSTS_N_INSNS (7), /* fma. */
1268 COSTS_N_INSNS (3), /* addsub. */
1269 COSTS_N_INSNS (3), /* fpconst. */
1270 COSTS_N_INSNS (3), /* neg. */
1271 COSTS_N_INSNS (3), /* compare. */
1272 COSTS_N_INSNS (3), /* widen. */
1273 COSTS_N_INSNS (3), /* narrow. */
1274 COSTS_N_INSNS (3), /* toint. */
1275 COSTS_N_INSNS (3), /* fromint. */
1276 COSTS_N_INSNS (3) /* roundint. */
1281 COSTS_N_INSNS (1) /* alu. */
1286 const struct cpu_cost_table cortexa7_extra_costs
=
1292 COSTS_N_INSNS (1), /* shift. */
1293 COSTS_N_INSNS (1), /* shift_reg. */
1294 COSTS_N_INSNS (1), /* arith_shift. */
1295 COSTS_N_INSNS (1), /* arith_shift_reg. */
1296 COSTS_N_INSNS (1), /* log_shift. */
1297 COSTS_N_INSNS (1), /* log_shift_reg. */
1298 COSTS_N_INSNS (1), /* extend. */
1299 COSTS_N_INSNS (1), /* extend_arith. */
1300 COSTS_N_INSNS (1), /* bfi. */
1301 COSTS_N_INSNS (1), /* bfx. */
1302 COSTS_N_INSNS (1), /* clz. */
1303 COSTS_N_INSNS (1), /* rev. */
1305 true /* non_exec_costs_exec. */
1312 COSTS_N_INSNS (1), /* flag_setting. */
1313 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (1), /* add. */
1315 COSTS_N_INSNS (1), /* extend_add. */
1316 COSTS_N_INSNS (7) /* idiv. */
1320 0, /* simple (N/A). */
1321 0, /* flag_setting (N/A). */
1322 COSTS_N_INSNS (1), /* extend. */
1324 COSTS_N_INSNS (2), /* extend_add. */
1330 COSTS_N_INSNS (1), /* load. */
1331 COSTS_N_INSNS (1), /* load_sign_extend. */
1332 COSTS_N_INSNS (3), /* ldrd. */
1333 COSTS_N_INSNS (1), /* ldm_1st. */
1334 1, /* ldm_regs_per_insn_1st. */
1335 2, /* ldm_regs_per_insn_subsequent. */
1336 COSTS_N_INSNS (2), /* loadf. */
1337 COSTS_N_INSNS (2), /* loadd. */
1338 COSTS_N_INSNS (1), /* load_unaligned. */
1339 COSTS_N_INSNS (1), /* store. */
1340 COSTS_N_INSNS (3), /* strd. */
1341 COSTS_N_INSNS (1), /* stm_1st. */
1342 1, /* stm_regs_per_insn_1st. */
1343 2, /* stm_regs_per_insn_subsequent. */
1344 COSTS_N_INSNS (2), /* storef. */
1345 COSTS_N_INSNS (2), /* stored. */
1346 COSTS_N_INSNS (1), /* store_unaligned. */
1347 COSTS_N_INSNS (1), /* loadv. */
1348 COSTS_N_INSNS (1) /* storev. */
1353 COSTS_N_INSNS (15), /* div. */
1354 COSTS_N_INSNS (3), /* mult. */
1355 COSTS_N_INSNS (7), /* mult_addsub. */
1356 COSTS_N_INSNS (7), /* fma. */
1357 COSTS_N_INSNS (3), /* addsub. */
1358 COSTS_N_INSNS (3), /* fpconst. */
1359 COSTS_N_INSNS (3), /* neg. */
1360 COSTS_N_INSNS (3), /* compare. */
1361 COSTS_N_INSNS (3), /* widen. */
1362 COSTS_N_INSNS (3), /* narrow. */
1363 COSTS_N_INSNS (3), /* toint. */
1364 COSTS_N_INSNS (3), /* fromint. */
1365 COSTS_N_INSNS (3) /* roundint. */
1369 COSTS_N_INSNS (30), /* div. */
1370 COSTS_N_INSNS (6), /* mult. */
1371 COSTS_N_INSNS (10), /* mult_addsub. */
1372 COSTS_N_INSNS (7), /* fma. */
1373 COSTS_N_INSNS (3), /* addsub. */
1374 COSTS_N_INSNS (3), /* fpconst. */
1375 COSTS_N_INSNS (3), /* neg. */
1376 COSTS_N_INSNS (3), /* compare. */
1377 COSTS_N_INSNS (3), /* widen. */
1378 COSTS_N_INSNS (3), /* narrow. */
1379 COSTS_N_INSNS (3), /* toint. */
1380 COSTS_N_INSNS (3), /* fromint. */
1381 COSTS_N_INSNS (3) /* roundint. */
1386 COSTS_N_INSNS (1) /* alu. */
1390 const struct cpu_cost_table cortexa12_extra_costs
=
1397 COSTS_N_INSNS (1), /* shift_reg. */
1398 COSTS_N_INSNS (1), /* arith_shift. */
1399 COSTS_N_INSNS (1), /* arith_shift_reg. */
1400 COSTS_N_INSNS (1), /* log_shift. */
1401 COSTS_N_INSNS (1), /* log_shift_reg. */
1403 COSTS_N_INSNS (1), /* extend_arith. */
1405 COSTS_N_INSNS (1), /* bfx. */
1406 COSTS_N_INSNS (1), /* clz. */
1407 COSTS_N_INSNS (1), /* rev. */
1409 true /* non_exec_costs_exec. */
1414 COSTS_N_INSNS (2), /* simple. */
1415 COSTS_N_INSNS (3), /* flag_setting. */
1416 COSTS_N_INSNS (2), /* extend. */
1417 COSTS_N_INSNS (3), /* add. */
1418 COSTS_N_INSNS (2), /* extend_add. */
1419 COSTS_N_INSNS (18) /* idiv. */
1423 0, /* simple (N/A). */
1424 0, /* flag_setting (N/A). */
1425 COSTS_N_INSNS (3), /* extend. */
1427 COSTS_N_INSNS (3), /* extend_add. */
1433 COSTS_N_INSNS (3), /* load. */
1434 COSTS_N_INSNS (3), /* load_sign_extend. */
1435 COSTS_N_INSNS (3), /* ldrd. */
1436 COSTS_N_INSNS (3), /* ldm_1st. */
1437 1, /* ldm_regs_per_insn_1st. */
1438 2, /* ldm_regs_per_insn_subsequent. */
1439 COSTS_N_INSNS (3), /* loadf. */
1440 COSTS_N_INSNS (3), /* loadd. */
1441 0, /* load_unaligned. */
1445 1, /* stm_regs_per_insn_1st. */
1446 2, /* stm_regs_per_insn_subsequent. */
1447 COSTS_N_INSNS (2), /* storef. */
1448 COSTS_N_INSNS (2), /* stored. */
1449 0, /* store_unaligned. */
1450 COSTS_N_INSNS (1), /* loadv. */
1451 COSTS_N_INSNS (1) /* storev. */
1456 COSTS_N_INSNS (17), /* div. */
1457 COSTS_N_INSNS (4), /* mult. */
1458 COSTS_N_INSNS (8), /* mult_addsub. */
1459 COSTS_N_INSNS (8), /* fma. */
1460 COSTS_N_INSNS (4), /* addsub. */
1461 COSTS_N_INSNS (2), /* fpconst. */
1462 COSTS_N_INSNS (2), /* neg. */
1463 COSTS_N_INSNS (2), /* compare. */
1464 COSTS_N_INSNS (4), /* widen. */
1465 COSTS_N_INSNS (4), /* narrow. */
1466 COSTS_N_INSNS (4), /* toint. */
1467 COSTS_N_INSNS (4), /* fromint. */
1468 COSTS_N_INSNS (4) /* roundint. */
1472 COSTS_N_INSNS (31), /* div. */
1473 COSTS_N_INSNS (4), /* mult. */
1474 COSTS_N_INSNS (8), /* mult_addsub. */
1475 COSTS_N_INSNS (8), /* fma. */
1476 COSTS_N_INSNS (4), /* addsub. */
1477 COSTS_N_INSNS (2), /* fpconst. */
1478 COSTS_N_INSNS (2), /* neg. */
1479 COSTS_N_INSNS (2), /* compare. */
1480 COSTS_N_INSNS (4), /* widen. */
1481 COSTS_N_INSNS (4), /* narrow. */
1482 COSTS_N_INSNS (4), /* toint. */
1483 COSTS_N_INSNS (4), /* fromint. */
1484 COSTS_N_INSNS (4) /* roundint. */
1489 COSTS_N_INSNS (1) /* alu. */
1493 const struct cpu_cost_table cortexa15_extra_costs
=
1501 COSTS_N_INSNS (1), /* arith_shift. */
1502 COSTS_N_INSNS (1), /* arith_shift_reg. */
1503 COSTS_N_INSNS (1), /* log_shift. */
1504 COSTS_N_INSNS (1), /* log_shift_reg. */
1506 COSTS_N_INSNS (1), /* extend_arith. */
1507 COSTS_N_INSNS (1), /* bfi. */
1512 true /* non_exec_costs_exec. */
1517 COSTS_N_INSNS (2), /* simple. */
1518 COSTS_N_INSNS (3), /* flag_setting. */
1519 COSTS_N_INSNS (2), /* extend. */
1520 COSTS_N_INSNS (2), /* add. */
1521 COSTS_N_INSNS (2), /* extend_add. */
1522 COSTS_N_INSNS (18) /* idiv. */
1526 0, /* simple (N/A). */
1527 0, /* flag_setting (N/A). */
1528 COSTS_N_INSNS (3), /* extend. */
1530 COSTS_N_INSNS (3), /* extend_add. */
1536 COSTS_N_INSNS (3), /* load. */
1537 COSTS_N_INSNS (3), /* load_sign_extend. */
1538 COSTS_N_INSNS (3), /* ldrd. */
1539 COSTS_N_INSNS (4), /* ldm_1st. */
1540 1, /* ldm_regs_per_insn_1st. */
1541 2, /* ldm_regs_per_insn_subsequent. */
1542 COSTS_N_INSNS (4), /* loadf. */
1543 COSTS_N_INSNS (4), /* loadd. */
1544 0, /* load_unaligned. */
1547 COSTS_N_INSNS (1), /* stm_1st. */
1548 1, /* stm_regs_per_insn_1st. */
1549 2, /* stm_regs_per_insn_subsequent. */
1552 0, /* store_unaligned. */
1553 COSTS_N_INSNS (1), /* loadv. */
1554 COSTS_N_INSNS (1) /* storev. */
1559 COSTS_N_INSNS (17), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (5), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1575 COSTS_N_INSNS (31), /* div. */
1576 COSTS_N_INSNS (4), /* mult. */
1577 COSTS_N_INSNS (8), /* mult_addsub. */
1578 COSTS_N_INSNS (8), /* fma. */
1579 COSTS_N_INSNS (4), /* addsub. */
1580 COSTS_N_INSNS (2), /* fpconst. */
1581 COSTS_N_INSNS (2), /* neg. */
1582 COSTS_N_INSNS (2), /* compare. */
1583 COSTS_N_INSNS (4), /* widen. */
1584 COSTS_N_INSNS (4), /* narrow. */
1585 COSTS_N_INSNS (4), /* toint. */
1586 COSTS_N_INSNS (4), /* fromint. */
1587 COSTS_N_INSNS (4) /* roundint. */
1592 COSTS_N_INSNS (1) /* alu. */
1596 const struct cpu_cost_table v7m_extra_costs
=
1604 0, /* arith_shift. */
1605 COSTS_N_INSNS (1), /* arith_shift_reg. */
1607 COSTS_N_INSNS (1), /* log_shift_reg. */
1609 COSTS_N_INSNS (1), /* extend_arith. */
1614 COSTS_N_INSNS (1), /* non_exec. */
1615 false /* non_exec_costs_exec. */
1620 COSTS_N_INSNS (1), /* simple. */
1621 COSTS_N_INSNS (1), /* flag_setting. */
1622 COSTS_N_INSNS (2), /* extend. */
1623 COSTS_N_INSNS (1), /* add. */
1624 COSTS_N_INSNS (3), /* extend_add. */
1625 COSTS_N_INSNS (8) /* idiv. */
1629 0, /* simple (N/A). */
1630 0, /* flag_setting (N/A). */
1631 COSTS_N_INSNS (2), /* extend. */
1633 COSTS_N_INSNS (3), /* extend_add. */
1639 COSTS_N_INSNS (2), /* load. */
1640 0, /* load_sign_extend. */
1641 COSTS_N_INSNS (3), /* ldrd. */
1642 COSTS_N_INSNS (2), /* ldm_1st. */
1643 1, /* ldm_regs_per_insn_1st. */
1644 1, /* ldm_regs_per_insn_subsequent. */
1645 COSTS_N_INSNS (2), /* loadf. */
1646 COSTS_N_INSNS (3), /* loadd. */
1647 COSTS_N_INSNS (1), /* load_unaligned. */
1648 COSTS_N_INSNS (2), /* store. */
1649 COSTS_N_INSNS (3), /* strd. */
1650 COSTS_N_INSNS (2), /* stm_1st. */
1651 1, /* stm_regs_per_insn_1st. */
1652 1, /* stm_regs_per_insn_subsequent. */
1653 COSTS_N_INSNS (2), /* storef. */
1654 COSTS_N_INSNS (3), /* stored. */
1655 COSTS_N_INSNS (1), /* store_unaligned. */
1656 COSTS_N_INSNS (1), /* loadv. */
1657 COSTS_N_INSNS (1) /* storev. */
1662 COSTS_N_INSNS (7), /* div. */
1663 COSTS_N_INSNS (2), /* mult. */
1664 COSTS_N_INSNS (5), /* mult_addsub. */
1665 COSTS_N_INSNS (3), /* fma. */
1666 COSTS_N_INSNS (1), /* addsub. */
1678 COSTS_N_INSNS (15), /* div. */
1679 COSTS_N_INSNS (5), /* mult. */
1680 COSTS_N_INSNS (7), /* mult_addsub. */
1681 COSTS_N_INSNS (7), /* fma. */
1682 COSTS_N_INSNS (3), /* addsub. */
1695 COSTS_N_INSNS (1) /* alu. */
1699 const struct tune_params arm_slowmul_tune
=
1701 arm_slowmul_rtx_costs
,
1702 NULL
, /* Insn extra costs. */
1703 NULL
, /* Sched adj cost. */
1704 arm_default_branch_cost
,
1705 &arm_default_vec_cost
,
1706 3, /* Constant limit. */
1707 5, /* Max cond insns. */
1708 8, /* Memset max inline. */
1709 1, /* Issue rate. */
1710 ARM_PREFETCH_NOT_BENEFICIAL
,
1711 tune_params::PREF_CONST_POOL_TRUE
,
1712 tune_params::PREF_LDRD_FALSE
,
1713 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1714 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1715 tune_params::DISPARAGE_FLAGS_NEITHER
,
1716 tune_params::PREF_NEON_64_FALSE
,
1717 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1718 tune_params::FUSE_NOTHING
,
1719 tune_params::SCHED_AUTOPREF_OFF
1722 const struct tune_params arm_fastmul_tune
=
1724 arm_fastmul_rtx_costs
,
1725 NULL
, /* Insn extra costs. */
1726 NULL
, /* Sched adj cost. */
1727 arm_default_branch_cost
,
1728 &arm_default_vec_cost
,
1729 1, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 8, /* Memset max inline. */
1732 1, /* Issue rate. */
1733 ARM_PREFETCH_NOT_BENEFICIAL
,
1734 tune_params::PREF_CONST_POOL_TRUE
,
1735 tune_params::PREF_LDRD_FALSE
,
1736 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1737 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1738 tune_params::DISPARAGE_FLAGS_NEITHER
,
1739 tune_params::PREF_NEON_64_FALSE
,
1740 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1741 tune_params::FUSE_NOTHING
,
1742 tune_params::SCHED_AUTOPREF_OFF
1745 /* StrongARM has early execution of branches, so a sequence that is worth
1746 skipping is shorter. Set max_insns_skipped to a lower value. */
1748 const struct tune_params arm_strongarm_tune
=
1750 arm_fastmul_rtx_costs
,
1751 NULL
, /* Insn extra costs. */
1752 NULL
, /* Sched adj cost. */
1753 arm_default_branch_cost
,
1754 &arm_default_vec_cost
,
1755 1, /* Constant limit. */
1756 3, /* Max cond insns. */
1757 8, /* Memset max inline. */
1758 1, /* Issue rate. */
1759 ARM_PREFETCH_NOT_BENEFICIAL
,
1760 tune_params::PREF_CONST_POOL_TRUE
,
1761 tune_params::PREF_LDRD_FALSE
,
1762 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1763 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1764 tune_params::DISPARAGE_FLAGS_NEITHER
,
1765 tune_params::PREF_NEON_64_FALSE
,
1766 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1767 tune_params::FUSE_NOTHING
,
1768 tune_params::SCHED_AUTOPREF_OFF
1771 const struct tune_params arm_xscale_tune
=
1773 arm_xscale_rtx_costs
,
1774 NULL
, /* Insn extra costs. */
1775 xscale_sched_adjust_cost
,
1776 arm_default_branch_cost
,
1777 &arm_default_vec_cost
,
1778 2, /* Constant limit. */
1779 3, /* Max cond insns. */
1780 8, /* Memset max inline. */
1781 1, /* Issue rate. */
1782 ARM_PREFETCH_NOT_BENEFICIAL
,
1783 tune_params::PREF_CONST_POOL_TRUE
,
1784 tune_params::PREF_LDRD_FALSE
,
1785 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1786 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1787 tune_params::DISPARAGE_FLAGS_NEITHER
,
1788 tune_params::PREF_NEON_64_FALSE
,
1789 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1790 tune_params::FUSE_NOTHING
,
1791 tune_params::SCHED_AUTOPREF_OFF
1794 const struct tune_params arm_9e_tune
=
1797 NULL
, /* Insn extra costs. */
1798 NULL
, /* Sched adj cost. */
1799 arm_default_branch_cost
,
1800 &arm_default_vec_cost
,
1801 1, /* Constant limit. */
1802 5, /* Max cond insns. */
1803 8, /* Memset max inline. */
1804 1, /* Issue rate. */
1805 ARM_PREFETCH_NOT_BENEFICIAL
,
1806 tune_params::PREF_CONST_POOL_TRUE
,
1807 tune_params::PREF_LDRD_FALSE
,
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1809 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1810 tune_params::DISPARAGE_FLAGS_NEITHER
,
1811 tune_params::PREF_NEON_64_FALSE
,
1812 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1813 tune_params::FUSE_NOTHING
,
1814 tune_params::SCHED_AUTOPREF_OFF
1817 const struct tune_params arm_marvell_pj4_tune
=
1820 NULL
, /* Insn extra costs. */
1821 NULL
, /* Sched adj cost. */
1822 arm_default_branch_cost
,
1823 &arm_default_vec_cost
,
1824 1, /* Constant limit. */
1825 5, /* Max cond insns. */
1826 8, /* Memset max inline. */
1827 2, /* Issue rate. */
1828 ARM_PREFETCH_NOT_BENEFICIAL
,
1829 tune_params::PREF_CONST_POOL_TRUE
,
1830 tune_params::PREF_LDRD_FALSE
,
1831 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1832 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1833 tune_params::DISPARAGE_FLAGS_NEITHER
,
1834 tune_params::PREF_NEON_64_FALSE
,
1835 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1836 tune_params::FUSE_NOTHING
,
1837 tune_params::SCHED_AUTOPREF_OFF
1840 const struct tune_params arm_v6t2_tune
=
1843 NULL
, /* Insn extra costs. */
1844 NULL
, /* Sched adj cost. */
1845 arm_default_branch_cost
,
1846 &arm_default_vec_cost
,
1847 1, /* Constant limit. */
1848 5, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL
,
1852 tune_params::PREF_CONST_POOL_FALSE
,
1853 tune_params::PREF_LDRD_FALSE
,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER
,
1857 tune_params::PREF_NEON_64_FALSE
,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1859 tune_params::FUSE_NOTHING
,
1860 tune_params::SCHED_AUTOPREF_OFF
1864 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1865 const struct tune_params arm_cortex_tune
=
1868 &generic_extra_costs
,
1869 NULL
, /* Sched adj cost. */
1870 arm_default_branch_cost
,
1871 &arm_default_vec_cost
,
1872 1, /* Constant limit. */
1873 5, /* Max cond insns. */
1874 8, /* Memset max inline. */
1875 2, /* Issue rate. */
1876 ARM_PREFETCH_NOT_BENEFICIAL
,
1877 tune_params::PREF_CONST_POOL_FALSE
,
1878 tune_params::PREF_LDRD_FALSE
,
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1881 tune_params::DISPARAGE_FLAGS_NEITHER
,
1882 tune_params::PREF_NEON_64_FALSE
,
1883 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1884 tune_params::FUSE_NOTHING
,
1885 tune_params::SCHED_AUTOPREF_OFF
1888 const struct tune_params arm_cortex_a8_tune
=
1891 &cortexa8_extra_costs
,
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 2, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_FALSE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_64_FALSE
,
1906 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1907 tune_params::FUSE_NOTHING
,
1908 tune_params::SCHED_AUTOPREF_OFF
1911 const struct tune_params arm_cortex_a7_tune
=
1914 &cortexa7_extra_costs
,
1915 NULL
, /* Sched adj cost. */
1916 arm_default_branch_cost
,
1917 &arm_default_vec_cost
,
1918 1, /* Constant limit. */
1919 5, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 2, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL
,
1923 tune_params::PREF_CONST_POOL_FALSE
,
1924 tune_params::PREF_LDRD_FALSE
,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER
,
1928 tune_params::PREF_NEON_64_FALSE
,
1929 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1930 tune_params::FUSE_NOTHING
,
1931 tune_params::SCHED_AUTOPREF_OFF
1934 const struct tune_params arm_cortex_a15_tune
=
1937 &cortexa15_extra_costs
,
1938 NULL
, /* Sched adj cost. */
1939 arm_default_branch_cost
,
1940 &arm_default_vec_cost
,
1941 1, /* Constant limit. */
1942 2, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 3, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL
,
1946 tune_params::PREF_CONST_POOL_FALSE
,
1947 tune_params::PREF_LDRD_TRUE
,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_ALL
,
1951 tune_params::PREF_NEON_64_FALSE
,
1952 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_FULL
1957 const struct tune_params arm_cortex_a35_tune
=
1960 &cortexa53_extra_costs
,
1961 NULL
, /* Sched adj cost. */
1962 arm_default_branch_cost
,
1963 &arm_default_vec_cost
,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL
,
1969 tune_params::PREF_CONST_POOL_FALSE
,
1970 tune_params::PREF_LDRD_FALSE
,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER
,
1974 tune_params::PREF_NEON_64_FALSE
,
1975 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1976 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1977 tune_params::SCHED_AUTOPREF_OFF
1980 const struct tune_params arm_cortex_a53_tune
=
1983 &cortexa53_extra_costs
,
1984 NULL
, /* Sched adj cost. */
1985 arm_default_branch_cost
,
1986 &arm_default_vec_cost
,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL
,
1992 tune_params::PREF_CONST_POOL_FALSE
,
1993 tune_params::PREF_LDRD_FALSE
,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER
,
1997 tune_params::PREF_NEON_64_FALSE
,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1999 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_cortex_a57_tune
=
2006 &cortexa57_extra_costs
,
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 2, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 3, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_TRUE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_ALL
,
2020 tune_params::PREF_NEON_64_FALSE
,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2023 tune_params::SCHED_AUTOPREF_FULL
2026 const struct tune_params arm_exynosm1_tune
=
2029 &exynosm1_extra_costs
,
2030 NULL
, /* Sched adj cost. */
2031 arm_default_branch_cost
,
2032 &arm_default_vec_cost
,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL
,
2038 tune_params::PREF_CONST_POOL_FALSE
,
2039 tune_params::PREF_LDRD_TRUE
,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL
,
2043 tune_params::PREF_NEON_64_FALSE
,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2045 tune_params::FUSE_NOTHING
,
2046 tune_params::SCHED_AUTOPREF_OFF
2049 const struct tune_params arm_xgene1_tune
=
2052 &xgene1_extra_costs
,
2053 NULL
, /* Sched adj cost. */
2054 arm_default_branch_cost
,
2055 &arm_default_vec_cost
,
2056 1, /* Constant limit. */
2057 2, /* Max cond insns. */
2058 32, /* Memset max inline. */
2059 4, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL
,
2061 tune_params::PREF_CONST_POOL_FALSE
,
2062 tune_params::PREF_LDRD_TRUE
,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_ALL
,
2066 tune_params::PREF_NEON_64_FALSE
,
2067 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2068 tune_params::FUSE_NOTHING
,
2069 tune_params::SCHED_AUTOPREF_OFF
2072 const struct tune_params arm_qdf24xx_tune
=
2075 &qdf24xx_extra_costs
,
2076 NULL
, /* Scheduler cost adjustment. */
2077 arm_default_branch_cost
,
2078 &arm_default_vec_cost
, /* Vectorizer costs. */
2079 1, /* Constant limit. */
2080 2, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 4, /* Issue rate. */
2083 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2084 tune_params::PREF_CONST_POOL_FALSE
,
2085 tune_params::PREF_LDRD_TRUE
,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_ALL
,
2089 tune_params::PREF_NEON_64_FALSE
,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2092 tune_params::SCHED_AUTOPREF_FULL
2095 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2096 less appealing. Set max_insns_skipped to a low value. */
2098 const struct tune_params arm_cortex_a5_tune
=
2101 &cortexa5_extra_costs
,
2102 NULL
, /* Sched adj cost. */
2103 arm_cortex_a5_branch_cost
,
2104 &arm_default_vec_cost
,
2105 1, /* Constant limit. */
2106 1, /* Max cond insns. */
2107 8, /* Memset max inline. */
2108 2, /* Issue rate. */
2109 ARM_PREFETCH_NOT_BENEFICIAL
,
2110 tune_params::PREF_CONST_POOL_FALSE
,
2111 tune_params::PREF_LDRD_FALSE
,
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2113 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2114 tune_params::DISPARAGE_FLAGS_NEITHER
,
2115 tune_params::PREF_NEON_64_FALSE
,
2116 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2117 tune_params::FUSE_NOTHING
,
2118 tune_params::SCHED_AUTOPREF_OFF
2121 const struct tune_params arm_cortex_a9_tune
=
2124 &cortexa9_extra_costs
,
2125 cortex_a9_sched_adjust_cost
,
2126 arm_default_branch_cost
,
2127 &arm_default_vec_cost
,
2128 1, /* Constant limit. */
2129 5, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_BENEFICIAL(4,32,32),
2133 tune_params::PREF_CONST_POOL_FALSE
,
2134 tune_params::PREF_LDRD_FALSE
,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER
,
2138 tune_params::PREF_NEON_64_FALSE
,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2140 tune_params::FUSE_NOTHING
,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 const struct tune_params arm_cortex_a12_tune
=
2147 &cortexa12_extra_costs
,
2148 NULL
, /* Sched adj cost. */
2149 arm_default_branch_cost
,
2150 &arm_default_vec_cost
, /* Vectorizer costs. */
2151 1, /* Constant limit. */
2152 2, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL
,
2156 tune_params::PREF_CONST_POOL_FALSE
,
2157 tune_params::PREF_LDRD_TRUE
,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_ALL
,
2161 tune_params::PREF_NEON_64_FALSE
,
2162 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2163 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2164 tune_params::SCHED_AUTOPREF_OFF
2167 const struct tune_params arm_cortex_a73_tune
=
2170 &cortexa57_extra_costs
,
2171 NULL
, /* Sched adj cost. */
2172 arm_default_branch_cost
,
2173 &arm_default_vec_cost
, /* Vectorizer costs. */
2174 1, /* Constant limit. */
2175 2, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL
,
2179 tune_params::PREF_CONST_POOL_FALSE
,
2180 tune_params::PREF_LDRD_TRUE
,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_ALL
,
2184 tune_params::PREF_NEON_64_FALSE
,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2186 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2187 tune_params::SCHED_AUTOPREF_FULL
2190 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2191 cycle to execute each. An LDR from the constant pool also takes two cycles
2192 to execute, but mildly increases pipelining opportunity (consecutive
2193 loads/stores can be pipelined together, saving one cycle), and may also
2194 improve icache utilisation. Hence we prefer the constant pool for such
2197 const struct tune_params arm_v7m_tune
=
2201 NULL
, /* Sched adj cost. */
2202 arm_cortex_m_branch_cost
,
2203 &arm_default_vec_cost
,
2204 1, /* Constant limit. */
2205 2, /* Max cond insns. */
2206 8, /* Memset max inline. */
2207 1, /* Issue rate. */
2208 ARM_PREFETCH_NOT_BENEFICIAL
,
2209 tune_params::PREF_CONST_POOL_TRUE
,
2210 tune_params::PREF_LDRD_FALSE
,
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2213 tune_params::DISPARAGE_FLAGS_NEITHER
,
2214 tune_params::PREF_NEON_64_FALSE
,
2215 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2216 tune_params::FUSE_NOTHING
,
2217 tune_params::SCHED_AUTOPREF_OFF
2220 /* Cortex-M7 tuning. */
2222 const struct tune_params arm_cortex_m7_tune
=
2226 NULL
, /* Sched adj cost. */
2227 arm_cortex_m7_branch_cost
,
2228 &arm_default_vec_cost
,
2229 0, /* Constant limit. */
2230 1, /* Max cond insns. */
2231 8, /* Memset max inline. */
2232 2, /* Issue rate. */
2233 ARM_PREFETCH_NOT_BENEFICIAL
,
2234 tune_params::PREF_CONST_POOL_TRUE
,
2235 tune_params::PREF_LDRD_FALSE
,
2236 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2238 tune_params::DISPARAGE_FLAGS_NEITHER
,
2239 tune_params::PREF_NEON_64_FALSE
,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2241 tune_params::FUSE_NOTHING
,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2246 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2247 const struct tune_params arm_v6m_tune
=
2250 NULL
, /* Insn extra costs. */
2251 NULL
, /* Sched adj cost. */
2252 arm_default_branch_cost
,
2253 &arm_default_vec_cost
, /* Vectorizer costs. */
2254 1, /* Constant limit. */
2255 5, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL
,
2259 tune_params::PREF_CONST_POOL_FALSE
,
2260 tune_params::PREF_LDRD_FALSE
,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER
,
2264 tune_params::PREF_NEON_64_FALSE
,
2265 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2266 tune_params::FUSE_NOTHING
,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_fa726te_tune
=
2273 NULL
, /* Insn extra costs. */
2274 fa726te_sched_adjust_cost
,
2275 arm_default_branch_cost
,
2276 &arm_default_vec_cost
,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_NOT_BENEFICIAL
,
2282 tune_params::PREF_CONST_POOL_TRUE
,
2283 tune_params::PREF_LDRD_FALSE
,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER
,
2287 tune_params::PREF_NEON_64_FALSE
,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2289 tune_params::FUSE_NOTHING
,
2290 tune_params::SCHED_AUTOPREF_OFF
2294 /* Not all of these give usefully different compilation alternatives,
2295 but there is no simple way of generalizing them. */
2296 static const struct processors all_cores
[] =
2299 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2300 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2301 FLAGS, &arm_##COSTS##_tune},
2302 #include "arm-cores.def"
2304 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2307 static const struct processors all_architectures
[] =
2309 /* ARM Architectures */
2310 /* We don't specify tuning costs here as it will be figured out
2313 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2314 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2315 #include "arm-arches.def"
2317 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2321 /* These are populated as commandline arguments are processed, or NULL
2322 if not specified. */
2323 static const struct processors
*arm_selected_arch
;
2324 static const struct processors
*arm_selected_cpu
;
2325 static const struct processors
*arm_selected_tune
;
2327 /* The name of the preprocessor macro to define for this architecture. PROFILE
2328 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2329 is thus chosen to be big enough to hold the longest architecture name. */
2331 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2333 /* Available values for -mfpu=. */
2335 const struct arm_fpu_desc all_fpus
[] =
2337 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2338 { NAME, REV, VFP_REGS, FEATURES },
2339 #include "arm-fpus.def"
2343 /* Supported TLS relocations. */
2351 TLS_DESCSEQ
/* GNU scheme */
2354 /* The maximum number of insns to be used when loading a constant. */
2356 arm_constant_limit (bool size_p
)
2358 return size_p
? 1 : current_tune
->constant_limit
;
2361 /* Emit an insn that's a simple single-set. Both the operands must be known
2363 inline static rtx_insn
*
2364 emit_set_insn (rtx x
, rtx y
)
2366 return emit_insn (gen_rtx_SET (x
, y
));
2369 /* Return the number of bits set in VALUE. */
2371 bit_count (unsigned long value
)
2373 unsigned long count
= 0;
2378 value
&= value
- 1; /* Clear the least-significant set bit. */
2384 /* Return the number of features in feature-set SET. */
2386 feature_count (const arm_feature_set
* set
)
2388 return (bit_count (ARM_FSET_CPU1 (*set
))
2389 + bit_count (ARM_FSET_CPU2 (*set
)));
2396 } arm_fixed_mode_set
;
2398 /* A small helper for setting fixed-point library libfuncs. */
2401 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2402 const char *funcname
, const char *modename
,
2407 if (num_suffix
== 0)
2408 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2410 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2412 set_optab_libfunc (optable
, mode
, buffer
);
2416 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2417 machine_mode from
, const char *funcname
,
2418 const char *toname
, const char *fromname
)
2421 const char *maybe_suffix_2
= "";
2423 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2424 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2425 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2426 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2427 maybe_suffix_2
= "2";
2429 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2432 set_conv_libfunc (optable
, to
, from
, buffer
);
2435 /* Set up library functions unique to ARM. */
2438 arm_init_libfuncs (void)
2440 /* For Linux, we have access to kernel support for atomic operations. */
2441 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2442 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2444 /* There are no special library functions unless we are using the
2449 /* The functions below are described in Section 4 of the "Run-Time
2450 ABI for the ARM architecture", Version 1.0. */
2452 /* Double-precision floating-point arithmetic. Table 2. */
2453 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2454 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2455 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2456 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2457 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2459 /* Double-precision comparisons. Table 3. */
2460 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2461 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2462 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2463 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2464 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2465 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2466 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2468 /* Single-precision floating-point arithmetic. Table 4. */
2469 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2470 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2471 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2472 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2473 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2475 /* Single-precision comparisons. Table 5. */
2476 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2477 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2478 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2479 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2480 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2481 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2482 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2484 /* Floating-point to integer conversions. Table 6. */
2485 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2486 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2487 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2488 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2489 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2490 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2491 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2492 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2494 /* Conversions between floating types. Table 7. */
2495 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2496 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2498 /* Integer to floating-point conversions. Table 8. */
2499 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2500 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2501 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2502 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2503 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2504 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2505 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2506 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2508 /* Long long. Table 9. */
2509 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2510 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2511 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2512 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2513 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2514 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2515 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2516 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2518 /* Integer (32/32->32) division. \S 4.3.1. */
2519 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2520 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2522 /* The divmod functions are designed so that they can be used for
2523 plain division, even though they return both the quotient and the
2524 remainder. The quotient is returned in the usual location (i.e.,
2525 r0 for SImode, {r0, r1} for DImode), just as would be expected
2526 for an ordinary division routine. Because the AAPCS calling
2527 conventions specify that all of { r0, r1, r2, r3 } are
2528 callee-saved registers, there is no need to tell the compiler
2529 explicitly that those registers are clobbered by these
2531 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2532 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2534 /* For SImode division the ABI provides div-without-mod routines,
2535 which are faster. */
2536 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2537 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2539 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2540 divmod libcalls instead. */
2541 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2542 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2543 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2544 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2546 /* Half-precision float operations. The compiler handles all operations
2547 with NULL libfuncs by converting the SFmode. */
2548 switch (arm_fp16_format
)
2550 case ARM_FP16_FORMAT_IEEE
:
2551 case ARM_FP16_FORMAT_ALTERNATIVE
:
2554 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2555 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2557 : "__gnu_f2h_alternative"));
2558 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2559 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2561 : "__gnu_h2f_alternative"));
2564 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2565 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2566 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2567 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2568 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2571 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2572 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2573 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2574 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2575 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2576 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2577 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2584 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2586 const arm_fixed_mode_set fixed_arith_modes
[] =
2607 const arm_fixed_mode_set fixed_conv_modes
[] =
2637 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2639 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2640 "add", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2642 "ssadd", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2644 "usadd", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2646 "sub", fixed_arith_modes
[i
].name
, 3);
2647 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2648 "sssub", fixed_arith_modes
[i
].name
, 3);
2649 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2650 "ussub", fixed_arith_modes
[i
].name
, 3);
2651 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2652 "mul", fixed_arith_modes
[i
].name
, 3);
2653 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2654 "ssmul", fixed_arith_modes
[i
].name
, 3);
2655 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2656 "usmul", fixed_arith_modes
[i
].name
, 3);
2657 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2658 "div", fixed_arith_modes
[i
].name
, 3);
2659 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2660 "udiv", fixed_arith_modes
[i
].name
, 3);
2661 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2662 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2663 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2664 "usdiv", fixed_arith_modes
[i
].name
, 3);
2665 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2666 "neg", fixed_arith_modes
[i
].name
, 2);
2667 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2668 "ssneg", fixed_arith_modes
[i
].name
, 2);
2669 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2670 "usneg", fixed_arith_modes
[i
].name
, 2);
2671 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2672 "ashl", fixed_arith_modes
[i
].name
, 3);
2673 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2674 "ashr", fixed_arith_modes
[i
].name
, 3);
2675 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2676 "lshr", fixed_arith_modes
[i
].name
, 3);
2677 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2678 "ssashl", fixed_arith_modes
[i
].name
, 3);
2679 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2680 "usashl", fixed_arith_modes
[i
].name
, 3);
2681 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2682 "cmp", fixed_arith_modes
[i
].name
, 2);
2685 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2686 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2689 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2690 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2693 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2694 fixed_conv_modes
[j
].mode
, "fract",
2695 fixed_conv_modes
[i
].name
,
2696 fixed_conv_modes
[j
].name
);
2697 arm_set_fixed_conv_libfunc (satfract_optab
,
2698 fixed_conv_modes
[i
].mode
,
2699 fixed_conv_modes
[j
].mode
, "satfract",
2700 fixed_conv_modes
[i
].name
,
2701 fixed_conv_modes
[j
].name
);
2702 arm_set_fixed_conv_libfunc (fractuns_optab
,
2703 fixed_conv_modes
[i
].mode
,
2704 fixed_conv_modes
[j
].mode
, "fractuns",
2705 fixed_conv_modes
[i
].name
,
2706 fixed_conv_modes
[j
].name
);
2707 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2708 fixed_conv_modes
[i
].mode
,
2709 fixed_conv_modes
[j
].mode
, "satfractuns",
2710 fixed_conv_modes
[i
].name
,
2711 fixed_conv_modes
[j
].name
);
2715 if (TARGET_AAPCS_BASED
)
2716 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2719 /* On AAPCS systems, this is the "struct __va_list". */
2720 static GTY(()) tree va_list_type
;
2722 /* Return the type to use as __builtin_va_list. */
2724 arm_build_builtin_va_list (void)
2729 if (!TARGET_AAPCS_BASED
)
2730 return std_build_builtin_va_list ();
2732 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2740 The C Library ABI further reinforces this definition in \S
2743 We must follow this definition exactly. The structure tag
2744 name is visible in C++ mangled names, and thus forms a part
2745 of the ABI. The field name may be used by people who
2746 #include <stdarg.h>. */
2747 /* Create the type. */
2748 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2749 /* Give it the required name. */
2750 va_list_name
= build_decl (BUILTINS_LOCATION
,
2752 get_identifier ("__va_list"),
2754 DECL_ARTIFICIAL (va_list_name
) = 1;
2755 TYPE_NAME (va_list_type
) = va_list_name
;
2756 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2757 /* Create the __ap field. */
2758 ap_field
= build_decl (BUILTINS_LOCATION
,
2760 get_identifier ("__ap"),
2762 DECL_ARTIFICIAL (ap_field
) = 1;
2763 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2764 TYPE_FIELDS (va_list_type
) = ap_field
;
2765 /* Compute its layout. */
2766 layout_type (va_list_type
);
2768 return va_list_type
;
2771 /* Return an expression of type "void *" pointing to the next
2772 available argument in a variable-argument list. VALIST is the
2773 user-level va_list object, of type __builtin_va_list. */
2775 arm_extract_valist_ptr (tree valist
)
2777 if (TREE_TYPE (valist
) == error_mark_node
)
2778 return error_mark_node
;
2780 /* On an AAPCS target, the pointer is stored within "struct
2782 if (TARGET_AAPCS_BASED
)
2784 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2785 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2786 valist
, ap_field
, NULL_TREE
);
2792 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2794 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2796 valist
= arm_extract_valist_ptr (valist
);
2797 std_expand_builtin_va_start (valist
, nextarg
);
2800 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2802 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2805 valist
= arm_extract_valist_ptr (valist
);
2806 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2809 /* Check any incompatible options that the user has specified. */
2811 arm_option_check_internal (struct gcc_options
*opts
)
2813 int flags
= opts
->x_target_flags
;
2814 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[opts
->x_arm_fpu_index
];
2816 /* iWMMXt and NEON are incompatible. */
2818 && ARM_FPU_FSET_HAS (fpu_desc
->features
, FPU_FL_NEON
))
2819 error ("iWMMXt and NEON are incompatible");
2821 /* Make sure that the processor choice does not conflict with any of the
2822 other command line choices. */
2823 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2824 error ("target CPU does not support ARM mode");
2826 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2827 from here where no function is being compiled currently. */
2828 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2829 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2831 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2832 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2834 /* If this target is normally configured to use APCS frames, warn if they
2835 are turned off and debugging is turned on. */
2836 if (TARGET_ARM_P (flags
)
2837 && write_symbols
!= NO_DEBUG
2838 && !TARGET_APCS_FRAME
2839 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2840 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2842 /* iWMMXt unsupported under Thumb mode. */
2843 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2844 error ("iWMMXt unsupported under Thumb mode");
2846 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2847 error ("can not use -mtp=cp15 with 16-bit Thumb");
2849 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2851 error ("RTP PIC is incompatible with Thumb");
2855 /* We only support -mslow-flash-data on armv7-m targets. */
2856 if (target_slow_flash_data
2857 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2858 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2859 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2861 /* We only support pure-code on Thumb-2 M-profile targets. */
2862 if (target_pure_code
2863 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2864 error ("-mpure-code only supports non-pic code on armv7-m targets");
2868 /* Recompute the global settings depending on target attribute options. */
2871 arm_option_params_internal (void)
2873 /* If we are not using the default (ARM mode) section anchor offset
2874 ranges, then set the correct ranges now. */
2877 /* Thumb-1 LDR instructions cannot have negative offsets.
2878 Permissible positive offset ranges are 5-bit (for byte loads),
2879 6-bit (for halfword loads), or 7-bit (for word loads).
2880 Empirical results suggest a 7-bit anchor range gives the best
2881 overall code size. */
2882 targetm
.min_anchor_offset
= 0;
2883 targetm
.max_anchor_offset
= 127;
2885 else if (TARGET_THUMB2
)
2887 /* The minimum is set such that the total size of the block
2888 for a particular anchor is 248 + 1 + 4095 bytes, which is
2889 divisible by eight, ensuring natural spacing of anchors. */
2890 targetm
.min_anchor_offset
= -248;
2891 targetm
.max_anchor_offset
= 4095;
2895 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2896 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2901 /* If optimizing for size, bump the number of instructions that we
2902 are prepared to conditionally execute (even on a StrongARM). */
2903 max_insns_skipped
= 6;
2905 /* For THUMB2, we limit the conditional sequence to one IT block. */
2907 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2910 /* When -mrestrict-it is in use tone down the if-conversion. */
2911 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2912 ? 1 : current_tune
->max_insns_skipped
;
2915 /* True if -mflip-thumb should next add an attribute for the default
2916 mode, false if it should next add an attribute for the opposite mode. */
2917 static GTY(()) bool thumb_flipper
;
2919 /* Options after initial target override. */
2920 static GTY(()) tree init_optimize
;
2923 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2925 if (opts
->x_align_functions
<= 0)
2926 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2927 && opts
->x_optimize_size
? 2 : 4;
2930 /* Implement targetm.override_options_after_change. */
2933 arm_override_options_after_change (void)
2935 arm_override_options_after_change_1 (&global_options
);
2938 /* Reset options between modes that the user has specified. */
2940 arm_option_override_internal (struct gcc_options
*opts
,
2941 struct gcc_options
*opts_set
)
2943 arm_override_options_after_change_1 (opts
);
2945 if (TARGET_INTERWORK
&& !ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
))
2947 /* The default is to enable interworking, so this warning message would
2948 be confusing to users who have just compiled with, eg, -march=armv3. */
2949 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2950 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2953 if (TARGET_THUMB_P (opts
->x_target_flags
)
2954 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2956 warning (0, "target CPU does not support THUMB instructions");
2957 opts
->x_target_flags
&= ~MASK_THUMB
;
2960 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2962 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2963 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2966 /* Callee super interworking implies thumb interworking. Adding
2967 this to the flags here simplifies the logic elsewhere. */
2968 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2969 opts
->x_target_flags
|= MASK_INTERWORK
;
2971 /* need to remember initial values so combinaisons of options like
2972 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2973 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2975 if (! opts_set
->x_arm_restrict_it
)
2976 opts
->x_arm_restrict_it
= arm_arch8
;
2978 /* ARM execution state and M profile don't have [restrict] IT. */
2979 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2980 opts
->x_arm_restrict_it
= 0;
2982 /* Enable -munaligned-access by default for
2983 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2984 i.e. Thumb2 and ARM state only.
2985 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2986 - ARMv8 architecture-base processors.
2988 Disable -munaligned-access by default for
2989 - all pre-ARMv6 architecture-based processors
2990 - ARMv6-M architecture-based processors
2991 - ARMv8-M Baseline processors. */
2993 if (! opts_set
->x_unaligned_access
)
2995 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2996 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2998 else if (opts
->x_unaligned_access
== 1
2999 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3001 warning (0, "target CPU does not support unaligned accesses");
3002 opts
->x_unaligned_access
= 0;
3005 /* Don't warn since it's on by default in -O2. */
3006 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3007 opts
->x_flag_schedule_insns
= 0;
3009 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3011 /* Disable shrink-wrap when optimizing function for size, since it tends to
3012 generate additional returns. */
3013 if (optimize_function_for_size_p (cfun
)
3014 && TARGET_THUMB2_P (opts
->x_target_flags
))
3015 opts
->x_flag_shrink_wrap
= false;
3017 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3019 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3020 - epilogue_insns - does not accurately model the corresponding insns
3021 emitted in the asm file. In particular, see the comment in thumb_exit
3022 'Find out how many of the (return) argument registers we can corrupt'.
3023 As a consequence, the epilogue may clobber registers without fipa-ra
3024 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3025 TODO: Accurately model clobbers for epilogue_insns and reenable
3027 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3028 opts
->x_flag_ipa_ra
= 0;
3030 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3032 /* Thumb2 inline assembly code should always use unified syntax.
3033 This will apply to ARM and Thumb1 eventually. */
3034 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3036 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3037 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3041 /* Fix up any incompatible options that the user has specified. */
3043 arm_option_override (void)
3045 arm_selected_arch
= NULL
;
3046 arm_selected_cpu
= NULL
;
3047 arm_selected_tune
= NULL
;
3049 if (global_options_set
.x_arm_arch_option
)
3050 arm_selected_arch
= &all_architectures
[arm_arch_option
];
3052 if (global_options_set
.x_arm_cpu_option
)
3054 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
3055 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
3058 if (global_options_set
.x_arm_tune_option
)
3059 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
3061 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3062 SUBTARGET_OVERRIDE_OPTIONS
;
3065 if (arm_selected_arch
)
3067 if (arm_selected_cpu
)
3069 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
3070 arm_feature_set selected_flags
;
3071 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
3072 arm_selected_arch
->flags
);
3073 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
3074 /* Check for conflict between mcpu and march. */
3075 if (!ARM_FSET_IS_EMPTY (selected_flags
))
3077 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3078 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3079 /* -march wins for code generation.
3080 -mcpu wins for default tuning. */
3081 if (!arm_selected_tune
)
3082 arm_selected_tune
= arm_selected_cpu
;
3084 arm_selected_cpu
= arm_selected_arch
;
3088 arm_selected_arch
= NULL
;
3091 /* Pick a CPU based on the architecture. */
3092 arm_selected_cpu
= arm_selected_arch
;
3095 /* If the user did not specify a processor, choose one for them. */
3096 if (!arm_selected_cpu
)
3098 const struct processors
* sel
;
3099 arm_feature_set sought
= ARM_FSET_EMPTY
;;
3101 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3102 if (!arm_selected_cpu
->name
)
3104 #ifdef SUBTARGET_CPU_DEFAULT
3105 /* Use the subtarget default CPU if none was specified by
3107 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
3109 /* Default to ARM6. */
3110 if (!arm_selected_cpu
->name
)
3111 arm_selected_cpu
= &all_cores
[arm6
];
3114 sel
= arm_selected_cpu
;
3115 insn_flags
= sel
->flags
;
3117 /* Now check to see if the user has specified some command line
3118 switch that require certain abilities from the cpu. */
3120 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3122 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
3123 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
3125 /* There are no ARM processors that support both APCS-26 and
3126 interworking. Therefore we force FL_MODE26 to be removed
3127 from insn_flags here (if it was set), so that the search
3128 below will always be able to find a compatible processor. */
3129 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
3132 if (!ARM_FSET_IS_EMPTY (sought
)
3133 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
3135 /* Try to locate a CPU type that supports all of the abilities
3136 of the default CPU, plus the extra abilities requested by
3138 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3139 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
3142 if (sel
->name
== NULL
)
3144 unsigned current_bit_count
= 0;
3145 const struct processors
* best_fit
= NULL
;
3147 /* Ideally we would like to issue an error message here
3148 saying that it was not possible to find a CPU compatible
3149 with the default CPU, but which also supports the command
3150 line options specified by the programmer, and so they
3151 ought to use the -mcpu=<name> command line option to
3152 override the default CPU type.
3154 If we cannot find a cpu that has both the
3155 characteristics of the default cpu and the given
3156 command line options we scan the array again looking
3157 for a best match. */
3158 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3160 arm_feature_set required
= ARM_FSET_EMPTY
;
3161 ARM_FSET_UNION (required
, sought
, insn_flags
);
3162 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3165 arm_feature_set flags
;
3166 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3167 count
= feature_count (&flags
);
3169 if (count
>= current_bit_count
)
3172 current_bit_count
= count
;
3176 gcc_assert (best_fit
);
3180 arm_selected_cpu
= sel
;
3184 gcc_assert (arm_selected_cpu
);
3185 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3186 if (!arm_selected_tune
)
3187 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3189 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3190 insn_flags
= arm_selected_cpu
->flags
;
3191 arm_base_arch
= arm_selected_cpu
->base_arch
;
3193 arm_tune
= arm_selected_tune
->core
;
3194 tune_flags
= arm_selected_tune
->flags
;
3195 current_tune
= arm_selected_tune
->tune
;
3197 /* TBD: Dwarf info for apcs frame is not handled yet. */
3198 if (TARGET_APCS_FRAME
)
3199 flag_shrink_wrap
= false;
3201 /* BPABI targets use linker tricks to allow interworking on cores
3202 without thumb support. */
3203 if (TARGET_INTERWORK
3204 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3206 warning (0, "target CPU does not support interworking" );
3207 target_flags
&= ~MASK_INTERWORK
;
3210 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3212 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3213 target_flags
|= MASK_APCS_FRAME
;
3216 if (TARGET_POKE_FUNCTION_NAME
)
3217 target_flags
|= MASK_APCS_FRAME
;
3219 if (TARGET_APCS_REENT
&& flag_pic
)
3220 error ("-fpic and -mapcs-reent are incompatible");
3222 if (TARGET_APCS_REENT
)
3223 warning (0, "APCS reentrant code not supported. Ignored");
3225 if (TARGET_APCS_FLOAT
)
3226 warning (0, "passing floating point arguments in fp regs not yet supported");
3228 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3229 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3230 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3231 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3232 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3233 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3234 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3235 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3236 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3237 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3238 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3239 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3240 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3241 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3242 arm_arch8_1
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_1
);
3243 arm_arch8_2
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_2
);
3244 arm_arch_thumb1
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
);
3245 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3246 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3248 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3249 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3250 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3251 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3252 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3253 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3254 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3255 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3256 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3257 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3258 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3259 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3260 arm_fp16_inst
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_FP16INST
);
3263 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3264 error ("selected fp16 options are incompatible.");
3265 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3268 /* V5 code we generate is completely interworking capable, so we turn off
3269 TARGET_INTERWORK here to avoid many tests later on. */
3271 /* XXX However, we must pass the right pre-processor defines to CPP
3272 or GLD can get confused. This is a hack. */
3273 if (TARGET_INTERWORK
)
3274 arm_cpp_interwork
= 1;
3277 target_flags
&= ~MASK_INTERWORK
;
3279 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3280 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3282 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3283 error ("iwmmxt abi requires an iwmmxt capable cpu");
3285 if (!global_options_set
.x_arm_fpu_index
)
3287 const char *target_fpu_name
;
3290 #ifdef FPUTYPE_DEFAULT
3291 target_fpu_name
= FPUTYPE_DEFAULT
;
3293 target_fpu_name
= "vfp";
3296 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3301 /* If soft-float is specified then don't use FPU. */
3302 if (TARGET_SOFT_FLOAT
)
3303 arm_fpu_attr
= FPU_NONE
;
3305 arm_fpu_attr
= FPU_VFP
;
3307 if (TARGET_AAPCS_BASED
)
3309 if (TARGET_CALLER_INTERWORKING
)
3310 error ("AAPCS does not support -mcaller-super-interworking");
3312 if (TARGET_CALLEE_INTERWORKING
)
3313 error ("AAPCS does not support -mcallee-super-interworking");
3316 /* __fp16 support currently assumes the core has ldrh. */
3317 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3318 sorry ("__fp16 and no ldrh");
3320 if (TARGET_AAPCS_BASED
)
3322 if (arm_abi
== ARM_ABI_IWMMXT
)
3323 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3324 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3325 && TARGET_HARD_FLOAT
)
3326 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3328 arm_pcs_default
= ARM_PCS_AAPCS
;
3332 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3333 sorry ("-mfloat-abi=hard and VFP");
3335 if (arm_abi
== ARM_ABI_APCS
)
3336 arm_pcs_default
= ARM_PCS_APCS
;
3338 arm_pcs_default
= ARM_PCS_ATPCS
;
3341 /* For arm2/3 there is no need to do any scheduling if we are doing
3342 software floating-point. */
3343 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3344 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3346 /* Use the cp15 method if it is available. */
3347 if (target_thread_pointer
== TP_AUTO
)
3349 if (arm_arch6k
&& !TARGET_THUMB1
)
3350 target_thread_pointer
= TP_CP15
;
3352 target_thread_pointer
= TP_SOFT
;
3355 /* Override the default structure alignment for AAPCS ABI. */
3356 if (!global_options_set
.x_arm_structure_size_boundary
)
3358 if (TARGET_AAPCS_BASED
)
3359 arm_structure_size_boundary
= 8;
3363 if (arm_structure_size_boundary
!= 8
3364 && arm_structure_size_boundary
!= 32
3365 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3367 if (ARM_DOUBLEWORD_ALIGN
)
3369 "structure size boundary can only be set to 8, 32 or 64");
3371 warning (0, "structure size boundary can only be set to 8 or 32");
3372 arm_structure_size_boundary
3373 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3377 if (TARGET_VXWORKS_RTP
)
3379 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3380 arm_pic_data_is_text_relative
= 0;
3383 && !arm_pic_data_is_text_relative
3384 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3385 /* When text & data segments don't have a fixed displacement, the
3386 intended use is with a single, read only, pic base register.
3387 Unless the user explicitly requested not to do that, set
3389 target_flags
|= MASK_SINGLE_PIC_BASE
;
3391 /* If stack checking is disabled, we can use r10 as the PIC register,
3392 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3393 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3395 if (TARGET_VXWORKS_RTP
)
3396 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3397 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3400 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3401 arm_pic_register
= 9;
3403 if (arm_pic_register_string
!= NULL
)
3405 int pic_register
= decode_reg_name (arm_pic_register_string
);
3408 warning (0, "-mpic-register= is useless without -fpic");
3410 /* Prevent the user from choosing an obviously stupid PIC register. */
3411 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3412 || pic_register
== HARD_FRAME_POINTER_REGNUM
3413 || pic_register
== STACK_POINTER_REGNUM
3414 || pic_register
>= PC_REGNUM
3415 || (TARGET_VXWORKS_RTP
3416 && (unsigned int) pic_register
!= arm_pic_register
))
3417 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3419 arm_pic_register
= pic_register
;
3422 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3423 if (fix_cm3_ldrd
== 2)
3425 if (arm_selected_cpu
->core
== cortexm3
)
3431 /* Hot/Cold partitioning is not currently supported, since we can't
3432 handle literal pool placement in that case. */
3433 if (flag_reorder_blocks_and_partition
)
3435 inform (input_location
,
3436 "-freorder-blocks-and-partition not supported on this architecture");
3437 flag_reorder_blocks_and_partition
= 0;
3438 flag_reorder_blocks
= 1;
3442 /* Hoisting PIC address calculations more aggressively provides a small,
3443 but measurable, size reduction for PIC code. Therefore, we decrease
3444 the bar for unrestricted expression hoisting to the cost of PIC address
3445 calculation, which is 2 instructions. */
3446 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3447 global_options
.x_param_values
,
3448 global_options_set
.x_param_values
);
3450 /* ARM EABI defaults to strict volatile bitfields. */
3451 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3452 && abi_version_at_least(2))
3453 flag_strict_volatile_bitfields
= 1;
3455 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3456 have deemed it beneficial (signified by setting
3457 prefetch.num_slots to 1 or more). */
3458 if (flag_prefetch_loop_arrays
< 0
3461 && current_tune
->prefetch
.num_slots
> 0)
3462 flag_prefetch_loop_arrays
= 1;
3464 /* Set up parameters to be used in prefetching algorithm. Do not
3465 override the defaults unless we are tuning for a core we have
3466 researched values for. */
3467 if (current_tune
->prefetch
.num_slots
> 0)
3468 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3469 current_tune
->prefetch
.num_slots
,
3470 global_options
.x_param_values
,
3471 global_options_set
.x_param_values
);
3472 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3473 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3474 current_tune
->prefetch
.l1_cache_line_size
,
3475 global_options
.x_param_values
,
3476 global_options_set
.x_param_values
);
3477 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3478 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3479 current_tune
->prefetch
.l1_cache_size
,
3480 global_options
.x_param_values
,
3481 global_options_set
.x_param_values
);
3483 /* Use Neon to perform 64-bits operations rather than core
3485 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3486 if (use_neon_for_64bits
== 1)
3487 prefer_neon_for_64bits
= true;
3489 /* Use the alternative scheduling-pressure algorithm by default. */
3490 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3491 global_options
.x_param_values
,
3492 global_options_set
.x_param_values
);
3494 /* Look through ready list and all of queue for instructions
3495 relevant for L2 auto-prefetcher. */
3496 int param_sched_autopref_queue_depth
;
3498 switch (current_tune
->sched_autopref
)
3500 case tune_params::SCHED_AUTOPREF_OFF
:
3501 param_sched_autopref_queue_depth
= -1;
3504 case tune_params::SCHED_AUTOPREF_RANK
:
3505 param_sched_autopref_queue_depth
= 0;
3508 case tune_params::SCHED_AUTOPREF_FULL
:
3509 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3516 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3517 param_sched_autopref_queue_depth
,
3518 global_options
.x_param_values
,
3519 global_options_set
.x_param_values
);
3521 /* Currently, for slow flash data, we just disable literal pools. We also
3522 disable it for pure-code. */
3523 if (target_slow_flash_data
|| target_pure_code
)
3524 arm_disable_literal_pool
= true;
3526 /* Disable scheduling fusion by default if it's not armv7 processor
3527 or doesn't prefer ldrd/strd. */
3528 if (flag_schedule_fusion
== 2
3529 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3530 flag_schedule_fusion
= 0;
3532 /* Need to remember initial options before they are overriden. */
3533 init_optimize
= build_optimization_node (&global_options
);
3535 arm_option_override_internal (&global_options
, &global_options_set
);
3536 arm_option_check_internal (&global_options
);
3537 arm_option_params_internal ();
3539 /* Register global variables with the garbage collector. */
3540 arm_add_gc_roots ();
3542 /* Save the initial options in case the user does function specific
3543 options or #pragma target. */
3544 target_option_default_node
= target_option_current_node
3545 = build_target_option_node (&global_options
);
3547 /* Init initial mode for testing. */
3548 thumb_flipper
= TARGET_THUMB
;
3552 arm_add_gc_roots (void)
3554 gcc_obstack_init(&minipool_obstack
);
3555 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3558 /* A table of known ARM exception types.
3559 For use with the interrupt function attribute. */
3563 const char *const arg
;
3564 const unsigned long return_value
;
3568 static const isr_attribute_arg isr_attribute_args
[] =
3570 { "IRQ", ARM_FT_ISR
},
3571 { "irq", ARM_FT_ISR
},
3572 { "FIQ", ARM_FT_FIQ
},
3573 { "fiq", ARM_FT_FIQ
},
3574 { "ABORT", ARM_FT_ISR
},
3575 { "abort", ARM_FT_ISR
},
3576 { "ABORT", ARM_FT_ISR
},
3577 { "abort", ARM_FT_ISR
},
3578 { "UNDEF", ARM_FT_EXCEPTION
},
3579 { "undef", ARM_FT_EXCEPTION
},
3580 { "SWI", ARM_FT_EXCEPTION
},
3581 { "swi", ARM_FT_EXCEPTION
},
3582 { NULL
, ARM_FT_NORMAL
}
3585 /* Returns the (interrupt) function type of the current
3586 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3588 static unsigned long
3589 arm_isr_value (tree argument
)
3591 const isr_attribute_arg
* ptr
;
3595 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3597 /* No argument - default to IRQ. */
3598 if (argument
== NULL_TREE
)
3601 /* Get the value of the argument. */
3602 if (TREE_VALUE (argument
) == NULL_TREE
3603 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3604 return ARM_FT_UNKNOWN
;
3606 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3608 /* Check it against the list of known arguments. */
3609 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3610 if (streq (arg
, ptr
->arg
))
3611 return ptr
->return_value
;
3613 /* An unrecognized interrupt type. */
3614 return ARM_FT_UNKNOWN
;
3617 /* Computes the type of the current function. */
3619 static unsigned long
3620 arm_compute_func_type (void)
3622 unsigned long type
= ARM_FT_UNKNOWN
;
3626 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3628 /* Decide if the current function is volatile. Such functions
3629 never return, and many memory cycles can be saved by not storing
3630 register values that will never be needed again. This optimization
3631 was added to speed up context switching in a kernel application. */
3633 && (TREE_NOTHROW (current_function_decl
)
3634 || !(flag_unwind_tables
3636 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3637 && TREE_THIS_VOLATILE (current_function_decl
))
3638 type
|= ARM_FT_VOLATILE
;
3640 if (cfun
->static_chain_decl
!= NULL
)
3641 type
|= ARM_FT_NESTED
;
3643 attr
= DECL_ATTRIBUTES (current_function_decl
);
3645 a
= lookup_attribute ("naked", attr
);
3647 type
|= ARM_FT_NAKED
;
3649 a
= lookup_attribute ("isr", attr
);
3651 a
= lookup_attribute ("interrupt", attr
);
3654 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3656 type
|= arm_isr_value (TREE_VALUE (a
));
3661 /* Returns the type of the current function. */
3664 arm_current_func_type (void)
3666 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3667 cfun
->machine
->func_type
= arm_compute_func_type ();
3669 return cfun
->machine
->func_type
;
3673 arm_allocate_stack_slots_for_args (void)
3675 /* Naked functions should not allocate stack slots for arguments. */
3676 return !IS_NAKED (arm_current_func_type ());
3680 arm_warn_func_return (tree decl
)
3682 /* Naked functions are implemented entirely in assembly, including the
3683 return sequence, so suppress warnings about this. */
3684 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3688 /* Output assembler code for a block containing the constant parts
3689 of a trampoline, leaving space for the variable parts.
3691 On the ARM, (if r8 is the static chain regnum, and remembering that
3692 referencing pc adds an offset of 8) the trampoline looks like:
3695 .word static chain value
3696 .word function's address
3697 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3700 arm_asm_trampoline_template (FILE *f
)
3702 fprintf (f
, "\t.syntax unified\n");
3706 fprintf (f
, "\t.arm\n");
3707 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3708 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3710 else if (TARGET_THUMB2
)
3712 fprintf (f
, "\t.thumb\n");
3713 /* The Thumb-2 trampoline is similar to the arm implementation.
3714 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3715 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3716 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3717 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3721 ASM_OUTPUT_ALIGN (f
, 2);
3722 fprintf (f
, "\t.code\t16\n");
3723 fprintf (f
, ".Ltrampoline_start:\n");
3724 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3725 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3726 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3727 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3728 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3729 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3731 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3732 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3735 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3738 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3740 rtx fnaddr
, mem
, a_tramp
;
3742 emit_block_move (m_tramp
, assemble_trampoline_template (),
3743 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3745 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3746 emit_move_insn (mem
, chain_value
);
3748 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3749 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3750 emit_move_insn (mem
, fnaddr
);
3752 a_tramp
= XEXP (m_tramp
, 0);
3753 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3754 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3755 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3758 /* Thumb trampolines should be entered in thumb mode, so set
3759 the bottom bit of the address. */
3762 arm_trampoline_adjust_address (rtx addr
)
3765 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3766 NULL
, 0, OPTAB_LIB_WIDEN
);
3770 /* Return 1 if it is possible to return using a single instruction.
3771 If SIBLING is non-null, this is a test for a return before a sibling
3772 call. SIBLING is the call insn, so we can examine its register usage. */
3775 use_return_insn (int iscond
, rtx sibling
)
3778 unsigned int func_type
;
3779 unsigned long saved_int_regs
;
3780 unsigned HOST_WIDE_INT stack_adjust
;
3781 arm_stack_offsets
*offsets
;
3783 /* Never use a return instruction before reload has run. */
3784 if (!reload_completed
)
3787 func_type
= arm_current_func_type ();
3789 /* Naked, volatile and stack alignment functions need special
3791 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3794 /* So do interrupt functions that use the frame pointer and Thumb
3795 interrupt functions. */
3796 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3799 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3800 && !optimize_function_for_size_p (cfun
))
3803 offsets
= arm_get_frame_offsets ();
3804 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3806 /* As do variadic functions. */
3807 if (crtl
->args
.pretend_args_size
3808 || cfun
->machine
->uses_anonymous_args
3809 /* Or if the function calls __builtin_eh_return () */
3810 || crtl
->calls_eh_return
3811 /* Or if the function calls alloca */
3812 || cfun
->calls_alloca
3813 /* Or if there is a stack adjustment. However, if the stack pointer
3814 is saved on the stack, we can use a pre-incrementing stack load. */
3815 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3816 && stack_adjust
== 4))
3817 /* Or if the static chain register was saved above the frame, under the
3818 assumption that the stack pointer isn't saved on the stack. */
3819 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3820 && arm_compute_static_chain_stack_bytes() != 0))
3823 saved_int_regs
= offsets
->saved_regs_mask
;
3825 /* Unfortunately, the insn
3827 ldmib sp, {..., sp, ...}
3829 triggers a bug on most SA-110 based devices, such that the stack
3830 pointer won't be correctly restored if the instruction takes a
3831 page fault. We work around this problem by popping r3 along with
3832 the other registers, since that is never slower than executing
3833 another instruction.
3835 We test for !arm_arch5 here, because code for any architecture
3836 less than this could potentially be run on one of the buggy
3838 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3840 /* Validate that r3 is a call-clobbered register (always true in
3841 the default abi) ... */
3842 if (!call_used_regs
[3])
3845 /* ... that it isn't being used for a return value ... */
3846 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3849 /* ... or for a tail-call argument ... */
3852 gcc_assert (CALL_P (sibling
));
3854 if (find_regno_fusage (sibling
, USE
, 3))
3858 /* ... and that there are no call-saved registers in r0-r2
3859 (always true in the default ABI). */
3860 if (saved_int_regs
& 0x7)
3864 /* Can't be done if interworking with Thumb, and any registers have been
3866 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3869 /* On StrongARM, conditional returns are expensive if they aren't
3870 taken and multiple registers have been stacked. */
3871 if (iscond
&& arm_tune_strongarm
)
3873 /* Conditional return when just the LR is stored is a simple
3874 conditional-load instruction, that's not expensive. */
3875 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3879 && arm_pic_register
!= INVALID_REGNUM
3880 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3884 /* If there are saved registers but the LR isn't saved, then we need
3885 two instructions for the return. */
3886 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3889 /* Can't be done if any of the VFP regs are pushed,
3890 since this also requires an insn. */
3891 if (TARGET_HARD_FLOAT
)
3892 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3893 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3896 if (TARGET_REALLY_IWMMXT
)
3897 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3898 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3904 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3905 shrink-wrapping if possible. This is the case if we need to emit a
3906 prologue, which we can test by looking at the offsets. */
3908 use_simple_return_p (void)
3910 arm_stack_offsets
*offsets
;
3912 offsets
= arm_get_frame_offsets ();
3913 return offsets
->outgoing_args
!= 0;
3916 /* Return TRUE if int I is a valid immediate ARM constant. */
3919 const_ok_for_arm (HOST_WIDE_INT i
)
3923 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3924 be all zero, or all one. */
3925 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3926 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3927 != ((~(unsigned HOST_WIDE_INT
) 0)
3928 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3931 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3933 /* Fast return for 0 and small values. We must do this for zero, since
3934 the code below can't handle that one case. */
3935 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3938 /* Get the number of trailing zeros. */
3939 lowbit
= ffs((int) i
) - 1;
3941 /* Only even shifts are allowed in ARM mode so round down to the
3942 nearest even number. */
3946 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3951 /* Allow rotated constants in ARM mode. */
3953 && ((i
& ~0xc000003f) == 0
3954 || (i
& ~0xf000000f) == 0
3955 || (i
& ~0xfc000003) == 0))
3962 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3965 if (i
== v
|| i
== (v
| (v
<< 8)))
3968 /* Allow repeated pattern 0xXY00XY00. */
3978 /* Return true if I is a valid constant for the operation CODE. */
3980 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3982 if (const_ok_for_arm (i
))
3988 /* See if we can use movw. */
3989 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
3992 /* Otherwise, try mvn. */
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3996 /* See if we can use addw or subw. */
3998 && ((i
& 0xfffff000) == 0
3999 || ((-i
) & 0xfffff000) == 0))
4020 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4022 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4028 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4032 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4039 /* Return true if I is a valid di mode constant for the operation CODE. */
4041 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4043 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4044 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4045 rtx hi
= GEN_INT (hi_val
);
4046 rtx lo
= GEN_INT (lo_val
);
4056 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4057 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4059 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4066 /* Emit a sequence of insns to handle a large constant.
4067 CODE is the code of the operation required, it can be any of SET, PLUS,
4068 IOR, AND, XOR, MINUS;
4069 MODE is the mode in which the operation is being performed;
4070 VAL is the integer to operate on;
4071 SOURCE is the other operand (a register, or a null-pointer for SET);
4072 SUBTARGETS means it is safe to create scratch registers if that will
4073 either produce a simpler sequence, or we will want to cse the values.
4074 Return value is the number of insns emitted. */
4076 /* ??? Tweak this for thumb2. */
4078 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4079 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4083 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4084 cond
= COND_EXEC_TEST (PATTERN (insn
));
4088 if (subtargets
|| code
== SET
4089 || (REG_P (target
) && REG_P (source
)
4090 && REGNO (target
) != REGNO (source
)))
4092 /* After arm_reorg has been called, we can't fix up expensive
4093 constants by pushing them into memory so we must synthesize
4094 them in-line, regardless of the cost. This is only likely to
4095 be more costly on chips that have load delay slots and we are
4096 compiling without running the scheduler (so no splitting
4097 occurred before the final instruction emission).
4099 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4101 if (!cfun
->machine
->after_arm_reorg
4103 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4105 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4110 /* Currently SET is the only monadic value for CODE, all
4111 the rest are diadic. */
4112 if (TARGET_USE_MOVT
)
4113 arm_emit_movpair (target
, GEN_INT (val
));
4115 emit_set_insn (target
, GEN_INT (val
));
4121 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4123 if (TARGET_USE_MOVT
)
4124 arm_emit_movpair (temp
, GEN_INT (val
));
4126 emit_set_insn (temp
, GEN_INT (val
));
4128 /* For MINUS, the value is subtracted from, since we never
4129 have subtraction of a constant. */
4131 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4133 emit_set_insn (target
,
4134 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4140 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4144 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4145 ARM/THUMB2 immediates, and add up to VAL.
4146 Thr function return value gives the number of insns required. */
4148 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4149 struct four_ints
*return_sequence
)
4151 int best_consecutive_zeros
= 0;
4155 struct four_ints tmp_sequence
;
4157 /* If we aren't targeting ARM, the best place to start is always at
4158 the bottom, otherwise look more closely. */
4161 for (i
= 0; i
< 32; i
+= 2)
4163 int consecutive_zeros
= 0;
4165 if (!(val
& (3 << i
)))
4167 while ((i
< 32) && !(val
& (3 << i
)))
4169 consecutive_zeros
+= 2;
4172 if (consecutive_zeros
> best_consecutive_zeros
)
4174 best_consecutive_zeros
= consecutive_zeros
;
4175 best_start
= i
- consecutive_zeros
;
4182 /* So long as it won't require any more insns to do so, it's
4183 desirable to emit a small constant (in bits 0...9) in the last
4184 insn. This way there is more chance that it can be combined with
4185 a later addressing insn to form a pre-indexed load or store
4186 operation. Consider:
4188 *((volatile int *)0xe0000100) = 1;
4189 *((volatile int *)0xe0000110) = 2;
4191 We want this to wind up as:
4195 str rB, [rA, #0x100]
4197 str rB, [rA, #0x110]
4199 rather than having to synthesize both large constants from scratch.
4201 Therefore, we calculate how many insns would be required to emit
4202 the constant starting from `best_start', and also starting from
4203 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4204 yield a shorter sequence, we may as well use zero. */
4205 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4207 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4209 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4210 if (insns2
<= insns1
)
4212 *return_sequence
= tmp_sequence
;
4220 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4222 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4223 struct four_ints
*return_sequence
, int i
)
4225 int remainder
= val
& 0xffffffff;
4228 /* Try and find a way of doing the job in either two or three
4231 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4232 location. We start at position I. This may be the MSB, or
4233 optimial_immediate_sequence may have positioned it at the largest block
4234 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4235 wrapping around to the top of the word when we drop off the bottom.
4236 In the worst case this code should produce no more than four insns.
4238 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4239 constants, shifted to any arbitrary location. We should always start
4244 unsigned int b1
, b2
, b3
, b4
;
4245 unsigned HOST_WIDE_INT result
;
4248 gcc_assert (insns
< 4);
4253 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4254 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4257 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4258 /* We can use addw/subw for the last 12 bits. */
4262 /* Use an 8-bit shifted/rotated immediate. */
4266 result
= remainder
& ((0x0ff << end
)
4267 | ((i
< end
) ? (0xff >> (32 - end
))
4274 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4275 arbitrary shifts. */
4276 i
-= TARGET_ARM
? 2 : 1;
4280 /* Next, see if we can do a better job with a thumb2 replicated
4283 We do it this way around to catch the cases like 0x01F001E0 where
4284 two 8-bit immediates would work, but a replicated constant would
4287 TODO: 16-bit constants that don't clear all the bits, but still win.
4288 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4291 b1
= (remainder
& 0xff000000) >> 24;
4292 b2
= (remainder
& 0x00ff0000) >> 16;
4293 b3
= (remainder
& 0x0000ff00) >> 8;
4294 b4
= remainder
& 0xff;
4298 /* The 8-bit immediate already found clears b1 (and maybe b2),
4299 but must leave b3 and b4 alone. */
4301 /* First try to find a 32-bit replicated constant that clears
4302 almost everything. We can assume that we can't do it in one,
4303 or else we wouldn't be here. */
4304 unsigned int tmp
= b1
& b2
& b3
& b4
;
4305 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4307 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4308 + (tmp
== b3
) + (tmp
== b4
);
4310 && (matching_bytes
>= 3
4311 || (matching_bytes
== 2
4312 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4314 /* At least 3 of the bytes match, and the fourth has at
4315 least as many bits set, or two of the bytes match
4316 and it will only require one more insn to finish. */
4324 /* Second, try to find a 16-bit replicated constant that can
4325 leave three of the bytes clear. If b2 or b4 is already
4326 zero, then we can. If the 8-bit from above would not
4327 clear b2 anyway, then we still win. */
4328 else if (b1
== b3
&& (!b2
|| !b4
4329 || (remainder
& 0x00ff0000 & ~result
)))
4331 result
= remainder
& 0xff00ff00;
4337 /* The 8-bit immediate already found clears b2 (and maybe b3)
4338 and we don't get here unless b1 is alredy clear, but it will
4339 leave b4 unchanged. */
4341 /* If we can clear b2 and b4 at once, then we win, since the
4342 8-bits couldn't possibly reach that far. */
4345 result
= remainder
& 0x00ff00ff;
4351 return_sequence
->i
[insns
++] = result
;
4352 remainder
&= ~result
;
4354 if (code
== SET
|| code
== MINUS
)
4362 /* Emit an instruction with the indicated PATTERN. If COND is
4363 non-NULL, conditionalize the execution of the instruction on COND
4367 emit_constant_insn (rtx cond
, rtx pattern
)
4370 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4371 emit_insn (pattern
);
4374 /* As above, but extra parameter GENERATE which, if clear, suppresses
4378 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4379 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4380 int subtargets
, int generate
)
4384 int final_invert
= 0;
4386 int set_sign_bit_copies
= 0;
4387 int clear_sign_bit_copies
= 0;
4388 int clear_zero_bit_copies
= 0;
4389 int set_zero_bit_copies
= 0;
4390 int insns
= 0, neg_insns
, inv_insns
;
4391 unsigned HOST_WIDE_INT temp1
, temp2
;
4392 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4393 struct four_ints
*immediates
;
4394 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4396 /* Find out which operations are safe for a given CODE. Also do a quick
4397 check for degenerate cases; these can occur when DImode operations
4410 if (remainder
== 0xffffffff)
4413 emit_constant_insn (cond
,
4414 gen_rtx_SET (target
,
4415 GEN_INT (ARM_SIGN_EXTEND (val
))));
4421 if (reload_completed
&& rtx_equal_p (target
, source
))
4425 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4434 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4437 if (remainder
== 0xffffffff)
4439 if (reload_completed
&& rtx_equal_p (target
, source
))
4442 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4451 if (reload_completed
&& rtx_equal_p (target
, source
))
4454 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4458 if (remainder
== 0xffffffff)
4461 emit_constant_insn (cond
,
4462 gen_rtx_SET (target
,
4463 gen_rtx_NOT (mode
, source
)));
4470 /* We treat MINUS as (val - source), since (source - val) is always
4471 passed as (source + (-val)). */
4475 emit_constant_insn (cond
,
4476 gen_rtx_SET (target
,
4477 gen_rtx_NEG (mode
, source
)));
4480 if (const_ok_for_arm (val
))
4483 emit_constant_insn (cond
,
4484 gen_rtx_SET (target
,
4485 gen_rtx_MINUS (mode
, GEN_INT (val
),
4496 /* If we can do it in one insn get out quickly. */
4497 if (const_ok_for_op (val
, code
))
4500 emit_constant_insn (cond
,
4501 gen_rtx_SET (target
,
4503 ? gen_rtx_fmt_ee (code
, mode
, source
,
4509 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4511 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4512 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4516 if (mode
== SImode
&& i
== 16)
4517 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4519 emit_constant_insn (cond
,
4520 gen_zero_extendhisi2
4521 (target
, gen_lowpart (HImode
, source
)));
4523 /* Extz only supports SImode, but we can coerce the operands
4525 emit_constant_insn (cond
,
4526 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4527 gen_lowpart (SImode
, source
),
4528 GEN_INT (i
), const0_rtx
));
4534 /* Calculate a few attributes that may be useful for specific
4536 /* Count number of leading zeros. */
4537 for (i
= 31; i
>= 0; i
--)
4539 if ((remainder
& (1 << i
)) == 0)
4540 clear_sign_bit_copies
++;
4545 /* Count number of leading 1's. */
4546 for (i
= 31; i
>= 0; i
--)
4548 if ((remainder
& (1 << i
)) != 0)
4549 set_sign_bit_copies
++;
4554 /* Count number of trailing zero's. */
4555 for (i
= 0; i
<= 31; i
++)
4557 if ((remainder
& (1 << i
)) == 0)
4558 clear_zero_bit_copies
++;
4563 /* Count number of trailing 1's. */
4564 for (i
= 0; i
<= 31; i
++)
4566 if ((remainder
& (1 << i
)) != 0)
4567 set_zero_bit_copies
++;
4575 /* See if we can do this by sign_extending a constant that is known
4576 to be negative. This is a good, way of doing it, since the shift
4577 may well merge into a subsequent insn. */
4578 if (set_sign_bit_copies
> 1)
4580 if (const_ok_for_arm
4581 (temp1
= ARM_SIGN_EXTEND (remainder
4582 << (set_sign_bit_copies
- 1))))
4586 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4587 emit_constant_insn (cond
,
4588 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4589 emit_constant_insn (cond
,
4590 gen_ashrsi3 (target
, new_src
,
4591 GEN_INT (set_sign_bit_copies
- 1)));
4595 /* For an inverted constant, we will need to set the low bits,
4596 these will be shifted out of harm's way. */
4597 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4598 if (const_ok_for_arm (~temp1
))
4602 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4603 emit_constant_insn (cond
,
4604 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4605 emit_constant_insn (cond
,
4606 gen_ashrsi3 (target
, new_src
,
4607 GEN_INT (set_sign_bit_copies
- 1)));
4613 /* See if we can calculate the value as the difference between two
4614 valid immediates. */
4615 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4617 int topshift
= clear_sign_bit_copies
& ~1;
4619 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4620 & (0xff000000 >> topshift
));
4622 /* If temp1 is zero, then that means the 9 most significant
4623 bits of remainder were 1 and we've caused it to overflow.
4624 When topshift is 0 we don't need to do anything since we
4625 can borrow from 'bit 32'. */
4626 if (temp1
== 0 && topshift
!= 0)
4627 temp1
= 0x80000000 >> (topshift
- 1);
4629 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4631 if (const_ok_for_arm (temp2
))
4635 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4636 emit_constant_insn (cond
,
4637 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4638 emit_constant_insn (cond
,
4639 gen_addsi3 (target
, new_src
,
4647 /* See if we can generate this by setting the bottom (or the top)
4648 16 bits, and then shifting these into the other half of the
4649 word. We only look for the simplest cases, to do more would cost
4650 too much. Be careful, however, not to generate this when the
4651 alternative would take fewer insns. */
4652 if (val
& 0xffff0000)
4654 temp1
= remainder
& 0xffff0000;
4655 temp2
= remainder
& 0x0000ffff;
4657 /* Overlaps outside this range are best done using other methods. */
4658 for (i
= 9; i
< 24; i
++)
4660 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4661 && !const_ok_for_arm (temp2
))
4663 rtx new_src
= (subtargets
4664 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4666 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4667 source
, subtargets
, generate
);
4675 gen_rtx_ASHIFT (mode
, source
,
4682 /* Don't duplicate cases already considered. */
4683 for (i
= 17; i
< 24; i
++)
4685 if (((temp1
| (temp1
>> i
)) == remainder
)
4686 && !const_ok_for_arm (temp1
))
4688 rtx new_src
= (subtargets
4689 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4691 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4692 source
, subtargets
, generate
);
4697 gen_rtx_SET (target
,
4700 gen_rtx_LSHIFTRT (mode
, source
,
4711 /* If we have IOR or XOR, and the constant can be loaded in a
4712 single instruction, and we can find a temporary to put it in,
4713 then this can be done in two instructions instead of 3-4. */
4715 /* TARGET can't be NULL if SUBTARGETS is 0 */
4716 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4718 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4722 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4724 emit_constant_insn (cond
,
4725 gen_rtx_SET (sub
, GEN_INT (val
)));
4726 emit_constant_insn (cond
,
4727 gen_rtx_SET (target
,
4728 gen_rtx_fmt_ee (code
, mode
,
4739 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4740 and the remainder 0s for e.g. 0xfff00000)
4741 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4743 This can be done in 2 instructions by using shifts with mov or mvn.
4748 mvn r0, r0, lsr #12 */
4749 if (set_sign_bit_copies
> 8
4750 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4754 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4755 rtx shift
= GEN_INT (set_sign_bit_copies
);
4761 gen_rtx_ASHIFT (mode
,
4766 gen_rtx_SET (target
,
4768 gen_rtx_LSHIFTRT (mode
, sub
,
4775 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4777 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4779 For eg. r0 = r0 | 0xfff
4784 if (set_zero_bit_copies
> 8
4785 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4789 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4790 rtx shift
= GEN_INT (set_zero_bit_copies
);
4796 gen_rtx_LSHIFTRT (mode
,
4801 gen_rtx_SET (target
,
4803 gen_rtx_ASHIFT (mode
, sub
,
4809 /* This will never be reached for Thumb2 because orn is a valid
4810 instruction. This is for Thumb1 and the ARM 32 bit cases.
4812 x = y | constant (such that ~constant is a valid constant)
4814 x = ~(~y & ~constant).
4816 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4820 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4821 emit_constant_insn (cond
,
4823 gen_rtx_NOT (mode
, source
)));
4826 sub
= gen_reg_rtx (mode
);
4827 emit_constant_insn (cond
,
4829 gen_rtx_AND (mode
, source
,
4831 emit_constant_insn (cond
,
4832 gen_rtx_SET (target
,
4833 gen_rtx_NOT (mode
, sub
)));
4840 /* See if two shifts will do 2 or more insn's worth of work. */
4841 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4843 HOST_WIDE_INT shift_mask
= ((0xffffffff
4844 << (32 - clear_sign_bit_copies
))
4847 if ((remainder
| shift_mask
) != 0xffffffff)
4849 HOST_WIDE_INT new_val
4850 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4854 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4855 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4856 new_src
, source
, subtargets
, 1);
4861 rtx targ
= subtargets
? NULL_RTX
: target
;
4862 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4863 targ
, source
, subtargets
, 0);
4869 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4870 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4872 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4873 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4879 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4881 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4883 if ((remainder
| shift_mask
) != 0xffffffff)
4885 HOST_WIDE_INT new_val
4886 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4889 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4891 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4892 new_src
, source
, subtargets
, 1);
4897 rtx targ
= subtargets
? NULL_RTX
: target
;
4899 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4900 targ
, source
, subtargets
, 0);
4906 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4907 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4909 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4910 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4922 /* Calculate what the instruction sequences would be if we generated it
4923 normally, negated, or inverted. */
4925 /* AND cannot be split into multiple insns, so invert and use BIC. */
4928 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4931 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4936 if (can_invert
|| final_invert
)
4937 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4942 immediates
= &pos_immediates
;
4944 /* Is the negated immediate sequence more efficient? */
4945 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4948 immediates
= &neg_immediates
;
4953 /* Is the inverted immediate sequence more efficient?
4954 We must allow for an extra NOT instruction for XOR operations, although
4955 there is some chance that the final 'mvn' will get optimized later. */
4956 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4959 immediates
= &inv_immediates
;
4967 /* Now output the chosen sequence as instructions. */
4970 for (i
= 0; i
< insns
; i
++)
4972 rtx new_src
, temp1_rtx
;
4974 temp1
= immediates
->i
[i
];
4976 if (code
== SET
|| code
== MINUS
)
4977 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4978 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4979 new_src
= gen_reg_rtx (mode
);
4985 else if (can_negate
)
4988 temp1
= trunc_int_for_mode (temp1
, mode
);
4989 temp1_rtx
= GEN_INT (temp1
);
4993 else if (code
== MINUS
)
4994 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4996 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4998 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5003 can_negate
= can_invert
;
5007 else if (code
== MINUS
)
5015 emit_constant_insn (cond
, gen_rtx_SET (target
,
5016 gen_rtx_NOT (mode
, source
)));
5023 /* Canonicalize a comparison so that we are more likely to recognize it.
5024 This can be done for a few constant compares, where we can make the
5025 immediate value easier to load. */
5028 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5029 bool op0_preserve_value
)
5032 unsigned HOST_WIDE_INT i
, maxval
;
5034 mode
= GET_MODE (*op0
);
5035 if (mode
== VOIDmode
)
5036 mode
= GET_MODE (*op1
);
5038 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5040 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5041 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5042 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5043 for GTU/LEU in Thumb mode. */
5047 if (*code
== GT
|| *code
== LE
5048 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5050 /* Missing comparison. First try to use an available
5052 if (CONST_INT_P (*op1
))
5060 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5062 *op1
= GEN_INT (i
+ 1);
5063 *code
= *code
== GT
? GE
: LT
;
5069 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5070 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5072 *op1
= GEN_INT (i
+ 1);
5073 *code
= *code
== GTU
? GEU
: LTU
;
5082 /* If that did not work, reverse the condition. */
5083 if (!op0_preserve_value
)
5085 std::swap (*op0
, *op1
);
5086 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5092 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5093 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5094 to facilitate possible combining with a cmp into 'ands'. */
5096 && GET_CODE (*op0
) == ZERO_EXTEND
5097 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5098 && GET_MODE (XEXP (*op0
, 0)) == QImode
5099 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5100 && subreg_lowpart_p (XEXP (*op0
, 0))
5101 && *op1
== const0_rtx
)
5102 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5105 /* Comparisons smaller than DImode. Only adjust comparisons against
5106 an out-of-range constant. */
5107 if (!CONST_INT_P (*op1
)
5108 || const_ok_for_arm (INTVAL (*op1
))
5109 || const_ok_for_arm (- INTVAL (*op1
)))
5123 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5125 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5126 *code
= *code
== GT
? GE
: LT
;
5134 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5136 *op1
= GEN_INT (i
- 1);
5137 *code
= *code
== GE
? GT
: LE
;
5144 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5145 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5147 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5148 *code
= *code
== GTU
? GEU
: LTU
;
5156 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5158 *op1
= GEN_INT (i
- 1);
5159 *code
= *code
== GEU
? GTU
: LEU
;
5170 /* Define how to find the value returned by a function. */
5173 arm_function_value(const_tree type
, const_tree func
,
5174 bool outgoing ATTRIBUTE_UNUSED
)
5177 int unsignedp ATTRIBUTE_UNUSED
;
5178 rtx r ATTRIBUTE_UNUSED
;
5180 mode
= TYPE_MODE (type
);
5182 if (TARGET_AAPCS_BASED
)
5183 return aapcs_allocate_return_reg (mode
, type
, func
);
5185 /* Promote integer types. */
5186 if (INTEGRAL_TYPE_P (type
))
5187 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5189 /* Promotes small structs returned in a register to full-word size
5190 for big-endian AAPCS. */
5191 if (arm_return_in_msb (type
))
5193 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5194 if (size
% UNITS_PER_WORD
!= 0)
5196 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5197 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5201 return arm_libcall_value_1 (mode
);
5204 /* libcall hashtable helpers. */
5206 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5208 static inline hashval_t
hash (const rtx_def
*);
5209 static inline bool equal (const rtx_def
*, const rtx_def
*);
5210 static inline void remove (rtx_def
*);
5214 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5216 return rtx_equal_p (p1
, p2
);
5220 libcall_hasher::hash (const rtx_def
*p1
)
5222 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5225 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5228 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5230 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5234 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5236 static bool init_done
= false;
5237 static libcall_table_type
*libcall_htab
= NULL
;
5243 libcall_htab
= new libcall_table_type (31);
5244 add_libcall (libcall_htab
,
5245 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5246 add_libcall (libcall_htab
,
5247 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5248 add_libcall (libcall_htab
,
5249 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5250 add_libcall (libcall_htab
,
5251 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5253 add_libcall (libcall_htab
,
5254 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5255 add_libcall (libcall_htab
,
5256 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5257 add_libcall (libcall_htab
,
5258 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5259 add_libcall (libcall_htab
,
5260 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5262 add_libcall (libcall_htab
,
5263 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5264 add_libcall (libcall_htab
,
5265 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5266 add_libcall (libcall_htab
,
5267 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5268 add_libcall (libcall_htab
,
5269 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5270 add_libcall (libcall_htab
,
5271 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5272 add_libcall (libcall_htab
,
5273 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5274 add_libcall (libcall_htab
,
5275 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5276 add_libcall (libcall_htab
,
5277 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5279 /* Values from double-precision helper functions are returned in core
5280 registers if the selected core only supports single-precision
5281 arithmetic, even if we are using the hard-float ABI. The same is
5282 true for single-precision helpers, but we will never be using the
5283 hard-float ABI on a CPU which doesn't support single-precision
5284 operations in hardware. */
5285 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5286 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5287 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5288 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5289 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5290 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5291 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5292 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5293 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5294 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5295 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5296 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5298 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5302 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5306 arm_libcall_value_1 (machine_mode mode
)
5308 if (TARGET_AAPCS_BASED
)
5309 return aapcs_libcall_value (mode
);
5310 else if (TARGET_IWMMXT_ABI
5311 && arm_vector_mode_supported_p (mode
))
5312 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5314 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5317 /* Define how to find the value returned by a library function
5318 assuming the value has mode MODE. */
5321 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5323 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5324 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5326 /* The following libcalls return their result in integer registers,
5327 even though they return a floating point value. */
5328 if (arm_libcall_uses_aapcs_base (libcall
))
5329 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5333 return arm_libcall_value_1 (mode
);
5336 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5339 arm_function_value_regno_p (const unsigned int regno
)
5341 if (regno
== ARG_REGISTER (1)
5343 && TARGET_AAPCS_BASED
5344 && TARGET_HARD_FLOAT
5345 && regno
== FIRST_VFP_REGNUM
)
5346 || (TARGET_IWMMXT_ABI
5347 && regno
== FIRST_IWMMXT_REGNUM
))
5353 /* Determine the amount of memory needed to store the possible return
5354 registers of an untyped call. */
5356 arm_apply_result_size (void)
5362 if (TARGET_HARD_FLOAT_ABI
)
5364 if (TARGET_IWMMXT_ABI
)
5371 /* Decide whether TYPE should be returned in memory (true)
5372 or in a register (false). FNTYPE is the type of the function making
5375 arm_return_in_memory (const_tree type
, const_tree fntype
)
5379 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5381 if (TARGET_AAPCS_BASED
)
5383 /* Simple, non-aggregate types (ie not including vectors and
5384 complex) are always returned in a register (or registers).
5385 We don't care about which register here, so we can short-cut
5386 some of the detail. */
5387 if (!AGGREGATE_TYPE_P (type
)
5388 && TREE_CODE (type
) != VECTOR_TYPE
5389 && TREE_CODE (type
) != COMPLEX_TYPE
)
5392 /* Any return value that is no larger than one word can be
5394 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5397 /* Check any available co-processors to see if they accept the
5398 type as a register candidate (VFP, for example, can return
5399 some aggregates in consecutive registers). These aren't
5400 available if the call is variadic. */
5401 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5404 /* Vector values should be returned using ARM registers, not
5405 memory (unless they're over 16 bytes, which will break since
5406 we only have four call-clobbered registers to play with). */
5407 if (TREE_CODE (type
) == VECTOR_TYPE
)
5408 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5410 /* The rest go in memory. */
5414 if (TREE_CODE (type
) == VECTOR_TYPE
)
5415 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5417 if (!AGGREGATE_TYPE_P (type
) &&
5418 (TREE_CODE (type
) != VECTOR_TYPE
))
5419 /* All simple types are returned in registers. */
5422 if (arm_abi
!= ARM_ABI_APCS
)
5424 /* ATPCS and later return aggregate types in memory only if they are
5425 larger than a word (or are variable size). */
5426 return (size
< 0 || size
> UNITS_PER_WORD
);
5429 /* For the arm-wince targets we choose to be compatible with Microsoft's
5430 ARM and Thumb compilers, which always return aggregates in memory. */
5432 /* All structures/unions bigger than one word are returned in memory.
5433 Also catch the case where int_size_in_bytes returns -1. In this case
5434 the aggregate is either huge or of variable size, and in either case
5435 we will want to return it via memory and not in a register. */
5436 if (size
< 0 || size
> UNITS_PER_WORD
)
5439 if (TREE_CODE (type
) == RECORD_TYPE
)
5443 /* For a struct the APCS says that we only return in a register
5444 if the type is 'integer like' and every addressable element
5445 has an offset of zero. For practical purposes this means
5446 that the structure can have at most one non bit-field element
5447 and that this element must be the first one in the structure. */
5449 /* Find the first field, ignoring non FIELD_DECL things which will
5450 have been created by C++. */
5451 for (field
= TYPE_FIELDS (type
);
5452 field
&& TREE_CODE (field
) != FIELD_DECL
;
5453 field
= DECL_CHAIN (field
))
5457 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5459 /* Check that the first field is valid for returning in a register. */
5461 /* ... Floats are not allowed */
5462 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5465 /* ... Aggregates that are not themselves valid for returning in
5466 a register are not allowed. */
5467 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5470 /* Now check the remaining fields, if any. Only bitfields are allowed,
5471 since they are not addressable. */
5472 for (field
= DECL_CHAIN (field
);
5474 field
= DECL_CHAIN (field
))
5476 if (TREE_CODE (field
) != FIELD_DECL
)
5479 if (!DECL_BIT_FIELD_TYPE (field
))
5486 if (TREE_CODE (type
) == UNION_TYPE
)
5490 /* Unions can be returned in registers if every element is
5491 integral, or can be returned in an integer register. */
5492 for (field
= TYPE_FIELDS (type
);
5494 field
= DECL_CHAIN (field
))
5496 if (TREE_CODE (field
) != FIELD_DECL
)
5499 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5502 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5508 #endif /* not ARM_WINCE */
5510 /* Return all other types in memory. */
5514 const struct pcs_attribute_arg
5518 } pcs_attribute_args
[] =
5520 {"aapcs", ARM_PCS_AAPCS
},
5521 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5523 /* We could recognize these, but changes would be needed elsewhere
5524 * to implement them. */
5525 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5526 {"atpcs", ARM_PCS_ATPCS
},
5527 {"apcs", ARM_PCS_APCS
},
5529 {NULL
, ARM_PCS_UNKNOWN
}
5533 arm_pcs_from_attribute (tree attr
)
5535 const struct pcs_attribute_arg
*ptr
;
5538 /* Get the value of the argument. */
5539 if (TREE_VALUE (attr
) == NULL_TREE
5540 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5541 return ARM_PCS_UNKNOWN
;
5543 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5545 /* Check it against the list of known arguments. */
5546 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5547 if (streq (arg
, ptr
->arg
))
5550 /* An unrecognized interrupt type. */
5551 return ARM_PCS_UNKNOWN
;
5554 /* Get the PCS variant to use for this call. TYPE is the function's type
5555 specification, DECL is the specific declartion. DECL may be null if
5556 the call could be indirect or if this is a library call. */
5558 arm_get_pcs_model (const_tree type
, const_tree decl
)
5560 bool user_convention
= false;
5561 enum arm_pcs user_pcs
= arm_pcs_default
;
5566 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5569 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5570 user_convention
= true;
5573 if (TARGET_AAPCS_BASED
)
5575 /* Detect varargs functions. These always use the base rules
5576 (no argument is ever a candidate for a co-processor
5578 bool base_rules
= stdarg_p (type
);
5580 if (user_convention
)
5582 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5583 sorry ("non-AAPCS derived PCS variant");
5584 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5585 error ("variadic functions must use the base AAPCS variant");
5589 return ARM_PCS_AAPCS
;
5590 else if (user_convention
)
5592 else if (decl
&& flag_unit_at_a_time
)
5594 /* Local functions never leak outside this compilation unit,
5595 so we are free to use whatever conventions are
5597 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5598 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5600 return ARM_PCS_AAPCS_LOCAL
;
5603 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5604 sorry ("PCS variant");
5606 /* For everything else we use the target's default. */
5607 return arm_pcs_default
;
5612 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5613 const_tree fntype ATTRIBUTE_UNUSED
,
5614 rtx libcall ATTRIBUTE_UNUSED
,
5615 const_tree fndecl ATTRIBUTE_UNUSED
)
5617 /* Record the unallocated VFP registers. */
5618 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5619 pcum
->aapcs_vfp_reg_alloc
= 0;
5622 /* Walk down the type tree of TYPE counting consecutive base elements.
5623 If *MODEP is VOIDmode, then set it to the first valid floating point
5624 type. If a non-floating point type is found, or if a floating point
5625 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5626 otherwise return the count in the sub-tree. */
5628 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5633 switch (TREE_CODE (type
))
5636 mode
= TYPE_MODE (type
);
5637 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5640 if (*modep
== VOIDmode
)
5649 mode
= TYPE_MODE (TREE_TYPE (type
));
5650 if (mode
!= DFmode
&& mode
!= SFmode
)
5653 if (*modep
== VOIDmode
)
5662 /* Use V2SImode and V4SImode as representatives of all 64-bit
5663 and 128-bit vector types, whether or not those modes are
5664 supported with the present options. */
5665 size
= int_size_in_bytes (type
);
5678 if (*modep
== VOIDmode
)
5681 /* Vector modes are considered to be opaque: two vectors are
5682 equivalent for the purposes of being homogeneous aggregates
5683 if they are the same size. */
5692 tree index
= TYPE_DOMAIN (type
);
5694 /* Can't handle incomplete types nor sizes that are not
5696 if (!COMPLETE_TYPE_P (type
)
5697 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5700 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5703 || !TYPE_MAX_VALUE (index
)
5704 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5705 || !TYPE_MIN_VALUE (index
)
5706 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5710 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5711 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5713 /* There must be no padding. */
5714 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5726 /* Can't handle incomplete types nor sizes that are not
5728 if (!COMPLETE_TYPE_P (type
)
5729 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5732 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5734 if (TREE_CODE (field
) != FIELD_DECL
)
5737 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5743 /* There must be no padding. */
5744 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5751 case QUAL_UNION_TYPE
:
5753 /* These aren't very interesting except in a degenerate case. */
5758 /* Can't handle incomplete types nor sizes that are not
5760 if (!COMPLETE_TYPE_P (type
)
5761 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5764 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5766 if (TREE_CODE (field
) != FIELD_DECL
)
5769 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5772 count
= count
> sub_count
? count
: sub_count
;
5775 /* There must be no padding. */
5776 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5789 /* Return true if PCS_VARIANT should use VFP registers. */
5791 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5793 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5795 static bool seen_thumb1_vfp
= false;
5797 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5799 sorry ("Thumb-1 hard-float VFP ABI");
5800 /* sorry() is not immediately fatal, so only display this once. */
5801 seen_thumb1_vfp
= true;
5807 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5810 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5811 (TARGET_VFP_DOUBLE
|| !is_double
));
5814 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5815 suitable for passing or returning in VFP registers for the PCS
5816 variant selected. If it is, then *BASE_MODE is updated to contain
5817 a machine mode describing each element of the argument's type and
5818 *COUNT to hold the number of such elements. */
5820 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5821 machine_mode mode
, const_tree type
,
5822 machine_mode
*base_mode
, int *count
)
5824 machine_mode new_mode
= VOIDmode
;
5826 /* If we have the type information, prefer that to working things
5827 out from the mode. */
5830 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5832 if (ag_count
> 0 && ag_count
<= 4)
5837 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5838 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5839 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5844 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5847 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5853 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5856 *base_mode
= new_mode
;
5861 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5862 machine_mode mode
, const_tree type
)
5864 int count ATTRIBUTE_UNUSED
;
5865 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5867 if (!use_vfp_abi (pcs_variant
, false))
5869 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5874 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5877 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5880 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5881 &pcum
->aapcs_vfp_rmode
,
5882 &pcum
->aapcs_vfp_rcount
);
5885 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5886 for the behaviour of this function. */
5889 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5890 const_tree type ATTRIBUTE_UNUSED
)
5893 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
5894 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
5895 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5898 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5899 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5901 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5903 || (mode
== TImode
&& ! TARGET_NEON
)
5904 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5907 int rcount
= pcum
->aapcs_vfp_rcount
;
5909 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5913 /* Avoid using unsupported vector modes. */
5914 if (rmode
== V2SImode
)
5916 else if (rmode
== V4SImode
)
5923 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5924 for (i
= 0; i
< rcount
; i
++)
5926 rtx tmp
= gen_rtx_REG (rmode
,
5927 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5928 tmp
= gen_rtx_EXPR_LIST
5930 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5931 XVECEXP (par
, 0, i
) = tmp
;
5934 pcum
->aapcs_reg
= par
;
5937 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5943 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5944 comment there for the behaviour of this function. */
5947 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5949 const_tree type ATTRIBUTE_UNUSED
)
5951 if (!use_vfp_abi (pcs_variant
, false))
5955 || (GET_MODE_CLASS (mode
) == MODE_INT
5956 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
5960 machine_mode ag_mode
;
5965 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5970 if (ag_mode
== V2SImode
)
5972 else if (ag_mode
== V4SImode
)
5978 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5979 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5980 for (i
= 0; i
< count
; i
++)
5982 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5983 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5984 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5985 XVECEXP (par
, 0, i
) = tmp
;
5991 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5995 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5996 machine_mode mode ATTRIBUTE_UNUSED
,
5997 const_tree type ATTRIBUTE_UNUSED
)
5999 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6000 pcum
->aapcs_vfp_reg_alloc
= 0;
6004 #define AAPCS_CP(X) \
6006 aapcs_ ## X ## _cum_init, \
6007 aapcs_ ## X ## _is_call_candidate, \
6008 aapcs_ ## X ## _allocate, \
6009 aapcs_ ## X ## _is_return_candidate, \
6010 aapcs_ ## X ## _allocate_return_reg, \
6011 aapcs_ ## X ## _advance \
6014 /* Table of co-processors that can be used to pass arguments in
6015 registers. Idealy no arugment should be a candidate for more than
6016 one co-processor table entry, but the table is processed in order
6017 and stops after the first match. If that entry then fails to put
6018 the argument into a co-processor register, the argument will go on
6022 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6023 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6025 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6026 BLKmode) is a candidate for this co-processor's registers; this
6027 function should ignore any position-dependent state in
6028 CUMULATIVE_ARGS and only use call-type dependent information. */
6029 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6031 /* Return true if the argument does get a co-processor register; it
6032 should set aapcs_reg to an RTX of the register allocated as is
6033 required for a return from FUNCTION_ARG. */
6034 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6036 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6037 be returned in this co-processor's registers. */
6038 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6040 /* Allocate and return an RTX element to hold the return type of a call. This
6041 routine must not fail and will only be called if is_return_candidate
6042 returned true with the same parameters. */
6043 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6045 /* Finish processing this argument and prepare to start processing
6047 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6048 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6056 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6061 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6062 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6069 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6071 /* We aren't passed a decl, so we can't check that a call is local.
6072 However, it isn't clear that that would be a win anyway, since it
6073 might limit some tail-calling opportunities. */
6074 enum arm_pcs pcs_variant
;
6078 const_tree fndecl
= NULL_TREE
;
6080 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6083 fntype
= TREE_TYPE (fntype
);
6086 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6089 pcs_variant
= arm_pcs_default
;
6091 if (pcs_variant
!= ARM_PCS_AAPCS
)
6095 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6096 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6105 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6108 /* We aren't passed a decl, so we can't check that a call is local.
6109 However, it isn't clear that that would be a win anyway, since it
6110 might limit some tail-calling opportunities. */
6111 enum arm_pcs pcs_variant
;
6112 int unsignedp ATTRIBUTE_UNUSED
;
6116 const_tree fndecl
= NULL_TREE
;
6118 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6121 fntype
= TREE_TYPE (fntype
);
6124 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6127 pcs_variant
= arm_pcs_default
;
6129 /* Promote integer types. */
6130 if (type
&& INTEGRAL_TYPE_P (type
))
6131 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6133 if (pcs_variant
!= ARM_PCS_AAPCS
)
6137 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6138 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6140 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6144 /* Promotes small structs returned in a register to full-word size
6145 for big-endian AAPCS. */
6146 if (type
&& arm_return_in_msb (type
))
6148 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6149 if (size
% UNITS_PER_WORD
!= 0)
6151 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6152 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6156 return gen_rtx_REG (mode
, R0_REGNUM
);
6160 aapcs_libcall_value (machine_mode mode
)
6162 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6163 && GET_MODE_SIZE (mode
) <= 4)
6166 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6169 /* Lay out a function argument using the AAPCS rules. The rule
6170 numbers referred to here are those in the AAPCS. */
6172 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6173 const_tree type
, bool named
)
6178 /* We only need to do this once per argument. */
6179 if (pcum
->aapcs_arg_processed
)
6182 pcum
->aapcs_arg_processed
= true;
6184 /* Special case: if named is false then we are handling an incoming
6185 anonymous argument which is on the stack. */
6189 /* Is this a potential co-processor register candidate? */
6190 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6192 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6193 pcum
->aapcs_cprc_slot
= slot
;
6195 /* We don't have to apply any of the rules from part B of the
6196 preparation phase, these are handled elsewhere in the
6201 /* A Co-processor register candidate goes either in its own
6202 class of registers or on the stack. */
6203 if (!pcum
->aapcs_cprc_failed
[slot
])
6205 /* C1.cp - Try to allocate the argument to co-processor
6207 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6210 /* C2.cp - Put the argument on the stack and note that we
6211 can't assign any more candidates in this slot. We also
6212 need to note that we have allocated stack space, so that
6213 we won't later try to split a non-cprc candidate between
6214 core registers and the stack. */
6215 pcum
->aapcs_cprc_failed
[slot
] = true;
6216 pcum
->can_split
= false;
6219 /* We didn't get a register, so this argument goes on the
6221 gcc_assert (pcum
->can_split
== false);
6226 /* C3 - For double-word aligned arguments, round the NCRN up to the
6227 next even number. */
6228 ncrn
= pcum
->aapcs_ncrn
;
6229 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6232 nregs
= ARM_NUM_REGS2(mode
, type
);
6234 /* Sigh, this test should really assert that nregs > 0, but a GCC
6235 extension allows empty structs and then gives them empty size; it
6236 then allows such a structure to be passed by value. For some of
6237 the code below we have to pretend that such an argument has
6238 non-zero size so that we 'locate' it correctly either in
6239 registers or on the stack. */
6240 gcc_assert (nregs
>= 0);
6242 nregs2
= nregs
? nregs
: 1;
6244 /* C4 - Argument fits entirely in core registers. */
6245 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6247 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6248 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6252 /* C5 - Some core registers left and there are no arguments already
6253 on the stack: split this argument between the remaining core
6254 registers and the stack. */
6255 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6257 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6258 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6259 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6263 /* C6 - NCRN is set to 4. */
6264 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6266 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6270 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6271 for a call to a function whose data type is FNTYPE.
6272 For a library call, FNTYPE is NULL. */
6274 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6276 tree fndecl ATTRIBUTE_UNUSED
)
6278 /* Long call handling. */
6280 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6282 pcum
->pcs_variant
= arm_pcs_default
;
6284 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6286 if (arm_libcall_uses_aapcs_base (libname
))
6287 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6289 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6290 pcum
->aapcs_reg
= NULL_RTX
;
6291 pcum
->aapcs_partial
= 0;
6292 pcum
->aapcs_arg_processed
= false;
6293 pcum
->aapcs_cprc_slot
= -1;
6294 pcum
->can_split
= true;
6296 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6300 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6302 pcum
->aapcs_cprc_failed
[i
] = false;
6303 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6311 /* On the ARM, the offset starts at 0. */
6313 pcum
->iwmmxt_nregs
= 0;
6314 pcum
->can_split
= true;
6316 /* Varargs vectors are treated the same as long long.
6317 named_count avoids having to change the way arm handles 'named' */
6318 pcum
->named_count
= 0;
6321 if (TARGET_REALLY_IWMMXT
&& fntype
)
6325 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6327 fn_arg
= TREE_CHAIN (fn_arg
))
6328 pcum
->named_count
+= 1;
6330 if (! pcum
->named_count
)
6331 pcum
->named_count
= INT_MAX
;
6335 /* Return true if mode/type need doubleword alignment. */
6337 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6340 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6342 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6343 if (!AGGREGATE_TYPE_P (type
))
6344 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6346 /* Array types: Use member alignment of element type. */
6347 if (TREE_CODE (type
) == ARRAY_TYPE
)
6348 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6350 /* Record/aggregate types: Use greatest member alignment of any member. */
6351 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6352 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6359 /* Determine where to put an argument to a function.
6360 Value is zero to push the argument on the stack,
6361 or a hard register in which to store the argument.
6363 MODE is the argument's machine mode.
6364 TYPE is the data type of the argument (as a tree).
6365 This is null for libcalls where that information may
6367 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6368 the preceding args and about the function being called.
6369 NAMED is nonzero if this argument is a named parameter
6370 (otherwise it is an extra parameter matching an ellipsis).
6372 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6373 other arguments are passed on the stack. If (NAMED == 0) (which happens
6374 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6375 defined), say it is passed in the stack (function_prologue will
6376 indeed make it pass in the stack if necessary). */
6379 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6380 const_tree type
, bool named
)
6382 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6385 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6386 a call insn (op3 of a call_value insn). */
6387 if (mode
== VOIDmode
)
6390 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6392 aapcs_layout_arg (pcum
, mode
, type
, named
);
6393 return pcum
->aapcs_reg
;
6396 /* Varargs vectors are treated the same as long long.
6397 named_count avoids having to change the way arm handles 'named' */
6398 if (TARGET_IWMMXT_ABI
6399 && arm_vector_mode_supported_p (mode
)
6400 && pcum
->named_count
> pcum
->nargs
+ 1)
6402 if (pcum
->iwmmxt_nregs
<= 9)
6403 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6406 pcum
->can_split
= false;
6411 /* Put doubleword aligned quantities in even register pairs. */
6413 && ARM_DOUBLEWORD_ALIGN
6414 && arm_needs_doubleword_align (mode
, type
))
6417 /* Only allow splitting an arg between regs and memory if all preceding
6418 args were allocated to regs. For args passed by reference we only count
6419 the reference pointer. */
6420 if (pcum
->can_split
)
6423 nregs
= ARM_NUM_REGS2 (mode
, type
);
6425 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6428 return gen_rtx_REG (mode
, pcum
->nregs
);
6432 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6434 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6435 ? DOUBLEWORD_ALIGNMENT
6440 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6441 tree type
, bool named
)
6443 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6444 int nregs
= pcum
->nregs
;
6446 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6448 aapcs_layout_arg (pcum
, mode
, type
, named
);
6449 return pcum
->aapcs_partial
;
6452 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6455 if (NUM_ARG_REGS
> nregs
6456 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6458 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6463 /* Update the data in PCUM to advance over an argument
6464 of mode MODE and data type TYPE.
6465 (TYPE is null for libcalls where that information may not be available.) */
6468 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6469 const_tree type
, bool named
)
6471 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6473 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6475 aapcs_layout_arg (pcum
, mode
, type
, named
);
6477 if (pcum
->aapcs_cprc_slot
>= 0)
6479 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6481 pcum
->aapcs_cprc_slot
= -1;
6484 /* Generic stuff. */
6485 pcum
->aapcs_arg_processed
= false;
6486 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6487 pcum
->aapcs_reg
= NULL_RTX
;
6488 pcum
->aapcs_partial
= 0;
6493 if (arm_vector_mode_supported_p (mode
)
6494 && pcum
->named_count
> pcum
->nargs
6495 && TARGET_IWMMXT_ABI
)
6496 pcum
->iwmmxt_nregs
+= 1;
6498 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6502 /* Variable sized types are passed by reference. This is a GCC
6503 extension to the ARM ABI. */
6506 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6507 machine_mode mode ATTRIBUTE_UNUSED
,
6508 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6510 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6513 /* Encode the current state of the #pragma [no_]long_calls. */
6516 OFF
, /* No #pragma [no_]long_calls is in effect. */
6517 LONG
, /* #pragma long_calls is in effect. */
6518 SHORT
/* #pragma no_long_calls is in effect. */
6521 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6524 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6526 arm_pragma_long_calls
= LONG
;
6530 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6532 arm_pragma_long_calls
= SHORT
;
6536 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6538 arm_pragma_long_calls
= OFF
;
6541 /* Handle an attribute requiring a FUNCTION_DECL;
6542 arguments as in struct attribute_spec.handler. */
6544 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6545 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6547 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6549 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6551 *no_add_attrs
= true;
6557 /* Handle an "interrupt" or "isr" attribute;
6558 arguments as in struct attribute_spec.handler. */
6560 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6565 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6567 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6569 *no_add_attrs
= true;
6571 /* FIXME: the argument if any is checked for type attributes;
6572 should it be checked for decl ones? */
6576 if (TREE_CODE (*node
) == FUNCTION_TYPE
6577 || TREE_CODE (*node
) == METHOD_TYPE
)
6579 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6581 warning (OPT_Wattributes
, "%qE attribute ignored",
6583 *no_add_attrs
= true;
6586 else if (TREE_CODE (*node
) == POINTER_TYPE
6587 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6588 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6589 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6591 *node
= build_variant_type_copy (*node
);
6592 TREE_TYPE (*node
) = build_type_attribute_variant
6594 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6595 *no_add_attrs
= true;
6599 /* Possibly pass this attribute on from the type to a decl. */
6600 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6601 | (int) ATTR_FLAG_FUNCTION_NEXT
6602 | (int) ATTR_FLAG_ARRAY_NEXT
))
6604 *no_add_attrs
= true;
6605 return tree_cons (name
, args
, NULL_TREE
);
6609 warning (OPT_Wattributes
, "%qE attribute ignored",
6618 /* Handle a "pcs" attribute; arguments as in struct
6619 attribute_spec.handler. */
6621 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6622 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6624 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6626 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6627 *no_add_attrs
= true;
6632 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6633 /* Handle the "notshared" attribute. This attribute is another way of
6634 requesting hidden visibility. ARM's compiler supports
6635 "__declspec(notshared)"; we support the same thing via an
6639 arm_handle_notshared_attribute (tree
*node
,
6640 tree name ATTRIBUTE_UNUSED
,
6641 tree args ATTRIBUTE_UNUSED
,
6642 int flags ATTRIBUTE_UNUSED
,
6645 tree decl
= TYPE_NAME (*node
);
6649 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6650 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6651 *no_add_attrs
= false;
6657 /* Return 0 if the attributes for two types are incompatible, 1 if they
6658 are compatible, and 2 if they are nearly compatible (which causes a
6659 warning to be generated). */
6661 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6665 /* Check for mismatch of non-default calling convention. */
6666 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6669 /* Check for mismatched call attributes. */
6670 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6671 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6672 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6673 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6675 /* Only bother to check if an attribute is defined. */
6676 if (l1
| l2
| s1
| s2
)
6678 /* If one type has an attribute, the other must have the same attribute. */
6679 if ((l1
!= l2
) || (s1
!= s2
))
6682 /* Disallow mixed attributes. */
6683 if ((l1
& s2
) || (l2
& s1
))
6687 /* Check for mismatched ISR attribute. */
6688 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6690 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6691 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6693 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6700 /* Assigns default attributes to newly defined type. This is used to
6701 set short_call/long_call attributes for function types of
6702 functions defined inside corresponding #pragma scopes. */
6704 arm_set_default_type_attributes (tree type
)
6706 /* Add __attribute__ ((long_call)) to all functions, when
6707 inside #pragma long_calls or __attribute__ ((short_call)),
6708 when inside #pragma no_long_calls. */
6709 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6711 tree type_attr_list
, attr_name
;
6712 type_attr_list
= TYPE_ATTRIBUTES (type
);
6714 if (arm_pragma_long_calls
== LONG
)
6715 attr_name
= get_identifier ("long_call");
6716 else if (arm_pragma_long_calls
== SHORT
)
6717 attr_name
= get_identifier ("short_call");
6721 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6722 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6726 /* Return true if DECL is known to be linked into section SECTION. */
6729 arm_function_in_section_p (tree decl
, section
*section
)
6731 /* We can only be certain about the prevailing symbol definition. */
6732 if (!decl_binds_to_current_def_p (decl
))
6735 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6736 if (!DECL_SECTION_NAME (decl
))
6738 /* Make sure that we will not create a unique section for DECL. */
6739 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6743 return function_section (decl
) == section
;
6746 /* Return nonzero if a 32-bit "long_call" should be generated for
6747 a call from the current function to DECL. We generate a long_call
6750 a. has an __attribute__((long call))
6751 or b. is within the scope of a #pragma long_calls
6752 or c. the -mlong-calls command line switch has been specified
6754 However we do not generate a long call if the function:
6756 d. has an __attribute__ ((short_call))
6757 or e. is inside the scope of a #pragma no_long_calls
6758 or f. is defined in the same section as the current function. */
6761 arm_is_long_call_p (tree decl
)
6766 return TARGET_LONG_CALLS
;
6768 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6769 if (lookup_attribute ("short_call", attrs
))
6772 /* For "f", be conservative, and only cater for cases in which the
6773 whole of the current function is placed in the same section. */
6774 if (!flag_reorder_blocks_and_partition
6775 && TREE_CODE (decl
) == FUNCTION_DECL
6776 && arm_function_in_section_p (decl
, current_function_section ()))
6779 if (lookup_attribute ("long_call", attrs
))
6782 return TARGET_LONG_CALLS
;
6785 /* Return nonzero if it is ok to make a tail-call to DECL. */
6787 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6789 unsigned long func_type
;
6791 if (cfun
->machine
->sibcall_blocked
)
6794 /* Never tailcall something if we are generating code for Thumb-1. */
6798 /* The PIC register is live on entry to VxWorks PLT entries, so we
6799 must make the call before restoring the PIC register. */
6800 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
6803 /* If we are interworking and the function is not declared static
6804 then we can't tail-call it unless we know that it exists in this
6805 compilation unit (since it might be a Thumb routine). */
6806 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6807 && !TREE_ASM_WRITTEN (decl
))
6810 func_type
= arm_current_func_type ();
6811 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6812 if (IS_INTERRUPT (func_type
))
6815 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6817 /* Check that the return value locations are the same. For
6818 example that we aren't returning a value from the sibling in
6819 a VFP register but then need to transfer it to a core
6822 tree decl_or_type
= decl
;
6824 /* If it is an indirect function pointer, get the function type. */
6826 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
6828 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6829 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6831 if (!rtx_equal_p (a
, b
))
6835 /* Never tailcall if function may be called with a misaligned SP. */
6836 if (IS_STACKALIGN (func_type
))
6839 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6840 references should become a NOP. Don't convert such calls into
6842 if (TARGET_AAPCS_BASED
6843 && arm_abi
== ARM_ABI_AAPCS
6845 && DECL_WEAK (decl
))
6848 /* Everything else is ok. */
6853 /* Addressing mode support functions. */
6855 /* Return nonzero if X is a legitimate immediate operand when compiling
6856 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6858 legitimate_pic_operand_p (rtx x
)
6860 if (GET_CODE (x
) == SYMBOL_REF
6861 || (GET_CODE (x
) == CONST
6862 && GET_CODE (XEXP (x
, 0)) == PLUS
6863 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6869 /* Record that the current function needs a PIC register. Initialize
6870 cfun->machine->pic_reg if we have not already done so. */
6873 require_pic_register (void)
6875 /* A lot of the logic here is made obscure by the fact that this
6876 routine gets called as part of the rtx cost estimation process.
6877 We don't want those calls to affect any assumptions about the real
6878 function; and further, we can't call entry_of_function() until we
6879 start the real expansion process. */
6880 if (!crtl
->uses_pic_offset_table
)
6882 gcc_assert (can_create_pseudo_p ());
6883 if (arm_pic_register
!= INVALID_REGNUM
6884 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6886 if (!cfun
->machine
->pic_reg
)
6887 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6889 /* Play games to avoid marking the function as needing pic
6890 if we are being called as part of the cost-estimation
6892 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6893 crtl
->uses_pic_offset_table
= 1;
6897 rtx_insn
*seq
, *insn
;
6899 if (!cfun
->machine
->pic_reg
)
6900 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6902 /* Play games to avoid marking the function as needing pic
6903 if we are being called as part of the cost-estimation
6905 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6907 crtl
->uses_pic_offset_table
= 1;
6910 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6911 && arm_pic_register
> LAST_LO_REGNUM
)
6912 emit_move_insn (cfun
->machine
->pic_reg
,
6913 gen_rtx_REG (Pmode
, arm_pic_register
));
6915 arm_load_pic_register (0UL);
6920 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6922 INSN_LOCATION (insn
) = prologue_location
;
6924 /* We can be called during expansion of PHI nodes, where
6925 we can't yet emit instructions directly in the final
6926 insn stream. Queue the insns on the entry edge, they will
6927 be committed after everything else is expanded. */
6928 insert_insn_on_edge (seq
,
6929 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6936 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6938 if (GET_CODE (orig
) == SYMBOL_REF
6939 || GET_CODE (orig
) == LABEL_REF
)
6945 gcc_assert (can_create_pseudo_p ());
6946 reg
= gen_reg_rtx (Pmode
);
6949 /* VxWorks does not impose a fixed gap between segments; the run-time
6950 gap can be different from the object-file gap. We therefore can't
6951 use GOTOFF unless we are absolutely sure that the symbol is in the
6952 same segment as the GOT. Unfortunately, the flexibility of linker
6953 scripts means that we can't be sure of that in general, so assume
6954 that GOTOFF is never valid on VxWorks. */
6955 if ((GET_CODE (orig
) == LABEL_REF
6956 || (GET_CODE (orig
) == SYMBOL_REF
&&
6957 SYMBOL_REF_LOCAL_P (orig
)))
6959 && arm_pic_data_is_text_relative
)
6960 insn
= arm_pic_static_addr (orig
, reg
);
6966 /* If this function doesn't have a pic register, create one now. */
6967 require_pic_register ();
6969 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6971 /* Make the MEM as close to a constant as possible. */
6972 mem
= SET_SRC (pat
);
6973 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6974 MEM_READONLY_P (mem
) = 1;
6975 MEM_NOTRAP_P (mem
) = 1;
6977 insn
= emit_insn (pat
);
6980 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6982 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6986 else if (GET_CODE (orig
) == CONST
)
6990 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6991 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6994 /* Handle the case where we have: const (UNSPEC_TLS). */
6995 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6996 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6999 /* Handle the case where we have:
7000 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7002 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7003 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7004 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7006 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7012 gcc_assert (can_create_pseudo_p ());
7013 reg
= gen_reg_rtx (Pmode
);
7016 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7018 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7019 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7020 base
== reg
? 0 : reg
);
7022 if (CONST_INT_P (offset
))
7024 /* The base register doesn't really matter, we only want to
7025 test the index for the appropriate mode. */
7026 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7028 gcc_assert (can_create_pseudo_p ());
7029 offset
= force_reg (Pmode
, offset
);
7032 if (CONST_INT_P (offset
))
7033 return plus_constant (Pmode
, base
, INTVAL (offset
));
7036 if (GET_MODE_SIZE (mode
) > 4
7037 && (GET_MODE_CLASS (mode
) == MODE_INT
7038 || TARGET_SOFT_FLOAT
))
7040 emit_insn (gen_addsi3 (reg
, base
, offset
));
7044 return gen_rtx_PLUS (Pmode
, base
, offset
);
7051 /* Find a spare register to use during the prolog of a function. */
7054 thumb_find_work_register (unsigned long pushed_regs_mask
)
7058 /* Check the argument registers first as these are call-used. The
7059 register allocation order means that sometimes r3 might be used
7060 but earlier argument registers might not, so check them all. */
7061 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7062 if (!df_regs_ever_live_p (reg
))
7065 /* Before going on to check the call-saved registers we can try a couple
7066 more ways of deducing that r3 is available. The first is when we are
7067 pushing anonymous arguments onto the stack and we have less than 4
7068 registers worth of fixed arguments(*). In this case r3 will be part of
7069 the variable argument list and so we can be sure that it will be
7070 pushed right at the start of the function. Hence it will be available
7071 for the rest of the prologue.
7072 (*): ie crtl->args.pretend_args_size is greater than 0. */
7073 if (cfun
->machine
->uses_anonymous_args
7074 && crtl
->args
.pretend_args_size
> 0)
7075 return LAST_ARG_REGNUM
;
7077 /* The other case is when we have fixed arguments but less than 4 registers
7078 worth. In this case r3 might be used in the body of the function, but
7079 it is not being used to convey an argument into the function. In theory
7080 we could just check crtl->args.size to see how many bytes are
7081 being passed in argument registers, but it seems that it is unreliable.
7082 Sometimes it will have the value 0 when in fact arguments are being
7083 passed. (See testcase execute/20021111-1.c for an example). So we also
7084 check the args_info.nregs field as well. The problem with this field is
7085 that it makes no allowances for arguments that are passed to the
7086 function but which are not used. Hence we could miss an opportunity
7087 when a function has an unused argument in r3. But it is better to be
7088 safe than to be sorry. */
7089 if (! cfun
->machine
->uses_anonymous_args
7090 && crtl
->args
.size
>= 0
7091 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7092 && (TARGET_AAPCS_BASED
7093 ? crtl
->args
.info
.aapcs_ncrn
< 4
7094 : crtl
->args
.info
.nregs
< 4))
7095 return LAST_ARG_REGNUM
;
7097 /* Otherwise look for a call-saved register that is going to be pushed. */
7098 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7099 if (pushed_regs_mask
& (1 << reg
))
7104 /* Thumb-2 can use high regs. */
7105 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7106 if (pushed_regs_mask
& (1 << reg
))
7109 /* Something went wrong - thumb_compute_save_reg_mask()
7110 should have arranged for a suitable register to be pushed. */
7114 static GTY(()) int pic_labelno
;
7116 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7120 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7122 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7124 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7127 gcc_assert (flag_pic
);
7129 pic_reg
= cfun
->machine
->pic_reg
;
7130 if (TARGET_VXWORKS_RTP
)
7132 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7133 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7134 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7136 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7138 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7139 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7143 /* We use an UNSPEC rather than a LABEL_REF because this label
7144 never appears in the code stream. */
7146 labelno
= GEN_INT (pic_labelno
++);
7147 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7148 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7150 /* On the ARM the PC register contains 'dot + 8' at the time of the
7151 addition, on the Thumb it is 'dot + 4'. */
7152 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7153 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7155 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7159 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7161 else /* TARGET_THUMB1 */
7163 if (arm_pic_register
!= INVALID_REGNUM
7164 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7166 /* We will have pushed the pic register, so we should always be
7167 able to find a work register. */
7168 pic_tmp
= gen_rtx_REG (SImode
,
7169 thumb_find_work_register (saved_regs
));
7170 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7171 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7172 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7174 else if (arm_pic_register
!= INVALID_REGNUM
7175 && arm_pic_register
> LAST_LO_REGNUM
7176 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7178 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7179 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7180 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7183 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7187 /* Need to emit this whether or not we obey regdecls,
7188 since setjmp/longjmp can cause life info to screw up. */
7192 /* Generate code to load the address of a static var when flag_pic is set. */
7194 arm_pic_static_addr (rtx orig
, rtx reg
)
7196 rtx l1
, labelno
, offset_rtx
, insn
;
7198 gcc_assert (flag_pic
);
7200 /* We use an UNSPEC rather than a LABEL_REF because this label
7201 never appears in the code stream. */
7202 labelno
= GEN_INT (pic_labelno
++);
7203 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7204 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7206 /* On the ARM the PC register contains 'dot + 8' at the time of the
7207 addition, on the Thumb it is 'dot + 4'. */
7208 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7209 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7210 UNSPEC_SYMBOL_OFFSET
);
7211 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7213 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7217 /* Return nonzero if X is valid as an ARM state addressing register. */
7219 arm_address_register_rtx_p (rtx x
, int strict_p
)
7229 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7231 return (regno
<= LAST_ARM_REGNUM
7232 || regno
>= FIRST_PSEUDO_REGISTER
7233 || regno
== FRAME_POINTER_REGNUM
7234 || regno
== ARG_POINTER_REGNUM
);
7237 /* Return TRUE if this rtx is the difference of a symbol and a label,
7238 and will reduce to a PC-relative relocation in the object file.
7239 Expressions like this can be left alone when generating PIC, rather
7240 than forced through the GOT. */
7242 pcrel_constant_p (rtx x
)
7244 if (GET_CODE (x
) == MINUS
)
7245 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7250 /* Return true if X will surely end up in an index register after next
7253 will_be_in_index_register (const_rtx x
)
7255 /* arm.md: calculate_pic_address will split this into a register. */
7256 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7259 /* Return nonzero if X is a valid ARM state address operand. */
7261 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7265 enum rtx_code code
= GET_CODE (x
);
7267 if (arm_address_register_rtx_p (x
, strict_p
))
7270 use_ldrd
= (TARGET_LDRD
7271 && (mode
== DImode
|| mode
== DFmode
));
7273 if (code
== POST_INC
|| code
== PRE_DEC
7274 || ((code
== PRE_INC
|| code
== POST_DEC
)
7275 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7276 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7278 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7279 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7280 && GET_CODE (XEXP (x
, 1)) == PLUS
7281 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7283 rtx addend
= XEXP (XEXP (x
, 1), 1);
7285 /* Don't allow ldrd post increment by register because it's hard
7286 to fixup invalid register choices. */
7288 && GET_CODE (x
) == POST_MODIFY
7292 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7293 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7296 /* After reload constants split into minipools will have addresses
7297 from a LABEL_REF. */
7298 else if (reload_completed
7299 && (code
== LABEL_REF
7301 && GET_CODE (XEXP (x
, 0)) == PLUS
7302 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7303 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7306 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7309 else if (code
== PLUS
)
7311 rtx xop0
= XEXP (x
, 0);
7312 rtx xop1
= XEXP (x
, 1);
7314 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7315 && ((CONST_INT_P (xop1
)
7316 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7317 || (!strict_p
&& will_be_in_index_register (xop1
))))
7318 || (arm_address_register_rtx_p (xop1
, strict_p
)
7319 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7323 /* Reload currently can't handle MINUS, so disable this for now */
7324 else if (GET_CODE (x
) == MINUS
)
7326 rtx xop0
= XEXP (x
, 0);
7327 rtx xop1
= XEXP (x
, 1);
7329 return (arm_address_register_rtx_p (xop0
, strict_p
)
7330 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7334 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7335 && code
== SYMBOL_REF
7336 && CONSTANT_POOL_ADDRESS_P (x
)
7338 && symbol_mentioned_p (get_pool_constant (x
))
7339 && ! pcrel_constant_p (get_pool_constant (x
))))
7345 /* Return nonzero if X is a valid Thumb-2 address operand. */
7347 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7350 enum rtx_code code
= GET_CODE (x
);
7352 if (arm_address_register_rtx_p (x
, strict_p
))
7355 use_ldrd
= (TARGET_LDRD
7356 && (mode
== DImode
|| mode
== DFmode
));
7358 if (code
== POST_INC
|| code
== PRE_DEC
7359 || ((code
== PRE_INC
|| code
== POST_DEC
)
7360 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7361 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7363 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7364 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7365 && GET_CODE (XEXP (x
, 1)) == PLUS
7366 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7368 /* Thumb-2 only has autoincrement by constant. */
7369 rtx addend
= XEXP (XEXP (x
, 1), 1);
7370 HOST_WIDE_INT offset
;
7372 if (!CONST_INT_P (addend
))
7375 offset
= INTVAL(addend
);
7376 if (GET_MODE_SIZE (mode
) <= 4)
7377 return (offset
> -256 && offset
< 256);
7379 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7380 && (offset
& 3) == 0);
7383 /* After reload constants split into minipools will have addresses
7384 from a LABEL_REF. */
7385 else if (reload_completed
7386 && (code
== LABEL_REF
7388 && GET_CODE (XEXP (x
, 0)) == PLUS
7389 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7390 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7393 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7396 else if (code
== PLUS
)
7398 rtx xop0
= XEXP (x
, 0);
7399 rtx xop1
= XEXP (x
, 1);
7401 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7402 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7403 || (!strict_p
&& will_be_in_index_register (xop1
))))
7404 || (arm_address_register_rtx_p (xop1
, strict_p
)
7405 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7408 /* Normally we can assign constant values to target registers without
7409 the help of constant pool. But there are cases we have to use constant
7411 1) assign a label to register.
7412 2) sign-extend a 8bit value to 32bit and then assign to register.
7414 Constant pool access in format:
7415 (set (reg r0) (mem (symbol_ref (".LC0"))))
7416 will cause the use of literal pool (later in function arm_reorg).
7417 So here we mark such format as an invalid format, then the compiler
7418 will adjust it into:
7419 (set (reg r0) (symbol_ref (".LC0")))
7420 (set (reg r0) (mem (reg r0))).
7421 No extra register is required, and (mem (reg r0)) won't cause the use
7422 of literal pools. */
7423 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7424 && CONSTANT_POOL_ADDRESS_P (x
))
7427 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7428 && code
== SYMBOL_REF
7429 && CONSTANT_POOL_ADDRESS_P (x
)
7431 && symbol_mentioned_p (get_pool_constant (x
))
7432 && ! pcrel_constant_p (get_pool_constant (x
))))
7438 /* Return nonzero if INDEX is valid for an address index operand in
7441 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7444 HOST_WIDE_INT range
;
7445 enum rtx_code code
= GET_CODE (index
);
7447 /* Standard coprocessor addressing modes. */
7448 if (TARGET_HARD_FLOAT
7449 && (mode
== SFmode
|| mode
== DFmode
))
7450 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7451 && INTVAL (index
) > -1024
7452 && (INTVAL (index
) & 3) == 0);
7454 /* For quad modes, we restrict the constant offset to be slightly less
7455 than what the instruction format permits. We do this because for
7456 quad mode moves, we will actually decompose them into two separate
7457 double-mode reads or writes. INDEX must therefore be a valid
7458 (double-mode) offset and so should INDEX+8. */
7459 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7460 return (code
== CONST_INT
7461 && INTVAL (index
) < 1016
7462 && INTVAL (index
) > -1024
7463 && (INTVAL (index
) & 3) == 0);
7465 /* We have no such constraint on double mode offsets, so we permit the
7466 full range of the instruction format. */
7467 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7468 return (code
== CONST_INT
7469 && INTVAL (index
) < 1024
7470 && INTVAL (index
) > -1024
7471 && (INTVAL (index
) & 3) == 0);
7473 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7474 return (code
== CONST_INT
7475 && INTVAL (index
) < 1024
7476 && INTVAL (index
) > -1024
7477 && (INTVAL (index
) & 3) == 0);
7479 if (arm_address_register_rtx_p (index
, strict_p
)
7480 && (GET_MODE_SIZE (mode
) <= 4))
7483 if (mode
== DImode
|| mode
== DFmode
)
7485 if (code
== CONST_INT
)
7487 HOST_WIDE_INT val
= INTVAL (index
);
7490 return val
> -256 && val
< 256;
7492 return val
> -4096 && val
< 4092;
7495 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7498 if (GET_MODE_SIZE (mode
) <= 4
7502 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7506 rtx xiop0
= XEXP (index
, 0);
7507 rtx xiop1
= XEXP (index
, 1);
7509 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7510 && power_of_two_operand (xiop1
, SImode
))
7511 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7512 && power_of_two_operand (xiop0
, SImode
)));
7514 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7515 || code
== ASHIFT
|| code
== ROTATERT
)
7517 rtx op
= XEXP (index
, 1);
7519 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7522 && INTVAL (op
) <= 31);
7526 /* For ARM v4 we may be doing a sign-extend operation during the
7532 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7538 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7540 return (code
== CONST_INT
7541 && INTVAL (index
) < range
7542 && INTVAL (index
) > -range
);
7545 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7546 index operand. i.e. 1, 2, 4 or 8. */
7548 thumb2_index_mul_operand (rtx op
)
7552 if (!CONST_INT_P (op
))
7556 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7559 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7561 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7563 enum rtx_code code
= GET_CODE (index
);
7565 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7566 /* Standard coprocessor addressing modes. */
7567 if (TARGET_HARD_FLOAT
7568 && (mode
== SFmode
|| mode
== DFmode
))
7569 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7570 /* Thumb-2 allows only > -256 index range for it's core register
7571 load/stores. Since we allow SF/DF in core registers, we have
7572 to use the intersection between -256~4096 (core) and -1024~1024
7574 && INTVAL (index
) > -256
7575 && (INTVAL (index
) & 3) == 0);
7577 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7579 /* For DImode assume values will usually live in core regs
7580 and only allow LDRD addressing modes. */
7581 if (!TARGET_LDRD
|| mode
!= DImode
)
7582 return (code
== CONST_INT
7583 && INTVAL (index
) < 1024
7584 && INTVAL (index
) > -1024
7585 && (INTVAL (index
) & 3) == 0);
7588 /* For quad modes, we restrict the constant offset to be slightly less
7589 than what the instruction format permits. We do this because for
7590 quad mode moves, we will actually decompose them into two separate
7591 double-mode reads or writes. INDEX must therefore be a valid
7592 (double-mode) offset and so should INDEX+8. */
7593 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7594 return (code
== CONST_INT
7595 && INTVAL (index
) < 1016
7596 && INTVAL (index
) > -1024
7597 && (INTVAL (index
) & 3) == 0);
7599 /* We have no such constraint on double mode offsets, so we permit the
7600 full range of the instruction format. */
7601 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7602 return (code
== CONST_INT
7603 && INTVAL (index
) < 1024
7604 && INTVAL (index
) > -1024
7605 && (INTVAL (index
) & 3) == 0);
7607 if (arm_address_register_rtx_p (index
, strict_p
)
7608 && (GET_MODE_SIZE (mode
) <= 4))
7611 if (mode
== DImode
|| mode
== DFmode
)
7613 if (code
== CONST_INT
)
7615 HOST_WIDE_INT val
= INTVAL (index
);
7616 /* ??? Can we assume ldrd for thumb2? */
7617 /* Thumb-2 ldrd only has reg+const addressing modes. */
7618 /* ldrd supports offsets of +-1020.
7619 However the ldr fallback does not. */
7620 return val
> -256 && val
< 256 && (val
& 3) == 0;
7628 rtx xiop0
= XEXP (index
, 0);
7629 rtx xiop1
= XEXP (index
, 1);
7631 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7632 && thumb2_index_mul_operand (xiop1
))
7633 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7634 && thumb2_index_mul_operand (xiop0
)));
7636 else if (code
== ASHIFT
)
7638 rtx op
= XEXP (index
, 1);
7640 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7643 && INTVAL (op
) <= 3);
7646 return (code
== CONST_INT
7647 && INTVAL (index
) < 4096
7648 && INTVAL (index
) > -256);
7651 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7653 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7663 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7665 return (regno
<= LAST_LO_REGNUM
7666 || regno
> LAST_VIRTUAL_REGISTER
7667 || regno
== FRAME_POINTER_REGNUM
7668 || (GET_MODE_SIZE (mode
) >= 4
7669 && (regno
== STACK_POINTER_REGNUM
7670 || regno
>= FIRST_PSEUDO_REGISTER
7671 || x
== hard_frame_pointer_rtx
7672 || x
== arg_pointer_rtx
)));
7675 /* Return nonzero if x is a legitimate index register. This is the case
7676 for any base register that can access a QImode object. */
7678 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7680 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7683 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7685 The AP may be eliminated to either the SP or the FP, so we use the
7686 least common denominator, e.g. SImode, and offsets from 0 to 64.
7688 ??? Verify whether the above is the right approach.
7690 ??? Also, the FP may be eliminated to the SP, so perhaps that
7691 needs special handling also.
7693 ??? Look at how the mips16 port solves this problem. It probably uses
7694 better ways to solve some of these problems.
7696 Although it is not incorrect, we don't accept QImode and HImode
7697 addresses based on the frame pointer or arg pointer until the
7698 reload pass starts. This is so that eliminating such addresses
7699 into stack based ones won't produce impossible code. */
7701 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7703 /* ??? Not clear if this is right. Experiment. */
7704 if (GET_MODE_SIZE (mode
) < 4
7705 && !(reload_in_progress
|| reload_completed
)
7706 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7707 || reg_mentioned_p (arg_pointer_rtx
, x
)
7708 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7709 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7710 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7711 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7714 /* Accept any base register. SP only in SImode or larger. */
7715 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7718 /* This is PC relative data before arm_reorg runs. */
7719 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7720 && GET_CODE (x
) == SYMBOL_REF
7721 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7724 /* This is PC relative data after arm_reorg runs. */
7725 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7727 && (GET_CODE (x
) == LABEL_REF
7728 || (GET_CODE (x
) == CONST
7729 && GET_CODE (XEXP (x
, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7731 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7734 /* Post-inc indexing only supported for SImode and larger. */
7735 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7736 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7739 else if (GET_CODE (x
) == PLUS
)
7741 /* REG+REG address can be any two index registers. */
7742 /* We disallow FRAME+REG addressing since we know that FRAME
7743 will be replaced with STACK, and SP relative addressing only
7744 permits SP+OFFSET. */
7745 if (GET_MODE_SIZE (mode
) <= 4
7746 && XEXP (x
, 0) != frame_pointer_rtx
7747 && XEXP (x
, 1) != frame_pointer_rtx
7748 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7749 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7750 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7753 /* REG+const has 5-7 bit offset for non-SP registers. */
7754 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7755 || XEXP (x
, 0) == arg_pointer_rtx
)
7756 && CONST_INT_P (XEXP (x
, 1))
7757 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7760 /* REG+const has 10-bit offset for SP, but only SImode and
7761 larger is supported. */
7762 /* ??? Should probably check for DI/DFmode overflow here
7763 just like GO_IF_LEGITIMATE_OFFSET does. */
7764 else if (REG_P (XEXP (x
, 0))
7765 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7766 && GET_MODE_SIZE (mode
) >= 4
7767 && CONST_INT_P (XEXP (x
, 1))
7768 && INTVAL (XEXP (x
, 1)) >= 0
7769 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7770 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7773 else if (REG_P (XEXP (x
, 0))
7774 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7775 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7776 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7777 && REGNO (XEXP (x
, 0))
7778 <= LAST_VIRTUAL_POINTER_REGISTER
))
7779 && GET_MODE_SIZE (mode
) >= 4
7780 && CONST_INT_P (XEXP (x
, 1))
7781 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7785 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7786 && GET_MODE_SIZE (mode
) == 4
7787 && GET_CODE (x
) == SYMBOL_REF
7788 && CONSTANT_POOL_ADDRESS_P (x
)
7790 && symbol_mentioned_p (get_pool_constant (x
))
7791 && ! pcrel_constant_p (get_pool_constant (x
))))
7797 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7798 instruction of mode MODE. */
7800 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7802 switch (GET_MODE_SIZE (mode
))
7805 return val
>= 0 && val
< 32;
7808 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7812 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7818 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7821 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7822 else if (TARGET_THUMB2
)
7823 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7824 else /* if (TARGET_THUMB1) */
7825 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7828 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7830 Given an rtx X being reloaded into a reg required to be
7831 in class CLASS, return the class of reg to actually use.
7832 In general this is just CLASS, but for the Thumb core registers and
7833 immediate constants we prefer a LO_REGS class or a subset. */
7836 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7842 if (rclass
== GENERAL_REGS
)
7849 /* Build the SYMBOL_REF for __tls_get_addr. */
7851 static GTY(()) rtx tls_get_addr_libfunc
;
7854 get_tls_get_addr (void)
7856 if (!tls_get_addr_libfunc
)
7857 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7858 return tls_get_addr_libfunc
;
7862 arm_load_tp (rtx target
)
7865 target
= gen_reg_rtx (SImode
);
7869 /* Can return in any reg. */
7870 emit_insn (gen_load_tp_hard (target
));
7874 /* Always returned in r0. Immediately copy the result into a pseudo,
7875 otherwise other uses of r0 (e.g. setting up function arguments) may
7876 clobber the value. */
7880 emit_insn (gen_load_tp_soft ());
7882 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7883 emit_move_insn (target
, tmp
);
7889 load_tls_operand (rtx x
, rtx reg
)
7893 if (reg
== NULL_RTX
)
7894 reg
= gen_reg_rtx (SImode
);
7896 tmp
= gen_rtx_CONST (SImode
, x
);
7898 emit_move_insn (reg
, tmp
);
7904 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7906 rtx insns
, label
, labelno
, sum
;
7908 gcc_assert (reloc
!= TLS_DESCSEQ
);
7911 labelno
= GEN_INT (pic_labelno
++);
7912 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7913 label
= gen_rtx_CONST (VOIDmode
, label
);
7915 sum
= gen_rtx_UNSPEC (Pmode
,
7916 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7917 GEN_INT (TARGET_ARM
? 8 : 4)),
7919 reg
= load_tls_operand (sum
, reg
);
7922 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7924 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7926 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7927 LCT_PURE
, /* LCT_CONST? */
7928 Pmode
, 1, reg
, Pmode
);
7930 insns
= get_insns ();
7937 arm_tls_descseq_addr (rtx x
, rtx reg
)
7939 rtx labelno
= GEN_INT (pic_labelno
++);
7940 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7941 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7942 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7943 gen_rtx_CONST (VOIDmode
, label
),
7944 GEN_INT (!TARGET_ARM
)),
7946 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7948 emit_insn (gen_tlscall (x
, labelno
));
7950 reg
= gen_reg_rtx (SImode
);
7952 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7954 emit_move_insn (reg
, reg0
);
7960 legitimize_tls_address (rtx x
, rtx reg
)
7962 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7963 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7967 case TLS_MODEL_GLOBAL_DYNAMIC
:
7968 if (TARGET_GNU2_TLS
)
7970 reg
= arm_tls_descseq_addr (x
, reg
);
7972 tp
= arm_load_tp (NULL_RTX
);
7974 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7978 /* Original scheme */
7979 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7980 dest
= gen_reg_rtx (Pmode
);
7981 emit_libcall_block (insns
, dest
, ret
, x
);
7985 case TLS_MODEL_LOCAL_DYNAMIC
:
7986 if (TARGET_GNU2_TLS
)
7988 reg
= arm_tls_descseq_addr (x
, reg
);
7990 tp
= arm_load_tp (NULL_RTX
);
7992 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7996 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7998 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7999 share the LDM result with other LD model accesses. */
8000 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8002 dest
= gen_reg_rtx (Pmode
);
8003 emit_libcall_block (insns
, dest
, ret
, eqv
);
8005 /* Load the addend. */
8006 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8007 GEN_INT (TLS_LDO32
)),
8009 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8010 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8014 case TLS_MODEL_INITIAL_EXEC
:
8015 labelno
= GEN_INT (pic_labelno
++);
8016 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8017 label
= gen_rtx_CONST (VOIDmode
, label
);
8018 sum
= gen_rtx_UNSPEC (Pmode
,
8019 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8020 GEN_INT (TARGET_ARM
? 8 : 4)),
8022 reg
= load_tls_operand (sum
, reg
);
8025 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8026 else if (TARGET_THUMB2
)
8027 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8030 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8031 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8034 tp
= arm_load_tp (NULL_RTX
);
8036 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8038 case TLS_MODEL_LOCAL_EXEC
:
8039 tp
= arm_load_tp (NULL_RTX
);
8041 reg
= gen_rtx_UNSPEC (Pmode
,
8042 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8044 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8046 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8053 /* Try machine-dependent ways of modifying an illegitimate address
8054 to be legitimate. If we find one, return the new, valid address. */
8056 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8058 if (arm_tls_referenced_p (x
))
8062 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8064 addend
= XEXP (XEXP (x
, 0), 1);
8065 x
= XEXP (XEXP (x
, 0), 0);
8068 if (GET_CODE (x
) != SYMBOL_REF
)
8071 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8073 x
= legitimize_tls_address (x
, NULL_RTX
);
8077 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8086 /* TODO: legitimize_address for Thumb2. */
8089 return thumb_legitimize_address (x
, orig_x
, mode
);
8092 if (GET_CODE (x
) == PLUS
)
8094 rtx xop0
= XEXP (x
, 0);
8095 rtx xop1
= XEXP (x
, 1);
8097 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8098 xop0
= force_reg (SImode
, xop0
);
8100 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8101 && !symbol_mentioned_p (xop1
))
8102 xop1
= force_reg (SImode
, xop1
);
8104 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8105 && CONST_INT_P (xop1
))
8107 HOST_WIDE_INT n
, low_n
;
8111 /* VFP addressing modes actually allow greater offsets, but for
8112 now we just stick with the lowest common denominator. */
8113 if (mode
== DImode
|| mode
== DFmode
)
8125 low_n
= ((mode
) == TImode
? 0
8126 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8130 base_reg
= gen_reg_rtx (SImode
);
8131 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8132 emit_move_insn (base_reg
, val
);
8133 x
= plus_constant (Pmode
, base_reg
, low_n
);
8135 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8136 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8139 /* XXX We don't allow MINUS any more -- see comment in
8140 arm_legitimate_address_outer_p (). */
8141 else if (GET_CODE (x
) == MINUS
)
8143 rtx xop0
= XEXP (x
, 0);
8144 rtx xop1
= XEXP (x
, 1);
8146 if (CONSTANT_P (xop0
))
8147 xop0
= force_reg (SImode
, xop0
);
8149 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8150 xop1
= force_reg (SImode
, xop1
);
8152 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8153 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8156 /* Make sure to take full advantage of the pre-indexed addressing mode
8157 with absolute addresses which often allows for the base register to
8158 be factorized for multiple adjacent memory references, and it might
8159 even allows for the mini pool to be avoided entirely. */
8160 else if (CONST_INT_P (x
) && optimize
> 0)
8163 HOST_WIDE_INT mask
, base
, index
;
8166 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8167 use a 8-bit index. So let's use a 12-bit index for SImode only and
8168 hope that arm_gen_constant will enable ldrb to use more bits. */
8169 bits
= (mode
== SImode
) ? 12 : 8;
8170 mask
= (1 << bits
) - 1;
8171 base
= INTVAL (x
) & ~mask
;
8172 index
= INTVAL (x
) & mask
;
8173 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8175 /* It'll most probably be more efficient to generate the base
8176 with more bits set and use a negative index instead. */
8180 base_reg
= force_reg (SImode
, GEN_INT (base
));
8181 x
= plus_constant (Pmode
, base_reg
, index
);
8186 /* We need to find and carefully transform any SYMBOL and LABEL
8187 references; so go back to the original address expression. */
8188 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8190 if (new_x
!= orig_x
)
8198 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8199 to be legitimate. If we find one, return the new, valid address. */
8201 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8203 if (GET_CODE (x
) == PLUS
8204 && CONST_INT_P (XEXP (x
, 1))
8205 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8206 || INTVAL (XEXP (x
, 1)) < 0))
8208 rtx xop0
= XEXP (x
, 0);
8209 rtx xop1
= XEXP (x
, 1);
8210 HOST_WIDE_INT offset
= INTVAL (xop1
);
8212 /* Try and fold the offset into a biasing of the base register and
8213 then offsetting that. Don't do this when optimizing for space
8214 since it can cause too many CSEs. */
8215 if (optimize_size
&& offset
>= 0
8216 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8218 HOST_WIDE_INT delta
;
8221 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8222 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8223 delta
= 31 * GET_MODE_SIZE (mode
);
8225 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8227 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8229 x
= plus_constant (Pmode
, xop0
, delta
);
8231 else if (offset
< 0 && offset
> -256)
8232 /* Small negative offsets are best done with a subtract before the
8233 dereference, forcing these into a register normally takes two
8235 x
= force_operand (x
, NULL_RTX
);
8238 /* For the remaining cases, force the constant into a register. */
8239 xop1
= force_reg (SImode
, xop1
);
8240 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8243 else if (GET_CODE (x
) == PLUS
8244 && s_register_operand (XEXP (x
, 1), SImode
)
8245 && !s_register_operand (XEXP (x
, 0), SImode
))
8247 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8249 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8254 /* We need to find and carefully transform any SYMBOL and LABEL
8255 references; so go back to the original address expression. */
8256 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8258 if (new_x
!= orig_x
)
8265 /* Return TRUE if X contains any TLS symbol references. */
8268 arm_tls_referenced_p (rtx x
)
8270 if (! TARGET_HAVE_TLS
)
8273 subrtx_iterator::array_type array
;
8274 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8276 const_rtx x
= *iter
;
8277 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8280 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8281 TLS offsets, not real symbol references. */
8282 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8283 iter
.skip_subrtxes ();
8288 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8290 On the ARM, allow any integer (invalid ones are removed later by insn
8291 patterns), nice doubles and symbol_refs which refer to the function's
8294 When generating pic allow anything. */
8297 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8299 return flag_pic
|| !label_mentioned_p (x
);
8303 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8305 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8306 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8307 for ARMv8-M Baseline or later the result is valid. */
8308 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8311 return (CONST_INT_P (x
)
8312 || CONST_DOUBLE_P (x
)
8313 || CONSTANT_ADDRESS_P (x
)
8318 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8320 return (!arm_cannot_force_const_mem (mode
, x
)
8322 ? arm_legitimate_constant_p_1 (mode
, x
)
8323 : thumb_legitimate_constant_p (mode
, x
)));
8326 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8329 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8333 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8335 split_const (x
, &base
, &offset
);
8336 if (GET_CODE (base
) == SYMBOL_REF
8337 && !offset_within_block_p (base
, INTVAL (offset
)))
8340 return arm_tls_referenced_p (x
);
8343 #define REG_OR_SUBREG_REG(X) \
8345 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8347 #define REG_OR_SUBREG_RTX(X) \
8348 (REG_P (X) ? (X) : SUBREG_REG (X))
8351 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8353 machine_mode mode
= GET_MODE (x
);
8362 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8369 return COSTS_N_INSNS (1);
8372 if (CONST_INT_P (XEXP (x
, 1)))
8375 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8382 return COSTS_N_INSNS (2) + cycles
;
8384 return COSTS_N_INSNS (1) + 16;
8387 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8389 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8390 return (COSTS_N_INSNS (words
)
8391 + 4 * ((MEM_P (SET_SRC (x
)))
8392 + MEM_P (SET_DEST (x
))));
8397 if (UINTVAL (x
) < 256
8398 /* 16-bit constant. */
8399 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8401 if (thumb_shiftable_const (INTVAL (x
)))
8402 return COSTS_N_INSNS (2);
8403 return COSTS_N_INSNS (3);
8405 else if ((outer
== PLUS
|| outer
== COMPARE
)
8406 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8408 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8409 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8410 return COSTS_N_INSNS (1);
8411 else if (outer
== AND
)
8414 /* This duplicates the tests in the andsi3 expander. */
8415 for (i
= 9; i
<= 31; i
++)
8416 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8417 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8418 return COSTS_N_INSNS (2);
8420 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8421 || outer
== LSHIFTRT
)
8423 return COSTS_N_INSNS (2);
8429 return COSTS_N_INSNS (3);
8447 /* XXX another guess. */
8448 /* Memory costs quite a lot for the first word, but subsequent words
8449 load at the equivalent of a single insn each. */
8450 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8451 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8456 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8462 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8463 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8469 return total
+ COSTS_N_INSNS (1);
8471 /* Assume a two-shift sequence. Increase the cost slightly so
8472 we prefer actual shifts over an extend operation. */
8473 return total
+ 1 + COSTS_N_INSNS (2);
8481 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8483 machine_mode mode
= GET_MODE (x
);
8484 enum rtx_code subcode
;
8486 enum rtx_code code
= GET_CODE (x
);
8492 /* Memory costs quite a lot for the first word, but subsequent words
8493 load at the equivalent of a single insn each. */
8494 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8501 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8502 *total
= COSTS_N_INSNS (2);
8503 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8504 *total
= COSTS_N_INSNS (4);
8506 *total
= COSTS_N_INSNS (20);
8510 if (REG_P (XEXP (x
, 1)))
8511 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8512 else if (!CONST_INT_P (XEXP (x
, 1)))
8513 *total
= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8519 *total
+= COSTS_N_INSNS (4);
8524 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8525 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8528 *total
+= COSTS_N_INSNS (3);
8532 *total
+= COSTS_N_INSNS (1);
8533 /* Increase the cost of complex shifts because they aren't any faster,
8534 and reduce dual issue opportunities. */
8535 if (arm_tune_cortex_a9
8536 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8544 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8545 if (CONST_INT_P (XEXP (x
, 0))
8546 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8548 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8552 if (CONST_INT_P (XEXP (x
, 1))
8553 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8555 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8562 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8564 if (TARGET_HARD_FLOAT
8566 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8568 *total
= COSTS_N_INSNS (1);
8569 if (CONST_DOUBLE_P (XEXP (x
, 0))
8570 && arm_const_double_rtx (XEXP (x
, 0)))
8572 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8576 if (CONST_DOUBLE_P (XEXP (x
, 1))
8577 && arm_const_double_rtx (XEXP (x
, 1)))
8579 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8585 *total
= COSTS_N_INSNS (20);
8589 *total
= COSTS_N_INSNS (1);
8590 if (CONST_INT_P (XEXP (x
, 0))
8591 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8593 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8597 subcode
= GET_CODE (XEXP (x
, 1));
8598 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8599 || subcode
== LSHIFTRT
8600 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8602 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8603 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8607 /* A shift as a part of RSB costs no more than RSB itself. */
8608 if (GET_CODE (XEXP (x
, 0)) == MULT
8609 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8611 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, speed
);
8612 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8617 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8619 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8620 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8624 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8625 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8627 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8629 if (REG_P (XEXP (XEXP (x
, 1), 0))
8630 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8631 *total
+= COSTS_N_INSNS (1);
8639 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8640 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8641 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8643 *total
= COSTS_N_INSNS (1);
8644 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
8645 GET_CODE (XEXP (x
, 0)), 0, speed
);
8646 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8650 /* MLA: All arguments must be registers. We filter out
8651 multiplication by a power of two, so that we fall down into
8653 if (GET_CODE (XEXP (x
, 0)) == MULT
8654 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8656 /* The cost comes from the cost of the multiply. */
8660 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8662 if (TARGET_HARD_FLOAT
8664 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8666 *total
= COSTS_N_INSNS (1);
8667 if (CONST_DOUBLE_P (XEXP (x
, 1))
8668 && arm_const_double_rtx (XEXP (x
, 1)))
8670 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8677 *total
= COSTS_N_INSNS (20);
8681 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8682 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8684 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), mode
, code
,
8686 if (REG_P (XEXP (XEXP (x
, 0), 0))
8687 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8688 *total
+= COSTS_N_INSNS (1);
8694 case AND
: case XOR
: case IOR
:
8696 /* Normally the frame registers will be spilt into reg+const during
8697 reload, so it is a bad idea to combine them with other instructions,
8698 since then they might not be moved outside of loops. As a compromise
8699 we allow integration with ops that have a constant as their second
8701 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8702 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8703 && !CONST_INT_P (XEXP (x
, 1)))
8704 *total
= COSTS_N_INSNS (1);
8708 *total
+= COSTS_N_INSNS (2);
8709 if (CONST_INT_P (XEXP (x
, 1))
8710 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8712 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8719 *total
+= COSTS_N_INSNS (1);
8720 if (CONST_INT_P (XEXP (x
, 1))
8721 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8723 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8726 subcode
= GET_CODE (XEXP (x
, 0));
8727 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8728 || subcode
== LSHIFTRT
8729 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8731 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8732 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8737 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8739 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8740 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8744 if (subcode
== UMIN
|| subcode
== UMAX
8745 || subcode
== SMIN
|| subcode
== SMAX
)
8747 *total
= COSTS_N_INSNS (3);
8754 /* This should have been handled by the CPU specific routines. */
8758 if (arm_arch3m
&& mode
== SImode
8759 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8760 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8761 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8762 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8763 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8764 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8766 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, LSHIFTRT
,
8770 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8774 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8776 if (TARGET_HARD_FLOAT
8778 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8780 *total
= COSTS_N_INSNS (1);
8783 *total
= COSTS_N_INSNS (2);
8789 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8790 if (mode
== SImode
&& code
== NOT
)
8792 subcode
= GET_CODE (XEXP (x
, 0));
8793 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8794 || subcode
== LSHIFTRT
8795 || subcode
== ROTATE
|| subcode
== ROTATERT
8797 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8799 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
,
8801 /* Register shifts cost an extra cycle. */
8802 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8803 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8813 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8815 *total
= COSTS_N_INSNS (4);
8819 operand
= XEXP (x
, 0);
8821 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8822 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8823 && REG_P (XEXP (operand
, 0))
8824 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8825 *total
+= COSTS_N_INSNS (1);
8826 *total
+= rtx_cost (XEXP (x
, 1), VOIDmode
, code
, 1, speed
);
8827 *total
+= rtx_cost (XEXP (x
, 2), VOIDmode
, code
, 2, speed
);
8831 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8833 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8840 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8841 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8843 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8850 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8851 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8853 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8874 /* SCC insns. In the case where the comparison has already been
8875 performed, then they cost 2 instructions. Otherwise they need
8876 an additional comparison before them. */
8877 *total
= COSTS_N_INSNS (2);
8878 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8885 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8891 *total
+= COSTS_N_INSNS (1);
8892 if (CONST_INT_P (XEXP (x
, 1))
8893 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8895 *total
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed
);
8899 subcode
= GET_CODE (XEXP (x
, 0));
8900 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8901 || subcode
== LSHIFTRT
8902 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8904 mode
= GET_MODE (XEXP (x
, 0));
8905 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8906 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8911 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8913 mode
= GET_MODE (XEXP (x
, 0));
8914 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8915 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8925 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8926 if (!CONST_INT_P (XEXP (x
, 1))
8927 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8928 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8932 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8934 if (TARGET_HARD_FLOAT
8936 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8938 *total
= COSTS_N_INSNS (1);
8941 *total
= COSTS_N_INSNS (20);
8944 *total
= COSTS_N_INSNS (1);
8946 *total
+= COSTS_N_INSNS (3);
8952 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8954 rtx op
= XEXP (x
, 0);
8955 machine_mode opmode
= GET_MODE (op
);
8958 *total
+= COSTS_N_INSNS (1);
8960 if (opmode
!= SImode
)
8964 /* If !arm_arch4, we use one of the extendhisi2_mem
8965 or movhi_bytes patterns for HImode. For a QImode
8966 sign extension, we first zero-extend from memory
8967 and then perform a shift sequence. */
8968 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8969 *total
+= COSTS_N_INSNS (2);
8972 *total
+= COSTS_N_INSNS (1);
8974 /* We don't have the necessary insn, so we need to perform some
8976 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8977 /* An and with constant 255. */
8978 *total
+= COSTS_N_INSNS (1);
8980 /* A shift sequence. Increase costs slightly to avoid
8981 combining two shifts into an extend operation. */
8982 *total
+= COSTS_N_INSNS (2) + 1;
8988 switch (GET_MODE (XEXP (x
, 0)))
8995 *total
= COSTS_N_INSNS (1);
9005 mode
= GET_MODE (XEXP (x
, 0));
9006 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
9010 if (const_ok_for_arm (INTVAL (x
))
9011 || const_ok_for_arm (~INTVAL (x
)))
9012 *total
= COSTS_N_INSNS (1);
9014 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
9015 INTVAL (x
), NULL_RTX
,
9022 *total
= COSTS_N_INSNS (3);
9026 *total
= COSTS_N_INSNS (1);
9030 *total
= COSTS_N_INSNS (1);
9031 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
9035 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
9036 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9037 *total
= COSTS_N_INSNS (1);
9039 *total
= COSTS_N_INSNS (4);
9043 /* The vec_extract patterns accept memory operands that require an
9044 address reload. Account for the cost of that reload to give the
9045 auto-inc-dec pass an incentive to try to replace them. */
9046 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
9047 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
9049 mode
= GET_MODE (SET_DEST (x
));
9050 *total
= rtx_cost (SET_DEST (x
), mode
, code
, 0, speed
);
9051 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
9052 *total
+= COSTS_N_INSNS (1);
9055 /* Likewise for the vec_set patterns. */
9056 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
9057 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
9058 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
9060 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
9061 mode
= GET_MODE (SET_DEST (x
));
9062 *total
= rtx_cost (mem
, mode
, code
, 0, speed
);
9063 if (!neon_vector_mem_operand (mem
, 2, true))
9064 *total
+= COSTS_N_INSNS (1);
9070 /* We cost this as high as our memory costs to allow this to
9071 be hoisted from loops. */
9072 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
9074 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
9080 && TARGET_HARD_FLOAT
9082 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9083 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9084 *total
= COSTS_N_INSNS (1);
9086 *total
= COSTS_N_INSNS (4);
9090 *total
= COSTS_N_INSNS (4);
9095 /* Estimates the size cost of thumb1 instructions.
9096 For now most of the code is copied from thumb1_rtx_costs. We need more
9097 fine grain tuning when we have more related test cases. */
9099 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9101 machine_mode mode
= GET_MODE (x
);
9110 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9114 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9115 defined by RTL expansion, especially for the expansion of
9117 if ((GET_CODE (XEXP (x
, 0)) == MULT
9118 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9119 || (GET_CODE (XEXP (x
, 1)) == MULT
9120 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9121 return COSTS_N_INSNS (2);
9126 return COSTS_N_INSNS (1);
9129 if (CONST_INT_P (XEXP (x
, 1)))
9131 /* Thumb1 mul instruction can't operate on const. We must Load it
9132 into a register first. */
9133 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9134 /* For the targets which have a very small and high-latency multiply
9135 unit, we prefer to synthesize the mult with up to 5 instructions,
9136 giving a good balance between size and performance. */
9137 if (arm_arch6m
&& arm_m_profile_small_mul
)
9138 return COSTS_N_INSNS (5);
9140 return COSTS_N_INSNS (1) + const_size
;
9142 return COSTS_N_INSNS (1);
9145 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9147 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9148 cost
= COSTS_N_INSNS (words
);
9149 if (satisfies_constraint_J (SET_SRC (x
))
9150 || satisfies_constraint_K (SET_SRC (x
))
9151 /* Too big an immediate for a 2-byte mov, using MOVT. */
9152 || (CONST_INT_P (SET_SRC (x
))
9153 && UINTVAL (SET_SRC (x
)) >= 256
9155 && satisfies_constraint_j (SET_SRC (x
)))
9156 /* thumb1_movdi_insn. */
9157 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9158 cost
+= COSTS_N_INSNS (1);
9164 if (UINTVAL (x
) < 256)
9165 return COSTS_N_INSNS (1);
9166 /* movw is 4byte long. */
9167 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9168 return COSTS_N_INSNS (2);
9169 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9170 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9171 return COSTS_N_INSNS (2);
9172 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9173 if (thumb_shiftable_const (INTVAL (x
)))
9174 return COSTS_N_INSNS (2);
9175 return COSTS_N_INSNS (3);
9177 else if ((outer
== PLUS
|| outer
== COMPARE
)
9178 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9180 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9181 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9182 return COSTS_N_INSNS (1);
9183 else if (outer
== AND
)
9186 /* This duplicates the tests in the andsi3 expander. */
9187 for (i
= 9; i
<= 31; i
++)
9188 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9189 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9190 return COSTS_N_INSNS (2);
9192 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9193 || outer
== LSHIFTRT
)
9195 return COSTS_N_INSNS (2);
9201 return COSTS_N_INSNS (3);
9215 return COSTS_N_INSNS (1);
9218 return (COSTS_N_INSNS (1)
9220 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9221 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9222 ? COSTS_N_INSNS (1) : 0));
9226 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9231 /* XXX still guessing. */
9232 switch (GET_MODE (XEXP (x
, 0)))
9235 return (1 + (mode
== DImode
? 4 : 0)
9236 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9239 return (4 + (mode
== DImode
? 4 : 0)
9240 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9243 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9254 /* RTX costs when optimizing for size. */
9256 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9259 machine_mode mode
= GET_MODE (x
);
9262 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9266 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9270 /* A memory access costs 1 insn if the mode is small, or the address is
9271 a single register, otherwise it costs one insn per word. */
9272 if (REG_P (XEXP (x
, 0)))
9273 *total
= COSTS_N_INSNS (1);
9275 && GET_CODE (XEXP (x
, 0)) == PLUS
9276 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9277 /* This will be split into two instructions.
9278 See arm.md:calculate_pic_address. */
9279 *total
= COSTS_N_INSNS (2);
9281 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9288 /* Needs a libcall, so it costs about this. */
9289 *total
= COSTS_N_INSNS (2);
9293 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9295 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
9304 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9306 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), mode
, code
,
9310 else if (mode
== SImode
)
9312 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
9314 /* Slightly disparage register shifts, but not by much. */
9315 if (!CONST_INT_P (XEXP (x
, 1)))
9316 *total
+= 1 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9320 /* Needs a libcall. */
9321 *total
= COSTS_N_INSNS (2);
9325 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9326 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9328 *total
= COSTS_N_INSNS (1);
9334 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9335 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9337 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9338 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9339 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9340 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9341 || subcode1
== ASHIFTRT
)
9343 /* It's just the cost of the two operands. */
9348 *total
= COSTS_N_INSNS (1);
9352 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9356 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9357 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9359 *total
= COSTS_N_INSNS (1);
9363 /* A shift as a part of ADD costs nothing. */
9364 if (GET_CODE (XEXP (x
, 0)) == MULT
9365 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9367 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9368 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, false);
9369 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9374 case AND
: case XOR
: case IOR
:
9377 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9379 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9380 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9381 || (code
== AND
&& subcode
== NOT
))
9383 /* It's just the cost of the two operands. */
9389 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9393 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9397 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9398 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9400 *total
= COSTS_N_INSNS (1);
9406 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9415 if (cc_register (XEXP (x
, 0), VOIDmode
))
9418 *total
= COSTS_N_INSNS (1);
9422 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9423 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9424 *total
= COSTS_N_INSNS (1);
9426 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9431 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9434 if (const_ok_for_arm (INTVAL (x
)))
9435 /* A multiplication by a constant requires another instruction
9436 to load the constant to a register. */
9437 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9439 else if (const_ok_for_arm (~INTVAL (x
)))
9440 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9441 else if (const_ok_for_arm (-INTVAL (x
)))
9443 if (outer_code
== COMPARE
|| outer_code
== PLUS
9444 || outer_code
== MINUS
)
9447 *total
= COSTS_N_INSNS (1);
9450 *total
= COSTS_N_INSNS (2);
9456 *total
= COSTS_N_INSNS (2);
9460 *total
= COSTS_N_INSNS (4);
9465 && TARGET_HARD_FLOAT
9466 && outer_code
== SET
9467 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9468 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9469 *total
= COSTS_N_INSNS (1);
9471 *total
= COSTS_N_INSNS (4);
9476 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9477 cost of these slightly. */
9478 *total
= COSTS_N_INSNS (1) + 1;
9485 if (mode
!= VOIDmode
)
9486 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9488 *total
= COSTS_N_INSNS (4); /* How knows? */
9493 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9494 operand, then return the operand that is being shifted. If the shift
9495 is not by a constant, then set SHIFT_REG to point to the operand.
9496 Return NULL if OP is not a shifter operand. */
9498 shifter_op_p (rtx op
, rtx
*shift_reg
)
9500 enum rtx_code code
= GET_CODE (op
);
9502 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9503 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9504 return XEXP (op
, 0);
9505 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9506 return XEXP (op
, 0);
9507 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9508 || code
== ASHIFTRT
)
9510 if (!CONST_INT_P (XEXP (op
, 1)))
9511 *shift_reg
= XEXP (op
, 1);
9512 return XEXP (op
, 0);
9519 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9521 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9522 rtx_code code
= GET_CODE (x
);
9523 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9525 switch (XINT (x
, 1))
9527 case UNSPEC_UNALIGNED_LOAD
:
9528 /* We can only do unaligned loads into the integer unit, and we can't
9530 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9532 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9533 + extra_cost
->ldst
.load_unaligned
);
9536 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9537 ADDR_SPACE_GENERIC
, speed_p
);
9541 case UNSPEC_UNALIGNED_STORE
:
9542 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9544 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9545 + extra_cost
->ldst
.store_unaligned
);
9547 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9549 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9550 ADDR_SPACE_GENERIC
, speed_p
);
9561 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9565 *cost
= COSTS_N_INSNS (2);
9571 /* Cost of a libcall. We assume one insn per argument, an amount for the
9572 call (one insn for -Os) and then one for processing the result. */
9573 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9575 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9578 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9579 if (shift_op != NULL \
9580 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9585 *cost += extra_cost->alu.arith_shift_reg; \
9586 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9587 ASHIFT, 1, speed_p); \
9590 *cost += extra_cost->alu.arith_shift; \
9592 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9593 ASHIFT, 0, speed_p) \
9594 + rtx_cost (XEXP (x, 1 - IDX), \
9595 GET_MODE (shift_op), \
9602 /* RTX costs. Make an estimate of the cost of executing the operation
9603 X, which is contained with an operation with code OUTER_CODE.
9604 SPEED_P indicates whether the cost desired is the performance cost,
9605 or the size cost. The estimate is stored in COST and the return
9606 value is TRUE if the cost calculation is final, or FALSE if the
9607 caller should recurse through the operands of X to add additional
9610 We currently make no attempt to model the size savings of Thumb-2
9611 16-bit instructions. At the normal points in compilation where
9612 this code is called we have no measure of whether the condition
9613 flags are live or not, and thus no realistic way to determine what
9614 the size will eventually be. */
9616 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9617 const struct cpu_cost_table
*extra_cost
,
9618 int *cost
, bool speed_p
)
9620 machine_mode mode
= GET_MODE (x
);
9622 *cost
= COSTS_N_INSNS (1);
9627 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9629 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9637 /* SET RTXs don't have a mode so we get it from the destination. */
9638 mode
= GET_MODE (SET_DEST (x
));
9640 if (REG_P (SET_SRC (x
))
9641 && REG_P (SET_DEST (x
)))
9643 /* Assume that most copies can be done with a single insn,
9644 unless we don't have HW FP, in which case everything
9645 larger than word mode will require two insns. */
9646 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9647 && GET_MODE_SIZE (mode
) > 4)
9650 /* Conditional register moves can be encoded
9651 in 16 bits in Thumb mode. */
9652 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9658 if (CONST_INT_P (SET_SRC (x
)))
9660 /* Handle CONST_INT here, since the value doesn't have a mode
9661 and we would otherwise be unable to work out the true cost. */
9662 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9665 /* Slightly lower the cost of setting a core reg to a constant.
9666 This helps break up chains and allows for better scheduling. */
9667 if (REG_P (SET_DEST (x
))
9668 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9671 /* Immediate moves with an immediate in the range [0, 255] can be
9672 encoded in 16 bits in Thumb mode. */
9673 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9674 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9676 goto const_int_cost
;
9682 /* A memory access costs 1 insn if the mode is small, or the address is
9683 a single register, otherwise it costs one insn per word. */
9684 if (REG_P (XEXP (x
, 0)))
9685 *cost
= COSTS_N_INSNS (1);
9687 && GET_CODE (XEXP (x
, 0)) == PLUS
9688 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9689 /* This will be split into two instructions.
9690 See arm.md:calculate_pic_address. */
9691 *cost
= COSTS_N_INSNS (2);
9693 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9695 /* For speed optimizations, add the costs of the address and
9696 accessing memory. */
9699 *cost
+= (extra_cost
->ldst
.load
9700 + arm_address_cost (XEXP (x
, 0), mode
,
9701 ADDR_SPACE_GENERIC
, speed_p
));
9703 *cost
+= extra_cost
->ldst
.load
;
9709 /* Calculations of LDM costs are complex. We assume an initial cost
9710 (ldm_1st) which will load the number of registers mentioned in
9711 ldm_regs_per_insn_1st registers; then each additional
9712 ldm_regs_per_insn_subsequent registers cost one more insn. The
9713 formula for N regs is thus:
9715 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9716 + ldm_regs_per_insn_subsequent - 1)
9717 / ldm_regs_per_insn_subsequent).
9719 Additional costs may also be added for addressing. A similar
9720 formula is used for STM. */
9722 bool is_ldm
= load_multiple_operation (x
, SImode
);
9723 bool is_stm
= store_multiple_operation (x
, SImode
);
9725 if (is_ldm
|| is_stm
)
9729 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9730 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9731 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9732 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9733 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9734 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9735 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9737 *cost
+= regs_per_insn_1st
9738 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9739 + regs_per_insn_sub
- 1)
9740 / regs_per_insn_sub
);
9749 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9750 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9751 *cost
+= COSTS_N_INSNS (speed_p
9752 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9753 else if (mode
== SImode
&& TARGET_IDIV
)
9754 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9756 *cost
= LIBCALL_COST (2);
9757 return false; /* All arguments must be in registers. */
9760 /* MOD by a power of 2 can be expanded as:
9762 and r0, r0, #(n - 1)
9763 and r1, r1, #(n - 1)
9764 rsbpl r0, r1, #0. */
9765 if (CONST_INT_P (XEXP (x
, 1))
9766 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9769 *cost
+= COSTS_N_INSNS (3);
9772 *cost
+= 2 * extra_cost
->alu
.logical
9773 + extra_cost
->alu
.arith
;
9779 *cost
= LIBCALL_COST (2);
9780 return false; /* All arguments must be in registers. */
9783 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9785 *cost
+= (COSTS_N_INSNS (1)
9786 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9788 *cost
+= extra_cost
->alu
.shift_reg
;
9796 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9798 *cost
+= (COSTS_N_INSNS (2)
9799 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9801 *cost
+= 2 * extra_cost
->alu
.shift
;
9804 else if (mode
== SImode
)
9806 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9807 /* Slightly disparage register shifts at -Os, but not by much. */
9808 if (!CONST_INT_P (XEXP (x
, 1)))
9809 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9810 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9813 else if (GET_MODE_CLASS (mode
) == MODE_INT
9814 && GET_MODE_SIZE (mode
) < 4)
9818 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9819 /* Slightly disparage register shifts at -Os, but not by
9821 if (!CONST_INT_P (XEXP (x
, 1)))
9822 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9823 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9825 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9827 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9829 /* Can use SBFX/UBFX. */
9831 *cost
+= extra_cost
->alu
.bfx
;
9832 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9836 *cost
+= COSTS_N_INSNS (1);
9837 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9840 if (CONST_INT_P (XEXP (x
, 1)))
9841 *cost
+= 2 * extra_cost
->alu
.shift
;
9843 *cost
+= (extra_cost
->alu
.shift
9844 + extra_cost
->alu
.shift_reg
);
9847 /* Slightly disparage register shifts. */
9848 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9853 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9854 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9857 if (CONST_INT_P (XEXP (x
, 1)))
9858 *cost
+= (2 * extra_cost
->alu
.shift
9859 + extra_cost
->alu
.log_shift
);
9861 *cost
+= (extra_cost
->alu
.shift
9862 + extra_cost
->alu
.shift_reg
9863 + extra_cost
->alu
.log_shift_reg
);
9869 *cost
= LIBCALL_COST (2);
9878 *cost
+= extra_cost
->alu
.rev
;
9885 /* No rev instruction available. Look at arm_legacy_rev
9886 and thumb_legacy_rev for the form of RTL used then. */
9889 *cost
+= COSTS_N_INSNS (9);
9893 *cost
+= 6 * extra_cost
->alu
.shift
;
9894 *cost
+= 3 * extra_cost
->alu
.logical
;
9899 *cost
+= COSTS_N_INSNS (4);
9903 *cost
+= 2 * extra_cost
->alu
.shift
;
9904 *cost
+= extra_cost
->alu
.arith_shift
;
9905 *cost
+= 2 * extra_cost
->alu
.logical
;
9913 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9914 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9916 if (GET_CODE (XEXP (x
, 0)) == MULT
9917 || GET_CODE (XEXP (x
, 1)) == MULT
)
9919 rtx mul_op0
, mul_op1
, sub_op
;
9922 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9924 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9926 mul_op0
= XEXP (XEXP (x
, 0), 0);
9927 mul_op1
= XEXP (XEXP (x
, 0), 1);
9928 sub_op
= XEXP (x
, 1);
9932 mul_op0
= XEXP (XEXP (x
, 1), 0);
9933 mul_op1
= XEXP (XEXP (x
, 1), 1);
9934 sub_op
= XEXP (x
, 0);
9937 /* The first operand of the multiply may be optionally
9939 if (GET_CODE (mul_op0
) == NEG
)
9940 mul_op0
= XEXP (mul_op0
, 0);
9942 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9943 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9944 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9950 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9956 rtx shift_by_reg
= NULL
;
9960 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9961 if (shift_op
== NULL
)
9963 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9964 non_shift_op
= XEXP (x
, 0);
9967 non_shift_op
= XEXP (x
, 1);
9969 if (shift_op
!= NULL
)
9971 if (shift_by_reg
!= NULL
)
9974 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9975 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9978 *cost
+= extra_cost
->alu
.arith_shift
;
9980 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9981 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9986 && GET_CODE (XEXP (x
, 1)) == MULT
)
9990 *cost
+= extra_cost
->mult
[0].add
;
9991 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9992 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9993 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9997 if (CONST_INT_P (XEXP (x
, 0)))
9999 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10000 INTVAL (XEXP (x
, 0)), NULL_RTX
,
10002 *cost
= COSTS_N_INSNS (insns
);
10004 *cost
+= insns
* extra_cost
->alu
.arith
;
10005 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10009 *cost
+= extra_cost
->alu
.arith
;
10014 if (GET_MODE_CLASS (mode
) == MODE_INT
10015 && GET_MODE_SIZE (mode
) < 4)
10017 rtx shift_op
, shift_reg
;
10020 /* We check both sides of the MINUS for shifter operands since,
10021 unlike PLUS, it's not commutative. */
10023 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
10024 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
10026 /* Slightly disparage, as we might need to widen the result. */
10029 *cost
+= extra_cost
->alu
.arith
;
10031 if (CONST_INT_P (XEXP (x
, 0)))
10033 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10040 if (mode
== DImode
)
10042 *cost
+= COSTS_N_INSNS (1);
10044 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10046 rtx op1
= XEXP (x
, 1);
10049 *cost
+= 2 * extra_cost
->alu
.arith
;
10051 if (GET_CODE (op1
) == ZERO_EXTEND
)
10052 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10055 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10056 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10060 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10063 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10064 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10066 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10069 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10070 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10073 *cost
+= (extra_cost
->alu
.arith
10074 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10075 ? extra_cost
->alu
.arith
10076 : extra_cost
->alu
.arith_shift
));
10077 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10078 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10079 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10084 *cost
+= 2 * extra_cost
->alu
.arith
;
10090 *cost
= LIBCALL_COST (2);
10094 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10095 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10097 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10099 rtx mul_op0
, mul_op1
, add_op
;
10102 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10104 mul_op0
= XEXP (XEXP (x
, 0), 0);
10105 mul_op1
= XEXP (XEXP (x
, 0), 1);
10106 add_op
= XEXP (x
, 1);
10108 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10109 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10110 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10116 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10119 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10121 *cost
= LIBCALL_COST (2);
10125 /* Narrow modes can be synthesized in SImode, but the range
10126 of useful sub-operations is limited. Check for shift operations
10127 on one of the operands. Only left shifts can be used in the
10129 if (GET_MODE_CLASS (mode
) == MODE_INT
10130 && GET_MODE_SIZE (mode
) < 4)
10132 rtx shift_op
, shift_reg
;
10135 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
10137 if (CONST_INT_P (XEXP (x
, 1)))
10139 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10140 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10142 *cost
= COSTS_N_INSNS (insns
);
10144 *cost
+= insns
* extra_cost
->alu
.arith
;
10145 /* Slightly penalize a narrow operation as the result may
10147 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10151 /* Slightly penalize a narrow operation as the result may
10155 *cost
+= extra_cost
->alu
.arith
;
10160 if (mode
== SImode
)
10162 rtx shift_op
, shift_reg
;
10164 if (TARGET_INT_SIMD
10165 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10166 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10168 /* UXTA[BH] or SXTA[BH]. */
10170 *cost
+= extra_cost
->alu
.extend_arith
;
10171 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10173 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10178 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10179 if (shift_op
!= NULL
)
10184 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10185 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10188 *cost
+= extra_cost
->alu
.arith_shift
;
10190 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10191 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10194 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10196 rtx mul_op
= XEXP (x
, 0);
10198 if (TARGET_DSP_MULTIPLY
10199 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10200 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10201 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10202 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10203 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10204 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10206 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10207 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10208 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10209 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10210 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10213 /* SMLA[BT][BT]. */
10215 *cost
+= extra_cost
->mult
[0].extend_add
;
10216 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10217 SIGN_EXTEND
, 0, speed_p
)
10218 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10219 SIGN_EXTEND
, 0, speed_p
)
10220 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10225 *cost
+= extra_cost
->mult
[0].add
;
10226 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10227 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10228 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10231 if (CONST_INT_P (XEXP (x
, 1)))
10233 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10234 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10236 *cost
= COSTS_N_INSNS (insns
);
10238 *cost
+= insns
* extra_cost
->alu
.arith
;
10239 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10243 *cost
+= extra_cost
->alu
.arith
;
10248 if (mode
== DImode
)
10251 && GET_CODE (XEXP (x
, 0)) == MULT
10252 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10253 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10254 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10255 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10258 *cost
+= extra_cost
->mult
[1].extend_add
;
10259 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10260 ZERO_EXTEND
, 0, speed_p
)
10261 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10262 ZERO_EXTEND
, 0, speed_p
)
10263 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10267 *cost
+= COSTS_N_INSNS (1);
10269 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10270 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10273 *cost
+= (extra_cost
->alu
.arith
10274 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10275 ? extra_cost
->alu
.arith
10276 : extra_cost
->alu
.arith_shift
));
10278 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10280 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10285 *cost
+= 2 * extra_cost
->alu
.arith
;
10290 *cost
= LIBCALL_COST (2);
10293 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10296 *cost
+= extra_cost
->alu
.rev
;
10300 /* Fall through. */
10301 case AND
: case XOR
:
10302 if (mode
== SImode
)
10304 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10305 rtx op0
= XEXP (x
, 0);
10306 rtx shift_op
, shift_reg
;
10310 || (code
== IOR
&& TARGET_THUMB2
)))
10311 op0
= XEXP (op0
, 0);
10314 shift_op
= shifter_op_p (op0
, &shift_reg
);
10315 if (shift_op
!= NULL
)
10320 *cost
+= extra_cost
->alu
.log_shift_reg
;
10321 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10324 *cost
+= extra_cost
->alu
.log_shift
;
10326 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10327 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10331 if (CONST_INT_P (XEXP (x
, 1)))
10333 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10334 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10337 *cost
= COSTS_N_INSNS (insns
);
10339 *cost
+= insns
* extra_cost
->alu
.logical
;
10340 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10345 *cost
+= extra_cost
->alu
.logical
;
10346 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10347 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10351 if (mode
== DImode
)
10353 rtx op0
= XEXP (x
, 0);
10354 enum rtx_code subcode
= GET_CODE (op0
);
10356 *cost
+= COSTS_N_INSNS (1);
10360 || (code
== IOR
&& TARGET_THUMB2
)))
10361 op0
= XEXP (op0
, 0);
10363 if (GET_CODE (op0
) == ZERO_EXTEND
)
10366 *cost
+= 2 * extra_cost
->alu
.logical
;
10368 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10370 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10373 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10376 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10378 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10380 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10385 *cost
+= 2 * extra_cost
->alu
.logical
;
10391 *cost
= LIBCALL_COST (2);
10395 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10396 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10398 rtx op0
= XEXP (x
, 0);
10400 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10401 op0
= XEXP (op0
, 0);
10404 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10406 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10407 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10410 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10412 *cost
= LIBCALL_COST (2);
10416 if (mode
== SImode
)
10418 if (TARGET_DSP_MULTIPLY
10419 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10420 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10421 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10422 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10423 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10424 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10425 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10426 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10427 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10428 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10429 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10430 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10433 /* SMUL[TB][TB]. */
10435 *cost
+= extra_cost
->mult
[0].extend
;
10436 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10437 SIGN_EXTEND
, 0, speed_p
);
10438 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10439 SIGN_EXTEND
, 1, speed_p
);
10443 *cost
+= extra_cost
->mult
[0].simple
;
10447 if (mode
== DImode
)
10450 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10451 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10452 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10453 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10456 *cost
+= extra_cost
->mult
[1].extend
;
10457 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10458 ZERO_EXTEND
, 0, speed_p
)
10459 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10460 ZERO_EXTEND
, 0, speed_p
));
10464 *cost
= LIBCALL_COST (2);
10469 *cost
= LIBCALL_COST (2);
10473 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10474 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10476 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10479 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10484 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10488 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10490 *cost
= LIBCALL_COST (1);
10494 if (mode
== SImode
)
10496 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10498 *cost
+= COSTS_N_INSNS (1);
10499 /* Assume the non-flag-changing variant. */
10501 *cost
+= (extra_cost
->alu
.log_shift
10502 + extra_cost
->alu
.arith_shift
);
10503 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10507 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10508 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10510 *cost
+= COSTS_N_INSNS (1);
10511 /* No extra cost for MOV imm and MVN imm. */
10512 /* If the comparison op is using the flags, there's no further
10513 cost, otherwise we need to add the cost of the comparison. */
10514 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10515 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10516 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10518 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10519 *cost
+= (COSTS_N_INSNS (1)
10520 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10522 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10525 *cost
+= extra_cost
->alu
.arith
;
10531 *cost
+= extra_cost
->alu
.arith
;
10535 if (GET_MODE_CLASS (mode
) == MODE_INT
10536 && GET_MODE_SIZE (mode
) < 4)
10538 /* Slightly disparage, as we might need an extend operation. */
10541 *cost
+= extra_cost
->alu
.arith
;
10545 if (mode
== DImode
)
10547 *cost
+= COSTS_N_INSNS (1);
10549 *cost
+= 2 * extra_cost
->alu
.arith
;
10554 *cost
= LIBCALL_COST (1);
10558 if (mode
== SImode
)
10561 rtx shift_reg
= NULL
;
10563 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10567 if (shift_reg
!= NULL
)
10570 *cost
+= extra_cost
->alu
.log_shift_reg
;
10571 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10574 *cost
+= extra_cost
->alu
.log_shift
;
10575 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10580 *cost
+= extra_cost
->alu
.logical
;
10583 if (mode
== DImode
)
10585 *cost
+= COSTS_N_INSNS (1);
10591 *cost
+= LIBCALL_COST (1);
10596 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10598 *cost
+= COSTS_N_INSNS (3);
10601 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10602 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10604 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10605 /* Assume that if one arm of the if_then_else is a register,
10606 that it will be tied with the result and eliminate the
10607 conditional insn. */
10608 if (REG_P (XEXP (x
, 1)))
10610 else if (REG_P (XEXP (x
, 2)))
10616 if (extra_cost
->alu
.non_exec_costs_exec
)
10617 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10619 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10622 *cost
+= op1cost
+ op2cost
;
10628 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10632 machine_mode op0mode
;
10633 /* We'll mostly assume that the cost of a compare is the cost of the
10634 LHS. However, there are some notable exceptions. */
10636 /* Floating point compares are never done as side-effects. */
10637 op0mode
= GET_MODE (XEXP (x
, 0));
10638 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10639 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10642 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10644 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10646 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10652 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10654 *cost
= LIBCALL_COST (2);
10658 /* DImode compares normally take two insns. */
10659 if (op0mode
== DImode
)
10661 *cost
+= COSTS_N_INSNS (1);
10663 *cost
+= 2 * extra_cost
->alu
.arith
;
10667 if (op0mode
== SImode
)
10672 if (XEXP (x
, 1) == const0_rtx
10673 && !(REG_P (XEXP (x
, 0))
10674 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10675 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10677 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10679 /* Multiply operations that set the flags are often
10680 significantly more expensive. */
10682 && GET_CODE (XEXP (x
, 0)) == MULT
10683 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10684 *cost
+= extra_cost
->mult
[0].flag_setting
;
10687 && GET_CODE (XEXP (x
, 0)) == PLUS
10688 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10689 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10691 *cost
+= extra_cost
->mult
[0].flag_setting
;
10696 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10697 if (shift_op
!= NULL
)
10699 if (shift_reg
!= NULL
)
10701 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10704 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10707 *cost
+= extra_cost
->alu
.arith_shift
;
10708 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10709 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10714 *cost
+= extra_cost
->alu
.arith
;
10715 if (CONST_INT_P (XEXP (x
, 1))
10716 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10718 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10726 *cost
= LIBCALL_COST (2);
10749 if (outer_code
== SET
)
10751 /* Is it a store-flag operation? */
10752 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10753 && XEXP (x
, 1) == const0_rtx
)
10755 /* Thumb also needs an IT insn. */
10756 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10759 if (XEXP (x
, 1) == const0_rtx
)
10764 /* LSR Rd, Rn, #31. */
10766 *cost
+= extra_cost
->alu
.shift
;
10776 *cost
+= COSTS_N_INSNS (1);
10780 /* RSBS T1, Rn, Rn, LSR #31
10782 *cost
+= COSTS_N_INSNS (1);
10784 *cost
+= extra_cost
->alu
.arith_shift
;
10788 /* RSB Rd, Rn, Rn, ASR #1
10789 LSR Rd, Rd, #31. */
10790 *cost
+= COSTS_N_INSNS (1);
10792 *cost
+= (extra_cost
->alu
.arith_shift
10793 + extra_cost
->alu
.shift
);
10799 *cost
+= COSTS_N_INSNS (1);
10801 *cost
+= extra_cost
->alu
.shift
;
10805 /* Remaining cases are either meaningless or would take
10806 three insns anyway. */
10807 *cost
= COSTS_N_INSNS (3);
10810 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10815 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10816 if (CONST_INT_P (XEXP (x
, 1))
10817 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10819 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10826 /* Not directly inside a set. If it involves the condition code
10827 register it must be the condition for a branch, cond_exec or
10828 I_T_E operation. Since the comparison is performed elsewhere
10829 this is just the control part which has no additional
10831 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10832 && XEXP (x
, 1) == const0_rtx
)
10840 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10841 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10844 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10848 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10850 *cost
= LIBCALL_COST (1);
10854 if (mode
== SImode
)
10857 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10861 *cost
= LIBCALL_COST (1);
10865 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10866 && MEM_P (XEXP (x
, 0)))
10868 if (mode
== DImode
)
10869 *cost
+= COSTS_N_INSNS (1);
10874 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10875 *cost
+= extra_cost
->ldst
.load
;
10877 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10879 if (mode
== DImode
)
10880 *cost
+= extra_cost
->alu
.shift
;
10885 /* Widening from less than 32-bits requires an extend operation. */
10886 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10888 /* We have SXTB/SXTH. */
10889 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10891 *cost
+= extra_cost
->alu
.extend
;
10893 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10895 /* Needs two shifts. */
10896 *cost
+= COSTS_N_INSNS (1);
10897 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10899 *cost
+= 2 * extra_cost
->alu
.shift
;
10902 /* Widening beyond 32-bits requires one more insn. */
10903 if (mode
== DImode
)
10905 *cost
+= COSTS_N_INSNS (1);
10907 *cost
+= extra_cost
->alu
.shift
;
10914 || GET_MODE (XEXP (x
, 0)) == SImode
10915 || GET_MODE (XEXP (x
, 0)) == QImode
)
10916 && MEM_P (XEXP (x
, 0)))
10918 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10920 if (mode
== DImode
)
10921 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10926 /* Widening from less than 32-bits requires an extend operation. */
10927 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10929 /* UXTB can be a shorter instruction in Thumb2, but it might
10930 be slower than the AND Rd, Rn, #255 alternative. When
10931 optimizing for speed it should never be slower to use
10932 AND, and we don't really model 16-bit vs 32-bit insns
10935 *cost
+= extra_cost
->alu
.logical
;
10937 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10939 /* We have UXTB/UXTH. */
10940 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10942 *cost
+= extra_cost
->alu
.extend
;
10944 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10946 /* Needs two shifts. It's marginally preferable to use
10947 shifts rather than two BIC instructions as the second
10948 shift may merge with a subsequent insn as a shifter
10950 *cost
= COSTS_N_INSNS (2);
10951 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10953 *cost
+= 2 * extra_cost
->alu
.shift
;
10956 /* Widening beyond 32-bits requires one more insn. */
10957 if (mode
== DImode
)
10959 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10966 /* CONST_INT has no mode, so we cannot tell for sure how many
10967 insns are really going to be needed. The best we can do is
10968 look at the value passed. If it fits in SImode, then assume
10969 that's the mode it will be used for. Otherwise assume it
10970 will be used in DImode. */
10971 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10976 /* Avoid blowing up in arm_gen_constant (). */
10977 if (!(outer_code
== PLUS
10978 || outer_code
== AND
10979 || outer_code
== IOR
10980 || outer_code
== XOR
10981 || outer_code
== MINUS
))
10985 if (mode
== SImode
)
10987 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10988 INTVAL (x
), NULL
, NULL
,
10994 *cost
+= COSTS_N_INSNS (arm_gen_constant
10995 (outer_code
, SImode
, NULL
,
10996 trunc_int_for_mode (INTVAL (x
), SImode
),
10998 + arm_gen_constant (outer_code
, SImode
, NULL
,
10999 INTVAL (x
) >> 32, NULL
,
11011 if (arm_arch_thumb2
&& !flag_pic
)
11012 *cost
+= COSTS_N_INSNS (1);
11014 *cost
+= extra_cost
->ldst
.load
;
11017 *cost
+= COSTS_N_INSNS (1);
11021 *cost
+= COSTS_N_INSNS (1);
11023 *cost
+= extra_cost
->alu
.arith
;
11029 *cost
= COSTS_N_INSNS (4);
11034 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11035 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11037 if (vfp3_const_double_rtx (x
))
11040 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11046 if (mode
== DFmode
)
11047 *cost
+= extra_cost
->ldst
.loadd
;
11049 *cost
+= extra_cost
->ldst
.loadf
;
11052 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11056 *cost
= COSTS_N_INSNS (4);
11062 && TARGET_HARD_FLOAT
11063 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11064 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11065 *cost
= COSTS_N_INSNS (1);
11067 *cost
= COSTS_N_INSNS (4);
11072 /* When optimizing for size, we prefer constant pool entries to
11073 MOVW/MOVT pairs, so bump the cost of these slightly. */
11080 *cost
+= extra_cost
->alu
.clz
;
11084 if (XEXP (x
, 1) == const0_rtx
)
11087 *cost
+= extra_cost
->alu
.log_shift
;
11088 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11091 /* Fall through. */
11095 *cost
+= COSTS_N_INSNS (1);
11099 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11100 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11101 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11102 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11103 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11104 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11105 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11106 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11110 *cost
+= extra_cost
->mult
[1].extend
;
11111 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11112 ZERO_EXTEND
, 0, speed_p
)
11113 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11114 ZERO_EXTEND
, 0, speed_p
));
11117 *cost
= LIBCALL_COST (1);
11120 case UNSPEC_VOLATILE
:
11122 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11125 /* Reading the PC is like reading any other register. Writing it
11126 is more expensive, but we take that into account elsewhere. */
11131 /* TODO: Simple zero_extract of bottom bits using AND. */
11132 /* Fall through. */
11136 && CONST_INT_P (XEXP (x
, 1))
11137 && CONST_INT_P (XEXP (x
, 2)))
11140 *cost
+= extra_cost
->alu
.bfx
;
11141 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11144 /* Without UBFX/SBFX, need to resort to shift operations. */
11145 *cost
+= COSTS_N_INSNS (1);
11147 *cost
+= 2 * extra_cost
->alu
.shift
;
11148 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11152 if (TARGET_HARD_FLOAT
)
11155 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11156 if (!TARGET_FPU_ARMV8
11157 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11159 /* Pre v8, widening HF->DF is a two-step process, first
11160 widening to SFmode. */
11161 *cost
+= COSTS_N_INSNS (1);
11163 *cost
+= extra_cost
->fp
[0].widen
;
11165 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11169 *cost
= LIBCALL_COST (1);
11172 case FLOAT_TRUNCATE
:
11173 if (TARGET_HARD_FLOAT
)
11176 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11177 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11179 /* Vector modes? */
11181 *cost
= LIBCALL_COST (1);
11185 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11187 rtx op0
= XEXP (x
, 0);
11188 rtx op1
= XEXP (x
, 1);
11189 rtx op2
= XEXP (x
, 2);
11192 /* vfms or vfnma. */
11193 if (GET_CODE (op0
) == NEG
)
11194 op0
= XEXP (op0
, 0);
11196 /* vfnms or vfnma. */
11197 if (GET_CODE (op2
) == NEG
)
11198 op2
= XEXP (op2
, 0);
11200 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11201 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11202 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11205 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11210 *cost
= LIBCALL_COST (3);
11215 if (TARGET_HARD_FLOAT
)
11217 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11218 a vcvt fixed-point conversion. */
11219 if (code
== FIX
&& mode
== SImode
11220 && GET_CODE (XEXP (x
, 0)) == FIX
11221 && GET_MODE (XEXP (x
, 0)) == SFmode
11222 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11223 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11227 *cost
+= extra_cost
->fp
[0].toint
;
11229 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11234 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11236 mode
= GET_MODE (XEXP (x
, 0));
11238 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11239 /* Strip of the 'cost' of rounding towards zero. */
11240 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11241 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11244 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11245 /* ??? Increase the cost to deal with transferring from
11246 FP -> CORE registers? */
11249 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11250 && TARGET_FPU_ARMV8
)
11253 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11256 /* Vector costs? */
11258 *cost
= LIBCALL_COST (1);
11262 case UNSIGNED_FLOAT
:
11263 if (TARGET_HARD_FLOAT
)
11265 /* ??? Increase the cost to deal with transferring from CORE
11266 -> FP registers? */
11268 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11271 *cost
= LIBCALL_COST (1);
11279 /* Just a guess. Guess number of instructions in the asm
11280 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11281 though (see PR60663). */
11282 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11283 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11285 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11289 if (mode
!= VOIDmode
)
11290 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11292 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11297 #undef HANDLE_NARROW_SHIFT_ARITH
11299 /* RTX costs when optimizing for size. */
11301 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11302 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11305 int code
= GET_CODE (x
);
11307 if (TARGET_OLD_RTX_COSTS
11308 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11310 /* Old way. (Deprecated.) */
11312 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11313 (enum rtx_code
) outer_code
, total
);
11315 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11316 (enum rtx_code
) outer_code
, total
,
11322 if (current_tune
->insn_extra_cost
)
11323 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11324 (enum rtx_code
) outer_code
,
11325 current_tune
->insn_extra_cost
,
11327 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11328 && current_tune->insn_extra_cost != NULL */
11330 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11331 (enum rtx_code
) outer_code
,
11332 &generic_extra_costs
, total
, speed
);
11335 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11337 print_rtl_single (dump_file
, x
);
11338 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11339 *total
, result
? "final" : "partial");
11344 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11345 supported on any "slowmul" cores, so it can be ignored. */
11348 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11349 int *total
, bool speed
)
11351 machine_mode mode
= GET_MODE (x
);
11355 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11362 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11365 *total
= COSTS_N_INSNS (20);
11369 if (CONST_INT_P (XEXP (x
, 1)))
11371 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11372 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11373 int cost
, const_ok
= const_ok_for_arm (i
);
11374 int j
, booth_unit_size
;
11376 /* Tune as appropriate. */
11377 cost
= const_ok
? 4 : 8;
11378 booth_unit_size
= 2;
11379 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11381 i
>>= booth_unit_size
;
11385 *total
= COSTS_N_INSNS (cost
);
11386 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
11390 *total
= COSTS_N_INSNS (20);
11394 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11399 /* RTX cost for cores with a fast multiply unit (M variants). */
11402 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11403 int *total
, bool speed
)
11405 machine_mode mode
= GET_MODE (x
);
11409 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11413 /* ??? should thumb2 use different costs? */
11417 /* There is no point basing this on the tuning, since it is always the
11418 fast variant if it exists at all. */
11420 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11421 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11422 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11424 *total
= COSTS_N_INSNS(2);
11429 if (mode
== DImode
)
11431 *total
= COSTS_N_INSNS (5);
11435 if (CONST_INT_P (XEXP (x
, 1)))
11437 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11438 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11439 int cost
, const_ok
= const_ok_for_arm (i
);
11440 int j
, booth_unit_size
;
11442 /* Tune as appropriate. */
11443 cost
= const_ok
? 4 : 8;
11444 booth_unit_size
= 8;
11445 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11447 i
>>= booth_unit_size
;
11451 *total
= COSTS_N_INSNS(cost
);
11455 if (mode
== SImode
)
11457 *total
= COSTS_N_INSNS (4);
11461 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11463 if (TARGET_HARD_FLOAT
11465 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11467 *total
= COSTS_N_INSNS (1);
11472 /* Requires a lib call */
11473 *total
= COSTS_N_INSNS (20);
11477 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11482 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11483 so it can be ignored. */
11486 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11487 int *total
, bool speed
)
11489 machine_mode mode
= GET_MODE (x
);
11493 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11500 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11501 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11503 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11504 will stall until the multiplication is complete. */
11505 *total
= COSTS_N_INSNS (3);
11509 /* There is no point basing this on the tuning, since it is always the
11510 fast variant if it exists at all. */
11512 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11513 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11514 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11516 *total
= COSTS_N_INSNS (2);
11521 if (mode
== DImode
)
11523 *total
= COSTS_N_INSNS (5);
11527 if (CONST_INT_P (XEXP (x
, 1)))
11529 /* If operand 1 is a constant we can more accurately
11530 calculate the cost of the multiply. The multiplier can
11531 retire 15 bits on the first cycle and a further 12 on the
11532 second. We do, of course, have to load the constant into
11533 a register first. */
11534 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11535 /* There's a general overhead of one cycle. */
11537 unsigned HOST_WIDE_INT masked_const
;
11539 if (i
& 0x80000000)
11542 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11544 masked_const
= i
& 0xffff8000;
11545 if (masked_const
!= 0)
11548 masked_const
= i
& 0xf8000000;
11549 if (masked_const
!= 0)
11552 *total
= COSTS_N_INSNS (cost
);
11556 if (mode
== SImode
)
11558 *total
= COSTS_N_INSNS (3);
11562 /* Requires a lib call */
11563 *total
= COSTS_N_INSNS (20);
11567 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11572 /* RTX costs for 9e (and later) cores. */
11575 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11576 int *total
, bool speed
)
11578 machine_mode mode
= GET_MODE (x
);
11585 /* Small multiply: 32 cycles for an integer multiply inst. */
11586 if (arm_arch6m
&& arm_m_profile_small_mul
)
11587 *total
= COSTS_N_INSNS (32);
11589 *total
= COSTS_N_INSNS (3);
11593 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11601 /* There is no point basing this on the tuning, since it is always the
11602 fast variant if it exists at all. */
11604 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11605 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11606 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11608 *total
= COSTS_N_INSNS (2);
11613 if (mode
== DImode
)
11615 *total
= COSTS_N_INSNS (5);
11619 if (mode
== SImode
)
11621 *total
= COSTS_N_INSNS (2);
11625 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11627 if (TARGET_HARD_FLOAT
11629 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11631 *total
= COSTS_N_INSNS (1);
11636 *total
= COSTS_N_INSNS (20);
11640 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11643 /* All address computations that can be done are free, but rtx cost returns
11644 the same for practically all of them. So we weight the different types
11645 of address here in the order (most pref first):
11646 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11648 arm_arm_address_cost (rtx x
)
11650 enum rtx_code c
= GET_CODE (x
);
11652 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11654 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11659 if (CONST_INT_P (XEXP (x
, 1)))
11662 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11672 arm_thumb_address_cost (rtx x
)
11674 enum rtx_code c
= GET_CODE (x
);
11679 && REG_P (XEXP (x
, 0))
11680 && CONST_INT_P (XEXP (x
, 1)))
11687 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11688 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11690 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11693 /* Adjust cost hook for XScale. */
11695 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11698 /* Some true dependencies can have a higher cost depending
11699 on precisely how certain input operands are used. */
11701 && recog_memoized (insn
) >= 0
11702 && recog_memoized (dep
) >= 0)
11704 int shift_opnum
= get_attr_shift (insn
);
11705 enum attr_type attr_type
= get_attr_type (dep
);
11707 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11708 operand for INSN. If we have a shifted input operand and the
11709 instruction we depend on is another ALU instruction, then we may
11710 have to account for an additional stall. */
11711 if (shift_opnum
!= 0
11712 && (attr_type
== TYPE_ALU_SHIFT_IMM
11713 || attr_type
== TYPE_ALUS_SHIFT_IMM
11714 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11715 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11716 || attr_type
== TYPE_ALU_SHIFT_REG
11717 || attr_type
== TYPE_ALUS_SHIFT_REG
11718 || attr_type
== TYPE_LOGIC_SHIFT_REG
11719 || attr_type
== TYPE_LOGICS_SHIFT_REG
11720 || attr_type
== TYPE_MOV_SHIFT
11721 || attr_type
== TYPE_MVN_SHIFT
11722 || attr_type
== TYPE_MOV_SHIFT_REG
11723 || attr_type
== TYPE_MVN_SHIFT_REG
))
11725 rtx shifted_operand
;
11728 /* Get the shifted operand. */
11729 extract_insn (insn
);
11730 shifted_operand
= recog_data
.operand
[shift_opnum
];
11732 /* Iterate over all the operands in DEP. If we write an operand
11733 that overlaps with SHIFTED_OPERAND, then we have increase the
11734 cost of this dependency. */
11735 extract_insn (dep
);
11736 preprocess_constraints (dep
);
11737 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11739 /* We can ignore strict inputs. */
11740 if (recog_data
.operand_type
[opno
] == OP_IN
)
11743 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11755 /* Adjust cost hook for Cortex A9. */
11757 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11767 case REG_DEP_OUTPUT
:
11768 if (recog_memoized (insn
) >= 0
11769 && recog_memoized (dep
) >= 0)
11771 if (GET_CODE (PATTERN (insn
)) == SET
)
11774 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11776 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11778 enum attr_type attr_type_insn
= get_attr_type (insn
);
11779 enum attr_type attr_type_dep
= get_attr_type (dep
);
11781 /* By default all dependencies of the form
11784 have an extra latency of 1 cycle because
11785 of the input and output dependency in this
11786 case. However this gets modeled as an true
11787 dependency and hence all these checks. */
11788 if (REG_P (SET_DEST (PATTERN (insn
)))
11789 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11791 /* FMACS is a special case where the dependent
11792 instruction can be issued 3 cycles before
11793 the normal latency in case of an output
11795 if ((attr_type_insn
== TYPE_FMACS
11796 || attr_type_insn
== TYPE_FMACD
)
11797 && (attr_type_dep
== TYPE_FMACS
11798 || attr_type_dep
== TYPE_FMACD
))
11800 if (dep_type
== REG_DEP_OUTPUT
)
11801 *cost
= insn_default_latency (dep
) - 3;
11803 *cost
= insn_default_latency (dep
);
11808 if (dep_type
== REG_DEP_OUTPUT
)
11809 *cost
= insn_default_latency (dep
) + 1;
11811 *cost
= insn_default_latency (dep
);
11821 gcc_unreachable ();
11827 /* Adjust cost hook for FA726TE. */
11829 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11832 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11833 have penalty of 3. */
11834 if (dep_type
== REG_DEP_TRUE
11835 && recog_memoized (insn
) >= 0
11836 && recog_memoized (dep
) >= 0
11837 && get_attr_conds (dep
) == CONDS_SET
)
11839 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11840 if (get_attr_conds (insn
) == CONDS_USE
11841 && get_attr_type (insn
) != TYPE_BRANCH
)
11847 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11848 || get_attr_conds (insn
) == CONDS_USE
)
11858 /* Implement TARGET_REGISTER_MOVE_COST.
11860 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11861 it is typically more expensive than a single memory access. We set
11862 the cost to less than two memory accesses so that floating
11863 point to integer conversion does not go through memory. */
11866 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11867 reg_class_t from
, reg_class_t to
)
11871 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11872 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11874 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11875 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11877 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11884 if (from
== HI_REGS
|| to
== HI_REGS
)
11891 /* Implement TARGET_MEMORY_MOVE_COST. */
11894 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11895 bool in ATTRIBUTE_UNUSED
)
11901 if (GET_MODE_SIZE (mode
) < 4)
11904 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11908 /* Vectorizer cost model implementation. */
11910 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11912 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11914 int misalign ATTRIBUTE_UNUSED
)
11918 switch (type_of_cost
)
11921 return current_tune
->vec_costs
->scalar_stmt_cost
;
11924 return current_tune
->vec_costs
->scalar_load_cost
;
11927 return current_tune
->vec_costs
->scalar_store_cost
;
11930 return current_tune
->vec_costs
->vec_stmt_cost
;
11933 return current_tune
->vec_costs
->vec_align_load_cost
;
11936 return current_tune
->vec_costs
->vec_store_cost
;
11938 case vec_to_scalar
:
11939 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11941 case scalar_to_vec
:
11942 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11944 case unaligned_load
:
11945 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11947 case unaligned_store
:
11948 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11950 case cond_branch_taken
:
11951 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11953 case cond_branch_not_taken
:
11954 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11957 case vec_promote_demote
:
11958 return current_tune
->vec_costs
->vec_stmt_cost
;
11960 case vec_construct
:
11961 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11962 return elements
/ 2 + 1;
11965 gcc_unreachable ();
11969 /* Implement targetm.vectorize.add_stmt_cost. */
11972 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11973 struct _stmt_vec_info
*stmt_info
, int misalign
,
11974 enum vect_cost_model_location where
)
11976 unsigned *cost
= (unsigned *) data
;
11977 unsigned retval
= 0;
11979 if (flag_vect_cost_model
)
11981 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11982 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11984 /* Statements in an inner loop relative to the loop being
11985 vectorized are weighted more heavily. The value here is
11986 arbitrary and could potentially be improved with analysis. */
11987 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11988 count
*= 50; /* FIXME. */
11990 retval
= (unsigned) (count
* stmt_cost
);
11991 cost
[where
] += retval
;
11997 /* Return true if and only if this insn can dual-issue only as older. */
11999 cortexa7_older_only (rtx_insn
*insn
)
12001 if (recog_memoized (insn
) < 0)
12004 switch (get_attr_type (insn
))
12006 case TYPE_ALU_DSP_REG
:
12007 case TYPE_ALU_SREG
:
12008 case TYPE_ALUS_SREG
:
12009 case TYPE_LOGIC_REG
:
12010 case TYPE_LOGICS_REG
:
12012 case TYPE_ADCS_REG
:
12017 case TYPE_SHIFT_IMM
:
12018 case TYPE_SHIFT_REG
:
12019 case TYPE_LOAD_BYTE
:
12022 case TYPE_FFARITHS
:
12024 case TYPE_FFARITHD
:
12042 case TYPE_F_STORES
:
12049 /* Return true if and only if this insn can dual-issue as younger. */
12051 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12053 if (recog_memoized (insn
) < 0)
12056 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12060 switch (get_attr_type (insn
))
12063 case TYPE_ALUS_IMM
:
12064 case TYPE_LOGIC_IMM
:
12065 case TYPE_LOGICS_IMM
:
12070 case TYPE_MOV_SHIFT
:
12071 case TYPE_MOV_SHIFT_REG
:
12081 /* Look for an instruction that can dual issue only as an older
12082 instruction, and move it in front of any instructions that can
12083 dual-issue as younger, while preserving the relative order of all
12084 other instructions in the ready list. This is a hueuristic to help
12085 dual-issue in later cycles, by postponing issue of more flexible
12086 instructions. This heuristic may affect dual issue opportunities
12087 in the current cycle. */
12089 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12090 int *n_readyp
, int clock
)
12093 int first_older_only
= -1, first_younger
= -1;
12097 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12101 /* Traverse the ready list from the head (the instruction to issue
12102 first), and looking for the first instruction that can issue as
12103 younger and the first instruction that can dual-issue only as
12105 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12107 rtx_insn
*insn
= ready
[i
];
12108 if (cortexa7_older_only (insn
))
12110 first_older_only
= i
;
12112 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12115 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12119 /* Nothing to reorder because either no younger insn found or insn
12120 that can dual-issue only as older appears before any insn that
12121 can dual-issue as younger. */
12122 if (first_younger
== -1)
12125 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12129 /* Nothing to reorder because no older-only insn in the ready list. */
12130 if (first_older_only
== -1)
12133 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12137 /* Move first_older_only insn before first_younger. */
12139 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12140 INSN_UID(ready
[first_older_only
]),
12141 INSN_UID(ready
[first_younger
]));
12142 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12143 for (i
= first_older_only
; i
< first_younger
; i
++)
12145 ready
[i
] = ready
[i
+1];
12148 ready
[i
] = first_older_only_insn
;
12152 /* Implement TARGET_SCHED_REORDER. */
12154 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12160 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12163 /* Do nothing for other cores. */
12167 return arm_issue_rate ();
12170 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12171 It corrects the value of COST based on the relationship between
12172 INSN and DEP through the dependence LINK. It returns the new
12173 value. There is a per-core adjust_cost hook to adjust scheduler costs
12174 and the per-core hook can choose to completely override the generic
12175 adjust_cost function. Only put bits of code into arm_adjust_cost that
12176 are common across all cores. */
12178 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12183 /* When generating Thumb-1 code, we want to place flag-setting operations
12184 close to a conditional branch which depends on them, so that we can
12185 omit the comparison. */
12188 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12189 && recog_memoized (dep
) >= 0
12190 && get_attr_conds (dep
) == CONDS_SET
)
12193 if (current_tune
->sched_adjust_cost
!= NULL
)
12195 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12199 /* XXX Is this strictly true? */
12200 if (dep_type
== REG_DEP_ANTI
12201 || dep_type
== REG_DEP_OUTPUT
)
12204 /* Call insns don't incur a stall, even if they follow a load. */
12209 if ((i_pat
= single_set (insn
)) != NULL
12210 && MEM_P (SET_SRC (i_pat
))
12211 && (d_pat
= single_set (dep
)) != NULL
12212 && MEM_P (SET_DEST (d_pat
)))
12214 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12215 /* This is a load after a store, there is no conflict if the load reads
12216 from a cached area. Assume that loads from the stack, and from the
12217 constant pool are cached, and that others will miss. This is a
12220 if ((GET_CODE (src_mem
) == SYMBOL_REF
12221 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12222 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12223 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12224 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12232 arm_max_conditional_execute (void)
12234 return max_insns_skipped
;
12238 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12241 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12243 return (optimize
> 0) ? 2 : 0;
12247 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12249 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12252 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12253 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12254 sequences of non-executed instructions in IT blocks probably take the same
12255 amount of time as executed instructions (and the IT instruction itself takes
12256 space in icache). This function was experimentally determined to give good
12257 results on a popular embedded benchmark. */
12260 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12262 return (TARGET_32BIT
&& speed_p
) ? 1
12263 : arm_default_branch_cost (speed_p
, predictable_p
);
12267 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12269 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12272 static bool fp_consts_inited
= false;
12274 static REAL_VALUE_TYPE value_fp0
;
12277 init_fp_table (void)
12281 r
= REAL_VALUE_ATOF ("0", DFmode
);
12283 fp_consts_inited
= true;
12286 /* Return TRUE if rtx X is a valid immediate FP constant. */
12288 arm_const_double_rtx (rtx x
)
12290 const REAL_VALUE_TYPE
*r
;
12292 if (!fp_consts_inited
)
12295 r
= CONST_DOUBLE_REAL_VALUE (x
);
12296 if (REAL_VALUE_MINUS_ZERO (*r
))
12299 if (real_equal (r
, &value_fp0
))
12305 /* VFPv3 has a fairly wide range of representable immediates, formed from
12306 "quarter-precision" floating-point values. These can be evaluated using this
12307 formula (with ^ for exponentiation):
12311 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12312 16 <= n <= 31 and 0 <= r <= 7.
12314 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12316 - A (most-significant) is the sign bit.
12317 - BCD are the exponent (encoded as r XOR 3).
12318 - EFGH are the mantissa (encoded as n - 16).
12321 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12322 fconst[sd] instruction, or -1 if X isn't suitable. */
12324 vfp3_const_double_index (rtx x
)
12326 REAL_VALUE_TYPE r
, m
;
12327 int sign
, exponent
;
12328 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12329 unsigned HOST_WIDE_INT mask
;
12330 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12333 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12336 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12338 /* We can't represent these things, so detect them first. */
12339 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12342 /* Extract sign, exponent and mantissa. */
12343 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12344 r
= real_value_abs (&r
);
12345 exponent
= REAL_EXP (&r
);
12346 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12347 highest (sign) bit, with a fixed binary point at bit point_pos.
12348 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12349 bits for the mantissa, this may fail (low bits would be lost). */
12350 real_ldexp (&m
, &r
, point_pos
- exponent
);
12351 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12352 mantissa
= w
.elt (0);
12353 mant_hi
= w
.elt (1);
12355 /* If there are bits set in the low part of the mantissa, we can't
12356 represent this value. */
12360 /* Now make it so that mantissa contains the most-significant bits, and move
12361 the point_pos to indicate that the least-significant bits have been
12363 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12364 mantissa
= mant_hi
;
12366 /* We can permit four significant bits of mantissa only, plus a high bit
12367 which is always 1. */
12368 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12369 if ((mantissa
& mask
) != 0)
12372 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12373 mantissa
>>= point_pos
- 5;
12375 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12376 floating-point immediate zero with Neon using an integer-zero load, but
12377 that case is handled elsewhere.) */
12381 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12383 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12384 normalized significands are in the range [1, 2). (Our mantissa is shifted
12385 left 4 places at this point relative to normalized IEEE754 values). GCC
12386 internally uses [0.5, 1) (see real.c), so the exponent returned from
12387 REAL_EXP must be altered. */
12388 exponent
= 5 - exponent
;
12390 if (exponent
< 0 || exponent
> 7)
12393 /* Sign, mantissa and exponent are now in the correct form to plug into the
12394 formula described in the comment above. */
12395 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12398 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12400 vfp3_const_double_rtx (rtx x
)
12405 return vfp3_const_double_index (x
) != -1;
12408 /* Recognize immediates which can be used in various Neon instructions. Legal
12409 immediates are described by the following table (for VMVN variants, the
12410 bitwise inverse of the constant shown is recognized. In either case, VMOV
12411 is output and the correct instruction to use for a given constant is chosen
12412 by the assembler). The constant shown is replicated across all elements of
12413 the destination vector.
12415 insn elems variant constant (binary)
12416 ---- ----- ------- -----------------
12417 vmov i32 0 00000000 00000000 00000000 abcdefgh
12418 vmov i32 1 00000000 00000000 abcdefgh 00000000
12419 vmov i32 2 00000000 abcdefgh 00000000 00000000
12420 vmov i32 3 abcdefgh 00000000 00000000 00000000
12421 vmov i16 4 00000000 abcdefgh
12422 vmov i16 5 abcdefgh 00000000
12423 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12424 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12425 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12426 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12427 vmvn i16 10 00000000 abcdefgh
12428 vmvn i16 11 abcdefgh 00000000
12429 vmov i32 12 00000000 00000000 abcdefgh 11111111
12430 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12431 vmov i32 14 00000000 abcdefgh 11111111 11111111
12432 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12433 vmov i8 16 abcdefgh
12434 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12435 eeeeeeee ffffffff gggggggg hhhhhhhh
12436 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12437 vmov f32 19 00000000 00000000 00000000 00000000
12439 For case 18, B = !b. Representable values are exactly those accepted by
12440 vfp3_const_double_index, but are output as floating-point numbers rather
12443 For case 19, we will change it to vmov.i32 when assembling.
12445 Variants 0-5 (inclusive) may also be used as immediates for the second
12446 operand of VORR/VBIC instructions.
12448 The INVERSE argument causes the bitwise inverse of the given operand to be
12449 recognized instead (used for recognizing legal immediates for the VAND/VORN
12450 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12451 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12452 output, rather than the real insns vbic/vorr).
12454 INVERSE makes no difference to the recognition of float vectors.
12456 The return value is the variant of immediate as shown in the above table, or
12457 -1 if the given value doesn't match any of the listed patterns.
12460 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12461 rtx
*modconst
, int *elementwidth
)
12463 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12465 for (i = 0; i < idx; i += (STRIDE)) \
12470 immtype = (CLASS); \
12471 elsize = (ELSIZE); \
12475 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12476 unsigned int innersize
;
12477 unsigned char bytes
[16];
12478 int immtype
= -1, matches
;
12479 unsigned int invmask
= inverse
? 0xff : 0;
12480 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12483 n_elts
= CONST_VECTOR_NUNITS (op
);
12487 if (mode
== VOIDmode
)
12491 innersize
= GET_MODE_UNIT_SIZE (mode
);
12493 /* Vectors of float constants. */
12494 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12496 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12498 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12501 /* FP16 vectors cannot be represented. */
12502 if (GET_MODE_INNER (mode
) == HFmode
)
12505 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12506 are distinct in this context. */
12507 if (!const_vec_duplicate_p (op
))
12511 *modconst
= CONST_VECTOR_ELT (op
, 0);
12516 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12522 /* Splat vector constant out into a byte vector. */
12523 for (i
= 0; i
< n_elts
; i
++)
12525 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12526 unsigned HOST_WIDE_INT elpart
;
12528 gcc_assert (CONST_INT_P (el
));
12529 elpart
= INTVAL (el
);
12531 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12533 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12534 elpart
>>= BITS_PER_UNIT
;
12538 /* Sanity check. */
12539 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12543 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12544 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12546 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12547 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12549 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12550 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12552 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12553 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12555 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12557 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12559 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12560 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12562 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12563 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12565 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12566 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12568 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12569 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12571 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12573 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12575 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12576 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12578 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12579 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12581 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12582 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12584 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12585 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12587 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12589 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12590 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12598 *elementwidth
= elsize
;
12602 unsigned HOST_WIDE_INT imm
= 0;
12604 /* Un-invert bytes of recognized vector, if necessary. */
12606 for (i
= 0; i
< idx
; i
++)
12607 bytes
[i
] ^= invmask
;
12611 /* FIXME: Broken on 32-bit H_W_I hosts. */
12612 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12614 for (i
= 0; i
< 8; i
++)
12615 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12616 << (i
* BITS_PER_UNIT
);
12618 *modconst
= GEN_INT (imm
);
12622 unsigned HOST_WIDE_INT imm
= 0;
12624 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12625 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12627 *modconst
= GEN_INT (imm
);
12635 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12636 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12637 float elements), and a modified constant (whatever should be output for a
12638 VMOV) in *MODCONST. */
12641 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12642 rtx
*modconst
, int *elementwidth
)
12646 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12652 *modconst
= tmpconst
;
12655 *elementwidth
= tmpwidth
;
12660 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12661 the immediate is valid, write a constant suitable for using as an operand
12662 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12663 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12666 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12667 rtx
*modconst
, int *elementwidth
)
12671 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12673 if (retval
< 0 || retval
> 5)
12677 *modconst
= tmpconst
;
12680 *elementwidth
= tmpwidth
;
12685 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12686 the immediate is valid, write a constant suitable for using as an operand
12687 to VSHR/VSHL to *MODCONST and the corresponding element width to
12688 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12689 because they have different limitations. */
12692 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12693 rtx
*modconst
, int *elementwidth
,
12696 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12697 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12698 unsigned HOST_WIDE_INT last_elt
= 0;
12699 unsigned HOST_WIDE_INT maxshift
;
12701 /* Split vector constant out into a byte vector. */
12702 for (i
= 0; i
< n_elts
; i
++)
12704 rtx el
= CONST_VECTOR_ELT (op
, i
);
12705 unsigned HOST_WIDE_INT elpart
;
12707 if (CONST_INT_P (el
))
12708 elpart
= INTVAL (el
);
12709 else if (CONST_DOUBLE_P (el
))
12712 gcc_unreachable ();
12714 if (i
!= 0 && elpart
!= last_elt
)
12720 /* Shift less than element size. */
12721 maxshift
= innersize
* 8;
12725 /* Left shift immediate value can be from 0 to <size>-1. */
12726 if (last_elt
>= maxshift
)
12731 /* Right shift immediate value can be from 1 to <size>. */
12732 if (last_elt
== 0 || last_elt
> maxshift
)
12737 *elementwidth
= innersize
* 8;
12740 *modconst
= CONST_VECTOR_ELT (op
, 0);
12745 /* Return a string suitable for output of Neon immediate logic operation
12749 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12750 int inverse
, int quad
)
12752 int width
, is_valid
;
12753 static char templ
[40];
12755 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12757 gcc_assert (is_valid
!= 0);
12760 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12762 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12767 /* Return a string suitable for output of Neon immediate shift operation
12768 (VSHR or VSHL) MNEM. */
12771 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12772 machine_mode mode
, int quad
,
12775 int width
, is_valid
;
12776 static char templ
[40];
12778 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12779 gcc_assert (is_valid
!= 0);
12782 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12784 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12789 /* Output a sequence of pairwise operations to implement a reduction.
12790 NOTE: We do "too much work" here, because pairwise operations work on two
12791 registers-worth of operands in one go. Unfortunately we can't exploit those
12792 extra calculations to do the full operation in fewer steps, I don't think.
12793 Although all vector elements of the result but the first are ignored, we
12794 actually calculate the same result in each of the elements. An alternative
12795 such as initially loading a vector with zero to use as each of the second
12796 operands would use up an additional register and take an extra instruction,
12797 for no particular gain. */
12800 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12801 rtx (*reduc
) (rtx
, rtx
, rtx
))
12803 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12806 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12808 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12809 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12814 /* If VALS is a vector constant that can be loaded into a register
12815 using VDUP, generate instructions to do so and return an RTX to
12816 assign to the register. Otherwise return NULL_RTX. */
12819 neon_vdup_constant (rtx vals
)
12821 machine_mode mode
= GET_MODE (vals
);
12822 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12825 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12828 if (!const_vec_duplicate_p (vals
, &x
))
12829 /* The elements are not all the same. We could handle repeating
12830 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12831 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12835 /* We can load this constant by using VDUP and a constant in a
12836 single ARM register. This will be cheaper than a vector
12839 x
= copy_to_mode_reg (inner_mode
, x
);
12840 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12843 /* Generate code to load VALS, which is a PARALLEL containing only
12844 constants (for vec_init) or CONST_VECTOR, efficiently into a
12845 register. Returns an RTX to copy into the register, or NULL_RTX
12846 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12849 neon_make_constant (rtx vals
)
12851 machine_mode mode
= GET_MODE (vals
);
12853 rtx const_vec
= NULL_RTX
;
12854 int n_elts
= GET_MODE_NUNITS (mode
);
12858 if (GET_CODE (vals
) == CONST_VECTOR
)
12860 else if (GET_CODE (vals
) == PARALLEL
)
12862 /* A CONST_VECTOR must contain only CONST_INTs and
12863 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12864 Only store valid constants in a CONST_VECTOR. */
12865 for (i
= 0; i
< n_elts
; ++i
)
12867 rtx x
= XVECEXP (vals
, 0, i
);
12868 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12871 if (n_const
== n_elts
)
12872 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12875 gcc_unreachable ();
12877 if (const_vec
!= NULL
12878 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12879 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12881 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12882 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12883 pipeline cycle; creating the constant takes one or two ARM
12884 pipeline cycles. */
12886 else if (const_vec
!= NULL_RTX
)
12887 /* Load from constant pool. On Cortex-A8 this takes two cycles
12888 (for either double or quad vectors). We can not take advantage
12889 of single-cycle VLD1 because we need a PC-relative addressing
12893 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12894 We can not construct an initializer. */
12898 /* Initialize vector TARGET to VALS. */
12901 neon_expand_vector_init (rtx target
, rtx vals
)
12903 machine_mode mode
= GET_MODE (target
);
12904 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12905 int n_elts
= GET_MODE_NUNITS (mode
);
12906 int n_var
= 0, one_var
= -1;
12907 bool all_same
= true;
12911 for (i
= 0; i
< n_elts
; ++i
)
12913 x
= XVECEXP (vals
, 0, i
);
12914 if (!CONSTANT_P (x
))
12915 ++n_var
, one_var
= i
;
12917 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12923 rtx constant
= neon_make_constant (vals
);
12924 if (constant
!= NULL_RTX
)
12926 emit_move_insn (target
, constant
);
12931 /* Splat a single non-constant element if we can. */
12932 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12934 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12935 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12939 /* One field is non-constant. Load constant then overwrite varying
12940 field. This is more efficient than using the stack. */
12943 rtx copy
= copy_rtx (vals
);
12944 rtx index
= GEN_INT (one_var
);
12946 /* Load constant part of vector, substitute neighboring value for
12947 varying element. */
12948 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12949 neon_expand_vector_init (target
, copy
);
12951 /* Insert variable. */
12952 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12956 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12959 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12962 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12965 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12968 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12971 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12974 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12977 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12980 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12983 gcc_unreachable ();
12988 /* Construct the vector in memory one field at a time
12989 and load the whole vector. */
12990 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12991 for (i
= 0; i
< n_elts
; i
++)
12992 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12993 i
* GET_MODE_SIZE (inner_mode
)),
12994 XVECEXP (vals
, 0, i
));
12995 emit_move_insn (target
, mem
);
12998 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12999 ERR if it doesn't. EXP indicates the source location, which includes the
13000 inlining history for intrinsics. */
13003 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13004 const_tree exp
, const char *desc
)
13006 HOST_WIDE_INT lane
;
13008 gcc_assert (CONST_INT_P (operand
));
13010 lane
= INTVAL (operand
);
13012 if (lane
< low
|| lane
>= high
)
13015 error ("%K%s %wd out of range %wd - %wd",
13016 exp
, desc
, lane
, low
, high
- 1);
13018 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13022 /* Bounds-check lanes. */
13025 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13028 bounds_check (operand
, low
, high
, exp
, "lane");
13031 /* Bounds-check constants. */
13034 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13036 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13040 neon_element_bits (machine_mode mode
)
13042 return GET_MODE_UNIT_BITSIZE (mode
);
13046 /* Predicates for `match_operand' and `match_operator'. */
13048 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13049 WB is true if full writeback address modes are allowed and is false
13050 if limited writeback address modes (POST_INC and PRE_DEC) are
13054 arm_coproc_mem_operand (rtx op
, bool wb
)
13058 /* Reject eliminable registers. */
13059 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13060 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13061 || reg_mentioned_p (arg_pointer_rtx
, op
)
13062 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13063 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13064 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13065 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13068 /* Constants are converted into offsets from labels. */
13072 ind
= XEXP (op
, 0);
13074 if (reload_completed
13075 && (GET_CODE (ind
) == LABEL_REF
13076 || (GET_CODE (ind
) == CONST
13077 && GET_CODE (XEXP (ind
, 0)) == PLUS
13078 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13079 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13082 /* Match: (mem (reg)). */
13084 return arm_address_register_rtx_p (ind
, 0);
13086 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13087 acceptable in any case (subject to verification by
13088 arm_address_register_rtx_p). We need WB to be true to accept
13089 PRE_INC and POST_DEC. */
13090 if (GET_CODE (ind
) == POST_INC
13091 || GET_CODE (ind
) == PRE_DEC
13093 && (GET_CODE (ind
) == PRE_INC
13094 || GET_CODE (ind
) == POST_DEC
)))
13095 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13098 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13099 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13100 && GET_CODE (XEXP (ind
, 1)) == PLUS
13101 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13102 ind
= XEXP (ind
, 1);
13107 if (GET_CODE (ind
) == PLUS
13108 && REG_P (XEXP (ind
, 0))
13109 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13110 && CONST_INT_P (XEXP (ind
, 1))
13111 && INTVAL (XEXP (ind
, 1)) > -1024
13112 && INTVAL (XEXP (ind
, 1)) < 1024
13113 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13119 /* Return TRUE if OP is a memory operand which we can load or store a vector
13120 to/from. TYPE is one of the following values:
13121 0 - Vector load/stor (vldr)
13122 1 - Core registers (ldm)
13123 2 - Element/structure loads (vld1)
13126 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13130 /* Reject eliminable registers. */
13131 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13132 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13133 || reg_mentioned_p (arg_pointer_rtx
, op
)
13134 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13135 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13136 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13137 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13140 /* Constants are converted into offsets from labels. */
13144 ind
= XEXP (op
, 0);
13146 if (reload_completed
13147 && (GET_CODE (ind
) == LABEL_REF
13148 || (GET_CODE (ind
) == CONST
13149 && GET_CODE (XEXP (ind
, 0)) == PLUS
13150 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13151 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13154 /* Match: (mem (reg)). */
13156 return arm_address_register_rtx_p (ind
, 0);
13158 /* Allow post-increment with Neon registers. */
13159 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13160 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13161 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13163 /* Allow post-increment by register for VLDn */
13164 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13165 && GET_CODE (XEXP (ind
, 1)) == PLUS
13166 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13173 && GET_CODE (ind
) == PLUS
13174 && REG_P (XEXP (ind
, 0))
13175 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13176 && CONST_INT_P (XEXP (ind
, 1))
13177 && INTVAL (XEXP (ind
, 1)) > -1024
13178 /* For quad modes, we restrict the constant offset to be slightly less
13179 than what the instruction format permits. We have no such constraint
13180 on double mode offsets. (This must match arm_legitimate_index_p.) */
13181 && (INTVAL (XEXP (ind
, 1))
13182 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13183 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13189 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13192 neon_struct_mem_operand (rtx op
)
13196 /* Reject eliminable registers. */
13197 if (! (reload_in_progress
|| reload_completed
)
13198 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13199 || reg_mentioned_p (arg_pointer_rtx
, op
)
13200 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13201 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13202 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13203 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13206 /* Constants are converted into offsets from labels. */
13210 ind
= XEXP (op
, 0);
13212 if (reload_completed
13213 && (GET_CODE (ind
) == LABEL_REF
13214 || (GET_CODE (ind
) == CONST
13215 && GET_CODE (XEXP (ind
, 0)) == PLUS
13216 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13217 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13220 /* Match: (mem (reg)). */
13222 return arm_address_register_rtx_p (ind
, 0);
13224 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13225 if (GET_CODE (ind
) == POST_INC
13226 || GET_CODE (ind
) == PRE_DEC
)
13227 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13232 /* Return true if X is a register that will be eliminated later on. */
13234 arm_eliminable_register (rtx x
)
13236 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13237 || REGNO (x
) == ARG_POINTER_REGNUM
13238 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13239 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13242 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13243 coprocessor registers. Otherwise return NO_REGS. */
13246 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13248 if (mode
== HFmode
)
13250 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13251 return GENERAL_REGS
;
13252 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13254 return GENERAL_REGS
;
13257 /* The neon move patterns handle all legitimate vector and struct
13260 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13261 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13262 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13263 || VALID_NEON_STRUCT_MODE (mode
)))
13266 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13269 return GENERAL_REGS
;
13272 /* Values which must be returned in the most-significant end of the return
13276 arm_return_in_msb (const_tree valtype
)
13278 return (TARGET_AAPCS_BASED
13279 && BYTES_BIG_ENDIAN
13280 && (AGGREGATE_TYPE_P (valtype
)
13281 || TREE_CODE (valtype
) == COMPLEX_TYPE
13282 || FIXED_POINT_TYPE_P (valtype
)));
13285 /* Return TRUE if X references a SYMBOL_REF. */
13287 symbol_mentioned_p (rtx x
)
13292 if (GET_CODE (x
) == SYMBOL_REF
)
13295 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13296 are constant offsets, not symbols. */
13297 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13300 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13302 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13308 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13309 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13312 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13319 /* Return TRUE if X references a LABEL_REF. */
13321 label_mentioned_p (rtx x
)
13326 if (GET_CODE (x
) == LABEL_REF
)
13329 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13330 instruction, but they are constant offsets, not symbols. */
13331 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13334 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13335 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13341 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13342 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13345 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13353 tls_mentioned_p (rtx x
)
13355 switch (GET_CODE (x
))
13358 return tls_mentioned_p (XEXP (x
, 0));
13361 if (XINT (x
, 1) == UNSPEC_TLS
)
13364 /* Fall through. */
13370 /* Must not copy any rtx that uses a pc-relative address.
13371 Also, disallow copying of load-exclusive instructions that
13372 may appear after splitting of compare-and-swap-style operations
13373 so as to prevent those loops from being transformed away from their
13374 canonical forms (see PR 69904). */
13377 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13379 /* The tls call insn cannot be copied, as it is paired with a data
13381 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13384 subrtx_iterator::array_type array
;
13385 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13387 const_rtx x
= *iter
;
13388 if (GET_CODE (x
) == UNSPEC
13389 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13390 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13394 rtx set
= single_set (insn
);
13397 rtx src
= SET_SRC (set
);
13398 if (GET_CODE (src
) == ZERO_EXTEND
)
13399 src
= XEXP (src
, 0);
13401 /* Catch the load-exclusive and load-acquire operations. */
13402 if (GET_CODE (src
) == UNSPEC_VOLATILE
13403 && (XINT (src
, 1) == VUNSPEC_LL
13404 || XINT (src
, 1) == VUNSPEC_LAX
))
13411 minmax_code (rtx x
)
13413 enum rtx_code code
= GET_CODE (x
);
13426 gcc_unreachable ();
13430 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13433 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13434 int *mask
, bool *signed_sat
)
13436 /* The high bound must be a power of two minus one. */
13437 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13441 /* The low bound is either zero (for usat) or one less than the
13442 negation of the high bound (for ssat). */
13443 if (INTVAL (lo_bound
) == 0)
13448 *signed_sat
= false;
13453 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13458 *signed_sat
= true;
13466 /* Return 1 if memory locations are adjacent. */
13468 adjacent_mem_locations (rtx a
, rtx b
)
13470 /* We don't guarantee to preserve the order of these memory refs. */
13471 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13474 if ((REG_P (XEXP (a
, 0))
13475 || (GET_CODE (XEXP (a
, 0)) == PLUS
13476 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13477 && (REG_P (XEXP (b
, 0))
13478 || (GET_CODE (XEXP (b
, 0)) == PLUS
13479 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13481 HOST_WIDE_INT val0
= 0, val1
= 0;
13485 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13487 reg0
= XEXP (XEXP (a
, 0), 0);
13488 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13491 reg0
= XEXP (a
, 0);
13493 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13495 reg1
= XEXP (XEXP (b
, 0), 0);
13496 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13499 reg1
= XEXP (b
, 0);
13501 /* Don't accept any offset that will require multiple
13502 instructions to handle, since this would cause the
13503 arith_adjacentmem pattern to output an overlong sequence. */
13504 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13507 /* Don't allow an eliminable register: register elimination can make
13508 the offset too large. */
13509 if (arm_eliminable_register (reg0
))
13512 val_diff
= val1
- val0
;
13516 /* If the target has load delay slots, then there's no benefit
13517 to using an ldm instruction unless the offset is zero and
13518 we are optimizing for size. */
13519 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13520 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13521 && (val_diff
== 4 || val_diff
== -4));
13524 return ((REGNO (reg0
) == REGNO (reg1
))
13525 && (val_diff
== 4 || val_diff
== -4));
13531 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13532 for load operations, false for store operations. CONSECUTIVE is true
13533 if the register numbers in the operation must be consecutive in the register
13534 bank. RETURN_PC is true if value is to be loaded in PC.
13535 The pattern we are trying to match for load is:
13536 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13537 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13540 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13543 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13544 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13545 3. If consecutive is TRUE, then for kth register being loaded,
13546 REGNO (R_dk) = REGNO (R_d0) + k.
13547 The pattern for store is similar. */
13549 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13550 bool consecutive
, bool return_pc
)
13552 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13553 rtx reg
, mem
, addr
;
13555 unsigned first_regno
;
13556 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13558 bool addr_reg_in_reglist
= false;
13559 bool update
= false;
13564 /* If not in SImode, then registers must be consecutive
13565 (e.g., VLDM instructions for DFmode). */
13566 gcc_assert ((mode
== SImode
) || consecutive
);
13567 /* Setting return_pc for stores is illegal. */
13568 gcc_assert (!return_pc
|| load
);
13570 /* Set up the increments and the regs per val based on the mode. */
13571 reg_increment
= GET_MODE_SIZE (mode
);
13572 regs_per_val
= reg_increment
/ 4;
13573 offset_adj
= return_pc
? 1 : 0;
13576 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13577 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13580 /* Check if this is a write-back. */
13581 elt
= XVECEXP (op
, 0, offset_adj
);
13582 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13588 /* The offset adjustment must be the number of registers being
13589 popped times the size of a single register. */
13590 if (!REG_P (SET_DEST (elt
))
13591 || !REG_P (XEXP (SET_SRC (elt
), 0))
13592 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13593 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13594 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13595 ((count
- 1 - offset_adj
) * reg_increment
))
13599 i
= i
+ offset_adj
;
13600 base
= base
+ offset_adj
;
13601 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13602 success depends on the type: VLDM can do just one reg,
13603 LDM must do at least two. */
13604 if ((count
<= i
) && (mode
== SImode
))
13607 elt
= XVECEXP (op
, 0, i
- 1);
13608 if (GET_CODE (elt
) != SET
)
13613 reg
= SET_DEST (elt
);
13614 mem
= SET_SRC (elt
);
13618 reg
= SET_SRC (elt
);
13619 mem
= SET_DEST (elt
);
13622 if (!REG_P (reg
) || !MEM_P (mem
))
13625 regno
= REGNO (reg
);
13626 first_regno
= regno
;
13627 addr
= XEXP (mem
, 0);
13628 if (GET_CODE (addr
) == PLUS
)
13630 if (!CONST_INT_P (XEXP (addr
, 1)))
13633 offset
= INTVAL (XEXP (addr
, 1));
13634 addr
= XEXP (addr
, 0);
13640 /* Don't allow SP to be loaded unless it is also the base register. It
13641 guarantees that SP is reset correctly when an LDM instruction
13642 is interrupted. Otherwise, we might end up with a corrupt stack. */
13643 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13646 for (; i
< count
; i
++)
13648 elt
= XVECEXP (op
, 0, i
);
13649 if (GET_CODE (elt
) != SET
)
13654 reg
= SET_DEST (elt
);
13655 mem
= SET_SRC (elt
);
13659 reg
= SET_SRC (elt
);
13660 mem
= SET_DEST (elt
);
13664 || GET_MODE (reg
) != mode
13665 || REGNO (reg
) <= regno
13668 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13669 /* Don't allow SP to be loaded unless it is also the base register. It
13670 guarantees that SP is reset correctly when an LDM instruction
13671 is interrupted. Otherwise, we might end up with a corrupt stack. */
13672 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13674 || GET_MODE (mem
) != mode
13675 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13676 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13677 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13678 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13679 offset
+ (i
- base
) * reg_increment
))
13680 && (!REG_P (XEXP (mem
, 0))
13681 || offset
+ (i
- base
) * reg_increment
!= 0)))
13684 regno
= REGNO (reg
);
13685 if (regno
== REGNO (addr
))
13686 addr_reg_in_reglist
= true;
13691 if (update
&& addr_reg_in_reglist
)
13694 /* For Thumb-1, address register is always modified - either by write-back
13695 or by explicit load. If the pattern does not describe an update,
13696 then the address register must be in the list of loaded registers. */
13698 return update
|| addr_reg_in_reglist
;
13704 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13705 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13706 instruction. ADD_OFFSET is nonzero if the base address register needs
13707 to be modified with an add instruction before we can use it. */
13710 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13711 int nops
, HOST_WIDE_INT add_offset
)
13713 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13714 if the offset isn't small enough. The reason 2 ldrs are faster
13715 is because these ARMs are able to do more than one cache access
13716 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13717 whilst the ARM8 has a double bandwidth cache. This means that
13718 these cores can do both an instruction fetch and a data fetch in
13719 a single cycle, so the trick of calculating the address into a
13720 scratch register (one of the result regs) and then doing a load
13721 multiple actually becomes slower (and no smaller in code size).
13722 That is the transformation
13724 ldr rd1, [rbase + offset]
13725 ldr rd2, [rbase + offset + 4]
13729 add rd1, rbase, offset
13730 ldmia rd1, {rd1, rd2}
13732 produces worse code -- '3 cycles + any stalls on rd2' instead of
13733 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13734 access per cycle, the first sequence could never complete in less
13735 than 6 cycles, whereas the ldm sequence would only take 5 and
13736 would make better use of sequential accesses if not hitting the
13739 We cheat here and test 'arm_ld_sched' which we currently know to
13740 only be true for the ARM8, ARM9 and StrongARM. If this ever
13741 changes, then the test below needs to be reworked. */
13742 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13745 /* XScale has load-store double instructions, but they have stricter
13746 alignment requirements than load-store multiple, so we cannot
13749 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13750 the pipeline until completion.
13758 An ldr instruction takes 1-3 cycles, but does not block the
13767 Best case ldr will always win. However, the more ldr instructions
13768 we issue, the less likely we are to be able to schedule them well.
13769 Using ldr instructions also increases code size.
13771 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13772 for counts of 3 or 4 regs. */
13773 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13778 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13779 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13780 an array ORDER which describes the sequence to use when accessing the
13781 offsets that produces an ascending order. In this sequence, each
13782 offset must be larger by exactly 4 than the previous one. ORDER[0]
13783 must have been filled in with the lowest offset by the caller.
13784 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13785 we use to verify that ORDER produces an ascending order of registers.
13786 Return true if it was possible to construct such an order, false if
13790 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13791 int *unsorted_regs
)
13794 for (i
= 1; i
< nops
; i
++)
13798 order
[i
] = order
[i
- 1];
13799 for (j
= 0; j
< nops
; j
++)
13800 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13802 /* We must find exactly one offset that is higher than the
13803 previous one by 4. */
13804 if (order
[i
] != order
[i
- 1])
13808 if (order
[i
] == order
[i
- 1])
13810 /* The register numbers must be ascending. */
13811 if (unsorted_regs
!= NULL
13812 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13818 /* Used to determine in a peephole whether a sequence of load
13819 instructions can be changed into a load-multiple instruction.
13820 NOPS is the number of separate load instructions we are examining. The
13821 first NOPS entries in OPERANDS are the destination registers, the
13822 next NOPS entries are memory operands. If this function is
13823 successful, *BASE is set to the common base register of the memory
13824 accesses; *LOAD_OFFSET is set to the first memory location's offset
13825 from that base register.
13826 REGS is an array filled in with the destination register numbers.
13827 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13828 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13829 the sequence of registers in REGS matches the loads from ascending memory
13830 locations, and the function verifies that the register numbers are
13831 themselves ascending. If CHECK_REGS is false, the register numbers
13832 are stored in the order they are found in the operands. */
13834 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13835 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13837 int unsorted_regs
[MAX_LDM_STM_OPS
];
13838 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13839 int order
[MAX_LDM_STM_OPS
];
13840 rtx base_reg_rtx
= NULL
;
13844 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13845 easily extended if required. */
13846 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13848 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13850 /* Loop over the operands and check that the memory references are
13851 suitable (i.e. immediate offsets from the same base register). At
13852 the same time, extract the target register, and the memory
13854 for (i
= 0; i
< nops
; i
++)
13859 /* Convert a subreg of a mem into the mem itself. */
13860 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13861 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13863 gcc_assert (MEM_P (operands
[nops
+ i
]));
13865 /* Don't reorder volatile memory references; it doesn't seem worth
13866 looking for the case where the order is ok anyway. */
13867 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13870 offset
= const0_rtx
;
13872 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13873 || (GET_CODE (reg
) == SUBREG
13874 && REG_P (reg
= SUBREG_REG (reg
))))
13875 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13876 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13877 || (GET_CODE (reg
) == SUBREG
13878 && REG_P (reg
= SUBREG_REG (reg
))))
13879 && (CONST_INT_P (offset
13880 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13884 base_reg
= REGNO (reg
);
13885 base_reg_rtx
= reg
;
13886 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13889 else if (base_reg
!= (int) REGNO (reg
))
13890 /* Not addressed from the same base register. */
13893 unsorted_regs
[i
] = (REG_P (operands
[i
])
13894 ? REGNO (operands
[i
])
13895 : REGNO (SUBREG_REG (operands
[i
])));
13897 /* If it isn't an integer register, or if it overwrites the
13898 base register but isn't the last insn in the list, then
13899 we can't do this. */
13900 if (unsorted_regs
[i
] < 0
13901 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13902 || unsorted_regs
[i
] > 14
13903 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13906 /* Don't allow SP to be loaded unless it is also the base
13907 register. It guarantees that SP is reset correctly when
13908 an LDM instruction is interrupted. Otherwise, we might
13909 end up with a corrupt stack. */
13910 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13913 unsorted_offsets
[i
] = INTVAL (offset
);
13914 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13918 /* Not a suitable memory address. */
13922 /* All the useful information has now been extracted from the
13923 operands into unsorted_regs and unsorted_offsets; additionally,
13924 order[0] has been set to the lowest offset in the list. Sort
13925 the offsets into order, verifying that they are adjacent, and
13926 check that the register numbers are ascending. */
13927 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13928 check_regs
? unsorted_regs
: NULL
))
13932 memcpy (saved_order
, order
, sizeof order
);
13938 for (i
= 0; i
< nops
; i
++)
13939 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13941 *load_offset
= unsorted_offsets
[order
[0]];
13945 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13948 if (unsorted_offsets
[order
[0]] == 0)
13949 ldm_case
= 1; /* ldmia */
13950 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13951 ldm_case
= 2; /* ldmib */
13952 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13953 ldm_case
= 3; /* ldmda */
13954 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13955 ldm_case
= 4; /* ldmdb */
13956 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13957 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13962 if (!multiple_operation_profitable_p (false, nops
,
13964 ? unsorted_offsets
[order
[0]] : 0))
13970 /* Used to determine in a peephole whether a sequence of store instructions can
13971 be changed into a store-multiple instruction.
13972 NOPS is the number of separate store instructions we are examining.
13973 NOPS_TOTAL is the total number of instructions recognized by the peephole
13975 The first NOPS entries in OPERANDS are the source registers, the next
13976 NOPS entries are memory operands. If this function is successful, *BASE is
13977 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13978 to the first memory location's offset from that base register. REGS is an
13979 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13980 likewise filled with the corresponding rtx's.
13981 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13982 numbers to an ascending order of stores.
13983 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13984 from ascending memory locations, and the function verifies that the register
13985 numbers are themselves ascending. If CHECK_REGS is false, the register
13986 numbers are stored in the order they are found in the operands. */
13988 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13989 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13990 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13992 int unsorted_regs
[MAX_LDM_STM_OPS
];
13993 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13994 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13995 int order
[MAX_LDM_STM_OPS
];
13997 rtx base_reg_rtx
= NULL
;
14000 /* Write back of base register is currently only supported for Thumb 1. */
14001 int base_writeback
= TARGET_THUMB1
;
14003 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14004 easily extended if required. */
14005 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14007 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14009 /* Loop over the operands and check that the memory references are
14010 suitable (i.e. immediate offsets from the same base register). At
14011 the same time, extract the target register, and the memory
14013 for (i
= 0; i
< nops
; i
++)
14018 /* Convert a subreg of a mem into the mem itself. */
14019 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14020 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14022 gcc_assert (MEM_P (operands
[nops
+ i
]));
14024 /* Don't reorder volatile memory references; it doesn't seem worth
14025 looking for the case where the order is ok anyway. */
14026 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14029 offset
= const0_rtx
;
14031 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14032 || (GET_CODE (reg
) == SUBREG
14033 && REG_P (reg
= SUBREG_REG (reg
))))
14034 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14035 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14036 || (GET_CODE (reg
) == SUBREG
14037 && REG_P (reg
= SUBREG_REG (reg
))))
14038 && (CONST_INT_P (offset
14039 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14041 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14042 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14043 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14047 base_reg
= REGNO (reg
);
14048 base_reg_rtx
= reg
;
14049 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14052 else if (base_reg
!= (int) REGNO (reg
))
14053 /* Not addressed from the same base register. */
14056 /* If it isn't an integer register, then we can't do this. */
14057 if (unsorted_regs
[i
] < 0
14058 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14059 /* The effects are unpredictable if the base register is
14060 both updated and stored. */
14061 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14062 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14063 || unsorted_regs
[i
] > 14)
14066 unsorted_offsets
[i
] = INTVAL (offset
);
14067 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14071 /* Not a suitable memory address. */
14075 /* All the useful information has now been extracted from the
14076 operands into unsorted_regs and unsorted_offsets; additionally,
14077 order[0] has been set to the lowest offset in the list. Sort
14078 the offsets into order, verifying that they are adjacent, and
14079 check that the register numbers are ascending. */
14080 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14081 check_regs
? unsorted_regs
: NULL
))
14085 memcpy (saved_order
, order
, sizeof order
);
14091 for (i
= 0; i
< nops
; i
++)
14093 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14095 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14098 *load_offset
= unsorted_offsets
[order
[0]];
14102 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14105 if (unsorted_offsets
[order
[0]] == 0)
14106 stm_case
= 1; /* stmia */
14107 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14108 stm_case
= 2; /* stmib */
14109 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14110 stm_case
= 3; /* stmda */
14111 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14112 stm_case
= 4; /* stmdb */
14116 if (!multiple_operation_profitable_p (false, nops
, 0))
14122 /* Routines for use in generating RTL. */
14124 /* Generate a load-multiple instruction. COUNT is the number of loads in
14125 the instruction; REGS and MEMS are arrays containing the operands.
14126 BASEREG is the base register to be used in addressing the memory operands.
14127 WBACK_OFFSET is nonzero if the instruction should update the base
14131 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14132 HOST_WIDE_INT wback_offset
)
14137 if (!multiple_operation_profitable_p (false, count
, 0))
14143 for (i
= 0; i
< count
; i
++)
14144 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14146 if (wback_offset
!= 0)
14147 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14149 seq
= get_insns ();
14155 result
= gen_rtx_PARALLEL (VOIDmode
,
14156 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14157 if (wback_offset
!= 0)
14159 XVECEXP (result
, 0, 0)
14160 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14165 for (j
= 0; i
< count
; i
++, j
++)
14166 XVECEXP (result
, 0, i
)
14167 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14172 /* Generate a store-multiple instruction. COUNT is the number of stores in
14173 the instruction; REGS and MEMS are arrays containing the operands.
14174 BASEREG is the base register to be used in addressing the memory operands.
14175 WBACK_OFFSET is nonzero if the instruction should update the base
14179 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14180 HOST_WIDE_INT wback_offset
)
14185 if (GET_CODE (basereg
) == PLUS
)
14186 basereg
= XEXP (basereg
, 0);
14188 if (!multiple_operation_profitable_p (false, count
, 0))
14194 for (i
= 0; i
< count
; i
++)
14195 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14197 if (wback_offset
!= 0)
14198 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14200 seq
= get_insns ();
14206 result
= gen_rtx_PARALLEL (VOIDmode
,
14207 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14208 if (wback_offset
!= 0)
14210 XVECEXP (result
, 0, 0)
14211 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14216 for (j
= 0; i
< count
; i
++, j
++)
14217 XVECEXP (result
, 0, i
)
14218 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14223 /* Generate either a load-multiple or a store-multiple instruction. This
14224 function can be used in situations where we can start with a single MEM
14225 rtx and adjust its address upwards.
14226 COUNT is the number of operations in the instruction, not counting a
14227 possible update of the base register. REGS is an array containing the
14229 BASEREG is the base register to be used in addressing the memory operands,
14230 which are constructed from BASEMEM.
14231 WRITE_BACK specifies whether the generated instruction should include an
14232 update of the base register.
14233 OFFSETP is used to pass an offset to and from this function; this offset
14234 is not used when constructing the address (instead BASEMEM should have an
14235 appropriate offset in its address), it is used only for setting
14236 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14239 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14240 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14242 rtx mems
[MAX_LDM_STM_OPS
];
14243 HOST_WIDE_INT offset
= *offsetp
;
14246 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14248 if (GET_CODE (basereg
) == PLUS
)
14249 basereg
= XEXP (basereg
, 0);
14251 for (i
= 0; i
< count
; i
++)
14253 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14254 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14262 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14263 write_back
? 4 * count
: 0);
14265 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14266 write_back
? 4 * count
: 0);
14270 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14271 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14273 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14278 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14279 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14281 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14285 /* Called from a peephole2 expander to turn a sequence of loads into an
14286 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14287 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14288 is true if we can reorder the registers because they are used commutatively
14290 Returns true iff we could generate a new instruction. */
14293 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14295 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14296 rtx mems
[MAX_LDM_STM_OPS
];
14297 int i
, j
, base_reg
;
14299 HOST_WIDE_INT offset
;
14300 int write_back
= FALSE
;
14304 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14305 &base_reg
, &offset
, !sort_regs
);
14311 for (i
= 0; i
< nops
- 1; i
++)
14312 for (j
= i
+ 1; j
< nops
; j
++)
14313 if (regs
[i
] > regs
[j
])
14319 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14323 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14324 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14330 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14331 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14333 if (!TARGET_THUMB1
)
14335 base_reg
= regs
[0];
14336 base_reg_rtx
= newbase
;
14340 for (i
= 0; i
< nops
; i
++)
14342 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14343 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14346 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14347 write_back
? offset
+ i
* 4 : 0));
14351 /* Called from a peephole2 expander to turn a sequence of stores into an
14352 STM instruction. OPERANDS are the operands found by the peephole matcher;
14353 NOPS indicates how many separate stores we are trying to combine.
14354 Returns true iff we could generate a new instruction. */
14357 gen_stm_seq (rtx
*operands
, int nops
)
14360 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14361 rtx mems
[MAX_LDM_STM_OPS
];
14364 HOST_WIDE_INT offset
;
14365 int write_back
= FALSE
;
14368 bool base_reg_dies
;
14370 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14371 mem_order
, &base_reg
, &offset
, true);
14376 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14378 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14381 gcc_assert (base_reg_dies
);
14387 gcc_assert (base_reg_dies
);
14388 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14392 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14394 for (i
= 0; i
< nops
; i
++)
14396 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14397 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14400 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14401 write_back
? offset
+ i
* 4 : 0));
14405 /* Called from a peephole2 expander to turn a sequence of stores that are
14406 preceded by constant loads into an STM instruction. OPERANDS are the
14407 operands found by the peephole matcher; NOPS indicates how many
14408 separate stores we are trying to combine; there are 2 * NOPS
14409 instructions in the peephole.
14410 Returns true iff we could generate a new instruction. */
14413 gen_const_stm_seq (rtx
*operands
, int nops
)
14415 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14416 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14417 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14418 rtx mems
[MAX_LDM_STM_OPS
];
14421 HOST_WIDE_INT offset
;
14422 int write_back
= FALSE
;
14425 bool base_reg_dies
;
14427 HARD_REG_SET allocated
;
14429 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14430 mem_order
, &base_reg
, &offset
, false);
14435 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14437 /* If the same register is used more than once, try to find a free
14439 CLEAR_HARD_REG_SET (allocated
);
14440 for (i
= 0; i
< nops
; i
++)
14442 for (j
= i
+ 1; j
< nops
; j
++)
14443 if (regs
[i
] == regs
[j
])
14445 rtx t
= peep2_find_free_register (0, nops
* 2,
14446 TARGET_THUMB1
? "l" : "r",
14447 SImode
, &allocated
);
14451 regs
[i
] = REGNO (t
);
14455 /* Compute an ordering that maps the register numbers to an ascending
14458 for (i
= 0; i
< nops
; i
++)
14459 if (regs
[i
] < regs
[reg_order
[0]])
14462 for (i
= 1; i
< nops
; i
++)
14464 int this_order
= reg_order
[i
- 1];
14465 for (j
= 0; j
< nops
; j
++)
14466 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14467 && (this_order
== reg_order
[i
- 1]
14468 || regs
[j
] < regs
[this_order
]))
14470 reg_order
[i
] = this_order
;
14473 /* Ensure that registers that must be live after the instruction end
14474 up with the correct value. */
14475 for (i
= 0; i
< nops
; i
++)
14477 int this_order
= reg_order
[i
];
14478 if ((this_order
!= mem_order
[i
]
14479 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14480 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14484 /* Load the constants. */
14485 for (i
= 0; i
< nops
; i
++)
14487 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14488 sorted_regs
[i
] = regs
[reg_order
[i
]];
14489 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14492 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14494 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14497 gcc_assert (base_reg_dies
);
14503 gcc_assert (base_reg_dies
);
14504 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14508 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14510 for (i
= 0; i
< nops
; i
++)
14512 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14513 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14516 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14517 write_back
? offset
+ i
* 4 : 0));
14521 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14522 unaligned copies on processors which support unaligned semantics for those
14523 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14524 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14525 An interleave factor of 1 (the minimum) will perform no interleaving.
14526 Load/store multiple are used for aligned addresses where possible. */
14529 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14530 HOST_WIDE_INT length
,
14531 unsigned int interleave_factor
)
14533 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14534 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14535 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14536 HOST_WIDE_INT i
, j
;
14537 HOST_WIDE_INT remaining
= length
, words
;
14538 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14540 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14541 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14542 HOST_WIDE_INT srcoffset
, dstoffset
;
14543 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14546 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14548 /* Use hard registers if we have aligned source or destination so we can use
14549 load/store multiple with contiguous registers. */
14550 if (dst_aligned
|| src_aligned
)
14551 for (i
= 0; i
< interleave_factor
; i
++)
14552 regs
[i
] = gen_rtx_REG (SImode
, i
);
14554 for (i
= 0; i
< interleave_factor
; i
++)
14555 regs
[i
] = gen_reg_rtx (SImode
);
14557 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14558 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14560 srcoffset
= dstoffset
= 0;
14562 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14563 For copying the last bytes we want to subtract this offset again. */
14564 src_autoinc
= dst_autoinc
= 0;
14566 for (i
= 0; i
< interleave_factor
; i
++)
14569 /* Copy BLOCK_SIZE_BYTES chunks. */
14571 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14574 if (src_aligned
&& interleave_factor
> 1)
14576 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14577 TRUE
, srcbase
, &srcoffset
));
14578 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14582 for (j
= 0; j
< interleave_factor
; j
++)
14584 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14586 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14587 srcoffset
+ j
* UNITS_PER_WORD
);
14588 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14590 srcoffset
+= block_size_bytes
;
14594 if (dst_aligned
&& interleave_factor
> 1)
14596 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14597 TRUE
, dstbase
, &dstoffset
));
14598 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14602 for (j
= 0; j
< interleave_factor
; j
++)
14604 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14606 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14607 dstoffset
+ j
* UNITS_PER_WORD
);
14608 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14610 dstoffset
+= block_size_bytes
;
14613 remaining
-= block_size_bytes
;
14616 /* Copy any whole words left (note these aren't interleaved with any
14617 subsequent halfword/byte load/stores in the interests of simplicity). */
14619 words
= remaining
/ UNITS_PER_WORD
;
14621 gcc_assert (words
< interleave_factor
);
14623 if (src_aligned
&& words
> 1)
14625 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14627 src_autoinc
+= UNITS_PER_WORD
* words
;
14631 for (j
= 0; j
< words
; j
++)
14633 addr
= plus_constant (Pmode
, src
,
14634 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14635 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14636 srcoffset
+ j
* UNITS_PER_WORD
);
14638 emit_move_insn (regs
[j
], mem
);
14640 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14642 srcoffset
+= words
* UNITS_PER_WORD
;
14645 if (dst_aligned
&& words
> 1)
14647 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14649 dst_autoinc
+= words
* UNITS_PER_WORD
;
14653 for (j
= 0; j
< words
; j
++)
14655 addr
= plus_constant (Pmode
, dst
,
14656 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14657 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14658 dstoffset
+ j
* UNITS_PER_WORD
);
14660 emit_move_insn (mem
, regs
[j
]);
14662 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14664 dstoffset
+= words
* UNITS_PER_WORD
;
14667 remaining
-= words
* UNITS_PER_WORD
;
14669 gcc_assert (remaining
< 4);
14671 /* Copy a halfword if necessary. */
14673 if (remaining
>= 2)
14675 halfword_tmp
= gen_reg_rtx (SImode
);
14677 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14678 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14679 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14681 /* Either write out immediately, or delay until we've loaded the last
14682 byte, depending on interleave factor. */
14683 if (interleave_factor
== 1)
14685 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14686 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14687 emit_insn (gen_unaligned_storehi (mem
,
14688 gen_lowpart (HImode
, halfword_tmp
)));
14689 halfword_tmp
= NULL
;
14697 gcc_assert (remaining
< 2);
14699 /* Copy last byte. */
14701 if ((remaining
& 1) != 0)
14703 byte_tmp
= gen_reg_rtx (SImode
);
14705 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14706 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14707 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14709 if (interleave_factor
== 1)
14711 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14712 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14713 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14722 /* Store last halfword if we haven't done so already. */
14726 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14727 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14728 emit_insn (gen_unaligned_storehi (mem
,
14729 gen_lowpart (HImode
, halfword_tmp
)));
14733 /* Likewise for last byte. */
14737 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14738 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14739 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14743 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14746 /* From mips_adjust_block_mem:
14748 Helper function for doing a loop-based block operation on memory
14749 reference MEM. Each iteration of the loop will operate on LENGTH
14752 Create a new base register for use within the loop and point it to
14753 the start of MEM. Create a new memory reference that uses this
14754 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14757 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14760 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14762 /* Although the new mem does not refer to a known location,
14763 it does keep up to LENGTH bytes of alignment. */
14764 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14765 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14768 /* From mips_block_move_loop:
14770 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14771 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14772 the memory regions do not overlap. */
14775 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14776 unsigned int interleave_factor
,
14777 HOST_WIDE_INT bytes_per_iter
)
14779 rtx src_reg
, dest_reg
, final_src
, test
;
14780 HOST_WIDE_INT leftover
;
14782 leftover
= length
% bytes_per_iter
;
14783 length
-= leftover
;
14785 /* Create registers and memory references for use within the loop. */
14786 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14787 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14789 /* Calculate the value that SRC_REG should have after the last iteration of
14791 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14792 0, 0, OPTAB_WIDEN
);
14794 /* Emit the start of the loop. */
14795 rtx_code_label
*label
= gen_label_rtx ();
14796 emit_label (label
);
14798 /* Emit the loop body. */
14799 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14800 interleave_factor
);
14802 /* Move on to the next block. */
14803 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14804 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14806 /* Emit the loop condition. */
14807 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14808 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14810 /* Mop up any left-over bytes. */
14812 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14815 /* Emit a block move when either the source or destination is unaligned (not
14816 aligned to a four-byte boundary). This may need further tuning depending on
14817 core type, optimize_size setting, etc. */
14820 arm_movmemqi_unaligned (rtx
*operands
)
14822 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14826 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14827 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14828 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14829 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14830 or dst_aligned though: allow more interleaving in those cases since the
14831 resulting code can be smaller. */
14832 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14833 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14836 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14837 interleave_factor
, bytes_per_iter
);
14839 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14840 interleave_factor
);
14844 /* Note that the loop created by arm_block_move_unaligned_loop may be
14845 subject to loop unrolling, which makes tuning this condition a little
14848 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14850 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14857 arm_gen_movmemqi (rtx
*operands
)
14859 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14860 HOST_WIDE_INT srcoffset
, dstoffset
;
14862 rtx src
, dst
, srcbase
, dstbase
;
14863 rtx part_bytes_reg
= NULL
;
14866 if (!CONST_INT_P (operands
[2])
14867 || !CONST_INT_P (operands
[3])
14868 || INTVAL (operands
[2]) > 64)
14871 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14872 return arm_movmemqi_unaligned (operands
);
14874 if (INTVAL (operands
[3]) & 3)
14877 dstbase
= operands
[0];
14878 srcbase
= operands
[1];
14880 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14881 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14883 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14884 out_words_to_go
= INTVAL (operands
[2]) / 4;
14885 last_bytes
= INTVAL (operands
[2]) & 3;
14886 dstoffset
= srcoffset
= 0;
14888 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14889 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14891 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14893 if (in_words_to_go
> 4)
14894 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14895 TRUE
, srcbase
, &srcoffset
));
14897 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14898 src
, FALSE
, srcbase
,
14901 if (out_words_to_go
)
14903 if (out_words_to_go
> 4)
14904 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14905 TRUE
, dstbase
, &dstoffset
));
14906 else if (out_words_to_go
!= 1)
14907 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14908 out_words_to_go
, dst
,
14911 dstbase
, &dstoffset
));
14914 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14915 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14916 if (last_bytes
!= 0)
14918 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14924 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14925 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14928 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14929 if (out_words_to_go
)
14933 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14934 sreg
= copy_to_reg (mem
);
14936 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14937 emit_move_insn (mem
, sreg
);
14940 gcc_assert (!in_words_to_go
); /* Sanity check */
14943 if (in_words_to_go
)
14945 gcc_assert (in_words_to_go
> 0);
14947 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14948 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14951 gcc_assert (!last_bytes
|| part_bytes_reg
);
14953 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14955 rtx tmp
= gen_reg_rtx (SImode
);
14957 /* The bytes we want are in the top end of the word. */
14958 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14959 GEN_INT (8 * (4 - last_bytes
))));
14960 part_bytes_reg
= tmp
;
14964 mem
= adjust_automodify_address (dstbase
, QImode
,
14965 plus_constant (Pmode
, dst
,
14967 dstoffset
+ last_bytes
- 1);
14968 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14972 tmp
= gen_reg_rtx (SImode
);
14973 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14974 part_bytes_reg
= tmp
;
14981 if (last_bytes
> 1)
14983 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14984 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14988 rtx tmp
= gen_reg_rtx (SImode
);
14989 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14990 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14991 part_bytes_reg
= tmp
;
14998 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14999 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15006 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
15009 next_consecutive_mem (rtx mem
)
15011 machine_mode mode
= GET_MODE (mem
);
15012 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15013 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15015 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15018 /* Copy using LDRD/STRD instructions whenever possible.
15019 Returns true upon success. */
15021 gen_movmem_ldrd_strd (rtx
*operands
)
15023 unsigned HOST_WIDE_INT len
;
15024 HOST_WIDE_INT align
;
15025 rtx src
, dst
, base
;
15027 bool src_aligned
, dst_aligned
;
15028 bool src_volatile
, dst_volatile
;
15030 gcc_assert (CONST_INT_P (operands
[2]));
15031 gcc_assert (CONST_INT_P (operands
[3]));
15033 len
= UINTVAL (operands
[2]);
15037 /* Maximum alignment we can assume for both src and dst buffers. */
15038 align
= INTVAL (operands
[3]);
15040 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15043 /* Place src and dst addresses in registers
15044 and update the corresponding mem rtx. */
15046 dst_volatile
= MEM_VOLATILE_P (dst
);
15047 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15048 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15049 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15052 src_volatile
= MEM_VOLATILE_P (src
);
15053 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15054 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15055 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15057 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15060 if (src_volatile
|| dst_volatile
)
15063 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15064 if (!(dst_aligned
|| src_aligned
))
15065 return arm_gen_movmemqi (operands
);
15067 /* If the either src or dst is unaligned we'll be accessing it as pairs
15068 of unaligned SImode accesses. Otherwise we can generate DImode
15069 ldrd/strd instructions. */
15070 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15071 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15076 reg0
= gen_reg_rtx (DImode
);
15077 rtx low_reg
= NULL_RTX
;
15078 rtx hi_reg
= NULL_RTX
;
15080 if (!src_aligned
|| !dst_aligned
)
15082 low_reg
= gen_lowpart (SImode
, reg0
);
15083 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15086 emit_move_insn (reg0
, src
);
15089 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
15090 src
= next_consecutive_mem (src
);
15091 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
15095 emit_move_insn (dst
, reg0
);
15098 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
15099 dst
= next_consecutive_mem (dst
);
15100 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
15103 src
= next_consecutive_mem (src
);
15104 dst
= next_consecutive_mem (dst
);
15107 gcc_assert (len
< 8);
15110 /* More than a word but less than a double-word to copy. Copy a word. */
15111 reg0
= gen_reg_rtx (SImode
);
15112 src
= adjust_address (src
, SImode
, 0);
15113 dst
= adjust_address (dst
, SImode
, 0);
15115 emit_move_insn (reg0
, src
);
15117 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15120 emit_move_insn (dst
, reg0
);
15122 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15124 src
= next_consecutive_mem (src
);
15125 dst
= next_consecutive_mem (dst
);
15132 /* Copy the remaining bytes. */
15135 dst
= adjust_address (dst
, HImode
, 0);
15136 src
= adjust_address (src
, HImode
, 0);
15137 reg0
= gen_reg_rtx (SImode
);
15139 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15141 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15144 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15146 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15148 src
= next_consecutive_mem (src
);
15149 dst
= next_consecutive_mem (dst
);
15154 dst
= adjust_address (dst
, QImode
, 0);
15155 src
= adjust_address (src
, QImode
, 0);
15156 reg0
= gen_reg_rtx (QImode
);
15157 emit_move_insn (reg0
, src
);
15158 emit_move_insn (dst
, reg0
);
15162 /* Select a dominance comparison mode if possible for a test of the general
15163 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15164 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15165 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15166 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15167 In all cases OP will be either EQ or NE, but we don't need to know which
15168 here. If we are unable to support a dominance comparison we return
15169 CC mode. This will then fail to match for the RTL expressions that
15170 generate this call. */
15172 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15174 enum rtx_code cond1
, cond2
;
15177 /* Currently we will probably get the wrong result if the individual
15178 comparisons are not simple. This also ensures that it is safe to
15179 reverse a comparison if necessary. */
15180 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15182 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15186 /* The if_then_else variant of this tests the second condition if the
15187 first passes, but is true if the first fails. Reverse the first
15188 condition to get a true "inclusive-or" expression. */
15189 if (cond_or
== DOM_CC_NX_OR_Y
)
15190 cond1
= reverse_condition (cond1
);
15192 /* If the comparisons are not equal, and one doesn't dominate the other,
15193 then we can't do this. */
15195 && !comparison_dominates_p (cond1
, cond2
)
15196 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15200 std::swap (cond1
, cond2
);
15205 if (cond_or
== DOM_CC_X_AND_Y
)
15210 case EQ
: return CC_DEQmode
;
15211 case LE
: return CC_DLEmode
;
15212 case LEU
: return CC_DLEUmode
;
15213 case GE
: return CC_DGEmode
;
15214 case GEU
: return CC_DGEUmode
;
15215 default: gcc_unreachable ();
15219 if (cond_or
== DOM_CC_X_AND_Y
)
15231 gcc_unreachable ();
15235 if (cond_or
== DOM_CC_X_AND_Y
)
15247 gcc_unreachable ();
15251 if (cond_or
== DOM_CC_X_AND_Y
)
15252 return CC_DLTUmode
;
15257 return CC_DLTUmode
;
15259 return CC_DLEUmode
;
15263 gcc_unreachable ();
15267 if (cond_or
== DOM_CC_X_AND_Y
)
15268 return CC_DGTUmode
;
15273 return CC_DGTUmode
;
15275 return CC_DGEUmode
;
15279 gcc_unreachable ();
15282 /* The remaining cases only occur when both comparisons are the
15285 gcc_assert (cond1
== cond2
);
15289 gcc_assert (cond1
== cond2
);
15293 gcc_assert (cond1
== cond2
);
15297 gcc_assert (cond1
== cond2
);
15298 return CC_DLEUmode
;
15301 gcc_assert (cond1
== cond2
);
15302 return CC_DGEUmode
;
15305 gcc_unreachable ();
15310 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15312 /* All floating point compares return CCFP if it is an equality
15313 comparison, and CCFPE otherwise. */
15314 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15337 gcc_unreachable ();
15341 /* A compare with a shifted operand. Because of canonicalization, the
15342 comparison will have to be swapped when we emit the assembler. */
15343 if (GET_MODE (y
) == SImode
15344 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15345 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15346 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15347 || GET_CODE (x
) == ROTATERT
))
15350 /* This operation is performed swapped, but since we only rely on the Z
15351 flag we don't need an additional mode. */
15352 if (GET_MODE (y
) == SImode
15353 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15354 && GET_CODE (x
) == NEG
15355 && (op
== EQ
|| op
== NE
))
15358 /* This is a special case that is used by combine to allow a
15359 comparison of a shifted byte load to be split into a zero-extend
15360 followed by a comparison of the shifted integer (only valid for
15361 equalities and unsigned inequalities). */
15362 if (GET_MODE (x
) == SImode
15363 && GET_CODE (x
) == ASHIFT
15364 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15365 && GET_CODE (XEXP (x
, 0)) == SUBREG
15366 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15367 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15368 && (op
== EQ
|| op
== NE
15369 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15370 && CONST_INT_P (y
))
15373 /* A construct for a conditional compare, if the false arm contains
15374 0, then both conditions must be true, otherwise either condition
15375 must be true. Not all conditions are possible, so CCmode is
15376 returned if it can't be done. */
15377 if (GET_CODE (x
) == IF_THEN_ELSE
15378 && (XEXP (x
, 2) == const0_rtx
15379 || XEXP (x
, 2) == const1_rtx
)
15380 && COMPARISON_P (XEXP (x
, 0))
15381 && COMPARISON_P (XEXP (x
, 1)))
15382 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15383 INTVAL (XEXP (x
, 2)));
15385 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15386 if (GET_CODE (x
) == AND
15387 && (op
== EQ
|| op
== NE
)
15388 && COMPARISON_P (XEXP (x
, 0))
15389 && COMPARISON_P (XEXP (x
, 1)))
15390 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15393 if (GET_CODE (x
) == IOR
15394 && (op
== EQ
|| op
== NE
)
15395 && COMPARISON_P (XEXP (x
, 0))
15396 && COMPARISON_P (XEXP (x
, 1)))
15397 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15400 /* An operation (on Thumb) where we want to test for a single bit.
15401 This is done by shifting that bit up into the top bit of a
15402 scratch register; we can then branch on the sign bit. */
15404 && GET_MODE (x
) == SImode
15405 && (op
== EQ
|| op
== NE
)
15406 && GET_CODE (x
) == ZERO_EXTRACT
15407 && XEXP (x
, 1) == const1_rtx
)
15410 /* An operation that sets the condition codes as a side-effect, the
15411 V flag is not set correctly, so we can only use comparisons where
15412 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15414 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15415 if (GET_MODE (x
) == SImode
15417 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15418 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15419 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15420 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15421 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15422 || GET_CODE (x
) == LSHIFTRT
15423 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15424 || GET_CODE (x
) == ROTATERT
15425 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15426 return CC_NOOVmode
;
15428 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15431 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15432 && GET_CODE (x
) == PLUS
15433 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15436 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15442 /* A DImode comparison against zero can be implemented by
15443 or'ing the two halves together. */
15444 if (y
== const0_rtx
)
15447 /* We can do an equality test in three Thumb instructions. */
15457 /* DImode unsigned comparisons can be implemented by cmp +
15458 cmpeq without a scratch register. Not worth doing in
15469 /* DImode signed and unsigned comparisons can be implemented
15470 by cmp + sbcs with a scratch register, but that does not
15471 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15472 gcc_assert (op
!= EQ
&& op
!= NE
);
15476 gcc_unreachable ();
15480 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15481 return GET_MODE (x
);
15486 /* X and Y are two things to compare using CODE. Emit the compare insn and
15487 return the rtx for register 0 in the proper mode. FP means this is a
15488 floating point compare: I don't think that it is needed on the arm. */
15490 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15494 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15496 /* We might have X as a constant, Y as a register because of the predicates
15497 used for cmpdi. If so, force X to a register here. */
15498 if (dimode_comparison
&& !REG_P (x
))
15499 x
= force_reg (DImode
, x
);
15501 mode
= SELECT_CC_MODE (code
, x
, y
);
15502 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15504 if (dimode_comparison
15505 && mode
!= CC_CZmode
)
15509 /* To compare two non-zero values for equality, XOR them and
15510 then compare against zero. Not used for ARM mode; there
15511 CC_CZmode is cheaper. */
15512 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15514 gcc_assert (!reload_completed
);
15515 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15519 /* A scratch register is required. */
15520 if (reload_completed
)
15521 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15523 scratch
= gen_rtx_SCRATCH (SImode
);
15525 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15526 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15527 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15530 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15535 /* Generate a sequence of insns that will generate the correct return
15536 address mask depending on the physical architecture that the program
15539 arm_gen_return_addr_mask (void)
15541 rtx reg
= gen_reg_rtx (Pmode
);
15543 emit_insn (gen_return_addr_mask (reg
));
15548 arm_reload_in_hi (rtx
*operands
)
15550 rtx ref
= operands
[1];
15552 HOST_WIDE_INT offset
= 0;
15554 if (GET_CODE (ref
) == SUBREG
)
15556 offset
= SUBREG_BYTE (ref
);
15557 ref
= SUBREG_REG (ref
);
15562 /* We have a pseudo which has been spilt onto the stack; there
15563 are two cases here: the first where there is a simple
15564 stack-slot replacement and a second where the stack-slot is
15565 out of range, or is used as a subreg. */
15566 if (reg_equiv_mem (REGNO (ref
)))
15568 ref
= reg_equiv_mem (REGNO (ref
));
15569 base
= find_replacement (&XEXP (ref
, 0));
15572 /* The slot is out of range, or was dressed up in a SUBREG. */
15573 base
= reg_equiv_address (REGNO (ref
));
15575 /* PR 62554: If there is no equivalent memory location then just move
15576 the value as an SImode register move. This happens when the target
15577 architecture variant does not have an HImode register move. */
15580 gcc_assert (REG_P (operands
[0]));
15581 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15582 gen_rtx_SUBREG (SImode
, ref
, 0)));
15587 base
= find_replacement (&XEXP (ref
, 0));
15589 /* Handle the case where the address is too complex to be offset by 1. */
15590 if (GET_CODE (base
) == MINUS
15591 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15593 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15595 emit_set_insn (base_plus
, base
);
15598 else if (GET_CODE (base
) == PLUS
)
15600 /* The addend must be CONST_INT, or we would have dealt with it above. */
15601 HOST_WIDE_INT hi
, lo
;
15603 offset
+= INTVAL (XEXP (base
, 1));
15604 base
= XEXP (base
, 0);
15606 /* Rework the address into a legal sequence of insns. */
15607 /* Valid range for lo is -4095 -> 4095 */
15610 : -((-offset
) & 0xfff));
15612 /* Corner case, if lo is the max offset then we would be out of range
15613 once we have added the additional 1 below, so bump the msb into the
15614 pre-loading insn(s). */
15618 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15619 ^ (HOST_WIDE_INT
) 0x80000000)
15620 - (HOST_WIDE_INT
) 0x80000000);
15622 gcc_assert (hi
+ lo
== offset
);
15626 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15628 /* Get the base address; addsi3 knows how to handle constants
15629 that require more than one insn. */
15630 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15636 /* Operands[2] may overlap operands[0] (though it won't overlap
15637 operands[1]), that's why we asked for a DImode reg -- so we can
15638 use the bit that does not overlap. */
15639 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15640 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15642 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15644 emit_insn (gen_zero_extendqisi2 (scratch
,
15645 gen_rtx_MEM (QImode
,
15646 plus_constant (Pmode
, base
,
15648 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15649 gen_rtx_MEM (QImode
,
15650 plus_constant (Pmode
, base
,
15652 if (!BYTES_BIG_ENDIAN
)
15653 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15654 gen_rtx_IOR (SImode
,
15657 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15661 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15662 gen_rtx_IOR (SImode
,
15663 gen_rtx_ASHIFT (SImode
, scratch
,
15665 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15668 /* Handle storing a half-word to memory during reload by synthesizing as two
15669 byte stores. Take care not to clobber the input values until after we
15670 have moved them somewhere safe. This code assumes that if the DImode
15671 scratch in operands[2] overlaps either the input value or output address
15672 in some way, then that value must die in this insn (we absolutely need
15673 two scratch registers for some corner cases). */
15675 arm_reload_out_hi (rtx
*operands
)
15677 rtx ref
= operands
[0];
15678 rtx outval
= operands
[1];
15680 HOST_WIDE_INT offset
= 0;
15682 if (GET_CODE (ref
) == SUBREG
)
15684 offset
= SUBREG_BYTE (ref
);
15685 ref
= SUBREG_REG (ref
);
15690 /* We have a pseudo which has been spilt onto the stack; there
15691 are two cases here: the first where there is a simple
15692 stack-slot replacement and a second where the stack-slot is
15693 out of range, or is used as a subreg. */
15694 if (reg_equiv_mem (REGNO (ref
)))
15696 ref
= reg_equiv_mem (REGNO (ref
));
15697 base
= find_replacement (&XEXP (ref
, 0));
15700 /* The slot is out of range, or was dressed up in a SUBREG. */
15701 base
= reg_equiv_address (REGNO (ref
));
15703 /* PR 62254: If there is no equivalent memory location then just move
15704 the value as an SImode register move. This happens when the target
15705 architecture variant does not have an HImode register move. */
15708 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15710 if (REG_P (outval
))
15712 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15713 gen_rtx_SUBREG (SImode
, outval
, 0)));
15715 else /* SUBREG_P (outval) */
15717 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15718 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15719 SUBREG_REG (outval
)));
15721 /* FIXME: Handle other cases ? */
15722 gcc_unreachable ();
15728 base
= find_replacement (&XEXP (ref
, 0));
15730 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15732 /* Handle the case where the address is too complex to be offset by 1. */
15733 if (GET_CODE (base
) == MINUS
15734 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15736 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15738 /* Be careful not to destroy OUTVAL. */
15739 if (reg_overlap_mentioned_p (base_plus
, outval
))
15741 /* Updating base_plus might destroy outval, see if we can
15742 swap the scratch and base_plus. */
15743 if (!reg_overlap_mentioned_p (scratch
, outval
))
15744 std::swap (scratch
, base_plus
);
15747 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15749 /* Be conservative and copy OUTVAL into the scratch now,
15750 this should only be necessary if outval is a subreg
15751 of something larger than a word. */
15752 /* XXX Might this clobber base? I can't see how it can,
15753 since scratch is known to overlap with OUTVAL, and
15754 must be wider than a word. */
15755 emit_insn (gen_movhi (scratch_hi
, outval
));
15756 outval
= scratch_hi
;
15760 emit_set_insn (base_plus
, base
);
15763 else if (GET_CODE (base
) == PLUS
)
15765 /* The addend must be CONST_INT, or we would have dealt with it above. */
15766 HOST_WIDE_INT hi
, lo
;
15768 offset
+= INTVAL (XEXP (base
, 1));
15769 base
= XEXP (base
, 0);
15771 /* Rework the address into a legal sequence of insns. */
15772 /* Valid range for lo is -4095 -> 4095 */
15775 : -((-offset
) & 0xfff));
15777 /* Corner case, if lo is the max offset then we would be out of range
15778 once we have added the additional 1 below, so bump the msb into the
15779 pre-loading insn(s). */
15783 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15784 ^ (HOST_WIDE_INT
) 0x80000000)
15785 - (HOST_WIDE_INT
) 0x80000000);
15787 gcc_assert (hi
+ lo
== offset
);
15791 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15793 /* Be careful not to destroy OUTVAL. */
15794 if (reg_overlap_mentioned_p (base_plus
, outval
))
15796 /* Updating base_plus might destroy outval, see if we
15797 can swap the scratch and base_plus. */
15798 if (!reg_overlap_mentioned_p (scratch
, outval
))
15799 std::swap (scratch
, base_plus
);
15802 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15804 /* Be conservative and copy outval into scratch now,
15805 this should only be necessary if outval is a
15806 subreg of something larger than a word. */
15807 /* XXX Might this clobber base? I can't see how it
15808 can, since scratch is known to overlap with
15810 emit_insn (gen_movhi (scratch_hi
, outval
));
15811 outval
= scratch_hi
;
15815 /* Get the base address; addsi3 knows how to handle constants
15816 that require more than one insn. */
15817 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15823 if (BYTES_BIG_ENDIAN
)
15825 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15826 plus_constant (Pmode
, base
,
15828 gen_lowpart (QImode
, outval
)));
15829 emit_insn (gen_lshrsi3 (scratch
,
15830 gen_rtx_SUBREG (SImode
, outval
, 0),
15832 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15834 gen_lowpart (QImode
, scratch
)));
15838 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15840 gen_lowpart (QImode
, outval
)));
15841 emit_insn (gen_lshrsi3 (scratch
,
15842 gen_rtx_SUBREG (SImode
, outval
, 0),
15844 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15845 plus_constant (Pmode
, base
,
15847 gen_lowpart (QImode
, scratch
)));
15851 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15852 (padded to the size of a word) should be passed in a register. */
15855 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15857 if (TARGET_AAPCS_BASED
)
15858 return must_pass_in_stack_var_size (mode
, type
);
15860 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15864 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15865 Return true if an argument passed on the stack should be padded upwards,
15866 i.e. if the least-significant byte has useful data.
15867 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15868 aggregate types are placed in the lowest memory address. */
15871 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15873 if (!TARGET_AAPCS_BASED
)
15874 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15876 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15883 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15884 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15885 register has useful data, and return the opposite if the most
15886 significant byte does. */
15889 arm_pad_reg_upward (machine_mode mode
,
15890 tree type
, int first ATTRIBUTE_UNUSED
)
15892 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15894 /* For AAPCS, small aggregates, small fixed-point types,
15895 and small complex types are always padded upwards. */
15898 if ((AGGREGATE_TYPE_P (type
)
15899 || TREE_CODE (type
) == COMPLEX_TYPE
15900 || FIXED_POINT_TYPE_P (type
))
15901 && int_size_in_bytes (type
) <= 4)
15906 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15907 && GET_MODE_SIZE (mode
) <= 4)
15912 /* Otherwise, use default padding. */
15913 return !BYTES_BIG_ENDIAN
;
15916 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15917 assuming that the address in the base register is word aligned. */
15919 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15921 HOST_WIDE_INT max_offset
;
15923 /* Offset must be a multiple of 4 in Thumb mode. */
15924 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15929 else if (TARGET_ARM
)
15934 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15937 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15938 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15939 Assumes that the address in the base register RN is word aligned. Pattern
15940 guarantees that both memory accesses use the same base register,
15941 the offsets are constants within the range, and the gap between the offsets is 4.
15942 If preload complete then check that registers are legal. WBACK indicates whether
15943 address is updated. LOAD indicates whether memory access is load or store. */
15945 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15946 bool wback
, bool load
)
15948 unsigned int t
, t2
, n
;
15950 if (!reload_completed
)
15953 if (!offset_ok_for_ldrd_strd (offset
))
15960 if ((TARGET_THUMB2
)
15961 && ((wback
&& (n
== t
|| n
== t2
))
15962 || (t
== SP_REGNUM
)
15963 || (t
== PC_REGNUM
)
15964 || (t2
== SP_REGNUM
)
15965 || (t2
== PC_REGNUM
)
15966 || (!load
&& (n
== PC_REGNUM
))
15967 || (load
&& (t
== t2
))
15968 /* Triggers Cortex-M3 LDRD errata. */
15969 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15973 && ((wback
&& (n
== t
|| n
== t2
))
15974 || (t2
== PC_REGNUM
)
15975 || (t
% 2 != 0) /* First destination register is not even. */
15977 /* PC can be used as base register (for offset addressing only),
15978 but it is depricated. */
15979 || (n
== PC_REGNUM
)))
15985 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15986 operand MEM's address contains an immediate offset from the base
15987 register and has no side effects, in which case it sets BASE and
15988 OFFSET accordingly. */
15990 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15994 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15996 /* TODO: Handle more general memory operand patterns, such as
15997 PRE_DEC and PRE_INC. */
15999 if (side_effects_p (mem
))
16002 /* Can't deal with subregs. */
16003 if (GET_CODE (mem
) == SUBREG
)
16006 gcc_assert (MEM_P (mem
));
16008 *offset
= const0_rtx
;
16010 addr
= XEXP (mem
, 0);
16012 /* If addr isn't valid for DImode, then we can't handle it. */
16013 if (!arm_legitimate_address_p (DImode
, addr
,
16014 reload_in_progress
|| reload_completed
))
16022 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
16024 *base
= XEXP (addr
, 0);
16025 *offset
= XEXP (addr
, 1);
16026 return (REG_P (*base
) && CONST_INT_P (*offset
));
16032 /* Called from a peephole2 to replace two word-size accesses with a
16033 single LDRD/STRD instruction. Returns true iff we can generate a
16034 new instruction sequence. That is, both accesses use the same base
16035 register and the gap between constant offsets is 4. This function
16036 may reorder its operands to match ldrd/strd RTL templates.
16037 OPERANDS are the operands found by the peephole matcher;
16038 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16039 corresponding memory operands. LOAD indicaates whether the access
16040 is load or store. CONST_STORE indicates a store of constant
16041 integer values held in OPERANDS[4,5] and assumes that the pattern
16042 is of length 4 insn, for the purpose of checking dead registers.
16043 COMMUTE indicates that register operands may be reordered. */
16045 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
16046 bool const_store
, bool commute
)
16049 HOST_WIDE_INT offsets
[2], offset
;
16050 rtx base
= NULL_RTX
;
16051 rtx cur_base
, cur_offset
, tmp
;
16053 HARD_REG_SET regset
;
16055 gcc_assert (!const_store
|| !load
);
16056 /* Check that the memory references are immediate offsets from the
16057 same base register. Extract the base register, the destination
16058 registers, and the corresponding memory offsets. */
16059 for (i
= 0; i
< nops
; i
++)
16061 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
16066 else if (REGNO (base
) != REGNO (cur_base
))
16069 offsets
[i
] = INTVAL (cur_offset
);
16070 if (GET_CODE (operands
[i
]) == SUBREG
)
16072 tmp
= SUBREG_REG (operands
[i
]);
16073 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
16078 /* Make sure there is no dependency between the individual loads. */
16079 if (load
&& REGNO (operands
[0]) == REGNO (base
))
16080 return false; /* RAW */
16082 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
16083 return false; /* WAW */
16085 /* If the same input register is used in both stores
16086 when storing different constants, try to find a free register.
16087 For example, the code
16092 can be transformed into
16096 in Thumb mode assuming that r1 is free.
16097 For ARM mode do the same but only if the starting register
16098 can be made to be even. */
16100 && REGNO (operands
[0]) == REGNO (operands
[1])
16101 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
16105 CLEAR_HARD_REG_SET (regset
);
16106 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16107 if (tmp
== NULL_RTX
)
16110 /* Use the new register in the first load to ensure that
16111 if the original input register is not dead after peephole,
16112 then it will have the correct constant value. */
16115 else if (TARGET_ARM
)
16117 int regno
= REGNO (operands
[0]);
16118 if (!peep2_reg_dead_p (4, operands
[0]))
16120 /* When the input register is even and is not dead after the
16121 pattern, it has to hold the second constant but we cannot
16122 form a legal STRD in ARM mode with this register as the second
16124 if (regno
% 2 == 0)
16127 /* Is regno-1 free? */
16128 SET_HARD_REG_SET (regset
);
16129 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
16130 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16131 if (tmp
== NULL_RTX
)
16138 /* Find a DImode register. */
16139 CLEAR_HARD_REG_SET (regset
);
16140 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16141 if (tmp
!= NULL_RTX
)
16143 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16144 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16148 /* Can we use the input register to form a DI register? */
16149 SET_HARD_REG_SET (regset
);
16150 CLEAR_HARD_REG_BIT(regset
,
16151 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16152 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16153 if (tmp
== NULL_RTX
)
16155 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16159 gcc_assert (operands
[0] != NULL_RTX
);
16160 gcc_assert (operands
[1] != NULL_RTX
);
16161 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16162 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16166 /* Make sure the instructions are ordered with lower memory access first. */
16167 if (offsets
[0] > offsets
[1])
16169 gap
= offsets
[0] - offsets
[1];
16170 offset
= offsets
[1];
16172 /* Swap the instructions such that lower memory is accessed first. */
16173 std::swap (operands
[0], operands
[1]);
16174 std::swap (operands
[2], operands
[3]);
16176 std::swap (operands
[4], operands
[5]);
16180 gap
= offsets
[1] - offsets
[0];
16181 offset
= offsets
[0];
16184 /* Make sure accesses are to consecutive memory locations. */
16188 /* Make sure we generate legal instructions. */
16189 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16193 /* In Thumb state, where registers are almost unconstrained, there
16194 is little hope to fix it. */
16198 if (load
&& commute
)
16200 /* Try reordering registers. */
16201 std::swap (operands
[0], operands
[1]);
16202 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16209 /* If input registers are dead after this pattern, they can be
16210 reordered or replaced by other registers that are free in the
16211 current pattern. */
16212 if (!peep2_reg_dead_p (4, operands
[0])
16213 || !peep2_reg_dead_p (4, operands
[1]))
16216 /* Try to reorder the input registers. */
16217 /* For example, the code
16222 can be transformed into
16227 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16230 std::swap (operands
[0], operands
[1]);
16234 /* Try to find a free DI register. */
16235 CLEAR_HARD_REG_SET (regset
);
16236 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16237 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16240 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16241 if (tmp
== NULL_RTX
)
16244 /* DREG must be an even-numbered register in DImode.
16245 Split it into SI registers. */
16246 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16247 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16248 gcc_assert (operands
[0] != NULL_RTX
);
16249 gcc_assert (operands
[1] != NULL_RTX
);
16250 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16251 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16253 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16265 /* Print a symbolic form of X to the debug file, F. */
16267 arm_print_value (FILE *f
, rtx x
)
16269 switch (GET_CODE (x
))
16272 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16276 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16284 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16286 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16287 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16295 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16299 fprintf (f
, "`%s'", XSTR (x
, 0));
16303 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16307 arm_print_value (f
, XEXP (x
, 0));
16311 arm_print_value (f
, XEXP (x
, 0));
16313 arm_print_value (f
, XEXP (x
, 1));
16321 fprintf (f
, "????");
16326 /* Routines for manipulation of the constant pool. */
16328 /* Arm instructions cannot load a large constant directly into a
16329 register; they have to come from a pc relative load. The constant
16330 must therefore be placed in the addressable range of the pc
16331 relative load. Depending on the precise pc relative load
16332 instruction the range is somewhere between 256 bytes and 4k. This
16333 means that we often have to dump a constant inside a function, and
16334 generate code to branch around it.
16336 It is important to minimize this, since the branches will slow
16337 things down and make the code larger.
16339 Normally we can hide the table after an existing unconditional
16340 branch so that there is no interruption of the flow, but in the
16341 worst case the code looks like this:
16359 We fix this by performing a scan after scheduling, which notices
16360 which instructions need to have their operands fetched from the
16361 constant table and builds the table.
16363 The algorithm starts by building a table of all the constants that
16364 need fixing up and all the natural barriers in the function (places
16365 where a constant table can be dropped without breaking the flow).
16366 For each fixup we note how far the pc-relative replacement will be
16367 able to reach and the offset of the instruction into the function.
16369 Having built the table we then group the fixes together to form
16370 tables that are as large as possible (subject to addressing
16371 constraints) and emit each table of constants after the last
16372 barrier that is within range of all the instructions in the group.
16373 If a group does not contain a barrier, then we forcibly create one
16374 by inserting a jump instruction into the flow. Once the table has
16375 been inserted, the insns are then modified to reference the
16376 relevant entry in the pool.
16378 Possible enhancements to the algorithm (not implemented) are:
16380 1) For some processors and object formats, there may be benefit in
16381 aligning the pools to the start of cache lines; this alignment
16382 would need to be taken into account when calculating addressability
16385 /* These typedefs are located at the start of this file, so that
16386 they can be used in the prototypes there. This comment is to
16387 remind readers of that fact so that the following structures
16388 can be understood more easily.
16390 typedef struct minipool_node Mnode;
16391 typedef struct minipool_fixup Mfix; */
16393 struct minipool_node
16395 /* Doubly linked chain of entries. */
16398 /* The maximum offset into the code that this entry can be placed. While
16399 pushing fixes for forward references, all entries are sorted in order
16400 of increasing max_address. */
16401 HOST_WIDE_INT max_address
;
16402 /* Similarly for an entry inserted for a backwards ref. */
16403 HOST_WIDE_INT min_address
;
16404 /* The number of fixes referencing this entry. This can become zero
16405 if we "unpush" an entry. In this case we ignore the entry when we
16406 come to emit the code. */
16408 /* The offset from the start of the minipool. */
16409 HOST_WIDE_INT offset
;
16410 /* The value in table. */
16412 /* The mode of value. */
16414 /* The size of the value. With iWMMXt enabled
16415 sizes > 4 also imply an alignment of 8-bytes. */
16419 struct minipool_fixup
16423 HOST_WIDE_INT address
;
16429 HOST_WIDE_INT forwards
;
16430 HOST_WIDE_INT backwards
;
16433 /* Fixes less than a word need padding out to a word boundary. */
16434 #define MINIPOOL_FIX_SIZE(mode) \
16435 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16437 static Mnode
* minipool_vector_head
;
16438 static Mnode
* minipool_vector_tail
;
16439 static rtx_code_label
*minipool_vector_label
;
16440 static int minipool_pad
;
16442 /* The linked list of all minipool fixes required for this function. */
16443 Mfix
* minipool_fix_head
;
16444 Mfix
* minipool_fix_tail
;
16445 /* The fix entry for the current minipool, once it has been placed. */
16446 Mfix
* minipool_barrier
;
16448 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16449 #define JUMP_TABLES_IN_TEXT_SECTION 0
16452 static HOST_WIDE_INT
16453 get_jump_table_size (rtx_jump_table_data
*insn
)
16455 /* ADDR_VECs only take room if read-only data does into the text
16457 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16459 rtx body
= PATTERN (insn
);
16460 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16461 HOST_WIDE_INT size
;
16462 HOST_WIDE_INT modesize
;
16464 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16465 size
= modesize
* XVECLEN (body
, elt
);
16469 /* Round up size of TBB table to a halfword boundary. */
16470 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16473 /* No padding necessary for TBH. */
16476 /* Add two bytes for alignment on Thumb. */
16481 gcc_unreachable ();
16489 /* Return the maximum amount of padding that will be inserted before
16492 static HOST_WIDE_INT
16493 get_label_padding (rtx label
)
16495 HOST_WIDE_INT align
, min_insn_size
;
16497 align
= 1 << label_to_alignment (label
);
16498 min_insn_size
= TARGET_THUMB
? 2 : 4;
16499 return align
> min_insn_size
? align
- min_insn_size
: 0;
16502 /* Move a minipool fix MP from its current location to before MAX_MP.
16503 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16504 constraints may need updating. */
16506 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16507 HOST_WIDE_INT max_address
)
16509 /* The code below assumes these are different. */
16510 gcc_assert (mp
!= max_mp
);
16512 if (max_mp
== NULL
)
16514 if (max_address
< mp
->max_address
)
16515 mp
->max_address
= max_address
;
16519 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16520 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16522 mp
->max_address
= max_address
;
16524 /* Unlink MP from its current position. Since max_mp is non-null,
16525 mp->prev must be non-null. */
16526 mp
->prev
->next
= mp
->next
;
16527 if (mp
->next
!= NULL
)
16528 mp
->next
->prev
= mp
->prev
;
16530 minipool_vector_tail
= mp
->prev
;
16532 /* Re-insert it before MAX_MP. */
16534 mp
->prev
= max_mp
->prev
;
16537 if (mp
->prev
!= NULL
)
16538 mp
->prev
->next
= mp
;
16540 minipool_vector_head
= mp
;
16543 /* Save the new entry. */
16546 /* Scan over the preceding entries and adjust their addresses as
16548 while (mp
->prev
!= NULL
16549 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16551 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16558 /* Add a constant to the minipool for a forward reference. Returns the
16559 node added or NULL if the constant will not fit in this pool. */
16561 add_minipool_forward_ref (Mfix
*fix
)
16563 /* If set, max_mp is the first pool_entry that has a lower
16564 constraint than the one we are trying to add. */
16565 Mnode
* max_mp
= NULL
;
16566 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16569 /* If the minipool starts before the end of FIX->INSN then this FIX
16570 can not be placed into the current pool. Furthermore, adding the
16571 new constant pool entry may cause the pool to start FIX_SIZE bytes
16573 if (minipool_vector_head
&&
16574 (fix
->address
+ get_attr_length (fix
->insn
)
16575 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16578 /* Scan the pool to see if a constant with the same value has
16579 already been added. While we are doing this, also note the
16580 location where we must insert the constant if it doesn't already
16582 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16584 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16585 && fix
->mode
== mp
->mode
16586 && (!LABEL_P (fix
->value
)
16587 || (CODE_LABEL_NUMBER (fix
->value
)
16588 == CODE_LABEL_NUMBER (mp
->value
)))
16589 && rtx_equal_p (fix
->value
, mp
->value
))
16591 /* More than one fix references this entry. */
16593 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16596 /* Note the insertion point if necessary. */
16598 && mp
->max_address
> max_address
)
16601 /* If we are inserting an 8-bytes aligned quantity and
16602 we have not already found an insertion point, then
16603 make sure that all such 8-byte aligned quantities are
16604 placed at the start of the pool. */
16605 if (ARM_DOUBLEWORD_ALIGN
16607 && fix
->fix_size
>= 8
16608 && mp
->fix_size
< 8)
16611 max_address
= mp
->max_address
;
16615 /* The value is not currently in the minipool, so we need to create
16616 a new entry for it. If MAX_MP is NULL, the entry will be put on
16617 the end of the list since the placement is less constrained than
16618 any existing entry. Otherwise, we insert the new fix before
16619 MAX_MP and, if necessary, adjust the constraints on the other
16622 mp
->fix_size
= fix
->fix_size
;
16623 mp
->mode
= fix
->mode
;
16624 mp
->value
= fix
->value
;
16626 /* Not yet required for a backwards ref. */
16627 mp
->min_address
= -65536;
16629 if (max_mp
== NULL
)
16631 mp
->max_address
= max_address
;
16633 mp
->prev
= minipool_vector_tail
;
16635 if (mp
->prev
== NULL
)
16637 minipool_vector_head
= mp
;
16638 minipool_vector_label
= gen_label_rtx ();
16641 mp
->prev
->next
= mp
;
16643 minipool_vector_tail
= mp
;
16647 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16648 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16650 mp
->max_address
= max_address
;
16653 mp
->prev
= max_mp
->prev
;
16655 if (mp
->prev
!= NULL
)
16656 mp
->prev
->next
= mp
;
16658 minipool_vector_head
= mp
;
16661 /* Save the new entry. */
16664 /* Scan over the preceding entries and adjust their addresses as
16666 while (mp
->prev
!= NULL
16667 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16669 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16677 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16678 HOST_WIDE_INT min_address
)
16680 HOST_WIDE_INT offset
;
16682 /* The code below assumes these are different. */
16683 gcc_assert (mp
!= min_mp
);
16685 if (min_mp
== NULL
)
16687 if (min_address
> mp
->min_address
)
16688 mp
->min_address
= min_address
;
16692 /* We will adjust this below if it is too loose. */
16693 mp
->min_address
= min_address
;
16695 /* Unlink MP from its current position. Since min_mp is non-null,
16696 mp->next must be non-null. */
16697 mp
->next
->prev
= mp
->prev
;
16698 if (mp
->prev
!= NULL
)
16699 mp
->prev
->next
= mp
->next
;
16701 minipool_vector_head
= mp
->next
;
16703 /* Reinsert it after MIN_MP. */
16705 mp
->next
= min_mp
->next
;
16707 if (mp
->next
!= NULL
)
16708 mp
->next
->prev
= mp
;
16710 minipool_vector_tail
= mp
;
16716 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16718 mp
->offset
= offset
;
16719 if (mp
->refcount
> 0)
16720 offset
+= mp
->fix_size
;
16722 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16723 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16729 /* Add a constant to the minipool for a backward reference. Returns the
16730 node added or NULL if the constant will not fit in this pool.
16732 Note that the code for insertion for a backwards reference can be
16733 somewhat confusing because the calculated offsets for each fix do
16734 not take into account the size of the pool (which is still under
16737 add_minipool_backward_ref (Mfix
*fix
)
16739 /* If set, min_mp is the last pool_entry that has a lower constraint
16740 than the one we are trying to add. */
16741 Mnode
*min_mp
= NULL
;
16742 /* This can be negative, since it is only a constraint. */
16743 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16746 /* If we can't reach the current pool from this insn, or if we can't
16747 insert this entry at the end of the pool without pushing other
16748 fixes out of range, then we don't try. This ensures that we
16749 can't fail later on. */
16750 if (min_address
>= minipool_barrier
->address
16751 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16752 >= minipool_barrier
->address
))
16755 /* Scan the pool to see if a constant with the same value has
16756 already been added. While we are doing this, also note the
16757 location where we must insert the constant if it doesn't already
16759 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16761 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16762 && fix
->mode
== mp
->mode
16763 && (!LABEL_P (fix
->value
)
16764 || (CODE_LABEL_NUMBER (fix
->value
)
16765 == CODE_LABEL_NUMBER (mp
->value
)))
16766 && rtx_equal_p (fix
->value
, mp
->value
)
16767 /* Check that there is enough slack to move this entry to the
16768 end of the table (this is conservative). */
16769 && (mp
->max_address
16770 > (minipool_barrier
->address
16771 + minipool_vector_tail
->offset
16772 + minipool_vector_tail
->fix_size
)))
16775 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16778 if (min_mp
!= NULL
)
16779 mp
->min_address
+= fix
->fix_size
;
16782 /* Note the insertion point if necessary. */
16783 if (mp
->min_address
< min_address
)
16785 /* For now, we do not allow the insertion of 8-byte alignment
16786 requiring nodes anywhere but at the start of the pool. */
16787 if (ARM_DOUBLEWORD_ALIGN
16788 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16793 else if (mp
->max_address
16794 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16796 /* Inserting before this entry would push the fix beyond
16797 its maximum address (which can happen if we have
16798 re-located a forwards fix); force the new fix to come
16800 if (ARM_DOUBLEWORD_ALIGN
16801 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16806 min_address
= mp
->min_address
+ fix
->fix_size
;
16809 /* Do not insert a non-8-byte aligned quantity before 8-byte
16810 aligned quantities. */
16811 else if (ARM_DOUBLEWORD_ALIGN
16812 && fix
->fix_size
< 8
16813 && mp
->fix_size
>= 8)
16816 min_address
= mp
->min_address
+ fix
->fix_size
;
16821 /* We need to create a new entry. */
16823 mp
->fix_size
= fix
->fix_size
;
16824 mp
->mode
= fix
->mode
;
16825 mp
->value
= fix
->value
;
16827 mp
->max_address
= minipool_barrier
->address
+ 65536;
16829 mp
->min_address
= min_address
;
16831 if (min_mp
== NULL
)
16834 mp
->next
= minipool_vector_head
;
16836 if (mp
->next
== NULL
)
16838 minipool_vector_tail
= mp
;
16839 minipool_vector_label
= gen_label_rtx ();
16842 mp
->next
->prev
= mp
;
16844 minipool_vector_head
= mp
;
16848 mp
->next
= min_mp
->next
;
16852 if (mp
->next
!= NULL
)
16853 mp
->next
->prev
= mp
;
16855 minipool_vector_tail
= mp
;
16858 /* Save the new entry. */
16866 /* Scan over the following entries and adjust their offsets. */
16867 while (mp
->next
!= NULL
)
16869 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16870 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16873 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16875 mp
->next
->offset
= mp
->offset
;
16884 assign_minipool_offsets (Mfix
*barrier
)
16886 HOST_WIDE_INT offset
= 0;
16889 minipool_barrier
= barrier
;
16891 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16893 mp
->offset
= offset
;
16895 if (mp
->refcount
> 0)
16896 offset
+= mp
->fix_size
;
16900 /* Output the literal table */
16902 dump_minipool (rtx_insn
*scan
)
16908 if (ARM_DOUBLEWORD_ALIGN
)
16909 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16910 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16917 fprintf (dump_file
,
16918 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16919 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16921 scan
= emit_label_after (gen_label_rtx (), scan
);
16922 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16923 scan
= emit_label_after (minipool_vector_label
, scan
);
16925 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16927 if (mp
->refcount
> 0)
16931 fprintf (dump_file
,
16932 ";; Offset %u, min %ld, max %ld ",
16933 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16934 (unsigned long) mp
->max_address
);
16935 arm_print_value (dump_file
, mp
->value
);
16936 fputc ('\n', dump_file
);
16939 switch (GET_MODE_SIZE (mp
->mode
))
16941 #ifdef HAVE_consttable_1
16943 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16947 #ifdef HAVE_consttable_2
16949 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16953 #ifdef HAVE_consttable_4
16955 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16959 #ifdef HAVE_consttable_8
16961 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16965 #ifdef HAVE_consttable_16
16967 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16972 gcc_unreachable ();
16980 minipool_vector_head
= minipool_vector_tail
= NULL
;
16981 scan
= emit_insn_after (gen_consttable_end (), scan
);
16982 scan
= emit_barrier_after (scan
);
16985 /* Return the cost of forcibly inserting a barrier after INSN. */
16987 arm_barrier_cost (rtx_insn
*insn
)
16989 /* Basing the location of the pool on the loop depth is preferable,
16990 but at the moment, the basic block information seems to be
16991 corrupt by this stage of the compilation. */
16992 int base_cost
= 50;
16993 rtx_insn
*next
= next_nonnote_insn (insn
);
16995 if (next
!= NULL
&& LABEL_P (next
))
16998 switch (GET_CODE (insn
))
17001 /* It will always be better to place the table before the label, rather
17010 return base_cost
- 10;
17013 return base_cost
+ 10;
17017 /* Find the best place in the insn stream in the range
17018 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17019 Create the barrier by inserting a jump and add a new fix entry for
17022 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
17024 HOST_WIDE_INT count
= 0;
17025 rtx_barrier
*barrier
;
17026 rtx_insn
*from
= fix
->insn
;
17027 /* The instruction after which we will insert the jump. */
17028 rtx_insn
*selected
= NULL
;
17030 /* The address at which the jump instruction will be placed. */
17031 HOST_WIDE_INT selected_address
;
17033 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
17034 rtx_code_label
*label
= gen_label_rtx ();
17036 selected_cost
= arm_barrier_cost (from
);
17037 selected_address
= fix
->address
;
17039 while (from
&& count
< max_count
)
17041 rtx_jump_table_data
*tmp
;
17044 /* This code shouldn't have been called if there was a natural barrier
17046 gcc_assert (!BARRIER_P (from
));
17048 /* Count the length of this insn. This must stay in sync with the
17049 code that pushes minipool fixes. */
17050 if (LABEL_P (from
))
17051 count
+= get_label_padding (from
);
17053 count
+= get_attr_length (from
);
17055 /* If there is a jump table, add its length. */
17056 if (tablejump_p (from
, NULL
, &tmp
))
17058 count
+= get_jump_table_size (tmp
);
17060 /* Jump tables aren't in a basic block, so base the cost on
17061 the dispatch insn. If we select this location, we will
17062 still put the pool after the table. */
17063 new_cost
= arm_barrier_cost (from
);
17065 if (count
< max_count
17066 && (!selected
|| new_cost
<= selected_cost
))
17069 selected_cost
= new_cost
;
17070 selected_address
= fix
->address
+ count
;
17073 /* Continue after the dispatch table. */
17074 from
= NEXT_INSN (tmp
);
17078 new_cost
= arm_barrier_cost (from
);
17080 if (count
< max_count
17081 && (!selected
|| new_cost
<= selected_cost
))
17084 selected_cost
= new_cost
;
17085 selected_address
= fix
->address
+ count
;
17088 from
= NEXT_INSN (from
);
17091 /* Make sure that we found a place to insert the jump. */
17092 gcc_assert (selected
);
17094 /* Make sure we do not split a call and its corresponding
17095 CALL_ARG_LOCATION note. */
17096 if (CALL_P (selected
))
17098 rtx_insn
*next
= NEXT_INSN (selected
);
17099 if (next
&& NOTE_P (next
)
17100 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
17104 /* Create a new JUMP_INSN that branches around a barrier. */
17105 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17106 JUMP_LABEL (from
) = label
;
17107 barrier
= emit_barrier_after (from
);
17108 emit_label_after (label
, barrier
);
17110 /* Create a minipool barrier entry for the new barrier. */
17111 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17112 new_fix
->insn
= barrier
;
17113 new_fix
->address
= selected_address
;
17114 new_fix
->next
= fix
->next
;
17115 fix
->next
= new_fix
;
17120 /* Record that there is a natural barrier in the insn stream at
17123 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17125 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17128 fix
->address
= address
;
17131 if (minipool_fix_head
!= NULL
)
17132 minipool_fix_tail
->next
= fix
;
17134 minipool_fix_head
= fix
;
17136 minipool_fix_tail
= fix
;
17139 /* Record INSN, which will need fixing up to load a value from the
17140 minipool. ADDRESS is the offset of the insn since the start of the
17141 function; LOC is a pointer to the part of the insn which requires
17142 fixing; VALUE is the constant that must be loaded, which is of type
17145 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17146 machine_mode mode
, rtx value
)
17148 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17151 fix
->address
= address
;
17154 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17155 fix
->value
= value
;
17156 fix
->forwards
= get_attr_pool_range (insn
);
17157 fix
->backwards
= get_attr_neg_pool_range (insn
);
17158 fix
->minipool
= NULL
;
17160 /* If an insn doesn't have a range defined for it, then it isn't
17161 expecting to be reworked by this code. Better to stop now than
17162 to generate duff assembly code. */
17163 gcc_assert (fix
->forwards
|| fix
->backwards
);
17165 /* If an entry requires 8-byte alignment then assume all constant pools
17166 require 4 bytes of padding. Trying to do this later on a per-pool
17167 basis is awkward because existing pool entries have to be modified. */
17168 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17173 fprintf (dump_file
,
17174 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17175 GET_MODE_NAME (mode
),
17176 INSN_UID (insn
), (unsigned long) address
,
17177 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17178 arm_print_value (dump_file
, fix
->value
);
17179 fprintf (dump_file
, "\n");
17182 /* Add it to the chain of fixes. */
17185 if (minipool_fix_head
!= NULL
)
17186 minipool_fix_tail
->next
= fix
;
17188 minipool_fix_head
= fix
;
17190 minipool_fix_tail
= fix
;
17193 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17194 Returns the number of insns needed, or 99 if we always want to synthesize
17197 arm_max_const_double_inline_cost ()
17199 /* Let the value get synthesized to avoid the use of literal pools. */
17200 if (arm_disable_literal_pool
)
17203 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17206 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17207 Returns the number of insns needed, or 99 if we don't know how to
17210 arm_const_double_inline_cost (rtx val
)
17212 rtx lowpart
, highpart
;
17215 mode
= GET_MODE (val
);
17217 if (mode
== VOIDmode
)
17220 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17222 lowpart
= gen_lowpart (SImode
, val
);
17223 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17225 gcc_assert (CONST_INT_P (lowpart
));
17226 gcc_assert (CONST_INT_P (highpart
));
17228 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17229 NULL_RTX
, NULL_RTX
, 0, 0)
17230 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17231 NULL_RTX
, NULL_RTX
, 0, 0));
17234 /* Cost of loading a SImode constant. */
17236 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17238 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17239 NULL_RTX
, NULL_RTX
, 1, 0);
17242 /* Return true if it is worthwhile to split a 64-bit constant into two
17243 32-bit operations. This is the case if optimizing for size, or
17244 if we have load delay slots, or if one 32-bit part can be done with
17245 a single data operation. */
17247 arm_const_double_by_parts (rtx val
)
17249 machine_mode mode
= GET_MODE (val
);
17252 if (optimize_size
|| arm_ld_sched
)
17255 if (mode
== VOIDmode
)
17258 part
= gen_highpart_mode (SImode
, mode
, val
);
17260 gcc_assert (CONST_INT_P (part
));
17262 if (const_ok_for_arm (INTVAL (part
))
17263 || const_ok_for_arm (~INTVAL (part
)))
17266 part
= gen_lowpart (SImode
, val
);
17268 gcc_assert (CONST_INT_P (part
));
17270 if (const_ok_for_arm (INTVAL (part
))
17271 || const_ok_for_arm (~INTVAL (part
)))
17277 /* Return true if it is possible to inline both the high and low parts
17278 of a 64-bit constant into 32-bit data processing instructions. */
17280 arm_const_double_by_immediates (rtx val
)
17282 machine_mode mode
= GET_MODE (val
);
17285 if (mode
== VOIDmode
)
17288 part
= gen_highpart_mode (SImode
, mode
, val
);
17290 gcc_assert (CONST_INT_P (part
));
17292 if (!const_ok_for_arm (INTVAL (part
)))
17295 part
= gen_lowpart (SImode
, val
);
17297 gcc_assert (CONST_INT_P (part
));
17299 if (!const_ok_for_arm (INTVAL (part
)))
17305 /* Scan INSN and note any of its operands that need fixing.
17306 If DO_PUSHES is false we do not actually push any of the fixups
17309 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17313 extract_constrain_insn (insn
);
17315 if (recog_data
.n_alternatives
== 0)
17318 /* Fill in recog_op_alt with information about the constraints of
17320 preprocess_constraints (insn
);
17322 const operand_alternative
*op_alt
= which_op_alt ();
17323 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17325 /* Things we need to fix can only occur in inputs. */
17326 if (recog_data
.operand_type
[opno
] != OP_IN
)
17329 /* If this alternative is a memory reference, then any mention
17330 of constants in this alternative is really to fool reload
17331 into allowing us to accept one there. We need to fix them up
17332 now so that we output the right code. */
17333 if (op_alt
[opno
].memory_ok
)
17335 rtx op
= recog_data
.operand
[opno
];
17337 if (CONSTANT_P (op
))
17340 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17341 recog_data
.operand_mode
[opno
], op
);
17343 else if (MEM_P (op
)
17344 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17345 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17349 rtx cop
= avoid_constant_pool_reference (op
);
17351 /* Casting the address of something to a mode narrower
17352 than a word can cause avoid_constant_pool_reference()
17353 to return the pool reference itself. That's no good to
17354 us here. Lets just hope that we can use the
17355 constant pool value directly. */
17357 cop
= get_pool_constant (XEXP (op
, 0));
17359 push_minipool_fix (insn
, address
,
17360 recog_data
.operand_loc
[opno
],
17361 recog_data
.operand_mode
[opno
], cop
);
17371 /* Rewrite move insn into subtract of 0 if the condition codes will
17372 be useful in next conditional jump insn. */
17375 thumb1_reorg (void)
17379 FOR_EACH_BB_FN (bb
, cfun
)
17382 rtx cmp
, op0
, op1
, set
= NULL
;
17383 rtx_insn
*prev
, *insn
= BB_END (bb
);
17384 bool insn_clobbered
= false;
17386 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17387 insn
= PREV_INSN (insn
);
17389 /* Find the last cbranchsi4_insn in basic block BB. */
17390 if (insn
== BB_HEAD (bb
)
17391 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17394 /* Get the register with which we are comparing. */
17395 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17396 op0
= XEXP (cmp
, 0);
17397 op1
= XEXP (cmp
, 1);
17399 /* Check that comparison is against ZERO. */
17400 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17403 /* Find the first flag setting insn before INSN in basic block BB. */
17404 gcc_assert (insn
!= BB_HEAD (bb
));
17405 for (prev
= PREV_INSN (insn
);
17407 && prev
!= BB_HEAD (bb
)
17409 || DEBUG_INSN_P (prev
)
17410 || ((set
= single_set (prev
)) != NULL
17411 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17412 prev
= PREV_INSN (prev
))
17414 if (reg_set_p (op0
, prev
))
17415 insn_clobbered
= true;
17418 /* Skip if op0 is clobbered by insn other than prev. */
17419 if (insn_clobbered
)
17425 dest
= SET_DEST (set
);
17426 src
= SET_SRC (set
);
17427 if (!low_register_operand (dest
, SImode
)
17428 || !low_register_operand (src
, SImode
))
17431 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17432 in INSN. Both src and dest of the move insn are checked. */
17433 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17435 dest
= copy_rtx (dest
);
17436 src
= copy_rtx (src
);
17437 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17438 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17439 INSN_CODE (prev
) = -1;
17440 /* Set test register in INSN to dest. */
17441 XEXP (cmp
, 0) = copy_rtx (dest
);
17442 INSN_CODE (insn
) = -1;
17447 /* Convert instructions to their cc-clobbering variant if possible, since
17448 that allows us to use smaller encodings. */
17451 thumb2_reorg (void)
17456 INIT_REG_SET (&live
);
17458 /* We are freeing block_for_insn in the toplev to keep compatibility
17459 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17460 compute_bb_for_insn ();
17463 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17465 FOR_EACH_BB_FN (bb
, cfun
)
17467 if ((current_tune
->disparage_flag_setting_t16_encodings
17468 == tune_params::DISPARAGE_FLAGS_ALL
)
17469 && optimize_bb_for_speed_p (bb
))
17473 Convert_Action action
= SKIP
;
17474 Convert_Action action_for_partial_flag_setting
17475 = ((current_tune
->disparage_flag_setting_t16_encodings
17476 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17477 && optimize_bb_for_speed_p (bb
))
17480 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17481 df_simulate_initialize_backwards (bb
, &live
);
17482 FOR_BB_INSNS_REVERSE (bb
, insn
)
17484 if (NONJUMP_INSN_P (insn
)
17485 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17486 && GET_CODE (PATTERN (insn
)) == SET
)
17489 rtx pat
= PATTERN (insn
);
17490 rtx dst
= XEXP (pat
, 0);
17491 rtx src
= XEXP (pat
, 1);
17492 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17494 if (UNARY_P (src
) || BINARY_P (src
))
17495 op0
= XEXP (src
, 0);
17497 if (BINARY_P (src
))
17498 op1
= XEXP (src
, 1);
17500 if (low_register_operand (dst
, SImode
))
17502 switch (GET_CODE (src
))
17505 /* Adding two registers and storing the result
17506 in the first source is already a 16-bit
17508 if (rtx_equal_p (dst
, op0
)
17509 && register_operand (op1
, SImode
))
17512 if (low_register_operand (op0
, SImode
))
17514 /* ADDS <Rd>,<Rn>,<Rm> */
17515 if (low_register_operand (op1
, SImode
))
17517 /* ADDS <Rdn>,#<imm8> */
17518 /* SUBS <Rdn>,#<imm8> */
17519 else if (rtx_equal_p (dst
, op0
)
17520 && CONST_INT_P (op1
)
17521 && IN_RANGE (INTVAL (op1
), -255, 255))
17523 /* ADDS <Rd>,<Rn>,#<imm3> */
17524 /* SUBS <Rd>,<Rn>,#<imm3> */
17525 else if (CONST_INT_P (op1
)
17526 && IN_RANGE (INTVAL (op1
), -7, 7))
17529 /* ADCS <Rd>, <Rn> */
17530 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17531 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17532 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17534 && COMPARISON_P (op1
)
17535 && cc_register (XEXP (op1
, 0), VOIDmode
)
17536 && maybe_get_arm_condition_code (op1
) == ARM_CS
17537 && XEXP (op1
, 1) == const0_rtx
)
17542 /* RSBS <Rd>,<Rn>,#0
17543 Not handled here: see NEG below. */
17544 /* SUBS <Rd>,<Rn>,#<imm3>
17546 Not handled here: see PLUS above. */
17547 /* SUBS <Rd>,<Rn>,<Rm> */
17548 if (low_register_operand (op0
, SImode
)
17549 && low_register_operand (op1
, SImode
))
17554 /* MULS <Rdm>,<Rn>,<Rdm>
17555 As an exception to the rule, this is only used
17556 when optimizing for size since MULS is slow on all
17557 known implementations. We do not even want to use
17558 MULS in cold code, if optimizing for speed, so we
17559 test the global flag here. */
17560 if (!optimize_size
)
17562 /* Fall through. */
17566 /* ANDS <Rdn>,<Rm> */
17567 if (rtx_equal_p (dst
, op0
)
17568 && low_register_operand (op1
, SImode
))
17569 action
= action_for_partial_flag_setting
;
17570 else if (rtx_equal_p (dst
, op1
)
17571 && low_register_operand (op0
, SImode
))
17572 action
= action_for_partial_flag_setting
== SKIP
17573 ? SKIP
: SWAP_CONV
;
17579 /* ASRS <Rdn>,<Rm> */
17580 /* LSRS <Rdn>,<Rm> */
17581 /* LSLS <Rdn>,<Rm> */
17582 if (rtx_equal_p (dst
, op0
)
17583 && low_register_operand (op1
, SImode
))
17584 action
= action_for_partial_flag_setting
;
17585 /* ASRS <Rd>,<Rm>,#<imm5> */
17586 /* LSRS <Rd>,<Rm>,#<imm5> */
17587 /* LSLS <Rd>,<Rm>,#<imm5> */
17588 else if (low_register_operand (op0
, SImode
)
17589 && CONST_INT_P (op1
)
17590 && IN_RANGE (INTVAL (op1
), 0, 31))
17591 action
= action_for_partial_flag_setting
;
17595 /* RORS <Rdn>,<Rm> */
17596 if (rtx_equal_p (dst
, op0
)
17597 && low_register_operand (op1
, SImode
))
17598 action
= action_for_partial_flag_setting
;
17602 /* MVNS <Rd>,<Rm> */
17603 if (low_register_operand (op0
, SImode
))
17604 action
= action_for_partial_flag_setting
;
17608 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17609 if (low_register_operand (op0
, SImode
))
17614 /* MOVS <Rd>,#<imm8> */
17615 if (CONST_INT_P (src
)
17616 && IN_RANGE (INTVAL (src
), 0, 255))
17617 action
= action_for_partial_flag_setting
;
17621 /* MOVS and MOV<c> with registers have different
17622 encodings, so are not relevant here. */
17630 if (action
!= SKIP
)
17632 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17633 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17636 if (action
== SWAP_CONV
)
17638 src
= copy_rtx (src
);
17639 XEXP (src
, 0) = op1
;
17640 XEXP (src
, 1) = op0
;
17641 pat
= gen_rtx_SET (dst
, src
);
17642 vec
= gen_rtvec (2, pat
, clobber
);
17644 else /* action == CONV */
17645 vec
= gen_rtvec (2, pat
, clobber
);
17647 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17648 INSN_CODE (insn
) = -1;
17652 if (NONDEBUG_INSN_P (insn
))
17653 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17657 CLEAR_REG_SET (&live
);
17660 /* Gcc puts the pool in the wrong place for ARM, since we can only
17661 load addresses a limited distance around the pc. We do some
17662 special munging to move the constant pool values to the correct
17663 point in the code. */
17668 HOST_WIDE_INT address
= 0;
17673 else if (TARGET_THUMB2
)
17676 /* Ensure all insns that must be split have been split at this point.
17677 Otherwise, the pool placement code below may compute incorrect
17678 insn lengths. Note that when optimizing, all insns have already
17679 been split at this point. */
17681 split_all_insns_noflow ();
17683 minipool_fix_head
= minipool_fix_tail
= NULL
;
17685 /* The first insn must always be a note, or the code below won't
17686 scan it properly. */
17687 insn
= get_insns ();
17688 gcc_assert (NOTE_P (insn
));
17691 /* Scan all the insns and record the operands that will need fixing. */
17692 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17694 if (BARRIER_P (insn
))
17695 push_minipool_barrier (insn
, address
);
17696 else if (INSN_P (insn
))
17698 rtx_jump_table_data
*table
;
17700 note_invalid_constants (insn
, address
, true);
17701 address
+= get_attr_length (insn
);
17703 /* If the insn is a vector jump, add the size of the table
17704 and skip the table. */
17705 if (tablejump_p (insn
, NULL
, &table
))
17707 address
+= get_jump_table_size (table
);
17711 else if (LABEL_P (insn
))
17712 /* Add the worst-case padding due to alignment. We don't add
17713 the _current_ padding because the minipool insertions
17714 themselves might change it. */
17715 address
+= get_label_padding (insn
);
17718 fix
= minipool_fix_head
;
17720 /* Now scan the fixups and perform the required changes. */
17725 Mfix
* last_added_fix
;
17726 Mfix
* last_barrier
= NULL
;
17729 /* Skip any further barriers before the next fix. */
17730 while (fix
&& BARRIER_P (fix
->insn
))
17733 /* No more fixes. */
17737 last_added_fix
= NULL
;
17739 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17741 if (BARRIER_P (ftmp
->insn
))
17743 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17746 last_barrier
= ftmp
;
17748 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17751 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17754 /* If we found a barrier, drop back to that; any fixes that we
17755 could have reached but come after the barrier will now go in
17756 the next mini-pool. */
17757 if (last_barrier
!= NULL
)
17759 /* Reduce the refcount for those fixes that won't go into this
17761 for (fdel
= last_barrier
->next
;
17762 fdel
&& fdel
!= ftmp
;
17765 fdel
->minipool
->refcount
--;
17766 fdel
->minipool
= NULL
;
17769 ftmp
= last_barrier
;
17773 /* ftmp is first fix that we can't fit into this pool and
17774 there no natural barriers that we could use. Insert a
17775 new barrier in the code somewhere between the previous
17776 fix and this one, and arrange to jump around it. */
17777 HOST_WIDE_INT max_address
;
17779 /* The last item on the list of fixes must be a barrier, so
17780 we can never run off the end of the list of fixes without
17781 last_barrier being set. */
17784 max_address
= minipool_vector_head
->max_address
;
17785 /* Check that there isn't another fix that is in range that
17786 we couldn't fit into this pool because the pool was
17787 already too large: we need to put the pool before such an
17788 instruction. The pool itself may come just after the
17789 fix because create_fix_barrier also allows space for a
17790 jump instruction. */
17791 if (ftmp
->address
< max_address
)
17792 max_address
= ftmp
->address
+ 1;
17794 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17797 assign_minipool_offsets (last_barrier
);
17801 if (!BARRIER_P (ftmp
->insn
)
17802 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17809 /* Scan over the fixes we have identified for this pool, fixing them
17810 up and adding the constants to the pool itself. */
17811 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17812 this_fix
= this_fix
->next
)
17813 if (!BARRIER_P (this_fix
->insn
))
17816 = plus_constant (Pmode
,
17817 gen_rtx_LABEL_REF (VOIDmode
,
17818 minipool_vector_label
),
17819 this_fix
->minipool
->offset
);
17820 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17823 dump_minipool (last_barrier
->insn
);
17827 /* From now on we must synthesize any constants that we can't handle
17828 directly. This can happen if the RTL gets split during final
17829 instruction generation. */
17830 cfun
->machine
->after_arm_reorg
= 1;
17832 /* Free the minipool memory. */
17833 obstack_free (&minipool_obstack
, minipool_startobj
);
17836 /* Routines to output assembly language. */
17838 /* Return string representation of passed in real value. */
17839 static const char *
17840 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17842 if (!fp_consts_inited
)
17845 gcc_assert (real_equal (r
, &value_fp0
));
17849 /* OPERANDS[0] is the entire list of insns that constitute pop,
17850 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17851 is in the list, UPDATE is true iff the list contains explicit
17852 update of base register. */
17854 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17860 const char *conditional
;
17861 int num_saves
= XVECLEN (operands
[0], 0);
17862 unsigned int regno
;
17863 unsigned int regno_base
= REGNO (operands
[1]);
17864 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17867 offset
+= update
? 1 : 0;
17868 offset
+= return_pc
? 1 : 0;
17870 /* Is the base register in the list? */
17871 for (i
= offset
; i
< num_saves
; i
++)
17873 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17874 /* If SP is in the list, then the base register must be SP. */
17875 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17876 /* If base register is in the list, there must be no explicit update. */
17877 if (regno
== regno_base
)
17878 gcc_assert (!update
);
17881 conditional
= reverse
? "%?%D0" : "%?%d0";
17882 /* Can't use POP if returning from an interrupt. */
17883 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17884 sprintf (pattern
, "pop%s\t{", conditional
);
17887 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17888 It's just a convention, their semantics are identical. */
17889 if (regno_base
== SP_REGNUM
)
17890 sprintf (pattern
, "ldmfd%s\t", conditional
);
17892 sprintf (pattern
, "ldmia%s\t", conditional
);
17894 sprintf (pattern
, "ldm%s\t", conditional
);
17896 strcat (pattern
, reg_names
[regno_base
]);
17898 strcat (pattern
, "!, {");
17900 strcat (pattern
, ", {");
17903 /* Output the first destination register. */
17905 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17907 /* Output the rest of the destination registers. */
17908 for (i
= offset
+ 1; i
< num_saves
; i
++)
17910 strcat (pattern
, ", ");
17912 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17915 strcat (pattern
, "}");
17917 if (interrupt_p
&& return_pc
)
17918 strcat (pattern
, "^");
17920 output_asm_insn (pattern
, &cond
);
17924 /* Output the assembly for a store multiple. */
17927 vfp_output_vstmd (rtx
* operands
)
17933 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17934 ? XEXP (operands
[0], 0)
17935 : XEXP (XEXP (operands
[0], 0), 0);
17936 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17939 strcpy (pattern
, "vpush%?.64\t{%P1");
17941 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17943 p
= strlen (pattern
);
17945 gcc_assert (REG_P (operands
[1]));
17947 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17948 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17950 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17952 strcpy (&pattern
[p
], "}");
17954 output_asm_insn (pattern
, operands
);
17959 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17960 number of bytes pushed. */
17963 vfp_emit_fstmd (int base_reg
, int count
)
17970 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17971 register pairs are stored by a store multiple insn. We avoid this
17972 by pushing an extra pair. */
17973 if (count
== 2 && !arm_arch6
)
17975 if (base_reg
== LAST_VFP_REGNUM
- 3)
17980 /* FSTMD may not store more than 16 doubleword registers at once. Split
17981 larger stores into multiple parts (up to a maximum of two, in
17986 /* NOTE: base_reg is an internal register number, so each D register
17988 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17989 saved
+= vfp_emit_fstmd (base_reg
, 16);
17993 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17994 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17996 reg
= gen_rtx_REG (DFmode
, base_reg
);
17999 XVECEXP (par
, 0, 0)
18000 = gen_rtx_SET (gen_frame_mem
18002 gen_rtx_PRE_MODIFY (Pmode
,
18005 (Pmode
, stack_pointer_rtx
,
18008 gen_rtx_UNSPEC (BLKmode
,
18009 gen_rtvec (1, reg
),
18010 UNSPEC_PUSH_MULT
));
18012 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18013 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
18014 RTX_FRAME_RELATED_P (tmp
) = 1;
18015 XVECEXP (dwarf
, 0, 0) = tmp
;
18017 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
18018 RTX_FRAME_RELATED_P (tmp
) = 1;
18019 XVECEXP (dwarf
, 0, 1) = tmp
;
18021 for (i
= 1; i
< count
; i
++)
18023 reg
= gen_rtx_REG (DFmode
, base_reg
);
18025 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18027 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18028 plus_constant (Pmode
,
18032 RTX_FRAME_RELATED_P (tmp
) = 1;
18033 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18036 par
= emit_insn (par
);
18037 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18038 RTX_FRAME_RELATED_P (par
) = 1;
18043 /* Emit a call instruction with pattern PAT. ADDR is the address of
18044 the call target. */
18047 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18051 insn
= emit_call_insn (pat
);
18053 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18054 If the call might use such an entry, add a use of the PIC register
18055 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18056 if (TARGET_VXWORKS_RTP
18059 && GET_CODE (addr
) == SYMBOL_REF
18060 && (SYMBOL_REF_DECL (addr
)
18061 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18062 : !SYMBOL_REF_LOCAL_P (addr
)))
18064 require_pic_register ();
18065 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18068 if (TARGET_AAPCS_BASED
)
18070 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18071 linker. We need to add an IP clobber to allow setting
18072 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18073 is not needed since it's a fixed register. */
18074 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18075 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18079 /* Output a 'call' insn. */
18081 output_call (rtx
*operands
)
18083 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
18085 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18086 if (REGNO (operands
[0]) == LR_REGNUM
)
18088 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18089 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18092 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18094 if (TARGET_INTERWORK
|| arm_arch4t
)
18095 output_asm_insn ("bx%?\t%0", operands
);
18097 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18102 /* Output a move from arm registers to arm registers of a long double
18103 OPERANDS[0] is the destination.
18104 OPERANDS[1] is the source. */
18106 output_mov_long_double_arm_from_arm (rtx
*operands
)
18108 /* We have to be careful here because the two might overlap. */
18109 int dest_start
= REGNO (operands
[0]);
18110 int src_start
= REGNO (operands
[1]);
18114 if (dest_start
< src_start
)
18116 for (i
= 0; i
< 3; i
++)
18118 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18119 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18120 output_asm_insn ("mov%?\t%0, %1", ops
);
18125 for (i
= 2; i
>= 0; i
--)
18127 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18128 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18129 output_asm_insn ("mov%?\t%0, %1", ops
);
18137 arm_emit_movpair (rtx dest
, rtx src
)
18141 /* If the src is an immediate, simplify it. */
18142 if (CONST_INT_P (src
))
18144 HOST_WIDE_INT val
= INTVAL (src
);
18145 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18146 if ((val
>> 16) & 0x0000ffff)
18148 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18150 GEN_INT ((val
>> 16) & 0x0000ffff));
18151 insn
= get_last_insn ();
18152 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18156 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18157 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18158 insn
= get_last_insn ();
18159 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18162 /* Output a move between double words. It must be REG<-MEM
18165 output_move_double (rtx
*operands
, bool emit
, int *count
)
18167 enum rtx_code code0
= GET_CODE (operands
[0]);
18168 enum rtx_code code1
= GET_CODE (operands
[1]);
18173 /* The only case when this might happen is when
18174 you are looking at the length of a DImode instruction
18175 that has an invalid constant in it. */
18176 if (code0
== REG
&& code1
!= MEM
)
18178 gcc_assert (!emit
);
18185 unsigned int reg0
= REGNO (operands
[0]);
18187 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18189 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18191 switch (GET_CODE (XEXP (operands
[1], 0)))
18198 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18199 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18201 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18206 gcc_assert (TARGET_LDRD
);
18208 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18215 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18217 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18225 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18227 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18232 gcc_assert (TARGET_LDRD
);
18234 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18239 /* Autoicrement addressing modes should never have overlapping
18240 base and destination registers, and overlapping index registers
18241 are already prohibited, so this doesn't need to worry about
18243 otherops
[0] = operands
[0];
18244 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18245 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18247 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18249 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18251 /* Registers overlap so split out the increment. */
18254 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18255 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18262 /* Use a single insn if we can.
18263 FIXME: IWMMXT allows offsets larger than ldrd can
18264 handle, fix these up with a pair of ldr. */
18266 || !CONST_INT_P (otherops
[2])
18267 || (INTVAL (otherops
[2]) > -256
18268 && INTVAL (otherops
[2]) < 256))
18271 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18277 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18278 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18288 /* Use a single insn if we can.
18289 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18290 fix these up with a pair of ldr. */
18292 || !CONST_INT_P (otherops
[2])
18293 || (INTVAL (otherops
[2]) > -256
18294 && INTVAL (otherops
[2]) < 256))
18297 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18303 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18304 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18314 /* We might be able to use ldrd %0, %1 here. However the range is
18315 different to ldr/adr, and it is broken on some ARMv7-M
18316 implementations. */
18317 /* Use the second register of the pair to avoid problematic
18319 otherops
[1] = operands
[1];
18321 output_asm_insn ("adr%?\t%0, %1", otherops
);
18322 operands
[1] = otherops
[0];
18326 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18328 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18335 /* ??? This needs checking for thumb2. */
18337 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18338 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18340 otherops
[0] = operands
[0];
18341 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18342 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18344 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18346 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18348 switch ((int) INTVAL (otherops
[2]))
18352 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18358 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18364 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18368 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18369 operands
[1] = otherops
[0];
18371 && (REG_P (otherops
[2])
18373 || (CONST_INT_P (otherops
[2])
18374 && INTVAL (otherops
[2]) > -256
18375 && INTVAL (otherops
[2]) < 256)))
18377 if (reg_overlap_mentioned_p (operands
[0],
18380 /* Swap base and index registers over to
18381 avoid a conflict. */
18382 std::swap (otherops
[1], otherops
[2]);
18384 /* If both registers conflict, it will usually
18385 have been fixed by a splitter. */
18386 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18387 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18391 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18392 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18399 otherops
[0] = operands
[0];
18401 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18406 if (CONST_INT_P (otherops
[2]))
18410 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18411 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18413 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18419 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18425 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18432 return "ldrd%?\t%0, [%1]";
18434 return "ldmia%?\t%1, %M0";
18438 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18439 /* Take care of overlapping base/data reg. */
18440 if (reg_mentioned_p (operands
[0], operands
[1]))
18444 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18445 output_asm_insn ("ldr%?\t%0, %1", operands
);
18455 output_asm_insn ("ldr%?\t%0, %1", operands
);
18456 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18466 /* Constraints should ensure this. */
18467 gcc_assert (code0
== MEM
&& code1
== REG
);
18468 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18469 || (TARGET_ARM
&& TARGET_LDRD
));
18471 switch (GET_CODE (XEXP (operands
[0], 0)))
18477 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18479 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18484 gcc_assert (TARGET_LDRD
);
18486 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18493 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18495 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18503 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18505 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18510 gcc_assert (TARGET_LDRD
);
18512 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18517 otherops
[0] = operands
[1];
18518 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18519 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18521 /* IWMMXT allows offsets larger than ldrd can handle,
18522 fix these up with a pair of ldr. */
18524 && CONST_INT_P (otherops
[2])
18525 && (INTVAL(otherops
[2]) <= -256
18526 || INTVAL(otherops
[2]) >= 256))
18528 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18532 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18533 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18542 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18543 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18549 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18552 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18557 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18562 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18563 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18565 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18569 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18576 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18583 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18588 && (REG_P (otherops
[2])
18590 || (CONST_INT_P (otherops
[2])
18591 && INTVAL (otherops
[2]) > -256
18592 && INTVAL (otherops
[2]) < 256)))
18594 otherops
[0] = operands
[1];
18595 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18597 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18603 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18604 otherops
[1] = operands
[1];
18607 output_asm_insn ("str%?\t%1, %0", operands
);
18608 output_asm_insn ("str%?\t%H1, %0", otherops
);
18618 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18619 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18622 output_move_quad (rtx
*operands
)
18624 if (REG_P (operands
[0]))
18626 /* Load, or reg->reg move. */
18628 if (MEM_P (operands
[1]))
18630 switch (GET_CODE (XEXP (operands
[1], 0)))
18633 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18638 output_asm_insn ("adr%?\t%0, %1", operands
);
18639 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18643 gcc_unreachable ();
18651 gcc_assert (REG_P (operands
[1]));
18653 dest
= REGNO (operands
[0]);
18654 src
= REGNO (operands
[1]);
18656 /* This seems pretty dumb, but hopefully GCC won't try to do it
18659 for (i
= 0; i
< 4; i
++)
18661 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18662 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18663 output_asm_insn ("mov%?\t%0, %1", ops
);
18666 for (i
= 3; i
>= 0; i
--)
18668 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18669 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18670 output_asm_insn ("mov%?\t%0, %1", ops
);
18676 gcc_assert (MEM_P (operands
[0]));
18677 gcc_assert (REG_P (operands
[1]));
18678 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18680 switch (GET_CODE (XEXP (operands
[0], 0)))
18683 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18687 gcc_unreachable ();
18694 /* Output a VFP load or store instruction. */
18697 output_move_vfp (rtx
*operands
)
18699 rtx reg
, mem
, addr
, ops
[2];
18700 int load
= REG_P (operands
[0]);
18701 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18702 int sp
= (!TARGET_VFP_FP16INST
18703 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18704 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18709 reg
= operands
[!load
];
18710 mem
= operands
[load
];
18712 mode
= GET_MODE (reg
);
18714 gcc_assert (REG_P (reg
));
18715 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18716 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18722 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18723 gcc_assert (MEM_P (mem
));
18725 addr
= XEXP (mem
, 0);
18727 switch (GET_CODE (addr
))
18730 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18731 ops
[0] = XEXP (addr
, 0);
18736 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18737 ops
[0] = XEXP (addr
, 0);
18742 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18748 sprintf (buff
, templ
,
18749 load
? "ld" : "st",
18750 dp
? "64" : sp
? "32" : "16",
18752 integer_p
? "\t%@ int" : "");
18753 output_asm_insn (buff
, ops
);
18758 /* Output a Neon double-word or quad-word load or store, or a load
18759 or store for larger structure modes.
18761 WARNING: The ordering of elements is weird in big-endian mode,
18762 because the EABI requires that vectors stored in memory appear
18763 as though they were stored by a VSTM, as required by the EABI.
18764 GCC RTL defines element ordering based on in-memory order.
18765 This can be different from the architectural ordering of elements
18766 within a NEON register. The intrinsics defined in arm_neon.h use the
18767 NEON register element ordering, not the GCC RTL element ordering.
18769 For example, the in-memory ordering of a big-endian a quadword
18770 vector with 16-bit elements when stored from register pair {d0,d1}
18771 will be (lowest address first, d0[N] is NEON register element N):
18773 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18775 When necessary, quadword registers (dN, dN+1) are moved to ARM
18776 registers from rN in the order:
18778 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18780 So that STM/LDM can be used on vectors in ARM registers, and the
18781 same memory layout will result as if VSTM/VLDM were used.
18783 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18784 possible, which allows use of appropriate alignment tags.
18785 Note that the choice of "64" is independent of the actual vector
18786 element size; this size simply ensures that the behavior is
18787 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18789 Due to limitations of those instructions, use of VST1.64/VLD1.64
18790 is not possible if:
18791 - the address contains PRE_DEC, or
18792 - the mode refers to more than 4 double-word registers
18794 In those cases, it would be possible to replace VSTM/VLDM by a
18795 sequence of instructions; this is not currently implemented since
18796 this is not certain to actually improve performance. */
18799 output_move_neon (rtx
*operands
)
18801 rtx reg
, mem
, addr
, ops
[2];
18802 int regno
, nregs
, load
= REG_P (operands
[0]);
18807 reg
= operands
[!load
];
18808 mem
= operands
[load
];
18810 mode
= GET_MODE (reg
);
18812 gcc_assert (REG_P (reg
));
18813 regno
= REGNO (reg
);
18814 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18815 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18816 || NEON_REGNO_OK_FOR_QUAD (regno
));
18817 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18818 || VALID_NEON_QREG_MODE (mode
)
18819 || VALID_NEON_STRUCT_MODE (mode
));
18820 gcc_assert (MEM_P (mem
));
18822 addr
= XEXP (mem
, 0);
18824 /* Strip off const from addresses like (const (plus (...))). */
18825 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18826 addr
= XEXP (addr
, 0);
18828 switch (GET_CODE (addr
))
18831 /* We have to use vldm / vstm for too-large modes. */
18834 templ
= "v%smia%%?\t%%0!, %%h1";
18835 ops
[0] = XEXP (addr
, 0);
18839 templ
= "v%s1.64\t%%h1, %%A0";
18846 /* We have to use vldm / vstm in this case, since there is no
18847 pre-decrement form of the vld1 / vst1 instructions. */
18848 templ
= "v%smdb%%?\t%%0!, %%h1";
18849 ops
[0] = XEXP (addr
, 0);
18854 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18855 gcc_unreachable ();
18858 /* We have to use vldm / vstm for too-large modes. */
18862 templ
= "v%smia%%?\t%%m0, %%h1";
18864 templ
= "v%s1.64\t%%h1, %%A0";
18870 /* Fall through. */
18876 for (i
= 0; i
< nregs
; i
++)
18878 /* We're only using DImode here because it's a convenient size. */
18879 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18880 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18881 if (reg_overlap_mentioned_p (ops
[0], mem
))
18883 gcc_assert (overlap
== -1);
18888 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18889 output_asm_insn (buff
, ops
);
18894 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18895 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18896 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18897 output_asm_insn (buff
, ops
);
18904 gcc_unreachable ();
18907 sprintf (buff
, templ
, load
? "ld" : "st");
18908 output_asm_insn (buff
, ops
);
18913 /* Compute and return the length of neon_mov<mode>, where <mode> is
18914 one of VSTRUCT modes: EI, OI, CI or XI. */
18916 arm_attr_length_move_neon (rtx_insn
*insn
)
18918 rtx reg
, mem
, addr
;
18922 extract_insn_cached (insn
);
18924 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18926 mode
= GET_MODE (recog_data
.operand
[0]);
18937 gcc_unreachable ();
18941 load
= REG_P (recog_data
.operand
[0]);
18942 reg
= recog_data
.operand
[!load
];
18943 mem
= recog_data
.operand
[load
];
18945 gcc_assert (MEM_P (mem
));
18947 mode
= GET_MODE (reg
);
18948 addr
= XEXP (mem
, 0);
18950 /* Strip off const from addresses like (const (plus (...))). */
18951 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18952 addr
= XEXP (addr
, 0);
18954 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18956 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18963 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18967 arm_address_offset_is_imm (rtx_insn
*insn
)
18971 extract_insn_cached (insn
);
18973 if (REG_P (recog_data
.operand
[0]))
18976 mem
= recog_data
.operand
[0];
18978 gcc_assert (MEM_P (mem
));
18980 addr
= XEXP (mem
, 0);
18983 || (GET_CODE (addr
) == PLUS
18984 && REG_P (XEXP (addr
, 0))
18985 && CONST_INT_P (XEXP (addr
, 1))))
18991 /* Output an ADD r, s, #n where n may be too big for one instruction.
18992 If adding zero to one register, output nothing. */
18994 output_add_immediate (rtx
*operands
)
18996 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18998 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19001 output_multi_immediate (operands
,
19002 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19005 output_multi_immediate (operands
,
19006 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19013 /* Output a multiple immediate operation.
19014 OPERANDS is the vector of operands referred to in the output patterns.
19015 INSTR1 is the output pattern to use for the first constant.
19016 INSTR2 is the output pattern to use for subsequent constants.
19017 IMMED_OP is the index of the constant slot in OPERANDS.
19018 N is the constant value. */
19019 static const char *
19020 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19021 int immed_op
, HOST_WIDE_INT n
)
19023 #if HOST_BITS_PER_WIDE_INT > 32
19029 /* Quick and easy output. */
19030 operands
[immed_op
] = const0_rtx
;
19031 output_asm_insn (instr1
, operands
);
19036 const char * instr
= instr1
;
19038 /* Note that n is never zero here (which would give no output). */
19039 for (i
= 0; i
< 32; i
+= 2)
19043 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19044 output_asm_insn (instr
, operands
);
19054 /* Return the name of a shifter operation. */
19055 static const char *
19056 arm_shift_nmem(enum rtx_code code
)
19061 return ARM_LSL_NAME
;
19077 /* Return the appropriate ARM instruction for the operation code.
19078 The returned result should not be overwritten. OP is the rtx of the
19079 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19082 arithmetic_instr (rtx op
, int shift_first_arg
)
19084 switch (GET_CODE (op
))
19090 return shift_first_arg
? "rsb" : "sub";
19105 return arm_shift_nmem(GET_CODE(op
));
19108 gcc_unreachable ();
19112 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19113 for the operation code. The returned result should not be overwritten.
19114 OP is the rtx code of the shift.
19115 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19117 static const char *
19118 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19121 enum rtx_code code
= GET_CODE (op
);
19126 if (!CONST_INT_P (XEXP (op
, 1)))
19128 output_operand_lossage ("invalid shift operand");
19133 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19141 mnem
= arm_shift_nmem(code
);
19142 if (CONST_INT_P (XEXP (op
, 1)))
19144 *amountp
= INTVAL (XEXP (op
, 1));
19146 else if (REG_P (XEXP (op
, 1)))
19153 output_operand_lossage ("invalid shift operand");
19159 /* We never have to worry about the amount being other than a
19160 power of 2, since this case can never be reloaded from a reg. */
19161 if (!CONST_INT_P (XEXP (op
, 1)))
19163 output_operand_lossage ("invalid shift operand");
19167 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19169 /* Amount must be a power of two. */
19170 if (*amountp
& (*amountp
- 1))
19172 output_operand_lossage ("invalid shift operand");
19176 *amountp
= exact_log2 (*amountp
);
19177 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19178 return ARM_LSL_NAME
;
19181 output_operand_lossage ("invalid shift operand");
19185 /* This is not 100% correct, but follows from the desire to merge
19186 multiplication by a power of 2 with the recognizer for a
19187 shift. >=32 is not a valid shift for "lsl", so we must try and
19188 output a shift that produces the correct arithmetical result.
19189 Using lsr #32 is identical except for the fact that the carry bit
19190 is not set correctly if we set the flags; but we never use the
19191 carry bit from such an operation, so we can ignore that. */
19192 if (code
== ROTATERT
)
19193 /* Rotate is just modulo 32. */
19195 else if (*amountp
!= (*amountp
& 31))
19197 if (code
== ASHIFT
)
19202 /* Shifts of 0 are no-ops. */
19209 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19210 because /bin/as is horribly restrictive. The judgement about
19211 whether or not each character is 'printable' (and can be output as
19212 is) or not (and must be printed with an octal escape) must be made
19213 with reference to the *host* character set -- the situation is
19214 similar to that discussed in the comments above pp_c_char in
19215 c-pretty-print.c. */
19217 #define MAX_ASCII_LEN 51
19220 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19223 int len_so_far
= 0;
19225 fputs ("\t.ascii\t\"", stream
);
19227 for (i
= 0; i
< len
; i
++)
19231 if (len_so_far
>= MAX_ASCII_LEN
)
19233 fputs ("\"\n\t.ascii\t\"", stream
);
19239 if (c
== '\\' || c
== '\"')
19241 putc ('\\', stream
);
19249 fprintf (stream
, "\\%03o", c
);
19254 fputs ("\"\n", stream
);
19257 /* Whether a register is callee saved or not. This is necessary because high
19258 registers are marked as caller saved when optimizing for size on Thumb-1
19259 targets despite being callee saved in order to avoid using them. */
19260 #define callee_saved_reg_p(reg) \
19261 (!call_used_regs[reg] \
19262 || (TARGET_THUMB1 && optimize_size \
19263 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19265 /* Compute the register save mask for registers 0 through 12
19266 inclusive. This code is used by arm_compute_save_reg_mask. */
19268 static unsigned long
19269 arm_compute_save_reg0_reg12_mask (void)
19271 unsigned long func_type
= arm_current_func_type ();
19272 unsigned long save_reg_mask
= 0;
19275 if (IS_INTERRUPT (func_type
))
19277 unsigned int max_reg
;
19278 /* Interrupt functions must not corrupt any registers,
19279 even call clobbered ones. If this is a leaf function
19280 we can just examine the registers used by the RTL, but
19281 otherwise we have to assume that whatever function is
19282 called might clobber anything, and so we have to save
19283 all the call-clobbered registers as well. */
19284 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19285 /* FIQ handlers have registers r8 - r12 banked, so
19286 we only need to check r0 - r7, Normal ISRs only
19287 bank r14 and r15, so we must check up to r12.
19288 r13 is the stack pointer which is always preserved,
19289 so we do not need to consider it here. */
19294 for (reg
= 0; reg
<= max_reg
; reg
++)
19295 if (df_regs_ever_live_p (reg
)
19296 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19297 save_reg_mask
|= (1 << reg
);
19299 /* Also save the pic base register if necessary. */
19301 && !TARGET_SINGLE_PIC_BASE
19302 && arm_pic_register
!= INVALID_REGNUM
19303 && crtl
->uses_pic_offset_table
)
19304 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19306 else if (IS_VOLATILE(func_type
))
19308 /* For noreturn functions we historically omitted register saves
19309 altogether. However this really messes up debugging. As a
19310 compromise save just the frame pointers. Combined with the link
19311 register saved elsewhere this should be sufficient to get
19313 if (frame_pointer_needed
)
19314 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19315 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19316 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19317 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19318 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19322 /* In the normal case we only need to save those registers
19323 which are call saved and which are used by this function. */
19324 for (reg
= 0; reg
<= 11; reg
++)
19325 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19326 save_reg_mask
|= (1 << reg
);
19328 /* Handle the frame pointer as a special case. */
19329 if (frame_pointer_needed
)
19330 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19332 /* If we aren't loading the PIC register,
19333 don't stack it even though it may be live. */
19335 && !TARGET_SINGLE_PIC_BASE
19336 && arm_pic_register
!= INVALID_REGNUM
19337 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19338 || crtl
->uses_pic_offset_table
))
19339 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19341 /* The prologue will copy SP into R0, so save it. */
19342 if (IS_STACKALIGN (func_type
))
19343 save_reg_mask
|= 1;
19346 /* Save registers so the exception handler can modify them. */
19347 if (crtl
->calls_eh_return
)
19353 reg
= EH_RETURN_DATA_REGNO (i
);
19354 if (reg
== INVALID_REGNUM
)
19356 save_reg_mask
|= 1 << reg
;
19360 return save_reg_mask
;
19363 /* Return true if r3 is live at the start of the function. */
19366 arm_r3_live_at_start_p (void)
19368 /* Just look at cfg info, which is still close enough to correct at this
19369 point. This gives false positives for broken functions that might use
19370 uninitialized data that happens to be allocated in r3, but who cares? */
19371 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19374 /* Compute the number of bytes used to store the static chain register on the
19375 stack, above the stack frame. We need to know this accurately to get the
19376 alignment of the rest of the stack frame correct. */
19379 arm_compute_static_chain_stack_bytes (void)
19381 /* See the defining assertion in arm_expand_prologue. */
19382 if (IS_NESTED (arm_current_func_type ())
19383 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19384 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19385 && !df_regs_ever_live_p (LR_REGNUM
)))
19386 && arm_r3_live_at_start_p ()
19387 && crtl
->args
.pretend_args_size
== 0)
19393 /* Compute a bit mask of which registers need to be
19394 saved on the stack for the current function.
19395 This is used by arm_get_frame_offsets, which may add extra registers. */
19397 static unsigned long
19398 arm_compute_save_reg_mask (void)
19400 unsigned int save_reg_mask
= 0;
19401 unsigned long func_type
= arm_current_func_type ();
19404 if (IS_NAKED (func_type
))
19405 /* This should never really happen. */
19408 /* If we are creating a stack frame, then we must save the frame pointer,
19409 IP (which will hold the old stack pointer), LR and the PC. */
19410 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19412 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19415 | (1 << PC_REGNUM
);
19417 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19419 /* Decide if we need to save the link register.
19420 Interrupt routines have their own banked link register,
19421 so they never need to save it.
19422 Otherwise if we do not use the link register we do not need to save
19423 it. If we are pushing other registers onto the stack however, we
19424 can save an instruction in the epilogue by pushing the link register
19425 now and then popping it back into the PC. This incurs extra memory
19426 accesses though, so we only do it when optimizing for size, and only
19427 if we know that we will not need a fancy return sequence. */
19428 if (df_regs_ever_live_p (LR_REGNUM
)
19431 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19432 && !crtl
->tail_call_emit
19433 && !crtl
->calls_eh_return
))
19434 save_reg_mask
|= 1 << LR_REGNUM
;
19436 if (cfun
->machine
->lr_save_eliminated
)
19437 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19439 if (TARGET_REALLY_IWMMXT
19440 && ((bit_count (save_reg_mask
)
19441 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19442 arm_compute_static_chain_stack_bytes())
19445 /* The total number of registers that are going to be pushed
19446 onto the stack is odd. We need to ensure that the stack
19447 is 64-bit aligned before we start to save iWMMXt registers,
19448 and also before we start to create locals. (A local variable
19449 might be a double or long long which we will load/store using
19450 an iWMMXt instruction). Therefore we need to push another
19451 ARM register, so that the stack will be 64-bit aligned. We
19452 try to avoid using the arg registers (r0 -r3) as they might be
19453 used to pass values in a tail call. */
19454 for (reg
= 4; reg
<= 12; reg
++)
19455 if ((save_reg_mask
& (1 << reg
)) == 0)
19459 save_reg_mask
|= (1 << reg
);
19462 cfun
->machine
->sibcall_blocked
= 1;
19463 save_reg_mask
|= (1 << 3);
19467 /* We may need to push an additional register for use initializing the
19468 PIC base register. */
19469 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19470 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19472 reg
= thumb_find_work_register (1 << 4);
19473 if (!call_used_regs
[reg
])
19474 save_reg_mask
|= (1 << reg
);
19477 return save_reg_mask
;
19480 /* Compute a bit mask of which registers need to be
19481 saved on the stack for the current function. */
19482 static unsigned long
19483 thumb1_compute_save_reg_mask (void)
19485 unsigned long mask
;
19489 for (reg
= 0; reg
< 12; reg
++)
19490 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19494 && !TARGET_SINGLE_PIC_BASE
19495 && arm_pic_register
!= INVALID_REGNUM
19496 && crtl
->uses_pic_offset_table
)
19497 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19499 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19500 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19501 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19503 /* LR will also be pushed if any lo regs are pushed. */
19504 if (mask
& 0xff || thumb_force_lr_save ())
19505 mask
|= (1 << LR_REGNUM
);
19507 /* Make sure we have a low work register if we need one.
19508 We will need one if we are going to push a high register,
19509 but we are not currently intending to push a low register. */
19510 if ((mask
& 0xff) == 0
19511 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19513 /* Use thumb_find_work_register to choose which register
19514 we will use. If the register is live then we will
19515 have to push it. Use LAST_LO_REGNUM as our fallback
19516 choice for the register to select. */
19517 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19518 /* Make sure the register returned by thumb_find_work_register is
19519 not part of the return value. */
19520 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19521 reg
= LAST_LO_REGNUM
;
19523 if (callee_saved_reg_p (reg
))
19527 /* The 504 below is 8 bytes less than 512 because there are two possible
19528 alignment words. We can't tell here if they will be present or not so we
19529 have to play it safe and assume that they are. */
19530 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19531 ROUND_UP_WORD (get_frame_size ()) +
19532 crtl
->outgoing_args_size
) >= 504)
19534 /* This is the same as the code in thumb1_expand_prologue() which
19535 determines which register to use for stack decrement. */
19536 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19537 if (mask
& (1 << reg
))
19540 if (reg
> LAST_LO_REGNUM
)
19542 /* Make sure we have a register available for stack decrement. */
19543 mask
|= 1 << LAST_LO_REGNUM
;
19551 /* Return the number of bytes required to save VFP registers. */
19553 arm_get_vfp_saved_size (void)
19555 unsigned int regno
;
19560 /* Space for saved VFP registers. */
19561 if (TARGET_HARD_FLOAT
)
19564 for (regno
= FIRST_VFP_REGNUM
;
19565 regno
< LAST_VFP_REGNUM
;
19568 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19569 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19573 /* Workaround ARM10 VFPr1 bug. */
19574 if (count
== 2 && !arm_arch6
)
19576 saved
+= count
* 8;
19585 if (count
== 2 && !arm_arch6
)
19587 saved
+= count
* 8;
19594 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19595 everything bar the final return instruction. If simple_return is true,
19596 then do not output epilogue, because it has already been emitted in RTL. */
19598 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19599 bool simple_return
)
19601 char conditional
[10];
19604 unsigned long live_regs_mask
;
19605 unsigned long func_type
;
19606 arm_stack_offsets
*offsets
;
19608 func_type
= arm_current_func_type ();
19610 if (IS_NAKED (func_type
))
19613 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19615 /* If this function was declared non-returning, and we have
19616 found a tail call, then we have to trust that the called
19617 function won't return. */
19622 /* Otherwise, trap an attempted return by aborting. */
19624 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19626 assemble_external_libcall (ops
[1]);
19627 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19633 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19635 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19637 cfun
->machine
->return_used_this_function
= 1;
19639 offsets
= arm_get_frame_offsets ();
19640 live_regs_mask
= offsets
->saved_regs_mask
;
19642 if (!simple_return
&& live_regs_mask
)
19644 const char * return_reg
;
19646 /* If we do not have any special requirements for function exit
19647 (e.g. interworking) then we can load the return address
19648 directly into the PC. Otherwise we must load it into LR. */
19650 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19651 return_reg
= reg_names
[PC_REGNUM
];
19653 return_reg
= reg_names
[LR_REGNUM
];
19655 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19657 /* There are three possible reasons for the IP register
19658 being saved. 1) a stack frame was created, in which case
19659 IP contains the old stack pointer, or 2) an ISR routine
19660 corrupted it, or 3) it was saved to align the stack on
19661 iWMMXt. In case 1, restore IP into SP, otherwise just
19663 if (frame_pointer_needed
)
19665 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19666 live_regs_mask
|= (1 << SP_REGNUM
);
19669 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19672 /* On some ARM architectures it is faster to use LDR rather than
19673 LDM to load a single register. On other architectures, the
19674 cost is the same. In 26 bit mode, or for exception handlers,
19675 we have to use LDM to load the PC so that the CPSR is also
19677 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19678 if (live_regs_mask
== (1U << reg
))
19681 if (reg
<= LAST_ARM_REGNUM
19682 && (reg
!= LR_REGNUM
19684 || ! IS_INTERRUPT (func_type
)))
19686 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19687 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19694 /* Generate the load multiple instruction to restore the
19695 registers. Note we can get here, even if
19696 frame_pointer_needed is true, but only if sp already
19697 points to the base of the saved core registers. */
19698 if (live_regs_mask
& (1 << SP_REGNUM
))
19700 unsigned HOST_WIDE_INT stack_adjust
;
19702 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19703 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19705 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19706 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19709 /* If we can't use ldmib (SA110 bug),
19710 then try to pop r3 instead. */
19712 live_regs_mask
|= 1 << 3;
19714 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19717 /* For interrupt returns we have to use an LDM rather than
19718 a POP so that we can use the exception return variant. */
19719 else if (IS_INTERRUPT (func_type
))
19720 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19722 sprintf (instr
, "pop%s\t{", conditional
);
19724 p
= instr
+ strlen (instr
);
19726 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19727 if (live_regs_mask
& (1 << reg
))
19729 int l
= strlen (reg_names
[reg
]);
19735 memcpy (p
, ", ", 2);
19739 memcpy (p
, "%|", 2);
19740 memcpy (p
+ 2, reg_names
[reg
], l
);
19744 if (live_regs_mask
& (1 << LR_REGNUM
))
19746 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19747 /* If returning from an interrupt, restore the CPSR. */
19748 if (IS_INTERRUPT (func_type
))
19755 output_asm_insn (instr
, & operand
);
19757 /* See if we need to generate an extra instruction to
19758 perform the actual function return. */
19760 && func_type
!= ARM_FT_INTERWORKED
19761 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19763 /* The return has already been handled
19764 by loading the LR into the PC. */
19771 switch ((int) ARM_FUNC_TYPE (func_type
))
19775 /* ??? This is wrong for unified assembly syntax. */
19776 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19779 case ARM_FT_INTERWORKED
:
19780 gcc_assert (arm_arch5
|| arm_arch4t
);
19781 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19784 case ARM_FT_EXCEPTION
:
19785 /* ??? This is wrong for unified assembly syntax. */
19786 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19790 /* Use bx if it's available. */
19791 if (arm_arch5
|| arm_arch4t
)
19792 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19794 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19798 output_asm_insn (instr
, & operand
);
19804 /* Write the function name into the code section, directly preceding
19805 the function prologue.
19807 Code will be output similar to this:
19809 .ascii "arm_poke_function_name", 0
19812 .word 0xff000000 + (t1 - t0)
19813 arm_poke_function_name
19815 stmfd sp!, {fp, ip, lr, pc}
19818 When performing a stack backtrace, code can inspect the value
19819 of 'pc' stored at 'fp' + 0. If the trace function then looks
19820 at location pc - 12 and the top 8 bits are set, then we know
19821 that there is a function name embedded immediately preceding this
19822 location and has length ((pc[-3]) & 0xff000000).
19824 We assume that pc is declared as a pointer to an unsigned long.
19826 It is of no benefit to output the function name if we are assembling
19827 a leaf function. These function types will not contain a stack
19828 backtrace structure, therefore it is not possible to determine the
19831 arm_poke_function_name (FILE *stream
, const char *name
)
19833 unsigned long alignlength
;
19834 unsigned long length
;
19837 length
= strlen (name
) + 1;
19838 alignlength
= ROUND_UP_WORD (length
);
19840 ASM_OUTPUT_ASCII (stream
, name
, length
);
19841 ASM_OUTPUT_ALIGN (stream
, 2);
19842 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19843 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19846 /* Place some comments into the assembler stream
19847 describing the current function. */
19849 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19851 unsigned long func_type
;
19853 /* ??? Do we want to print some of the below anyway? */
19857 /* Sanity check. */
19858 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19860 func_type
= arm_current_func_type ();
19862 switch ((int) ARM_FUNC_TYPE (func_type
))
19865 case ARM_FT_NORMAL
:
19867 case ARM_FT_INTERWORKED
:
19868 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19871 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19874 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19876 case ARM_FT_EXCEPTION
:
19877 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19881 if (IS_NAKED (func_type
))
19882 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19884 if (IS_VOLATILE (func_type
))
19885 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19887 if (IS_NESTED (func_type
))
19888 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19889 if (IS_STACKALIGN (func_type
))
19890 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19892 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19894 crtl
->args
.pretend_args_size
, frame_size
);
19896 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19897 frame_pointer_needed
,
19898 cfun
->machine
->uses_anonymous_args
);
19900 if (cfun
->machine
->lr_save_eliminated
)
19901 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19903 if (crtl
->calls_eh_return
)
19904 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19909 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19910 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19912 arm_stack_offsets
*offsets
;
19918 /* Emit any call-via-reg trampolines that are needed for v4t support
19919 of call_reg and call_value_reg type insns. */
19920 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19922 rtx label
= cfun
->machine
->call_via
[regno
];
19926 switch_to_section (function_section (current_function_decl
));
19927 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19928 CODE_LABEL_NUMBER (label
));
19929 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19933 /* ??? Probably not safe to set this here, since it assumes that a
19934 function will be emitted as assembly immediately after we generate
19935 RTL for it. This does not happen for inline functions. */
19936 cfun
->machine
->return_used_this_function
= 0;
19938 else /* TARGET_32BIT */
19940 /* We need to take into account any stack-frame rounding. */
19941 offsets
= arm_get_frame_offsets ();
19943 gcc_assert (!use_return_insn (FALSE
, NULL
)
19944 || (cfun
->machine
->return_used_this_function
!= 0)
19945 || offsets
->saved_regs
== offsets
->outgoing_args
19946 || frame_pointer_needed
);
19950 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19951 STR and STRD. If an even number of registers are being pushed, one
19952 or more STRD patterns are created for each register pair. If an
19953 odd number of registers are pushed, emit an initial STR followed by
19954 as many STRD instructions as are needed. This works best when the
19955 stack is initially 64-bit aligned (the normal case), since it
19956 ensures that each STRD is also 64-bit aligned. */
19958 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19963 rtx par
= NULL_RTX
;
19964 rtx dwarf
= NULL_RTX
;
19968 num_regs
= bit_count (saved_regs_mask
);
19970 /* Must be at least one register to save, and can't save SP or PC. */
19971 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19972 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19973 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19975 /* Create sequence for DWARF info. All the frame-related data for
19976 debugging is held in this wrapper. */
19977 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19979 /* Describe the stack adjustment. */
19980 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19981 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19982 RTX_FRAME_RELATED_P (tmp
) = 1;
19983 XVECEXP (dwarf
, 0, 0) = tmp
;
19985 /* Find the first register. */
19986 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19991 /* If there's an odd number of registers to push. Start off by
19992 pushing a single register. This ensures that subsequent strd
19993 operations are dword aligned (assuming that SP was originally
19994 64-bit aligned). */
19995 if ((num_regs
& 1) != 0)
19997 rtx reg
, mem
, insn
;
19999 reg
= gen_rtx_REG (SImode
, regno
);
20001 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20002 stack_pointer_rtx
));
20004 mem
= gen_frame_mem (Pmode
,
20006 (Pmode
, stack_pointer_rtx
,
20007 plus_constant (Pmode
, stack_pointer_rtx
,
20010 tmp
= gen_rtx_SET (mem
, reg
);
20011 RTX_FRAME_RELATED_P (tmp
) = 1;
20012 insn
= emit_insn (tmp
);
20013 RTX_FRAME_RELATED_P (insn
) = 1;
20014 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20015 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20016 RTX_FRAME_RELATED_P (tmp
) = 1;
20019 XVECEXP (dwarf
, 0, i
) = tmp
;
20023 while (i
< num_regs
)
20024 if (saved_regs_mask
& (1 << regno
))
20026 rtx reg1
, reg2
, mem1
, mem2
;
20027 rtx tmp0
, tmp1
, tmp2
;
20030 /* Find the register to pair with this one. */
20031 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20035 reg1
= gen_rtx_REG (SImode
, regno
);
20036 reg2
= gen_rtx_REG (SImode
, regno2
);
20043 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20046 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20048 -4 * (num_regs
- 1)));
20049 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20050 plus_constant (Pmode
, stack_pointer_rtx
,
20052 tmp1
= gen_rtx_SET (mem1
, reg1
);
20053 tmp2
= gen_rtx_SET (mem2
, reg2
);
20054 RTX_FRAME_RELATED_P (tmp0
) = 1;
20055 RTX_FRAME_RELATED_P (tmp1
) = 1;
20056 RTX_FRAME_RELATED_P (tmp2
) = 1;
20057 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20058 XVECEXP (par
, 0, 0) = tmp0
;
20059 XVECEXP (par
, 0, 1) = tmp1
;
20060 XVECEXP (par
, 0, 2) = tmp2
;
20061 insn
= emit_insn (par
);
20062 RTX_FRAME_RELATED_P (insn
) = 1;
20063 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20067 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20070 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20073 tmp1
= gen_rtx_SET (mem1
, reg1
);
20074 tmp2
= gen_rtx_SET (mem2
, reg2
);
20075 RTX_FRAME_RELATED_P (tmp1
) = 1;
20076 RTX_FRAME_RELATED_P (tmp2
) = 1;
20077 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20078 XVECEXP (par
, 0, 0) = tmp1
;
20079 XVECEXP (par
, 0, 1) = tmp2
;
20083 /* Create unwind information. This is an approximation. */
20084 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20085 plus_constant (Pmode
,
20089 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20090 plus_constant (Pmode
,
20095 RTX_FRAME_RELATED_P (tmp1
) = 1;
20096 RTX_FRAME_RELATED_P (tmp2
) = 1;
20097 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20098 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20100 regno
= regno2
+ 1;
20108 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20109 whenever possible, otherwise it emits single-word stores. The first store
20110 also allocates stack space for all saved registers, using writeback with
20111 post-addressing mode. All other stores use offset addressing. If no STRD
20112 can be emitted, this function emits a sequence of single-word stores,
20113 and not an STM as before, because single-word stores provide more freedom
20114 scheduling and can be turned into an STM by peephole optimizations. */
20116 arm_emit_strd_push (unsigned long saved_regs_mask
)
20119 int i
, j
, dwarf_index
= 0;
20121 rtx dwarf
= NULL_RTX
;
20122 rtx insn
= NULL_RTX
;
20125 /* TODO: A more efficient code can be emitted by changing the
20126 layout, e.g., first push all pairs that can use STRD to keep the
20127 stack aligned, and then push all other registers. */
20128 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20129 if (saved_regs_mask
& (1 << i
))
20132 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20133 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20134 gcc_assert (num_regs
> 0);
20136 /* Create sequence for DWARF info. */
20137 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20139 /* For dwarf info, we generate explicit stack update. */
20140 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20141 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20142 RTX_FRAME_RELATED_P (tmp
) = 1;
20143 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20145 /* Save registers. */
20146 offset
= - 4 * num_regs
;
20148 while (j
<= LAST_ARM_REGNUM
)
20149 if (saved_regs_mask
& (1 << j
))
20152 && (saved_regs_mask
& (1 << (j
+ 1))))
20154 /* Current register and previous register form register pair for
20155 which STRD can be generated. */
20158 /* Allocate stack space for all saved registers. */
20159 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20160 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20161 mem
= gen_frame_mem (DImode
, tmp
);
20164 else if (offset
> 0)
20165 mem
= gen_frame_mem (DImode
,
20166 plus_constant (Pmode
,
20170 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20172 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20173 RTX_FRAME_RELATED_P (tmp
) = 1;
20174 tmp
= emit_insn (tmp
);
20176 /* Record the first store insn. */
20177 if (dwarf_index
== 1)
20180 /* Generate dwarf info. */
20181 mem
= gen_frame_mem (SImode
,
20182 plus_constant (Pmode
,
20185 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20186 RTX_FRAME_RELATED_P (tmp
) = 1;
20187 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20189 mem
= gen_frame_mem (SImode
,
20190 plus_constant (Pmode
,
20193 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20194 RTX_FRAME_RELATED_P (tmp
) = 1;
20195 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20202 /* Emit a single word store. */
20205 /* Allocate stack space for all saved registers. */
20206 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20207 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20208 mem
= gen_frame_mem (SImode
, tmp
);
20211 else if (offset
> 0)
20212 mem
= gen_frame_mem (SImode
,
20213 plus_constant (Pmode
,
20217 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20219 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20220 RTX_FRAME_RELATED_P (tmp
) = 1;
20221 tmp
= emit_insn (tmp
);
20223 /* Record the first store insn. */
20224 if (dwarf_index
== 1)
20227 /* Generate dwarf info. */
20228 mem
= gen_frame_mem (SImode
,
20229 plus_constant(Pmode
,
20232 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20233 RTX_FRAME_RELATED_P (tmp
) = 1;
20234 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20243 /* Attach dwarf info to the first insn we generate. */
20244 gcc_assert (insn
!= NULL_RTX
);
20245 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20246 RTX_FRAME_RELATED_P (insn
) = 1;
20249 /* Generate and emit an insn that we will recognize as a push_multi.
20250 Unfortunately, since this insn does not reflect very well the actual
20251 semantics of the operation, we need to annotate the insn for the benefit
20252 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20253 MASK for registers that should be annotated for DWARF2 frame unwind
20256 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20259 int num_dwarf_regs
= 0;
20263 int dwarf_par_index
;
20266 /* We don't record the PC in the dwarf frame information. */
20267 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20269 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20271 if (mask
& (1 << i
))
20273 if (dwarf_regs_mask
& (1 << i
))
20277 gcc_assert (num_regs
&& num_regs
<= 16);
20278 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20280 /* For the body of the insn we are going to generate an UNSPEC in
20281 parallel with several USEs. This allows the insn to be recognized
20282 by the push_multi pattern in the arm.md file.
20284 The body of the insn looks something like this:
20287 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20288 (const_int:SI <num>)))
20289 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20295 For the frame note however, we try to be more explicit and actually
20296 show each register being stored into the stack frame, plus a (single)
20297 decrement of the stack pointer. We do it this way in order to be
20298 friendly to the stack unwinding code, which only wants to see a single
20299 stack decrement per instruction. The RTL we generate for the note looks
20300 something like this:
20303 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20304 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20305 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20306 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20310 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20311 instead we'd have a parallel expression detailing all
20312 the stores to the various memory addresses so that debug
20313 information is more up-to-date. Remember however while writing
20314 this to take care of the constraints with the push instruction.
20316 Note also that this has to be taken care of for the VFP registers.
20318 For more see PR43399. */
20320 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20321 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20322 dwarf_par_index
= 1;
20324 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20326 if (mask
& (1 << i
))
20328 reg
= gen_rtx_REG (SImode
, i
);
20330 XVECEXP (par
, 0, 0)
20331 = gen_rtx_SET (gen_frame_mem
20333 gen_rtx_PRE_MODIFY (Pmode
,
20336 (Pmode
, stack_pointer_rtx
,
20339 gen_rtx_UNSPEC (BLKmode
,
20340 gen_rtvec (1, reg
),
20341 UNSPEC_PUSH_MULT
));
20343 if (dwarf_regs_mask
& (1 << i
))
20345 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20347 RTX_FRAME_RELATED_P (tmp
) = 1;
20348 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20355 for (j
= 1, i
++; j
< num_regs
; i
++)
20357 if (mask
& (1 << i
))
20359 reg
= gen_rtx_REG (SImode
, i
);
20361 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20363 if (dwarf_regs_mask
& (1 << i
))
20366 = gen_rtx_SET (gen_frame_mem
20368 plus_constant (Pmode
, stack_pointer_rtx
,
20371 RTX_FRAME_RELATED_P (tmp
) = 1;
20372 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20379 par
= emit_insn (par
);
20381 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20382 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20383 RTX_FRAME_RELATED_P (tmp
) = 1;
20384 XVECEXP (dwarf
, 0, 0) = tmp
;
20386 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20391 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20392 SIZE is the offset to be adjusted.
20393 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20395 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20399 RTX_FRAME_RELATED_P (insn
) = 1;
20400 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20401 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20404 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20405 SAVED_REGS_MASK shows which registers need to be restored.
20407 Unfortunately, since this insn does not reflect very well the actual
20408 semantics of the operation, we need to annotate the insn for the benefit
20409 of DWARF2 frame unwind information. */
20411 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20416 rtx dwarf
= NULL_RTX
;
20418 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20422 offset_adj
= return_in_pc
? 1 : 0;
20423 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20424 if (saved_regs_mask
& (1 << i
))
20427 gcc_assert (num_regs
&& num_regs
<= 16);
20429 /* If SP is in reglist, then we don't emit SP update insn. */
20430 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20432 /* The parallel needs to hold num_regs SETs
20433 and one SET for the stack update. */
20434 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20437 XVECEXP (par
, 0, 0) = ret_rtx
;
20441 /* Increment the stack pointer, based on there being
20442 num_regs 4-byte registers to restore. */
20443 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20444 plus_constant (Pmode
,
20447 RTX_FRAME_RELATED_P (tmp
) = 1;
20448 XVECEXP (par
, 0, offset_adj
) = tmp
;
20451 /* Now restore every reg, which may include PC. */
20452 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20453 if (saved_regs_mask
& (1 << i
))
20455 reg
= gen_rtx_REG (SImode
, i
);
20456 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20458 /* Emit single load with writeback. */
20459 tmp
= gen_frame_mem (SImode
,
20460 gen_rtx_POST_INC (Pmode
,
20461 stack_pointer_rtx
));
20462 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20463 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20467 tmp
= gen_rtx_SET (reg
,
20470 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20471 RTX_FRAME_RELATED_P (tmp
) = 1;
20472 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20474 /* We need to maintain a sequence for DWARF info too. As dwarf info
20475 should not have PC, skip PC. */
20476 if (i
!= PC_REGNUM
)
20477 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20483 par
= emit_jump_insn (par
);
20485 par
= emit_insn (par
);
20487 REG_NOTES (par
) = dwarf
;
20489 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20490 stack_pointer_rtx
, stack_pointer_rtx
);
20493 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20494 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20496 Unfortunately, since this insn does not reflect very well the actual
20497 semantics of the operation, we need to annotate the insn for the benefit
20498 of DWARF2 frame unwind information. */
20500 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20504 rtx dwarf
= NULL_RTX
;
20507 gcc_assert (num_regs
&& num_regs
<= 32);
20509 /* Workaround ARM10 VFPr1 bug. */
20510 if (num_regs
== 2 && !arm_arch6
)
20512 if (first_reg
== 15)
20518 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20519 there could be up to 32 D-registers to restore.
20520 If there are more than 16 D-registers, make two recursive calls,
20521 each of which emits one pop_multi instruction. */
20524 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20525 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20529 /* The parallel needs to hold num_regs SETs
20530 and one SET for the stack update. */
20531 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20533 /* Increment the stack pointer, based on there being
20534 num_regs 8-byte registers to restore. */
20535 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20536 RTX_FRAME_RELATED_P (tmp
) = 1;
20537 XVECEXP (par
, 0, 0) = tmp
;
20539 /* Now show every reg that will be restored, using a SET for each. */
20540 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20542 reg
= gen_rtx_REG (DFmode
, i
);
20544 tmp
= gen_rtx_SET (reg
,
20547 plus_constant (Pmode
, base_reg
, 8 * j
)));
20548 RTX_FRAME_RELATED_P (tmp
) = 1;
20549 XVECEXP (par
, 0, j
+ 1) = tmp
;
20551 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20556 par
= emit_insn (par
);
20557 REG_NOTES (par
) = dwarf
;
20559 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20560 if (REGNO (base_reg
) == IP_REGNUM
)
20562 RTX_FRAME_RELATED_P (par
) = 1;
20563 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20566 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20567 base_reg
, base_reg
);
20570 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20571 number of registers are being popped, multiple LDRD patterns are created for
20572 all register pairs. If odd number of registers are popped, last register is
20573 loaded by using LDR pattern. */
20575 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20579 rtx par
= NULL_RTX
;
20580 rtx dwarf
= NULL_RTX
;
20581 rtx tmp
, reg
, tmp1
;
20582 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20584 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20585 if (saved_regs_mask
& (1 << i
))
20588 gcc_assert (num_regs
&& num_regs
<= 16);
20590 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20591 to be popped. So, if num_regs is even, now it will become odd,
20592 and we can generate pop with PC. If num_regs is odd, it will be
20593 even now, and ldr with return can be generated for PC. */
20597 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20599 /* Var j iterates over all the registers to gather all the registers in
20600 saved_regs_mask. Var i gives index of saved registers in stack frame.
20601 A PARALLEL RTX of register-pair is created here, so that pattern for
20602 LDRD can be matched. As PC is always last register to be popped, and
20603 we have already decremented num_regs if PC, we don't have to worry
20604 about PC in this loop. */
20605 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20606 if (saved_regs_mask
& (1 << j
))
20608 /* Create RTX for memory load. */
20609 reg
= gen_rtx_REG (SImode
, j
);
20610 tmp
= gen_rtx_SET (reg
,
20611 gen_frame_mem (SImode
,
20612 plus_constant (Pmode
,
20613 stack_pointer_rtx
, 4 * i
)));
20614 RTX_FRAME_RELATED_P (tmp
) = 1;
20618 /* When saved-register index (i) is even, the RTX to be emitted is
20619 yet to be created. Hence create it first. The LDRD pattern we
20620 are generating is :
20621 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20622 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20623 where target registers need not be consecutive. */
20624 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20628 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20629 added as 0th element and if i is odd, reg_i is added as 1st element
20630 of LDRD pattern shown above. */
20631 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20632 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20636 /* When saved-register index (i) is odd, RTXs for both the registers
20637 to be loaded are generated in above given LDRD pattern, and the
20638 pattern can be emitted now. */
20639 par
= emit_insn (par
);
20640 REG_NOTES (par
) = dwarf
;
20641 RTX_FRAME_RELATED_P (par
) = 1;
20647 /* If the number of registers pushed is odd AND return_in_pc is false OR
20648 number of registers are even AND return_in_pc is true, last register is
20649 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20650 then LDR with post increment. */
20652 /* Increment the stack pointer, based on there being
20653 num_regs 4-byte registers to restore. */
20654 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20655 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20656 RTX_FRAME_RELATED_P (tmp
) = 1;
20657 tmp
= emit_insn (tmp
);
20660 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20661 stack_pointer_rtx
, stack_pointer_rtx
);
20666 if (((num_regs
% 2) == 1 && !return_in_pc
)
20667 || ((num_regs
% 2) == 0 && return_in_pc
))
20669 /* Scan for the single register to be popped. Skip until the saved
20670 register is found. */
20671 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20673 /* Gen LDR with post increment here. */
20674 tmp1
= gen_rtx_MEM (SImode
,
20675 gen_rtx_POST_INC (SImode
,
20676 stack_pointer_rtx
));
20677 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20679 reg
= gen_rtx_REG (SImode
, j
);
20680 tmp
= gen_rtx_SET (reg
, tmp1
);
20681 RTX_FRAME_RELATED_P (tmp
) = 1;
20682 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20686 /* If return_in_pc, j must be PC_REGNUM. */
20687 gcc_assert (j
== PC_REGNUM
);
20688 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20689 XVECEXP (par
, 0, 0) = ret_rtx
;
20690 XVECEXP (par
, 0, 1) = tmp
;
20691 par
= emit_jump_insn (par
);
20695 par
= emit_insn (tmp
);
20696 REG_NOTES (par
) = dwarf
;
20697 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20698 stack_pointer_rtx
, stack_pointer_rtx
);
20702 else if ((num_regs
% 2) == 1 && return_in_pc
)
20704 /* There are 2 registers to be popped. So, generate the pattern
20705 pop_multiple_with_stack_update_and_return to pop in PC. */
20706 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20712 /* LDRD in ARM mode needs consecutive registers as operands. This function
20713 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20714 offset addressing and then generates one separate stack udpate. This provides
20715 more scheduling freedom, compared to writeback on every load. However,
20716 if the function returns using load into PC directly
20717 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20718 before the last load. TODO: Add a peephole optimization to recognize
20719 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20720 peephole optimization to merge the load at stack-offset zero
20721 with the stack update instruction using load with writeback
20722 in post-index addressing mode. */
20724 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20728 rtx par
= NULL_RTX
;
20729 rtx dwarf
= NULL_RTX
;
20732 /* Restore saved registers. */
20733 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20735 while (j
<= LAST_ARM_REGNUM
)
20736 if (saved_regs_mask
& (1 << j
))
20739 && (saved_regs_mask
& (1 << (j
+ 1)))
20740 && (j
+ 1) != PC_REGNUM
)
20742 /* Current register and next register form register pair for which
20743 LDRD can be generated. PC is always the last register popped, and
20744 we handle it separately. */
20746 mem
= gen_frame_mem (DImode
,
20747 plus_constant (Pmode
,
20751 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20753 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20754 tmp
= emit_insn (tmp
);
20755 RTX_FRAME_RELATED_P (tmp
) = 1;
20757 /* Generate dwarf info. */
20759 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20760 gen_rtx_REG (SImode
, j
),
20762 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20763 gen_rtx_REG (SImode
, j
+ 1),
20766 REG_NOTES (tmp
) = dwarf
;
20771 else if (j
!= PC_REGNUM
)
20773 /* Emit a single word load. */
20775 mem
= gen_frame_mem (SImode
,
20776 plus_constant (Pmode
,
20780 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20782 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20783 tmp
= emit_insn (tmp
);
20784 RTX_FRAME_RELATED_P (tmp
) = 1;
20786 /* Generate dwarf info. */
20787 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20788 gen_rtx_REG (SImode
, j
),
20794 else /* j == PC_REGNUM */
20800 /* Update the stack. */
20803 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20804 plus_constant (Pmode
,
20807 tmp
= emit_insn (tmp
);
20808 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20809 stack_pointer_rtx
, stack_pointer_rtx
);
20813 if (saved_regs_mask
& (1 << PC_REGNUM
))
20815 /* Only PC is to be popped. */
20816 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20817 XVECEXP (par
, 0, 0) = ret_rtx
;
20818 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20819 gen_frame_mem (SImode
,
20820 gen_rtx_POST_INC (SImode
,
20821 stack_pointer_rtx
)));
20822 RTX_FRAME_RELATED_P (tmp
) = 1;
20823 XVECEXP (par
, 0, 1) = tmp
;
20824 par
= emit_jump_insn (par
);
20826 /* Generate dwarf info. */
20827 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20828 gen_rtx_REG (SImode
, PC_REGNUM
),
20830 REG_NOTES (par
) = dwarf
;
20831 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20832 stack_pointer_rtx
, stack_pointer_rtx
);
20836 /* Calculate the size of the return value that is passed in registers. */
20838 arm_size_return_regs (void)
20842 if (crtl
->return_rtx
!= 0)
20843 mode
= GET_MODE (crtl
->return_rtx
);
20845 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20847 return GET_MODE_SIZE (mode
);
20850 /* Return true if the current function needs to save/restore LR. */
20852 thumb_force_lr_save (void)
20854 return !cfun
->machine
->lr_save_eliminated
20855 && (!leaf_function_p ()
20856 || thumb_far_jump_used_p ()
20857 || df_regs_ever_live_p (LR_REGNUM
));
20860 /* We do not know if r3 will be available because
20861 we do have an indirect tailcall happening in this
20862 particular case. */
20864 is_indirect_tailcall_p (rtx call
)
20866 rtx pat
= PATTERN (call
);
20868 /* Indirect tail call. */
20869 pat
= XVECEXP (pat
, 0, 0);
20870 if (GET_CODE (pat
) == SET
)
20871 pat
= SET_SRC (pat
);
20873 pat
= XEXP (XEXP (pat
, 0), 0);
20874 return REG_P (pat
);
20877 /* Return true if r3 is used by any of the tail call insns in the
20878 current function. */
20880 any_sibcall_could_use_r3 (void)
20885 if (!crtl
->tail_call_emit
)
20887 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20888 if (e
->flags
& EDGE_SIBCALL
)
20890 rtx_insn
*call
= BB_END (e
->src
);
20891 if (!CALL_P (call
))
20892 call
= prev_nonnote_nondebug_insn (call
);
20893 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20894 if (find_regno_fusage (call
, USE
, 3)
20895 || is_indirect_tailcall_p (call
))
20902 /* Compute the distance from register FROM to register TO.
20903 These can be the arg pointer (26), the soft frame pointer (25),
20904 the stack pointer (13) or the hard frame pointer (11).
20905 In thumb mode r7 is used as the soft frame pointer, if needed.
20906 Typical stack layout looks like this:
20908 old stack pointer -> | |
20911 | | saved arguments for
20912 | | vararg functions
20915 hard FP & arg pointer -> | | \
20923 soft frame pointer -> | | /
20928 locals base pointer -> | | /
20933 current stack pointer -> | | /
20936 For a given function some or all of these stack components
20937 may not be needed, giving rise to the possibility of
20938 eliminating some of the registers.
20940 The values returned by this function must reflect the behavior
20941 of arm_expand_prologue() and arm_compute_save_reg_mask().
20943 The sign of the number returned reflects the direction of stack
20944 growth, so the values are positive for all eliminations except
20945 from the soft frame pointer to the hard frame pointer.
20947 SFP may point just inside the local variables block to ensure correct
20951 /* Calculate stack offsets. These are used to calculate register elimination
20952 offsets and in prologue/epilogue code. Also calculates which registers
20953 should be saved. */
20955 static arm_stack_offsets
*
20956 arm_get_frame_offsets (void)
20958 struct arm_stack_offsets
*offsets
;
20959 unsigned long func_type
;
20963 HOST_WIDE_INT frame_size
;
20966 offsets
= &cfun
->machine
->stack_offsets
;
20968 /* We need to know if we are a leaf function. Unfortunately, it
20969 is possible to be called after start_sequence has been called,
20970 which causes get_insns to return the insns for the sequence,
20971 not the function, which will cause leaf_function_p to return
20972 the incorrect result.
20974 to know about leaf functions once reload has completed, and the
20975 frame size cannot be changed after that time, so we can safely
20976 use the cached value. */
20978 if (reload_completed
)
20981 /* Initially this is the size of the local variables. It will translated
20982 into an offset once we have determined the size of preceding data. */
20983 frame_size
= ROUND_UP_WORD (get_frame_size ());
20985 leaf
= leaf_function_p ();
20987 /* Space for variadic functions. */
20988 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20990 /* In Thumb mode this is incorrect, but never used. */
20992 = (offsets
->saved_args
20993 + arm_compute_static_chain_stack_bytes ()
20994 + (frame_pointer_needed
? 4 : 0));
20998 unsigned int regno
;
21000 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
21001 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21002 saved
= core_saved
;
21004 /* We know that SP will be doubleword aligned on entry, and we must
21005 preserve that condition at any subroutine call. We also require the
21006 soft frame pointer to be doubleword aligned. */
21008 if (TARGET_REALLY_IWMMXT
)
21010 /* Check for the call-saved iWMMXt registers. */
21011 for (regno
= FIRST_IWMMXT_REGNUM
;
21012 regno
<= LAST_IWMMXT_REGNUM
;
21014 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
21018 func_type
= arm_current_func_type ();
21019 /* Space for saved VFP registers. */
21020 if (! IS_VOLATILE (func_type
)
21021 && TARGET_HARD_FLOAT
)
21022 saved
+= arm_get_vfp_saved_size ();
21024 else /* TARGET_THUMB1 */
21026 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
21027 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21028 saved
= core_saved
;
21029 if (TARGET_BACKTRACE
)
21033 /* Saved registers include the stack frame. */
21034 offsets
->saved_regs
21035 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21036 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21038 /* A leaf function does not need any stack alignment if it has nothing
21040 if (leaf
&& frame_size
== 0
21041 /* However if it calls alloca(), we have a dynamically allocated
21042 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21043 && ! cfun
->calls_alloca
)
21045 offsets
->outgoing_args
= offsets
->soft_frame
;
21046 offsets
->locals_base
= offsets
->soft_frame
;
21050 /* Ensure SFP has the correct alignment. */
21051 if (ARM_DOUBLEWORD_ALIGN
21052 && (offsets
->soft_frame
& 7))
21054 offsets
->soft_frame
+= 4;
21055 /* Try to align stack by pushing an extra reg. Don't bother doing this
21056 when there is a stack frame as the alignment will be rolled into
21057 the normal stack adjustment. */
21058 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21062 /* Register r3 is caller-saved. Normally it does not need to be
21063 saved on entry by the prologue. However if we choose to save
21064 it for padding then we may confuse the compiler into thinking
21065 a prologue sequence is required when in fact it is not. This
21066 will occur when shrink-wrapping if r3 is used as a scratch
21067 register and there are no other callee-saved writes.
21069 This situation can be avoided when other callee-saved registers
21070 are available and r3 is not mandatory if we choose a callee-saved
21071 register for padding. */
21072 bool prefer_callee_reg_p
= false;
21074 /* If it is safe to use r3, then do so. This sometimes
21075 generates better code on Thumb-2 by avoiding the need to
21076 use 32-bit push/pop instructions. */
21077 if (! any_sibcall_could_use_r3 ()
21078 && arm_size_return_regs () <= 12
21079 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21081 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21084 if (!TARGET_THUMB2
)
21085 prefer_callee_reg_p
= true;
21088 || prefer_callee_reg_p
)
21090 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21092 /* Avoid fixed registers; they may be changed at
21093 arbitrary times so it's unsafe to restore them
21094 during the epilogue. */
21096 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21106 offsets
->saved_regs
+= 4;
21107 offsets
->saved_regs_mask
|= (1 << reg
);
21112 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21113 offsets
->outgoing_args
= (offsets
->locals_base
21114 + crtl
->outgoing_args_size
);
21116 if (ARM_DOUBLEWORD_ALIGN
)
21118 /* Ensure SP remains doubleword aligned. */
21119 if (offsets
->outgoing_args
& 7)
21120 offsets
->outgoing_args
+= 4;
21121 gcc_assert (!(offsets
->outgoing_args
& 7));
21128 /* Calculate the relative offsets for the different stack pointers. Positive
21129 offsets are in the direction of stack growth. */
21132 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21134 arm_stack_offsets
*offsets
;
21136 offsets
= arm_get_frame_offsets ();
21138 /* OK, now we have enough information to compute the distances.
21139 There must be an entry in these switch tables for each pair
21140 of registers in ELIMINABLE_REGS, even if some of the entries
21141 seem to be redundant or useless. */
21144 case ARG_POINTER_REGNUM
:
21147 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21150 case FRAME_POINTER_REGNUM
:
21151 /* This is the reverse of the soft frame pointer
21152 to hard frame pointer elimination below. */
21153 return offsets
->soft_frame
- offsets
->saved_args
;
21155 case ARM_HARD_FRAME_POINTER_REGNUM
:
21156 /* This is only non-zero in the case where the static chain register
21157 is stored above the frame. */
21158 return offsets
->frame
- offsets
->saved_args
- 4;
21160 case STACK_POINTER_REGNUM
:
21161 /* If nothing has been pushed on the stack at all
21162 then this will return -4. This *is* correct! */
21163 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21166 gcc_unreachable ();
21168 gcc_unreachable ();
21170 case FRAME_POINTER_REGNUM
:
21173 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21176 case ARM_HARD_FRAME_POINTER_REGNUM
:
21177 /* The hard frame pointer points to the top entry in the
21178 stack frame. The soft frame pointer to the bottom entry
21179 in the stack frame. If there is no stack frame at all,
21180 then they are identical. */
21182 return offsets
->frame
- offsets
->soft_frame
;
21184 case STACK_POINTER_REGNUM
:
21185 return offsets
->outgoing_args
- offsets
->soft_frame
;
21188 gcc_unreachable ();
21190 gcc_unreachable ();
21193 /* You cannot eliminate from the stack pointer.
21194 In theory you could eliminate from the hard frame
21195 pointer to the stack pointer, but this will never
21196 happen, since if a stack frame is not needed the
21197 hard frame pointer will never be used. */
21198 gcc_unreachable ();
21202 /* Given FROM and TO register numbers, say whether this elimination is
21203 allowed. Frame pointer elimination is automatically handled.
21205 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21206 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21207 pointer, we must eliminate FRAME_POINTER_REGNUM into
21208 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21209 ARG_POINTER_REGNUM. */
21212 arm_can_eliminate (const int from
, const int to
)
21214 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21215 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21216 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21217 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21221 /* Emit RTL to save coprocessor registers on function entry. Returns the
21222 number of bytes pushed. */
21225 arm_save_coproc_regs(void)
21227 int saved_size
= 0;
21229 unsigned start_reg
;
21232 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21233 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21235 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21236 insn
= gen_rtx_MEM (V2SImode
, insn
);
21237 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21238 RTX_FRAME_RELATED_P (insn
) = 1;
21242 if (TARGET_HARD_FLOAT
)
21244 start_reg
= FIRST_VFP_REGNUM
;
21246 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21248 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21249 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21251 if (start_reg
!= reg
)
21252 saved_size
+= vfp_emit_fstmd (start_reg
,
21253 (reg
- start_reg
) / 2);
21254 start_reg
= reg
+ 2;
21257 if (start_reg
!= reg
)
21258 saved_size
+= vfp_emit_fstmd (start_reg
,
21259 (reg
- start_reg
) / 2);
21265 /* Set the Thumb frame pointer from the stack pointer. */
21268 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21270 HOST_WIDE_INT amount
;
21273 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21275 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21276 stack_pointer_rtx
, GEN_INT (amount
)));
21279 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21280 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21281 expects the first two operands to be the same. */
21284 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21286 hard_frame_pointer_rtx
));
21290 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21291 hard_frame_pointer_rtx
,
21292 stack_pointer_rtx
));
21294 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21295 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21296 RTX_FRAME_RELATED_P (dwarf
) = 1;
21297 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21300 RTX_FRAME_RELATED_P (insn
) = 1;
21303 struct scratch_reg
{
21308 /* Return a short-lived scratch register for use as a 2nd scratch register on
21309 function entry after the registers are saved in the prologue. This register
21310 must be released by means of release_scratch_register_on_entry. IP is not
21311 considered since it is always used as the 1st scratch register if available.
21313 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21314 mask of live registers. */
21317 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21318 unsigned long live_regs
)
21324 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21330 for (i
= 4; i
< 11; i
++)
21331 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21339 /* If IP is used as the 1st scratch register for a nested function,
21340 then either r3 wasn't available or is used to preserve IP. */
21341 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21343 regno
= (regno1
== 3 ? 2 : 3);
21345 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21350 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21353 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21354 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21355 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21356 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21357 RTX_FRAME_RELATED_P (insn
) = 1;
21358 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21362 /* Release a scratch register obtained from the preceding function. */
21365 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21369 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21370 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21371 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21372 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21373 RTX_FRAME_RELATED_P (insn
) = 1;
21374 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21378 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21380 #if PROBE_INTERVAL > 4096
21381 #error Cannot use indexed addressing mode for stack probing
21384 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21385 inclusive. These are offsets from the current stack pointer. REGNO1
21386 is the index number of the 1st scratch register and LIVE_REGS is the
21387 mask of live registers. */
21390 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21391 unsigned int regno1
, unsigned long live_regs
)
21393 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21395 /* See if we have a constant small number of probes to generate. If so,
21396 that's the easy case. */
21397 if (size
<= PROBE_INTERVAL
)
21399 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21400 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21401 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21404 /* The run-time loop is made up of 10 insns in the generic case while the
21405 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21406 else if (size
<= 5 * PROBE_INTERVAL
)
21408 HOST_WIDE_INT i
, rem
;
21410 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21411 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21412 emit_stack_probe (reg1
);
21414 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21415 it exceeds SIZE. If only two probes are needed, this will not
21416 generate any code. Then probe at FIRST + SIZE. */
21417 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21419 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21420 emit_stack_probe (reg1
);
21423 rem
= size
- (i
- PROBE_INTERVAL
);
21424 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21426 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21427 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21430 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21433 /* Otherwise, do the same as above, but in a loop. Note that we must be
21434 extra careful with variables wrapping around because we might be at
21435 the very top (or the very bottom) of the address space and we have
21436 to be able to handle this case properly; in particular, we use an
21437 equality test for the loop condition. */
21440 HOST_WIDE_INT rounded_size
;
21441 struct scratch_reg sr
;
21443 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21445 emit_move_insn (reg1
, GEN_INT (first
));
21448 /* Step 1: round SIZE to the previous multiple of the interval. */
21450 rounded_size
= size
& -PROBE_INTERVAL
;
21451 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21454 /* Step 2: compute initial and final value of the loop counter. */
21456 /* TEST_ADDR = SP + FIRST. */
21457 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21459 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21460 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21463 /* Step 3: the loop
21467 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21470 while (TEST_ADDR != LAST_ADDR)
21472 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21473 until it is equal to ROUNDED_SIZE. */
21475 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21478 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21479 that SIZE is equal to ROUNDED_SIZE. */
21481 if (size
!= rounded_size
)
21483 HOST_WIDE_INT rem
= size
- rounded_size
;
21485 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21487 emit_set_insn (sr
.reg
,
21488 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21489 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21490 PROBE_INTERVAL
- rem
));
21493 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21496 release_scratch_register_on_entry (&sr
);
21499 /* Make sure nothing is scheduled before we are done. */
21500 emit_insn (gen_blockage ());
21503 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21504 absolute addresses. */
21507 output_probe_stack_range (rtx reg1
, rtx reg2
)
21509 static int labelno
= 0;
21513 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21516 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21518 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21520 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21521 output_asm_insn ("sub\t%0, %0, %1", xops
);
21523 /* Probe at TEST_ADDR. */
21524 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21526 /* Test if TEST_ADDR == LAST_ADDR. */
21528 output_asm_insn ("cmp\t%0, %1", xops
);
21531 fputs ("\tbne\t", asm_out_file
);
21532 assemble_name_raw (asm_out_file
, loop_lab
);
21533 fputc ('\n', asm_out_file
);
21538 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21541 arm_expand_prologue (void)
21546 unsigned long live_regs_mask
;
21547 unsigned long func_type
;
21549 int saved_pretend_args
= 0;
21550 int saved_regs
= 0;
21551 unsigned HOST_WIDE_INT args_to_push
;
21552 HOST_WIDE_INT size
;
21553 arm_stack_offsets
*offsets
;
21556 func_type
= arm_current_func_type ();
21558 /* Naked functions don't have prologues. */
21559 if (IS_NAKED (func_type
))
21561 if (flag_stack_usage_info
)
21562 current_function_static_stack_size
= 0;
21566 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21567 args_to_push
= crtl
->args
.pretend_args_size
;
21569 /* Compute which register we will have to save onto the stack. */
21570 offsets
= arm_get_frame_offsets ();
21571 live_regs_mask
= offsets
->saved_regs_mask
;
21573 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21575 if (IS_STACKALIGN (func_type
))
21579 /* Handle a word-aligned stack pointer. We generate the following:
21584 <save and restore r0 in normal prologue/epilogue>
21588 The unwinder doesn't need to know about the stack realignment.
21589 Just tell it we saved SP in r0. */
21590 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21592 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21593 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21595 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21596 RTX_FRAME_RELATED_P (insn
) = 1;
21597 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21599 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21601 /* ??? The CFA changes here, which may cause GDB to conclude that it
21602 has entered a different function. That said, the unwind info is
21603 correct, individually, before and after this instruction because
21604 we've described the save of SP, which will override the default
21605 handling of SP as restoring from the CFA. */
21606 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21609 /* The static chain register is the same as the IP register. If it is
21610 clobbered when creating the frame, we need to save and restore it. */
21611 clobber_ip
= IS_NESTED (func_type
)
21612 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21613 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21614 && !df_regs_ever_live_p (LR_REGNUM
)
21615 && arm_r3_live_at_start_p ()));
21617 /* Find somewhere to store IP whilst the frame is being created.
21618 We try the following places in order:
21620 1. The last argument register r3 if it is available.
21621 2. A slot on the stack above the frame if there are no
21622 arguments to push onto the stack.
21623 3. Register r3 again, after pushing the argument registers
21624 onto the stack, if this is a varargs function.
21625 4. The last slot on the stack created for the arguments to
21626 push, if this isn't a varargs function.
21628 Note - we only need to tell the dwarf2 backend about the SP
21629 adjustment in the second variant; the static chain register
21630 doesn't need to be unwound, as it doesn't contain a value
21631 inherited from the caller. */
21634 if (!arm_r3_live_at_start_p ())
21635 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21636 else if (args_to_push
== 0)
21640 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21643 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21644 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21647 /* Just tell the dwarf backend that we adjusted SP. */
21648 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21649 plus_constant (Pmode
, stack_pointer_rtx
,
21651 RTX_FRAME_RELATED_P (insn
) = 1;
21652 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21656 /* Store the args on the stack. */
21657 if (cfun
->machine
->uses_anonymous_args
)
21659 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21660 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21661 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21662 saved_pretend_args
= 1;
21668 if (args_to_push
== 4)
21669 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21671 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21672 plus_constant (Pmode
,
21676 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21678 /* Just tell the dwarf backend that we adjusted SP. */
21679 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21680 plus_constant (Pmode
, stack_pointer_rtx
,
21682 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21685 RTX_FRAME_RELATED_P (insn
) = 1;
21686 fp_offset
= args_to_push
;
21691 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21693 if (IS_INTERRUPT (func_type
))
21695 /* Interrupt functions must not corrupt any registers.
21696 Creating a frame pointer however, corrupts the IP
21697 register, so we must push it first. */
21698 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21700 /* Do not set RTX_FRAME_RELATED_P on this insn.
21701 The dwarf stack unwinding code only wants to see one
21702 stack decrement per function, and this is not it. If
21703 this instruction is labeled as being part of the frame
21704 creation sequence then dwarf2out_frame_debug_expr will
21705 die when it encounters the assignment of IP to FP
21706 later on, since the use of SP here establishes SP as
21707 the CFA register and not IP.
21709 Anyway this instruction is not really part of the stack
21710 frame creation although it is part of the prologue. */
21713 insn
= emit_set_insn (ip_rtx
,
21714 plus_constant (Pmode
, stack_pointer_rtx
,
21716 RTX_FRAME_RELATED_P (insn
) = 1;
21721 /* Push the argument registers, or reserve space for them. */
21722 if (cfun
->machine
->uses_anonymous_args
)
21723 insn
= emit_multi_reg_push
21724 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21725 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21728 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21729 GEN_INT (- args_to_push
)));
21730 RTX_FRAME_RELATED_P (insn
) = 1;
21733 /* If this is an interrupt service routine, and the link register
21734 is going to be pushed, and we're not generating extra
21735 push of IP (needed when frame is needed and frame layout if apcs),
21736 subtracting four from LR now will mean that the function return
21737 can be done with a single instruction. */
21738 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21739 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21740 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21743 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21745 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21748 if (live_regs_mask
)
21750 unsigned long dwarf_regs_mask
= live_regs_mask
;
21752 saved_regs
+= bit_count (live_regs_mask
) * 4;
21753 if (optimize_size
&& !frame_pointer_needed
21754 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21756 /* If no coprocessor registers are being pushed and we don't have
21757 to worry about a frame pointer then push extra registers to
21758 create the stack frame. This is done is a way that does not
21759 alter the frame layout, so is independent of the epilogue. */
21763 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21765 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21766 if (frame
&& n
* 4 >= frame
)
21769 live_regs_mask
|= (1 << n
) - 1;
21770 saved_regs
+= frame
;
21775 && current_tune
->prefer_ldrd_strd
21776 && !optimize_function_for_size_p (cfun
))
21778 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21780 thumb2_emit_strd_push (live_regs_mask
);
21781 else if (TARGET_ARM
21782 && !TARGET_APCS_FRAME
21783 && !IS_INTERRUPT (func_type
))
21784 arm_emit_strd_push (live_regs_mask
);
21787 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21788 RTX_FRAME_RELATED_P (insn
) = 1;
21793 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21794 RTX_FRAME_RELATED_P (insn
) = 1;
21798 if (! IS_VOLATILE (func_type
))
21799 saved_regs
+= arm_save_coproc_regs ();
21801 if (frame_pointer_needed
&& TARGET_ARM
)
21803 /* Create the new frame pointer. */
21804 if (TARGET_APCS_FRAME
)
21806 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21807 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21808 RTX_FRAME_RELATED_P (insn
) = 1;
21812 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21813 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21814 stack_pointer_rtx
, insn
));
21815 RTX_FRAME_RELATED_P (insn
) = 1;
21819 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21820 if (flag_stack_usage_info
)
21821 current_function_static_stack_size
= size
;
21823 /* If this isn't an interrupt service routine and we have a frame, then do
21824 stack checking. We use IP as the first scratch register, except for the
21825 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21826 if (!IS_INTERRUPT (func_type
)
21827 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21829 unsigned int regno
;
21831 if (!IS_NESTED (func_type
) || clobber_ip
)
21833 else if (df_regs_ever_live_p (LR_REGNUM
))
21838 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21840 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21841 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21842 size
- STACK_CHECK_PROTECT
,
21843 regno
, live_regs_mask
);
21846 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21847 regno
, live_regs_mask
);
21850 /* Recover the static chain register. */
21853 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21854 insn
= gen_rtx_REG (SImode
, 3);
21857 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21858 insn
= gen_frame_mem (SImode
, insn
);
21860 emit_set_insn (ip_rtx
, insn
);
21861 emit_insn (gen_force_register_use (ip_rtx
));
21864 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21866 /* This add can produce multiple insns for a large constant, so we
21867 need to get tricky. */
21868 rtx_insn
*last
= get_last_insn ();
21870 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21871 - offsets
->outgoing_args
);
21873 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21877 last
= last
? NEXT_INSN (last
) : get_insns ();
21878 RTX_FRAME_RELATED_P (last
) = 1;
21880 while (last
!= insn
);
21882 /* If the frame pointer is needed, emit a special barrier that
21883 will prevent the scheduler from moving stores to the frame
21884 before the stack adjustment. */
21885 if (frame_pointer_needed
)
21886 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21887 hard_frame_pointer_rtx
));
21891 if (frame_pointer_needed
&& TARGET_THUMB2
)
21892 thumb_set_frame_pointer (offsets
);
21894 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21896 unsigned long mask
;
21898 mask
= live_regs_mask
;
21899 mask
&= THUMB2_WORK_REGS
;
21900 if (!IS_NESTED (func_type
))
21901 mask
|= (1 << IP_REGNUM
);
21902 arm_load_pic_register (mask
);
21905 /* If we are profiling, make sure no instructions are scheduled before
21906 the call to mcount. Similarly if the user has requested no
21907 scheduling in the prolog. Similarly if we want non-call exceptions
21908 using the EABI unwinder, to prevent faulting instructions from being
21909 swapped with a stack adjustment. */
21910 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21911 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21912 && cfun
->can_throw_non_call_exceptions
))
21913 emit_insn (gen_blockage ());
21915 /* If the link register is being kept alive, with the return address in it,
21916 then make sure that it does not get reused by the ce2 pass. */
21917 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21918 cfun
->machine
->lr_save_eliminated
= 1;
21921 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21923 arm_print_condition (FILE *stream
)
21925 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21927 /* Branch conversion is not implemented for Thumb-2. */
21930 output_operand_lossage ("predicated Thumb instruction");
21933 if (current_insn_predicate
!= NULL
)
21935 output_operand_lossage
21936 ("predicated instruction in conditional sequence");
21940 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21942 else if (current_insn_predicate
)
21944 enum arm_cond_code code
;
21948 output_operand_lossage ("predicated Thumb instruction");
21952 code
= get_arm_condition_code (current_insn_predicate
);
21953 fputs (arm_condition_codes
[code
], stream
);
21958 /* Globally reserved letters: acln
21959 Puncutation letters currently used: @_|?().!#
21960 Lower case letters currently used: bcdefhimpqtvwxyz
21961 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21962 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21964 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21966 If CODE is 'd', then the X is a condition operand and the instruction
21967 should only be executed if the condition is true.
21968 if CODE is 'D', then the X is a condition operand and the instruction
21969 should only be executed if the condition is false: however, if the mode
21970 of the comparison is CCFPEmode, then always execute the instruction -- we
21971 do this because in these circumstances !GE does not necessarily imply LT;
21972 in these cases the instruction pattern will take care to make sure that
21973 an instruction containing %d will follow, thereby undoing the effects of
21974 doing this instruction unconditionally.
21975 If CODE is 'N' then X is a floating point operand that must be negated
21977 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21978 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21980 arm_print_operand (FILE *stream
, rtx x
, int code
)
21985 fputs (ASM_COMMENT_START
, stream
);
21989 fputs (user_label_prefix
, stream
);
21993 fputs (REGISTER_PREFIX
, stream
);
21997 arm_print_condition (stream
);
22001 /* The current condition code for a condition code setting instruction.
22002 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22003 fputc('s', stream
);
22004 arm_print_condition (stream
);
22008 /* If the instruction is conditionally executed then print
22009 the current condition code, otherwise print 's'. */
22010 gcc_assert (TARGET_THUMB2
);
22011 if (current_insn_predicate
)
22012 arm_print_condition (stream
);
22014 fputc('s', stream
);
22017 /* %# is a "break" sequence. It doesn't output anything, but is used to
22018 separate e.g. operand numbers from following text, if that text consists
22019 of further digits which we don't want to be part of the operand
22027 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22028 fprintf (stream
, "%s", fp_const_from_val (&r
));
22032 /* An integer or symbol address without a preceding # sign. */
22034 switch (GET_CODE (x
))
22037 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22041 output_addr_const (stream
, x
);
22045 if (GET_CODE (XEXP (x
, 0)) == PLUS
22046 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22048 output_addr_const (stream
, x
);
22051 /* Fall through. */
22054 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22058 /* An integer that we want to print in HEX. */
22060 switch (GET_CODE (x
))
22063 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22067 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22072 if (CONST_INT_P (x
))
22075 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22076 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22080 putc ('~', stream
);
22081 output_addr_const (stream
, x
);
22086 /* Print the log2 of a CONST_INT. */
22090 if (!CONST_INT_P (x
)
22091 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22092 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22094 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22099 /* The low 16 bits of an immediate constant. */
22100 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22104 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22108 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22116 shift
= shift_op (x
, &val
);
22120 fprintf (stream
, ", %s ", shift
);
22122 arm_print_operand (stream
, XEXP (x
, 1), 0);
22124 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22129 /* An explanation of the 'Q', 'R' and 'H' register operands:
22131 In a pair of registers containing a DI or DF value the 'Q'
22132 operand returns the register number of the register containing
22133 the least significant part of the value. The 'R' operand returns
22134 the register number of the register containing the most
22135 significant part of the value.
22137 The 'H' operand returns the higher of the two register numbers.
22138 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22139 same as the 'Q' operand, since the most significant part of the
22140 value is held in the lower number register. The reverse is true
22141 on systems where WORDS_BIG_ENDIAN is false.
22143 The purpose of these operands is to distinguish between cases
22144 where the endian-ness of the values is important (for example
22145 when they are added together), and cases where the endian-ness
22146 is irrelevant, but the order of register operations is important.
22147 For example when loading a value from memory into a register
22148 pair, the endian-ness does not matter. Provided that the value
22149 from the lower memory address is put into the lower numbered
22150 register, and the value from the higher address is put into the
22151 higher numbered register, the load will work regardless of whether
22152 the value being loaded is big-wordian or little-wordian. The
22153 order of the two register loads can matter however, if the address
22154 of the memory location is actually held in one of the registers
22155 being overwritten by the load.
22157 The 'Q' and 'R' constraints are also available for 64-bit
22160 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22162 rtx part
= gen_lowpart (SImode
, x
);
22163 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22167 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22169 output_operand_lossage ("invalid operand for code '%c'", code
);
22173 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22177 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22179 machine_mode mode
= GET_MODE (x
);
22182 if (mode
== VOIDmode
)
22184 part
= gen_highpart_mode (SImode
, mode
, x
);
22185 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22189 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22191 output_operand_lossage ("invalid operand for code '%c'", code
);
22195 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22199 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22201 output_operand_lossage ("invalid operand for code '%c'", code
);
22205 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22209 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22211 output_operand_lossage ("invalid operand for code '%c'", code
);
22215 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22219 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22221 output_operand_lossage ("invalid operand for code '%c'", code
);
22225 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22229 asm_fprintf (stream
, "%r",
22230 REG_P (XEXP (x
, 0))
22231 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22235 asm_fprintf (stream
, "{%r-%r}",
22237 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22240 /* Like 'M', but writing doubleword vector registers, for use by Neon
22244 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22245 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22247 asm_fprintf (stream
, "{d%d}", regno
);
22249 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22254 /* CONST_TRUE_RTX means always -- that's the default. */
22255 if (x
== const_true_rtx
)
22258 if (!COMPARISON_P (x
))
22260 output_operand_lossage ("invalid operand for code '%c'", code
);
22264 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22269 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22270 want to do that. */
22271 if (x
== const_true_rtx
)
22273 output_operand_lossage ("instruction never executed");
22276 if (!COMPARISON_P (x
))
22278 output_operand_lossage ("invalid operand for code '%c'", code
);
22282 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22283 (get_arm_condition_code (x
))],
22293 /* Former Maverick support, removed after GCC-4.7. */
22294 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22299 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22300 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22301 /* Bad value for wCG register number. */
22303 output_operand_lossage ("invalid operand for code '%c'", code
);
22308 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22311 /* Print an iWMMXt control register name. */
22313 if (!CONST_INT_P (x
)
22315 || INTVAL (x
) >= 16)
22316 /* Bad value for wC register number. */
22318 output_operand_lossage ("invalid operand for code '%c'", code
);
22324 static const char * wc_reg_names
[16] =
22326 "wCID", "wCon", "wCSSF", "wCASF",
22327 "wC4", "wC5", "wC6", "wC7",
22328 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22329 "wC12", "wC13", "wC14", "wC15"
22332 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22336 /* Print the high single-precision register of a VFP double-precision
22340 machine_mode mode
= GET_MODE (x
);
22343 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22345 output_operand_lossage ("invalid operand for code '%c'", code
);
22350 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22352 output_operand_lossage ("invalid operand for code '%c'", code
);
22356 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22360 /* Print a VFP/Neon double precision or quad precision register name. */
22364 machine_mode mode
= GET_MODE (x
);
22365 int is_quad
= (code
== 'q');
22368 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22370 output_operand_lossage ("invalid operand for code '%c'", code
);
22375 || !IS_VFP_REGNUM (REGNO (x
)))
22377 output_operand_lossage ("invalid operand for code '%c'", code
);
22382 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22383 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22385 output_operand_lossage ("invalid operand for code '%c'", code
);
22389 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22390 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22394 /* These two codes print the low/high doubleword register of a Neon quad
22395 register, respectively. For pair-structure types, can also print
22396 low/high quadword registers. */
22400 machine_mode mode
= GET_MODE (x
);
22403 if ((GET_MODE_SIZE (mode
) != 16
22404 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22406 output_operand_lossage ("invalid operand for code '%c'", code
);
22411 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22413 output_operand_lossage ("invalid operand for code '%c'", code
);
22417 if (GET_MODE_SIZE (mode
) == 16)
22418 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22419 + (code
== 'f' ? 1 : 0));
22421 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22422 + (code
== 'f' ? 1 : 0));
22426 /* Print a VFPv3 floating-point constant, represented as an integer
22430 int index
= vfp3_const_double_index (x
);
22431 gcc_assert (index
!= -1);
22432 fprintf (stream
, "%d", index
);
22436 /* Print bits representing opcode features for Neon.
22438 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22439 and polynomials as unsigned.
22441 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22443 Bit 2 is 1 for rounding functions, 0 otherwise. */
22445 /* Identify the type as 's', 'u', 'p' or 'f'. */
22448 HOST_WIDE_INT bits
= INTVAL (x
);
22449 fputc ("uspf"[bits
& 3], stream
);
22453 /* Likewise, but signed and unsigned integers are both 'i'. */
22456 HOST_WIDE_INT bits
= INTVAL (x
);
22457 fputc ("iipf"[bits
& 3], stream
);
22461 /* As for 'T', but emit 'u' instead of 'p'. */
22464 HOST_WIDE_INT bits
= INTVAL (x
);
22465 fputc ("usuf"[bits
& 3], stream
);
22469 /* Bit 2: rounding (vs none). */
22472 HOST_WIDE_INT bits
= INTVAL (x
);
22473 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22477 /* Memory operand for vld1/vst1 instruction. */
22481 bool postinc
= FALSE
;
22482 rtx postinc_reg
= NULL
;
22483 unsigned align
, memsize
, align_bits
;
22485 gcc_assert (MEM_P (x
));
22486 addr
= XEXP (x
, 0);
22487 if (GET_CODE (addr
) == POST_INC
)
22490 addr
= XEXP (addr
, 0);
22492 if (GET_CODE (addr
) == POST_MODIFY
)
22494 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22495 addr
= XEXP (addr
, 0);
22497 asm_fprintf (stream
, "[%r", REGNO (addr
));
22499 /* We know the alignment of this access, so we can emit a hint in the
22500 instruction (for some alignments) as an aid to the memory subsystem
22502 align
= MEM_ALIGN (x
) >> 3;
22503 memsize
= MEM_SIZE (x
);
22505 /* Only certain alignment specifiers are supported by the hardware. */
22506 if (memsize
== 32 && (align
% 32) == 0)
22508 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22510 else if (memsize
>= 8 && (align
% 8) == 0)
22515 if (align_bits
!= 0)
22516 asm_fprintf (stream
, ":%d", align_bits
);
22518 asm_fprintf (stream
, "]");
22521 fputs("!", stream
);
22523 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22531 gcc_assert (MEM_P (x
));
22532 addr
= XEXP (x
, 0);
22533 gcc_assert (REG_P (addr
));
22534 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22538 /* Translate an S register number into a D register number and element index. */
22541 machine_mode mode
= GET_MODE (x
);
22544 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22546 output_operand_lossage ("invalid operand for code '%c'", code
);
22551 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22553 output_operand_lossage ("invalid operand for code '%c'", code
);
22557 regno
= regno
- FIRST_VFP_REGNUM
;
22558 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22563 gcc_assert (CONST_DOUBLE_P (x
));
22565 result
= vfp3_const_double_for_fract_bits (x
);
22567 result
= vfp3_const_double_for_bits (x
);
22568 fprintf (stream
, "#%d", result
);
22571 /* Register specifier for vld1.16/vst1.16. Translate the S register
22572 number into a D register number and element index. */
22575 machine_mode mode
= GET_MODE (x
);
22578 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22580 output_operand_lossage ("invalid operand for code '%c'", code
);
22585 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22587 output_operand_lossage ("invalid operand for code '%c'", code
);
22591 regno
= regno
- FIRST_VFP_REGNUM
;
22592 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22599 output_operand_lossage ("missing operand");
22603 switch (GET_CODE (x
))
22606 asm_fprintf (stream
, "%r", REGNO (x
));
22610 output_address (GET_MODE (x
), XEXP (x
, 0));
22616 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22617 sizeof (fpstr
), 0, 1);
22618 fprintf (stream
, "#%s", fpstr
);
22623 gcc_assert (GET_CODE (x
) != NEG
);
22624 fputc ('#', stream
);
22625 if (GET_CODE (x
) == HIGH
)
22627 fputs (":lower16:", stream
);
22631 output_addr_const (stream
, x
);
22637 /* Target hook for printing a memory address. */
22639 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22643 int is_minus
= GET_CODE (x
) == MINUS
;
22646 asm_fprintf (stream
, "[%r]", REGNO (x
));
22647 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22649 rtx base
= XEXP (x
, 0);
22650 rtx index
= XEXP (x
, 1);
22651 HOST_WIDE_INT offset
= 0;
22653 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22655 /* Ensure that BASE is a register. */
22656 /* (one of them must be). */
22657 /* Also ensure the SP is not used as in index register. */
22658 std::swap (base
, index
);
22660 switch (GET_CODE (index
))
22663 offset
= INTVAL (index
);
22666 asm_fprintf (stream
, "[%r, #%wd]",
22667 REGNO (base
), offset
);
22671 asm_fprintf (stream
, "[%r, %s%r]",
22672 REGNO (base
), is_minus
? "-" : "",
22682 asm_fprintf (stream
, "[%r, %s%r",
22683 REGNO (base
), is_minus
? "-" : "",
22684 REGNO (XEXP (index
, 0)));
22685 arm_print_operand (stream
, index
, 'S');
22686 fputs ("]", stream
);
22691 gcc_unreachable ();
22694 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22695 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22697 gcc_assert (REG_P (XEXP (x
, 0)));
22699 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22700 asm_fprintf (stream
, "[%r, #%s%d]!",
22701 REGNO (XEXP (x
, 0)),
22702 GET_CODE (x
) == PRE_DEC
? "-" : "",
22703 GET_MODE_SIZE (mode
));
22705 asm_fprintf (stream
, "[%r], #%s%d",
22706 REGNO (XEXP (x
, 0)),
22707 GET_CODE (x
) == POST_DEC
? "-" : "",
22708 GET_MODE_SIZE (mode
));
22710 else if (GET_CODE (x
) == PRE_MODIFY
)
22712 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22713 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22714 asm_fprintf (stream
, "#%wd]!",
22715 INTVAL (XEXP (XEXP (x
, 1), 1)));
22717 asm_fprintf (stream
, "%r]!",
22718 REGNO (XEXP (XEXP (x
, 1), 1)));
22720 else if (GET_CODE (x
) == POST_MODIFY
)
22722 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22723 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22724 asm_fprintf (stream
, "#%wd",
22725 INTVAL (XEXP (XEXP (x
, 1), 1)));
22727 asm_fprintf (stream
, "%r",
22728 REGNO (XEXP (XEXP (x
, 1), 1)));
22730 else output_addr_const (stream
, x
);
22735 asm_fprintf (stream
, "[%r]", REGNO (x
));
22736 else if (GET_CODE (x
) == POST_INC
)
22737 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22738 else if (GET_CODE (x
) == PLUS
)
22740 gcc_assert (REG_P (XEXP (x
, 0)));
22741 if (CONST_INT_P (XEXP (x
, 1)))
22742 asm_fprintf (stream
, "[%r, #%wd]",
22743 REGNO (XEXP (x
, 0)),
22744 INTVAL (XEXP (x
, 1)));
22746 asm_fprintf (stream
, "[%r, %r]",
22747 REGNO (XEXP (x
, 0)),
22748 REGNO (XEXP (x
, 1)));
22751 output_addr_const (stream
, x
);
22755 /* Target hook for indicating whether a punctuation character for
22756 TARGET_PRINT_OPERAND is valid. */
22758 arm_print_operand_punct_valid_p (unsigned char code
)
22760 return (code
== '@' || code
== '|' || code
== '.'
22761 || code
== '(' || code
== ')' || code
== '#'
22762 || (TARGET_32BIT
&& (code
== '?'))
22763 || (TARGET_THUMB2
&& (code
== '!'))
22764 || (TARGET_THUMB
&& (code
== '_')));
22767 /* Target hook for assembling integer objects. The ARM version needs to
22768 handle word-sized values specially. */
22770 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22774 if (size
== UNITS_PER_WORD
&& aligned_p
)
22776 fputs ("\t.word\t", asm_out_file
);
22777 output_addr_const (asm_out_file
, x
);
22779 /* Mark symbols as position independent. We only do this in the
22780 .text segment, not in the .data segment. */
22781 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22782 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22784 /* See legitimize_pic_address for an explanation of the
22785 TARGET_VXWORKS_RTP check. */
22786 if (!arm_pic_data_is_text_relative
22787 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22788 fputs ("(GOT)", asm_out_file
);
22790 fputs ("(GOTOFF)", asm_out_file
);
22792 fputc ('\n', asm_out_file
);
22796 mode
= GET_MODE (x
);
22798 if (arm_vector_mode_supported_p (mode
))
22802 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22804 units
= CONST_VECTOR_NUNITS (x
);
22805 size
= GET_MODE_UNIT_SIZE (mode
);
22807 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22808 for (i
= 0; i
< units
; i
++)
22810 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22812 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22815 for (i
= 0; i
< units
; i
++)
22817 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22819 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22820 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22826 return default_assemble_integer (x
, size
, aligned_p
);
22830 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22834 if (!TARGET_AAPCS_BASED
)
22837 default_named_section_asm_out_constructor
22838 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22842 /* Put these in the .init_array section, using a special relocation. */
22843 if (priority
!= DEFAULT_INIT_PRIORITY
)
22846 sprintf (buf
, "%s.%.5u",
22847 is_ctor
? ".init_array" : ".fini_array",
22849 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22856 switch_to_section (s
);
22857 assemble_align (POINTER_SIZE
);
22858 fputs ("\t.word\t", asm_out_file
);
22859 output_addr_const (asm_out_file
, symbol
);
22860 fputs ("(target1)\n", asm_out_file
);
22863 /* Add a function to the list of static constructors. */
22866 arm_elf_asm_constructor (rtx symbol
, int priority
)
22868 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22871 /* Add a function to the list of static destructors. */
22874 arm_elf_asm_destructor (rtx symbol
, int priority
)
22876 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22879 /* A finite state machine takes care of noticing whether or not instructions
22880 can be conditionally executed, and thus decrease execution time and code
22881 size by deleting branch instructions. The fsm is controlled by
22882 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22884 /* The state of the fsm controlling condition codes are:
22885 0: normal, do nothing special
22886 1: make ASM_OUTPUT_OPCODE not output this instruction
22887 2: make ASM_OUTPUT_OPCODE not output this instruction
22888 3: make instructions conditional
22889 4: make instructions conditional
22891 State transitions (state->state by whom under condition):
22892 0 -> 1 final_prescan_insn if the `target' is a label
22893 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22894 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22895 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22896 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22897 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22898 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22899 (the target insn is arm_target_insn).
22901 If the jump clobbers the conditions then we use states 2 and 4.
22903 A similar thing can be done with conditional return insns.
22905 XXX In case the `target' is an unconditional branch, this conditionalising
22906 of the instructions always reduces code size, but not always execution
22907 time. But then, I want to reduce the code size to somewhere near what
22908 /bin/cc produces. */
22910 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22911 instructions. When a COND_EXEC instruction is seen the subsequent
22912 instructions are scanned so that multiple conditional instructions can be
22913 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22914 specify the length and true/false mask for the IT block. These will be
22915 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22917 /* Returns the index of the ARM condition code string in
22918 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22919 COMPARISON should be an rtx like `(eq (...) (...))'. */
22922 maybe_get_arm_condition_code (rtx comparison
)
22924 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22925 enum arm_cond_code code
;
22926 enum rtx_code comp_code
= GET_CODE (comparison
);
22928 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22929 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22930 XEXP (comparison
, 1));
22934 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22935 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22936 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22937 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22938 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22939 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22940 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22941 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22942 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22943 case CC_DLTUmode
: code
= ARM_CC
;
22946 if (comp_code
== EQ
)
22947 return ARM_INVERSE_CONDITION_CODE (code
);
22948 if (comp_code
== NE
)
22955 case NE
: return ARM_NE
;
22956 case EQ
: return ARM_EQ
;
22957 case GE
: return ARM_PL
;
22958 case LT
: return ARM_MI
;
22959 default: return ARM_NV
;
22965 case NE
: return ARM_NE
;
22966 case EQ
: return ARM_EQ
;
22967 default: return ARM_NV
;
22973 case NE
: return ARM_MI
;
22974 case EQ
: return ARM_PL
;
22975 default: return ARM_NV
;
22980 /* We can handle all cases except UNEQ and LTGT. */
22983 case GE
: return ARM_GE
;
22984 case GT
: return ARM_GT
;
22985 case LE
: return ARM_LS
;
22986 case LT
: return ARM_MI
;
22987 case NE
: return ARM_NE
;
22988 case EQ
: return ARM_EQ
;
22989 case ORDERED
: return ARM_VC
;
22990 case UNORDERED
: return ARM_VS
;
22991 case UNLT
: return ARM_LT
;
22992 case UNLE
: return ARM_LE
;
22993 case UNGT
: return ARM_HI
;
22994 case UNGE
: return ARM_PL
;
22995 /* UNEQ and LTGT do not have a representation. */
22996 case UNEQ
: /* Fall through. */
22997 case LTGT
: /* Fall through. */
22998 default: return ARM_NV
;
23004 case NE
: return ARM_NE
;
23005 case EQ
: return ARM_EQ
;
23006 case GE
: return ARM_LE
;
23007 case GT
: return ARM_LT
;
23008 case LE
: return ARM_GE
;
23009 case LT
: return ARM_GT
;
23010 case GEU
: return ARM_LS
;
23011 case GTU
: return ARM_CC
;
23012 case LEU
: return ARM_CS
;
23013 case LTU
: return ARM_HI
;
23014 default: return ARM_NV
;
23020 case LTU
: return ARM_CS
;
23021 case GEU
: return ARM_CC
;
23022 case NE
: return ARM_CS
;
23023 case EQ
: return ARM_CC
;
23024 default: return ARM_NV
;
23030 case NE
: return ARM_NE
;
23031 case EQ
: return ARM_EQ
;
23032 case GEU
: return ARM_CS
;
23033 case GTU
: return ARM_HI
;
23034 case LEU
: return ARM_LS
;
23035 case LTU
: return ARM_CC
;
23036 default: return ARM_NV
;
23042 case GE
: return ARM_GE
;
23043 case LT
: return ARM_LT
;
23044 case GEU
: return ARM_CS
;
23045 case LTU
: return ARM_CC
;
23046 default: return ARM_NV
;
23052 case NE
: return ARM_VS
;
23053 case EQ
: return ARM_VC
;
23054 default: return ARM_NV
;
23060 case NE
: return ARM_NE
;
23061 case EQ
: return ARM_EQ
;
23062 case GE
: return ARM_GE
;
23063 case GT
: return ARM_GT
;
23064 case LE
: return ARM_LE
;
23065 case LT
: return ARM_LT
;
23066 case GEU
: return ARM_CS
;
23067 case GTU
: return ARM_HI
;
23068 case LEU
: return ARM_LS
;
23069 case LTU
: return ARM_CC
;
23070 default: return ARM_NV
;
23073 default: gcc_unreachable ();
23077 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23078 static enum arm_cond_code
23079 get_arm_condition_code (rtx comparison
)
23081 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23082 gcc_assert (code
!= ARM_NV
);
23086 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23089 thumb2_final_prescan_insn (rtx_insn
*insn
)
23091 rtx_insn
*first_insn
= insn
;
23092 rtx body
= PATTERN (insn
);
23094 enum arm_cond_code code
;
23099 /* max_insns_skipped in the tune was already taken into account in the
23100 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23101 just emit the IT blocks as we can. It does not make sense to split
23103 max
= MAX_INSN_PER_IT_BLOCK
;
23105 /* Remove the previous insn from the count of insns to be output. */
23106 if (arm_condexec_count
)
23107 arm_condexec_count
--;
23109 /* Nothing to do if we are already inside a conditional block. */
23110 if (arm_condexec_count
)
23113 if (GET_CODE (body
) != COND_EXEC
)
23116 /* Conditional jumps are implemented directly. */
23120 predicate
= COND_EXEC_TEST (body
);
23121 arm_current_cc
= get_arm_condition_code (predicate
);
23123 n
= get_attr_ce_count (insn
);
23124 arm_condexec_count
= 1;
23125 arm_condexec_mask
= (1 << n
) - 1;
23126 arm_condexec_masklen
= n
;
23127 /* See if subsequent instructions can be combined into the same block. */
23130 insn
= next_nonnote_insn (insn
);
23132 /* Jumping into the middle of an IT block is illegal, so a label or
23133 barrier terminates the block. */
23134 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23137 body
= PATTERN (insn
);
23138 /* USE and CLOBBER aren't really insns, so just skip them. */
23139 if (GET_CODE (body
) == USE
23140 || GET_CODE (body
) == CLOBBER
)
23143 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23144 if (GET_CODE (body
) != COND_EXEC
)
23146 /* Maximum number of conditionally executed instructions in a block. */
23147 n
= get_attr_ce_count (insn
);
23148 if (arm_condexec_masklen
+ n
> max
)
23151 predicate
= COND_EXEC_TEST (body
);
23152 code
= get_arm_condition_code (predicate
);
23153 mask
= (1 << n
) - 1;
23154 if (arm_current_cc
== code
)
23155 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23156 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23159 arm_condexec_count
++;
23160 arm_condexec_masklen
+= n
;
23162 /* A jump must be the last instruction in a conditional block. */
23166 /* Restore recog_data (getting the attributes of other insns can
23167 destroy this array, but final.c assumes that it remains intact
23168 across this call). */
23169 extract_constrain_insn_cached (first_insn
);
23173 arm_final_prescan_insn (rtx_insn
*insn
)
23175 /* BODY will hold the body of INSN. */
23176 rtx body
= PATTERN (insn
);
23178 /* This will be 1 if trying to repeat the trick, and things need to be
23179 reversed if it appears to fail. */
23182 /* If we start with a return insn, we only succeed if we find another one. */
23183 int seeking_return
= 0;
23184 enum rtx_code return_code
= UNKNOWN
;
23186 /* START_INSN will hold the insn from where we start looking. This is the
23187 first insn after the following code_label if REVERSE is true. */
23188 rtx_insn
*start_insn
= insn
;
23190 /* If in state 4, check if the target branch is reached, in order to
23191 change back to state 0. */
23192 if (arm_ccfsm_state
== 4)
23194 if (insn
== arm_target_insn
)
23196 arm_target_insn
= NULL
;
23197 arm_ccfsm_state
= 0;
23202 /* If in state 3, it is possible to repeat the trick, if this insn is an
23203 unconditional branch to a label, and immediately following this branch
23204 is the previous target label which is only used once, and the label this
23205 branch jumps to is not too far off. */
23206 if (arm_ccfsm_state
== 3)
23208 if (simplejump_p (insn
))
23210 start_insn
= next_nonnote_insn (start_insn
);
23211 if (BARRIER_P (start_insn
))
23213 /* XXX Isn't this always a barrier? */
23214 start_insn
= next_nonnote_insn (start_insn
);
23216 if (LABEL_P (start_insn
)
23217 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23218 && LABEL_NUSES (start_insn
) == 1)
23223 else if (ANY_RETURN_P (body
))
23225 start_insn
= next_nonnote_insn (start_insn
);
23226 if (BARRIER_P (start_insn
))
23227 start_insn
= next_nonnote_insn (start_insn
);
23228 if (LABEL_P (start_insn
)
23229 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23230 && LABEL_NUSES (start_insn
) == 1)
23233 seeking_return
= 1;
23234 return_code
= GET_CODE (body
);
23243 gcc_assert (!arm_ccfsm_state
|| reverse
);
23244 if (!JUMP_P (insn
))
23247 /* This jump might be paralleled with a clobber of the condition codes
23248 the jump should always come first */
23249 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23250 body
= XVECEXP (body
, 0, 0);
23253 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23254 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23257 int fail
= FALSE
, succeed
= FALSE
;
23258 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23259 int then_not_else
= TRUE
;
23260 rtx_insn
*this_insn
= start_insn
;
23263 /* Register the insn jumped to. */
23266 if (!seeking_return
)
23267 label
= XEXP (SET_SRC (body
), 0);
23269 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23270 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23271 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23273 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23274 then_not_else
= FALSE
;
23276 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23278 seeking_return
= 1;
23279 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23281 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23283 seeking_return
= 1;
23284 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23285 then_not_else
= FALSE
;
23288 gcc_unreachable ();
23290 /* See how many insns this branch skips, and what kind of insns. If all
23291 insns are okay, and the label or unconditional branch to the same
23292 label is not too far away, succeed. */
23293 for (insns_skipped
= 0;
23294 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23298 this_insn
= next_nonnote_insn (this_insn
);
23302 switch (GET_CODE (this_insn
))
23305 /* Succeed if it is the target label, otherwise fail since
23306 control falls in from somewhere else. */
23307 if (this_insn
== label
)
23309 arm_ccfsm_state
= 1;
23317 /* Succeed if the following insn is the target label.
23319 If return insns are used then the last insn in a function
23320 will be a barrier. */
23321 this_insn
= next_nonnote_insn (this_insn
);
23322 if (this_insn
&& this_insn
== label
)
23324 arm_ccfsm_state
= 1;
23332 /* The AAPCS says that conditional calls should not be
23333 used since they make interworking inefficient (the
23334 linker can't transform BL<cond> into BLX). That's
23335 only a problem if the machine has BLX. */
23342 /* Succeed if the following insn is the target label, or
23343 if the following two insns are a barrier and the
23345 this_insn
= next_nonnote_insn (this_insn
);
23346 if (this_insn
&& BARRIER_P (this_insn
))
23347 this_insn
= next_nonnote_insn (this_insn
);
23349 if (this_insn
&& this_insn
== label
23350 && insns_skipped
< max_insns_skipped
)
23352 arm_ccfsm_state
= 1;
23360 /* If this is an unconditional branch to the same label, succeed.
23361 If it is to another label, do nothing. If it is conditional,
23363 /* XXX Probably, the tests for SET and the PC are
23366 scanbody
= PATTERN (this_insn
);
23367 if (GET_CODE (scanbody
) == SET
23368 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23370 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23371 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23373 arm_ccfsm_state
= 2;
23376 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23379 /* Fail if a conditional return is undesirable (e.g. on a
23380 StrongARM), but still allow this if optimizing for size. */
23381 else if (GET_CODE (scanbody
) == return_code
23382 && !use_return_insn (TRUE
, NULL
)
23385 else if (GET_CODE (scanbody
) == return_code
)
23387 arm_ccfsm_state
= 2;
23390 else if (GET_CODE (scanbody
) == PARALLEL
)
23392 switch (get_attr_conds (this_insn
))
23402 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23407 /* Instructions using or affecting the condition codes make it
23409 scanbody
= PATTERN (this_insn
);
23410 if (!(GET_CODE (scanbody
) == SET
23411 || GET_CODE (scanbody
) == PARALLEL
)
23412 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23422 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23423 arm_target_label
= CODE_LABEL_NUMBER (label
);
23426 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23428 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23430 this_insn
= next_nonnote_insn (this_insn
);
23431 gcc_assert (!this_insn
23432 || (!BARRIER_P (this_insn
)
23433 && !LABEL_P (this_insn
)));
23437 /* Oh, dear! we ran off the end.. give up. */
23438 extract_constrain_insn_cached (insn
);
23439 arm_ccfsm_state
= 0;
23440 arm_target_insn
= NULL
;
23443 arm_target_insn
= this_insn
;
23446 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23449 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23451 if (reverse
|| then_not_else
)
23452 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23455 /* Restore recog_data (getting the attributes of other insns can
23456 destroy this array, but final.c assumes that it remains intact
23457 across this call. */
23458 extract_constrain_insn_cached (insn
);
23462 /* Output IT instructions. */
23464 thumb2_asm_output_opcode (FILE * stream
)
23469 if (arm_condexec_mask
)
23471 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23472 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23474 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23475 arm_condition_codes
[arm_current_cc
]);
23476 arm_condexec_mask
= 0;
23480 /* Returns true if REGNO is a valid register
23481 for holding a quantity of type MODE. */
23483 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23485 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23486 return (regno
== CC_REGNUM
23487 || (TARGET_HARD_FLOAT
23488 && regno
== VFPCC_REGNUM
));
23490 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23494 /* For the Thumb we only allow values bigger than SImode in
23495 registers 0 - 6, so that there is always a second low
23496 register available to hold the upper part of the value.
23497 We probably we ought to ensure that the register is the
23498 start of an even numbered register pair. */
23499 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23501 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23503 if (mode
== SFmode
|| mode
== SImode
)
23504 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23506 if (mode
== DFmode
)
23507 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23509 if (mode
== HFmode
)
23510 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23512 /* VFP registers can hold HImode values. */
23513 if (mode
== HImode
)
23514 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23517 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23518 || (VALID_NEON_QREG_MODE (mode
)
23519 && NEON_REGNO_OK_FOR_QUAD (regno
))
23520 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23521 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23522 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23523 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23524 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23529 if (TARGET_REALLY_IWMMXT
)
23531 if (IS_IWMMXT_GR_REGNUM (regno
))
23532 return mode
== SImode
;
23534 if (IS_IWMMXT_REGNUM (regno
))
23535 return VALID_IWMMXT_REG_MODE (mode
);
23538 /* We allow almost any value to be stored in the general registers.
23539 Restrict doubleword quantities to even register pairs in ARM state
23540 so that we can use ldrd. Do not allow very large Neon structure
23541 opaque modes in general registers; they would use too many. */
23542 if (regno
<= LAST_ARM_REGNUM
)
23544 if (ARM_NUM_REGS (mode
) > 4)
23550 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23553 if (regno
== FRAME_POINTER_REGNUM
23554 || regno
== ARG_POINTER_REGNUM
)
23555 /* We only allow integers in the fake hard registers. */
23556 return GET_MODE_CLASS (mode
) == MODE_INT
;
23561 /* Implement MODES_TIEABLE_P. */
23564 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23566 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23569 /* We specifically want to allow elements of "structure" modes to
23570 be tieable to the structure. This more general condition allows
23571 other rarer situations too. */
23573 && (VALID_NEON_DREG_MODE (mode1
)
23574 || VALID_NEON_QREG_MODE (mode1
)
23575 || VALID_NEON_STRUCT_MODE (mode1
))
23576 && (VALID_NEON_DREG_MODE (mode2
)
23577 || VALID_NEON_QREG_MODE (mode2
)
23578 || VALID_NEON_STRUCT_MODE (mode2
)))
23584 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23585 not used in arm mode. */
23588 arm_regno_class (int regno
)
23590 if (regno
== PC_REGNUM
)
23595 if (regno
== STACK_POINTER_REGNUM
)
23597 if (regno
== CC_REGNUM
)
23604 if (TARGET_THUMB2
&& regno
< 8)
23607 if ( regno
<= LAST_ARM_REGNUM
23608 || regno
== FRAME_POINTER_REGNUM
23609 || regno
== ARG_POINTER_REGNUM
)
23610 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23612 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23613 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23615 if (IS_VFP_REGNUM (regno
))
23617 if (regno
<= D7_VFP_REGNUM
)
23618 return VFP_D0_D7_REGS
;
23619 else if (regno
<= LAST_LO_VFP_REGNUM
)
23620 return VFP_LO_REGS
;
23622 return VFP_HI_REGS
;
23625 if (IS_IWMMXT_REGNUM (regno
))
23626 return IWMMXT_REGS
;
23628 if (IS_IWMMXT_GR_REGNUM (regno
))
23629 return IWMMXT_GR_REGS
;
23634 /* Handle a special case when computing the offset
23635 of an argument from the frame pointer. */
23637 arm_debugger_arg_offset (int value
, rtx addr
)
23641 /* We are only interested if dbxout_parms() failed to compute the offset. */
23645 /* We can only cope with the case where the address is held in a register. */
23649 /* If we are using the frame pointer to point at the argument, then
23650 an offset of 0 is correct. */
23651 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23654 /* If we are using the stack pointer to point at the
23655 argument, then an offset of 0 is correct. */
23656 /* ??? Check this is consistent with thumb2 frame layout. */
23657 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23658 && REGNO (addr
) == SP_REGNUM
)
23661 /* Oh dear. The argument is pointed to by a register rather
23662 than being held in a register, or being stored at a known
23663 offset from the frame pointer. Since GDB only understands
23664 those two kinds of argument we must translate the address
23665 held in the register into an offset from the frame pointer.
23666 We do this by searching through the insns for the function
23667 looking to see where this register gets its value. If the
23668 register is initialized from the frame pointer plus an offset
23669 then we are in luck and we can continue, otherwise we give up.
23671 This code is exercised by producing debugging information
23672 for a function with arguments like this:
23674 double func (double a, double b, int c, double d) {return d;}
23676 Without this code the stab for parameter 'd' will be set to
23677 an offset of 0 from the frame pointer, rather than 8. */
23679 /* The if() statement says:
23681 If the insn is a normal instruction
23682 and if the insn is setting the value in a register
23683 and if the register being set is the register holding the address of the argument
23684 and if the address is computing by an addition
23685 that involves adding to a register
23686 which is the frame pointer
23691 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23693 if ( NONJUMP_INSN_P (insn
)
23694 && GET_CODE (PATTERN (insn
)) == SET
23695 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23696 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23697 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23698 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23699 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23702 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23711 warning (0, "unable to compute real location of stacked parameter");
23712 value
= 8; /* XXX magic hack */
23718 /* Implement TARGET_PROMOTED_TYPE. */
23721 arm_promoted_type (const_tree t
)
23723 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23724 return float_type_node
;
23728 /* Implement TARGET_CONVERT_TO_TYPE.
23729 Specifically, this hook implements the peculiarity of the ARM
23730 half-precision floating-point C semantics that requires conversions between
23731 __fp16 to or from double to do an intermediate conversion to float. */
23734 arm_convert_to_type (tree type
, tree expr
)
23736 tree fromtype
= TREE_TYPE (expr
);
23737 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23739 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23740 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23741 return convert (type
, convert (float_type_node
, expr
));
23745 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23746 This simply adds HFmode as a supported mode; even though we don't
23747 implement arithmetic on this type directly, it's supported by
23748 optabs conversions, much the way the double-word arithmetic is
23749 special-cased in the default hook. */
23752 arm_scalar_mode_supported_p (machine_mode mode
)
23754 if (mode
== HFmode
)
23755 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23756 else if (ALL_FIXED_POINT_MODE_P (mode
))
23759 return default_scalar_mode_supported_p (mode
);
23762 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23763 not to early-clobber SRC registers in the process.
23765 We assume that the operands described by SRC and DEST represent a
23766 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23767 number of components into which the copy has been decomposed. */
23769 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23773 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23774 || REGNO (operands
[0]) < REGNO (operands
[1]))
23776 for (i
= 0; i
< count
; i
++)
23778 operands
[2 * i
] = dest
[i
];
23779 operands
[2 * i
+ 1] = src
[i
];
23784 for (i
= 0; i
< count
; i
++)
23786 operands
[2 * i
] = dest
[count
- i
- 1];
23787 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23792 /* Split operands into moves from op[1] + op[2] into op[0]. */
23795 neon_split_vcombine (rtx operands
[3])
23797 unsigned int dest
= REGNO (operands
[0]);
23798 unsigned int src1
= REGNO (operands
[1]);
23799 unsigned int src2
= REGNO (operands
[2]);
23800 machine_mode halfmode
= GET_MODE (operands
[1]);
23801 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23802 rtx destlo
, desthi
;
23804 if (src1
== dest
&& src2
== dest
+ halfregs
)
23806 /* No-op move. Can't split to nothing; emit something. */
23807 emit_note (NOTE_INSN_DELETED
);
23811 /* Preserve register attributes for variable tracking. */
23812 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23813 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23814 GET_MODE_SIZE (halfmode
));
23816 /* Special case of reversed high/low parts. Use VSWP. */
23817 if (src2
== dest
&& src1
== dest
+ halfregs
)
23819 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23820 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23821 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23825 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23827 /* Try to avoid unnecessary moves if part of the result
23828 is in the right place already. */
23830 emit_move_insn (destlo
, operands
[1]);
23831 if (src2
!= dest
+ halfregs
)
23832 emit_move_insn (desthi
, operands
[2]);
23836 if (src2
!= dest
+ halfregs
)
23837 emit_move_insn (desthi
, operands
[2]);
23839 emit_move_insn (destlo
, operands
[1]);
23843 /* Return the number (counting from 0) of
23844 the least significant set bit in MASK. */
23847 number_of_first_bit_set (unsigned mask
)
23849 return ctz_hwi (mask
);
23852 /* Like emit_multi_reg_push, but allowing for a different set of
23853 registers to be described as saved. MASK is the set of registers
23854 to be saved; REAL_REGS is the set of registers to be described as
23855 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23858 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23860 unsigned long regno
;
23861 rtx par
[10], tmp
, reg
;
23865 /* Build the parallel of the registers actually being stored. */
23866 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23868 regno
= ctz_hwi (mask
);
23869 reg
= gen_rtx_REG (SImode
, regno
);
23872 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23874 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23879 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23880 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23881 tmp
= gen_frame_mem (BLKmode
, tmp
);
23882 tmp
= gen_rtx_SET (tmp
, par
[0]);
23885 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23886 insn
= emit_insn (tmp
);
23888 /* Always build the stack adjustment note for unwind info. */
23889 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23890 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23893 /* Build the parallel of the registers recorded as saved for unwind. */
23894 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23896 regno
= ctz_hwi (real_regs
);
23897 reg
= gen_rtx_REG (SImode
, regno
);
23899 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23900 tmp
= gen_frame_mem (SImode
, tmp
);
23901 tmp
= gen_rtx_SET (tmp
, reg
);
23902 RTX_FRAME_RELATED_P (tmp
) = 1;
23910 RTX_FRAME_RELATED_P (par
[0]) = 1;
23911 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23914 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23919 /* Emit code to push or pop registers to or from the stack. F is the
23920 assembly file. MASK is the registers to pop. */
23922 thumb_pop (FILE *f
, unsigned long mask
)
23925 int lo_mask
= mask
& 0xFF;
23926 int pushed_words
= 0;
23930 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23932 /* Special case. Do not generate a POP PC statement here, do it in
23934 thumb_exit (f
, -1);
23938 fprintf (f
, "\tpop\t{");
23940 /* Look at the low registers first. */
23941 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23945 asm_fprintf (f
, "%r", regno
);
23947 if ((lo_mask
& ~1) != 0)
23954 if (mask
& (1 << PC_REGNUM
))
23956 /* Catch popping the PC. */
23957 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23958 || crtl
->calls_eh_return
)
23960 /* The PC is never poped directly, instead
23961 it is popped into r3 and then BX is used. */
23962 fprintf (f
, "}\n");
23964 thumb_exit (f
, -1);
23973 asm_fprintf (f
, "%r", PC_REGNUM
);
23977 fprintf (f
, "}\n");
23980 /* Generate code to return from a thumb function.
23981 If 'reg_containing_return_addr' is -1, then the return address is
23982 actually on the stack, at the stack pointer. */
23984 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23986 unsigned regs_available_for_popping
;
23987 unsigned regs_to_pop
;
23989 unsigned available
;
23993 int restore_a4
= FALSE
;
23995 /* Compute the registers we need to pop. */
23999 if (reg_containing_return_addr
== -1)
24001 regs_to_pop
|= 1 << LR_REGNUM
;
24005 if (TARGET_BACKTRACE
)
24007 /* Restore the (ARM) frame pointer and stack pointer. */
24008 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
24012 /* If there is nothing to pop then just emit the BX instruction and
24014 if (pops_needed
== 0)
24016 if (crtl
->calls_eh_return
)
24017 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24019 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24022 /* Otherwise if we are not supporting interworking and we have not created
24023 a backtrace structure and the function was not entered in ARM mode then
24024 just pop the return address straight into the PC. */
24025 else if (!TARGET_INTERWORK
24026 && !TARGET_BACKTRACE
24027 && !is_called_in_ARM_mode (current_function_decl
)
24028 && !crtl
->calls_eh_return
)
24030 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24034 /* Find out how many of the (return) argument registers we can corrupt. */
24035 regs_available_for_popping
= 0;
24037 /* If returning via __builtin_eh_return, the bottom three registers
24038 all contain information needed for the return. */
24039 if (crtl
->calls_eh_return
)
24043 /* If we can deduce the registers used from the function's
24044 return value. This is more reliable that examining
24045 df_regs_ever_live_p () because that will be set if the register is
24046 ever used in the function, not just if the register is used
24047 to hold a return value. */
24049 if (crtl
->return_rtx
!= 0)
24050 mode
= GET_MODE (crtl
->return_rtx
);
24052 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24054 size
= GET_MODE_SIZE (mode
);
24058 /* In a void function we can use any argument register.
24059 In a function that returns a structure on the stack
24060 we can use the second and third argument registers. */
24061 if (mode
== VOIDmode
)
24062 regs_available_for_popping
=
24063 (1 << ARG_REGISTER (1))
24064 | (1 << ARG_REGISTER (2))
24065 | (1 << ARG_REGISTER (3));
24067 regs_available_for_popping
=
24068 (1 << ARG_REGISTER (2))
24069 | (1 << ARG_REGISTER (3));
24071 else if (size
<= 4)
24072 regs_available_for_popping
=
24073 (1 << ARG_REGISTER (2))
24074 | (1 << ARG_REGISTER (3));
24075 else if (size
<= 8)
24076 regs_available_for_popping
=
24077 (1 << ARG_REGISTER (3));
24080 /* Match registers to be popped with registers into which we pop them. */
24081 for (available
= regs_available_for_popping
,
24082 required
= regs_to_pop
;
24083 required
!= 0 && available
!= 0;
24084 available
&= ~(available
& - available
),
24085 required
&= ~(required
& - required
))
24088 /* If we have any popping registers left over, remove them. */
24090 regs_available_for_popping
&= ~available
;
24092 /* Otherwise if we need another popping register we can use
24093 the fourth argument register. */
24094 else if (pops_needed
)
24096 /* If we have not found any free argument registers and
24097 reg a4 contains the return address, we must move it. */
24098 if (regs_available_for_popping
== 0
24099 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24101 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24102 reg_containing_return_addr
= LR_REGNUM
;
24104 else if (size
> 12)
24106 /* Register a4 is being used to hold part of the return value,
24107 but we have dire need of a free, low register. */
24110 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24113 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24115 /* The fourth argument register is available. */
24116 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24122 /* Pop as many registers as we can. */
24123 thumb_pop (f
, regs_available_for_popping
);
24125 /* Process the registers we popped. */
24126 if (reg_containing_return_addr
== -1)
24128 /* The return address was popped into the lowest numbered register. */
24129 regs_to_pop
&= ~(1 << LR_REGNUM
);
24131 reg_containing_return_addr
=
24132 number_of_first_bit_set (regs_available_for_popping
);
24134 /* Remove this register for the mask of available registers, so that
24135 the return address will not be corrupted by further pops. */
24136 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24139 /* If we popped other registers then handle them here. */
24140 if (regs_available_for_popping
)
24144 /* Work out which register currently contains the frame pointer. */
24145 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24147 /* Move it into the correct place. */
24148 asm_fprintf (f
, "\tmov\t%r, %r\n",
24149 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24151 /* (Temporarily) remove it from the mask of popped registers. */
24152 regs_available_for_popping
&= ~(1 << frame_pointer
);
24153 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24155 if (regs_available_for_popping
)
24159 /* We popped the stack pointer as well,
24160 find the register that contains it. */
24161 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24163 /* Move it into the stack register. */
24164 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24166 /* At this point we have popped all necessary registers, so
24167 do not worry about restoring regs_available_for_popping
24168 to its correct value:
24170 assert (pops_needed == 0)
24171 assert (regs_available_for_popping == (1 << frame_pointer))
24172 assert (regs_to_pop == (1 << STACK_POINTER)) */
24176 /* Since we have just move the popped value into the frame
24177 pointer, the popping register is available for reuse, and
24178 we know that we still have the stack pointer left to pop. */
24179 regs_available_for_popping
|= (1 << frame_pointer
);
24183 /* If we still have registers left on the stack, but we no longer have
24184 any registers into which we can pop them, then we must move the return
24185 address into the link register and make available the register that
24187 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24189 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24191 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24192 reg_containing_return_addr
);
24194 reg_containing_return_addr
= LR_REGNUM
;
24197 /* If we have registers left on the stack then pop some more.
24198 We know that at most we will want to pop FP and SP. */
24199 if (pops_needed
> 0)
24204 thumb_pop (f
, regs_available_for_popping
);
24206 /* We have popped either FP or SP.
24207 Move whichever one it is into the correct register. */
24208 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24209 move_to
= number_of_first_bit_set (regs_to_pop
);
24211 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24213 regs_to_pop
&= ~(1 << move_to
);
24218 /* If we still have not popped everything then we must have only
24219 had one register available to us and we are now popping the SP. */
24220 if (pops_needed
> 0)
24224 thumb_pop (f
, regs_available_for_popping
);
24226 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24228 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24230 assert (regs_to_pop == (1 << STACK_POINTER))
24231 assert (pops_needed == 1)
24235 /* If necessary restore the a4 register. */
24238 if (reg_containing_return_addr
!= LR_REGNUM
)
24240 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24241 reg_containing_return_addr
= LR_REGNUM
;
24244 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24247 if (crtl
->calls_eh_return
)
24248 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24250 /* Return to caller. */
24251 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24254 /* Scan INSN just before assembler is output for it.
24255 For Thumb-1, we track the status of the condition codes; this
24256 information is used in the cbranchsi4_insn pattern. */
24258 thumb1_final_prescan_insn (rtx_insn
*insn
)
24260 if (flag_print_asm_name
)
24261 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24262 INSN_ADDRESSES (INSN_UID (insn
)));
24263 /* Don't overwrite the previous setter when we get to a cbranch. */
24264 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24266 enum attr_conds conds
;
24268 if (cfun
->machine
->thumb1_cc_insn
)
24270 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24271 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24274 conds
= get_attr_conds (insn
);
24275 if (conds
== CONDS_SET
)
24277 rtx set
= single_set (insn
);
24278 cfun
->machine
->thumb1_cc_insn
= insn
;
24279 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24280 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24281 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24282 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24284 rtx src1
= XEXP (SET_SRC (set
), 1);
24285 if (src1
== const0_rtx
)
24286 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24288 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24290 /* Record the src register operand instead of dest because
24291 cprop_hardreg pass propagates src. */
24292 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24295 else if (conds
!= CONDS_NOCOND
)
24296 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24299 /* Check if unexpected far jump is used. */
24300 if (cfun
->machine
->lr_save_eliminated
24301 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24302 internal_error("Unexpected thumb1 far jump");
24306 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24308 unsigned HOST_WIDE_INT mask
= 0xff;
24311 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24312 if (val
== 0) /* XXX */
24315 for (i
= 0; i
< 25; i
++)
24316 if ((val
& (mask
<< i
)) == val
)
24322 /* Returns nonzero if the current function contains,
24323 or might contain a far jump. */
24325 thumb_far_jump_used_p (void)
24328 bool far_jump
= false;
24329 unsigned int func_size
= 0;
24331 /* This test is only important for leaf functions. */
24332 /* assert (!leaf_function_p ()); */
24334 /* If we have already decided that far jumps may be used,
24335 do not bother checking again, and always return true even if
24336 it turns out that they are not being used. Once we have made
24337 the decision that far jumps are present (and that hence the link
24338 register will be pushed onto the stack) we cannot go back on it. */
24339 if (cfun
->machine
->far_jump_used
)
24342 /* If this function is not being called from the prologue/epilogue
24343 generation code then it must be being called from the
24344 INITIAL_ELIMINATION_OFFSET macro. */
24345 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24347 /* In this case we know that we are being asked about the elimination
24348 of the arg pointer register. If that register is not being used,
24349 then there are no arguments on the stack, and we do not have to
24350 worry that a far jump might force the prologue to push the link
24351 register, changing the stack offsets. In this case we can just
24352 return false, since the presence of far jumps in the function will
24353 not affect stack offsets.
24355 If the arg pointer is live (or if it was live, but has now been
24356 eliminated and so set to dead) then we do have to test to see if
24357 the function might contain a far jump. This test can lead to some
24358 false negatives, since before reload is completed, then length of
24359 branch instructions is not known, so gcc defaults to returning their
24360 longest length, which in turn sets the far jump attribute to true.
24362 A false negative will not result in bad code being generated, but it
24363 will result in a needless push and pop of the link register. We
24364 hope that this does not occur too often.
24366 If we need doubleword stack alignment this could affect the other
24367 elimination offsets so we can't risk getting it wrong. */
24368 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24369 cfun
->machine
->arg_pointer_live
= 1;
24370 else if (!cfun
->machine
->arg_pointer_live
)
24374 /* We should not change far_jump_used during or after reload, as there is
24375 no chance to change stack frame layout. */
24376 if (reload_in_progress
|| reload_completed
)
24379 /* Check to see if the function contains a branch
24380 insn with the far jump attribute set. */
24381 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24383 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24387 func_size
+= get_attr_length (insn
);
24390 /* Attribute far_jump will always be true for thumb1 before
24391 shorten_branch pass. So checking far_jump attribute before
24392 shorten_branch isn't much useful.
24394 Following heuristic tries to estimate more accurately if a far jump
24395 may finally be used. The heuristic is very conservative as there is
24396 no chance to roll-back the decision of not to use far jump.
24398 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24399 2-byte insn is associated with a 4 byte constant pool. Using
24400 function size 2048/3 as the threshold is conservative enough. */
24403 if ((func_size
* 3) >= 2048)
24405 /* Record the fact that we have decided that
24406 the function does use far jumps. */
24407 cfun
->machine
->far_jump_used
= 1;
24415 /* Return nonzero if FUNC must be entered in ARM mode. */
24417 is_called_in_ARM_mode (tree func
)
24419 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24421 /* Ignore the problem about functions whose address is taken. */
24422 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24426 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24432 /* Given the stack offsets and register mask in OFFSETS, decide how
24433 many additional registers to push instead of subtracting a constant
24434 from SP. For epilogues the principle is the same except we use pop.
24435 FOR_PROLOGUE indicates which we're generating. */
24437 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24439 HOST_WIDE_INT amount
;
24440 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24441 /* Extract a mask of the ones we can give to the Thumb's push/pop
24443 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24444 /* Then count how many other high registers will need to be pushed. */
24445 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24446 int n_free
, reg_base
, size
;
24448 if (!for_prologue
&& frame_pointer_needed
)
24449 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24451 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24453 /* If the stack frame size is 512 exactly, we can save one load
24454 instruction, which should make this a win even when optimizing
24456 if (!optimize_size
&& amount
!= 512)
24459 /* Can't do this if there are high registers to push. */
24460 if (high_regs_pushed
!= 0)
24463 /* Shouldn't do it in the prologue if no registers would normally
24464 be pushed at all. In the epilogue, also allow it if we'll have
24465 a pop insn for the PC. */
24468 || TARGET_BACKTRACE
24469 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24470 || TARGET_INTERWORK
24471 || crtl
->args
.pretend_args_size
!= 0))
24474 /* Don't do this if thumb_expand_prologue wants to emit instructions
24475 between the push and the stack frame allocation. */
24477 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24478 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24485 size
= arm_size_return_regs ();
24486 reg_base
= ARM_NUM_INTS (size
);
24487 live_regs_mask
>>= reg_base
;
24490 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24491 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24493 live_regs_mask
>>= 1;
24499 gcc_assert (amount
/ 4 * 4 == amount
);
24501 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24502 return (amount
- 508) / 4;
24503 if (amount
<= n_free
* 4)
24508 /* The bits which aren't usefully expanded as rtl. */
24510 thumb1_unexpanded_epilogue (void)
24512 arm_stack_offsets
*offsets
;
24514 unsigned long live_regs_mask
= 0;
24515 int high_regs_pushed
= 0;
24517 int had_to_push_lr
;
24520 if (cfun
->machine
->return_used_this_function
!= 0)
24523 if (IS_NAKED (arm_current_func_type ()))
24526 offsets
= arm_get_frame_offsets ();
24527 live_regs_mask
= offsets
->saved_regs_mask
;
24528 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24530 /* If we can deduce the registers used from the function's return value.
24531 This is more reliable that examining df_regs_ever_live_p () because that
24532 will be set if the register is ever used in the function, not just if
24533 the register is used to hold a return value. */
24534 size
= arm_size_return_regs ();
24536 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24539 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24540 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24543 /* The prolog may have pushed some high registers to use as
24544 work registers. e.g. the testsuite file:
24545 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24546 compiles to produce:
24547 push {r4, r5, r6, r7, lr}
24551 as part of the prolog. We have to undo that pushing here. */
24553 if (high_regs_pushed
)
24555 unsigned long mask
= live_regs_mask
& 0xff;
24558 /* The available low registers depend on the size of the value we are
24566 /* Oh dear! We have no low registers into which we can pop
24569 ("no low registers available for popping high registers");
24571 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24572 if (live_regs_mask
& (1 << next_hi_reg
))
24575 while (high_regs_pushed
)
24577 /* Find lo register(s) into which the high register(s) can
24579 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24581 if (mask
& (1 << regno
))
24582 high_regs_pushed
--;
24583 if (high_regs_pushed
== 0)
24587 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24589 /* Pop the values into the low register(s). */
24590 thumb_pop (asm_out_file
, mask
);
24592 /* Move the value(s) into the high registers. */
24593 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24595 if (mask
& (1 << regno
))
24597 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24600 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24601 if (live_regs_mask
& (1 << next_hi_reg
))
24606 live_regs_mask
&= ~0x0f00;
24609 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24610 live_regs_mask
&= 0xff;
24612 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24614 /* Pop the return address into the PC. */
24615 if (had_to_push_lr
)
24616 live_regs_mask
|= 1 << PC_REGNUM
;
24618 /* Either no argument registers were pushed or a backtrace
24619 structure was created which includes an adjusted stack
24620 pointer, so just pop everything. */
24621 if (live_regs_mask
)
24622 thumb_pop (asm_out_file
, live_regs_mask
);
24624 /* We have either just popped the return address into the
24625 PC or it is was kept in LR for the entire function.
24626 Note that thumb_pop has already called thumb_exit if the
24627 PC was in the list. */
24628 if (!had_to_push_lr
)
24629 thumb_exit (asm_out_file
, LR_REGNUM
);
24633 /* Pop everything but the return address. */
24634 if (live_regs_mask
)
24635 thumb_pop (asm_out_file
, live_regs_mask
);
24637 if (had_to_push_lr
)
24641 /* We have no free low regs, so save one. */
24642 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24646 /* Get the return address into a temporary register. */
24647 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24651 /* Move the return address to lr. */
24652 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24654 /* Restore the low register. */
24655 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24660 regno
= LAST_ARG_REGNUM
;
24665 /* Remove the argument registers that were pushed onto the stack. */
24666 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24667 SP_REGNUM
, SP_REGNUM
,
24668 crtl
->args
.pretend_args_size
);
24670 thumb_exit (asm_out_file
, regno
);
24676 /* Functions to save and restore machine-specific function data. */
24677 static struct machine_function
*
24678 arm_init_machine_status (void)
24680 struct machine_function
*machine
;
24681 machine
= ggc_cleared_alloc
<machine_function
> ();
24683 #if ARM_FT_UNKNOWN != 0
24684 machine
->func_type
= ARM_FT_UNKNOWN
;
24689 /* Return an RTX indicating where the return address to the
24690 calling function can be found. */
24692 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24697 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24700 /* Do anything needed before RTL is emitted for each function. */
24702 arm_init_expanders (void)
24704 /* Arrange to initialize and mark the machine per-function status. */
24705 init_machine_status
= arm_init_machine_status
;
24707 /* This is to stop the combine pass optimizing away the alignment
24708 adjustment of va_arg. */
24709 /* ??? It is claimed that this should not be necessary. */
24711 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24714 /* Check that FUNC is called with a different mode. */
24717 arm_change_mode_p (tree func
)
24719 if (TREE_CODE (func
) != FUNCTION_DECL
)
24722 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24725 callee_tree
= target_option_default_node
;
24727 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24728 int flags
= callee_opts
->x_target_flags
;
24730 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24733 /* Like arm_compute_initial_elimination offset. Simpler because there
24734 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24735 to point at the base of the local variables after static stack
24736 space for a function has been allocated. */
24739 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24741 arm_stack_offsets
*offsets
;
24743 offsets
= arm_get_frame_offsets ();
24747 case ARG_POINTER_REGNUM
:
24750 case STACK_POINTER_REGNUM
:
24751 return offsets
->outgoing_args
- offsets
->saved_args
;
24753 case FRAME_POINTER_REGNUM
:
24754 return offsets
->soft_frame
- offsets
->saved_args
;
24756 case ARM_HARD_FRAME_POINTER_REGNUM
:
24757 return offsets
->saved_regs
- offsets
->saved_args
;
24759 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24760 return offsets
->locals_base
- offsets
->saved_args
;
24763 gcc_unreachable ();
24767 case FRAME_POINTER_REGNUM
:
24770 case STACK_POINTER_REGNUM
:
24771 return offsets
->outgoing_args
- offsets
->soft_frame
;
24773 case ARM_HARD_FRAME_POINTER_REGNUM
:
24774 return offsets
->saved_regs
- offsets
->soft_frame
;
24776 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24777 return offsets
->locals_base
- offsets
->soft_frame
;
24780 gcc_unreachable ();
24785 gcc_unreachable ();
24789 /* Generate the function's prologue. */
24792 thumb1_expand_prologue (void)
24796 HOST_WIDE_INT amount
;
24797 HOST_WIDE_INT size
;
24798 arm_stack_offsets
*offsets
;
24799 unsigned long func_type
;
24801 unsigned long live_regs_mask
;
24802 unsigned long l_mask
;
24803 unsigned high_regs_pushed
= 0;
24805 func_type
= arm_current_func_type ();
24807 /* Naked functions don't have prologues. */
24808 if (IS_NAKED (func_type
))
24810 if (flag_stack_usage_info
)
24811 current_function_static_stack_size
= 0;
24815 if (IS_INTERRUPT (func_type
))
24817 error ("interrupt Service Routines cannot be coded in Thumb mode");
24821 if (is_called_in_ARM_mode (current_function_decl
))
24822 emit_insn (gen_prologue_thumb1_interwork ());
24824 offsets
= arm_get_frame_offsets ();
24825 live_regs_mask
= offsets
->saved_regs_mask
;
24827 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24828 l_mask
= live_regs_mask
& 0x40ff;
24829 /* Then count how many other high registers will need to be pushed. */
24830 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24832 if (crtl
->args
.pretend_args_size
)
24834 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24836 if (cfun
->machine
->uses_anonymous_args
)
24838 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24839 unsigned long mask
;
24841 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24842 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24844 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24848 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24849 stack_pointer_rtx
, x
));
24851 RTX_FRAME_RELATED_P (insn
) = 1;
24854 if (TARGET_BACKTRACE
)
24856 HOST_WIDE_INT offset
= 0;
24857 unsigned work_register
;
24858 rtx work_reg
, x
, arm_hfp_rtx
;
24860 /* We have been asked to create a stack backtrace structure.
24861 The code looks like this:
24865 0 sub SP, #16 Reserve space for 4 registers.
24866 2 push {R7} Push low registers.
24867 4 add R7, SP, #20 Get the stack pointer before the push.
24868 6 str R7, [SP, #8] Store the stack pointer
24869 (before reserving the space).
24870 8 mov R7, PC Get hold of the start of this code + 12.
24871 10 str R7, [SP, #16] Store it.
24872 12 mov R7, FP Get hold of the current frame pointer.
24873 14 str R7, [SP, #4] Store it.
24874 16 mov R7, LR Get hold of the current return address.
24875 18 str R7, [SP, #12] Store it.
24876 20 add R7, SP, #16 Point at the start of the
24877 backtrace structure.
24878 22 mov FP, R7 Put this value into the frame pointer. */
24880 work_register
= thumb_find_work_register (live_regs_mask
);
24881 work_reg
= gen_rtx_REG (SImode
, work_register
);
24882 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24884 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24885 stack_pointer_rtx
, GEN_INT (-16)));
24886 RTX_FRAME_RELATED_P (insn
) = 1;
24890 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24891 RTX_FRAME_RELATED_P (insn
) = 1;
24893 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24896 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24897 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24899 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24900 x
= gen_frame_mem (SImode
, x
);
24901 emit_move_insn (x
, work_reg
);
24903 /* Make sure that the instruction fetching the PC is in the right place
24904 to calculate "start of backtrace creation code + 12". */
24905 /* ??? The stores using the common WORK_REG ought to be enough to
24906 prevent the scheduler from doing anything weird. Failing that
24907 we could always move all of the following into an UNSPEC_VOLATILE. */
24910 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24911 emit_move_insn (work_reg
, x
);
24913 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24914 x
= gen_frame_mem (SImode
, x
);
24915 emit_move_insn (x
, work_reg
);
24917 emit_move_insn (work_reg
, arm_hfp_rtx
);
24919 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24920 x
= gen_frame_mem (SImode
, x
);
24921 emit_move_insn (x
, work_reg
);
24925 emit_move_insn (work_reg
, arm_hfp_rtx
);
24927 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24928 x
= gen_frame_mem (SImode
, x
);
24929 emit_move_insn (x
, work_reg
);
24931 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24932 emit_move_insn (work_reg
, x
);
24934 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24935 x
= gen_frame_mem (SImode
, x
);
24936 emit_move_insn (x
, work_reg
);
24939 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24940 emit_move_insn (work_reg
, x
);
24942 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24943 x
= gen_frame_mem (SImode
, x
);
24944 emit_move_insn (x
, work_reg
);
24946 x
= GEN_INT (offset
+ 12);
24947 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24949 emit_move_insn (arm_hfp_rtx
, work_reg
);
24951 /* Optimization: If we are not pushing any low registers but we are going
24952 to push some high registers then delay our first push. This will just
24953 be a push of LR and we can combine it with the push of the first high
24955 else if ((l_mask
& 0xff) != 0
24956 || (high_regs_pushed
== 0 && l_mask
))
24958 unsigned long mask
= l_mask
;
24959 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24960 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24961 RTX_FRAME_RELATED_P (insn
) = 1;
24964 if (high_regs_pushed
)
24966 unsigned pushable_regs
;
24967 unsigned next_hi_reg
;
24968 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24969 : crtl
->args
.info
.nregs
;
24970 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24972 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24973 if (live_regs_mask
& (1 << next_hi_reg
))
24976 /* Here we need to mask out registers used for passing arguments
24977 even if they can be pushed. This is to avoid using them to stash the high
24978 registers. Such kind of stash may clobber the use of arguments. */
24979 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24981 if (pushable_regs
== 0)
24982 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24984 while (high_regs_pushed
> 0)
24986 unsigned long real_regs_mask
= 0;
24988 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24990 if (pushable_regs
& (1 << regno
))
24992 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24993 gen_rtx_REG (SImode
, next_hi_reg
));
24995 high_regs_pushed
--;
24996 real_regs_mask
|= (1 << next_hi_reg
);
24998 if (high_regs_pushed
)
25000 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25002 if (live_regs_mask
& (1 << next_hi_reg
))
25007 pushable_regs
&= ~((1 << regno
) - 1);
25013 /* If we had to find a work register and we have not yet
25014 saved the LR then add it to the list of regs to push. */
25015 if (l_mask
== (1 << LR_REGNUM
))
25017 pushable_regs
|= l_mask
;
25018 real_regs_mask
|= l_mask
;
25022 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
25023 RTX_FRAME_RELATED_P (insn
) = 1;
25027 /* Load the pic register before setting the frame pointer,
25028 so we can use r7 as a temporary work register. */
25029 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25030 arm_load_pic_register (live_regs_mask
);
25032 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25033 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25034 stack_pointer_rtx
);
25036 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25037 if (flag_stack_usage_info
)
25038 current_function_static_stack_size
= size
;
25040 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25041 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
25042 sorry ("-fstack-check=specific for Thumb-1");
25044 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25045 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25050 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25051 GEN_INT (- amount
)));
25052 RTX_FRAME_RELATED_P (insn
) = 1;
25058 /* The stack decrement is too big for an immediate value in a single
25059 insn. In theory we could issue multiple subtracts, but after
25060 three of them it becomes more space efficient to place the full
25061 value in the constant pool and load into a register. (Also the
25062 ARM debugger really likes to see only one stack decrement per
25063 function). So instead we look for a scratch register into which
25064 we can load the decrement, and then we subtract this from the
25065 stack pointer. Unfortunately on the thumb the only available
25066 scratch registers are the argument registers, and we cannot use
25067 these as they may hold arguments to the function. Instead we
25068 attempt to locate a call preserved register which is used by this
25069 function. If we can find one, then we know that it will have
25070 been pushed at the start of the prologue and so we can corrupt
25072 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25073 if (live_regs_mask
& (1 << regno
))
25076 gcc_assert(regno
<= LAST_LO_REGNUM
);
25078 reg
= gen_rtx_REG (SImode
, regno
);
25080 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25082 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25083 stack_pointer_rtx
, reg
));
25085 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25086 plus_constant (Pmode
, stack_pointer_rtx
,
25088 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25089 RTX_FRAME_RELATED_P (insn
) = 1;
25093 if (frame_pointer_needed
)
25094 thumb_set_frame_pointer (offsets
);
25096 /* If we are profiling, make sure no instructions are scheduled before
25097 the call to mcount. Similarly if the user has requested no
25098 scheduling in the prolog. Similarly if we want non-call exceptions
25099 using the EABI unwinder, to prevent faulting instructions from being
25100 swapped with a stack adjustment. */
25101 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25102 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25103 && cfun
->can_throw_non_call_exceptions
))
25104 emit_insn (gen_blockage ());
25106 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25107 if (live_regs_mask
& 0xff)
25108 cfun
->machine
->lr_save_eliminated
= 0;
25111 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25112 POP instruction can be generated. LR should be replaced by PC. All
25113 the checks required are already done by USE_RETURN_INSN (). Hence,
25114 all we really need to check here is if single register is to be
25115 returned, or multiple register return. */
25117 thumb2_expand_return (bool simple_return
)
25120 unsigned long saved_regs_mask
;
25121 arm_stack_offsets
*offsets
;
25123 offsets
= arm_get_frame_offsets ();
25124 saved_regs_mask
= offsets
->saved_regs_mask
;
25126 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25127 if (saved_regs_mask
& (1 << i
))
25130 if (!simple_return
&& saved_regs_mask
)
25134 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25135 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25136 rtx addr
= gen_rtx_MEM (SImode
,
25137 gen_rtx_POST_INC (SImode
,
25138 stack_pointer_rtx
));
25139 set_mem_alias_set (addr
, get_frame_alias_set ());
25140 XVECEXP (par
, 0, 0) = ret_rtx
;
25141 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25142 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25143 emit_jump_insn (par
);
25147 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25148 saved_regs_mask
|= (1 << PC_REGNUM
);
25149 arm_emit_multi_reg_pop (saved_regs_mask
);
25154 emit_jump_insn (simple_return_rtx
);
25159 thumb1_expand_epilogue (void)
25161 HOST_WIDE_INT amount
;
25162 arm_stack_offsets
*offsets
;
25165 /* Naked functions don't have prologues. */
25166 if (IS_NAKED (arm_current_func_type ()))
25169 offsets
= arm_get_frame_offsets ();
25170 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25172 if (frame_pointer_needed
)
25174 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25175 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25177 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25179 gcc_assert (amount
>= 0);
25182 emit_insn (gen_blockage ());
25185 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25186 GEN_INT (amount
)));
25189 /* r3 is always free in the epilogue. */
25190 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25192 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25193 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25197 /* Emit a USE (stack_pointer_rtx), so that
25198 the stack adjustment will not be deleted. */
25199 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25201 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25202 emit_insn (gen_blockage ());
25204 /* Emit a clobber for each insn that will be restored in the epilogue,
25205 so that flow2 will get register lifetimes correct. */
25206 for (regno
= 0; regno
< 13; regno
++)
25207 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25208 emit_clobber (gen_rtx_REG (SImode
, regno
));
25210 if (! df_regs_ever_live_p (LR_REGNUM
))
25211 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25214 /* Epilogue code for APCS frame. */
25216 arm_expand_epilogue_apcs_frame (bool really_return
)
25218 unsigned long func_type
;
25219 unsigned long saved_regs_mask
;
25222 int floats_from_frame
= 0;
25223 arm_stack_offsets
*offsets
;
25225 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25226 func_type
= arm_current_func_type ();
25228 /* Get frame offsets for ARM. */
25229 offsets
= arm_get_frame_offsets ();
25230 saved_regs_mask
= offsets
->saved_regs_mask
;
25232 /* Find the offset of the floating-point save area in the frame. */
25234 = (offsets
->saved_args
25235 + arm_compute_static_chain_stack_bytes ()
25238 /* Compute how many core registers saved and how far away the floats are. */
25239 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25240 if (saved_regs_mask
& (1 << i
))
25243 floats_from_frame
+= 4;
25246 if (TARGET_HARD_FLOAT
)
25249 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25251 /* The offset is from IP_REGNUM. */
25252 int saved_size
= arm_get_vfp_saved_size ();
25253 if (saved_size
> 0)
25256 floats_from_frame
+= saved_size
;
25257 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25258 hard_frame_pointer_rtx
,
25259 GEN_INT (-floats_from_frame
)));
25260 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25261 ip_rtx
, hard_frame_pointer_rtx
);
25264 /* Generate VFP register multi-pop. */
25265 start_reg
= FIRST_VFP_REGNUM
;
25267 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25268 /* Look for a case where a reg does not need restoring. */
25269 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25270 && (!df_regs_ever_live_p (i
+ 1)
25271 || call_used_regs
[i
+ 1]))
25273 if (start_reg
!= i
)
25274 arm_emit_vfp_multi_reg_pop (start_reg
,
25275 (i
- start_reg
) / 2,
25276 gen_rtx_REG (SImode
,
25281 /* Restore the remaining regs that we have discovered (or possibly
25282 even all of them, if the conditional in the for loop never
25284 if (start_reg
!= i
)
25285 arm_emit_vfp_multi_reg_pop (start_reg
,
25286 (i
- start_reg
) / 2,
25287 gen_rtx_REG (SImode
, IP_REGNUM
));
25292 /* The frame pointer is guaranteed to be non-double-word aligned, as
25293 it is set to double-word-aligned old_stack_pointer - 4. */
25295 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25297 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25298 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25300 rtx addr
= gen_frame_mem (V2SImode
,
25301 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25303 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25304 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25305 gen_rtx_REG (V2SImode
, i
),
25311 /* saved_regs_mask should contain IP which contains old stack pointer
25312 at the time of activation creation. Since SP and IP are adjacent registers,
25313 we can restore the value directly into SP. */
25314 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25315 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25316 saved_regs_mask
|= (1 << SP_REGNUM
);
25318 /* There are two registers left in saved_regs_mask - LR and PC. We
25319 only need to restore LR (the return address), but to
25320 save time we can load it directly into PC, unless we need a
25321 special function exit sequence, or we are not really returning. */
25323 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25324 && !crtl
->calls_eh_return
)
25325 /* Delete LR from the register mask, so that LR on
25326 the stack is loaded into the PC in the register mask. */
25327 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25329 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25331 num_regs
= bit_count (saved_regs_mask
);
25332 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25335 emit_insn (gen_blockage ());
25336 /* Unwind the stack to just below the saved registers. */
25337 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25338 hard_frame_pointer_rtx
,
25339 GEN_INT (- 4 * num_regs
)));
25341 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25342 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25345 arm_emit_multi_reg_pop (saved_regs_mask
);
25347 if (IS_INTERRUPT (func_type
))
25349 /* Interrupt handlers will have pushed the
25350 IP onto the stack, so restore it now. */
25352 rtx addr
= gen_rtx_MEM (SImode
,
25353 gen_rtx_POST_INC (SImode
,
25354 stack_pointer_rtx
));
25355 set_mem_alias_set (addr
, get_frame_alias_set ());
25356 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25357 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25358 gen_rtx_REG (SImode
, IP_REGNUM
),
25362 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25365 if (crtl
->calls_eh_return
)
25366 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25368 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25370 if (IS_STACKALIGN (func_type
))
25371 /* Restore the original stack pointer. Before prologue, the stack was
25372 realigned and the original stack pointer saved in r0. For details,
25373 see comment in arm_expand_prologue. */
25374 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25376 emit_jump_insn (simple_return_rtx
);
25379 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25380 function is not a sibcall. */
25382 arm_expand_epilogue (bool really_return
)
25384 unsigned long func_type
;
25385 unsigned long saved_regs_mask
;
25389 arm_stack_offsets
*offsets
;
25391 func_type
= arm_current_func_type ();
25393 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25394 let output_return_instruction take care of instruction emission if any. */
25395 if (IS_NAKED (func_type
)
25396 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25399 emit_jump_insn (simple_return_rtx
);
25403 /* If we are throwing an exception, then we really must be doing a
25404 return, so we can't tail-call. */
25405 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25407 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25409 arm_expand_epilogue_apcs_frame (really_return
);
25413 /* Get frame offsets for ARM. */
25414 offsets
= arm_get_frame_offsets ();
25415 saved_regs_mask
= offsets
->saved_regs_mask
;
25416 num_regs
= bit_count (saved_regs_mask
);
25418 if (frame_pointer_needed
)
25421 /* Restore stack pointer if necessary. */
25424 /* In ARM mode, frame pointer points to first saved register.
25425 Restore stack pointer to last saved register. */
25426 amount
= offsets
->frame
- offsets
->saved_regs
;
25428 /* Force out any pending memory operations that reference stacked data
25429 before stack de-allocation occurs. */
25430 emit_insn (gen_blockage ());
25431 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25432 hard_frame_pointer_rtx
,
25433 GEN_INT (amount
)));
25434 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25436 hard_frame_pointer_rtx
);
25438 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25440 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25444 /* In Thumb-2 mode, the frame pointer points to the last saved
25446 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25449 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25450 hard_frame_pointer_rtx
,
25451 GEN_INT (amount
)));
25452 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25453 hard_frame_pointer_rtx
,
25454 hard_frame_pointer_rtx
);
25457 /* Force out any pending memory operations that reference stacked data
25458 before stack de-allocation occurs. */
25459 emit_insn (gen_blockage ());
25460 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25461 hard_frame_pointer_rtx
));
25462 arm_add_cfa_adjust_cfa_note (insn
, 0,
25464 hard_frame_pointer_rtx
);
25465 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25467 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25472 /* Pop off outgoing args and local frame to adjust stack pointer to
25473 last saved register. */
25474 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25478 /* Force out any pending memory operations that reference stacked data
25479 before stack de-allocation occurs. */
25480 emit_insn (gen_blockage ());
25481 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25483 GEN_INT (amount
)));
25484 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25485 stack_pointer_rtx
, stack_pointer_rtx
);
25486 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25488 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25492 if (TARGET_HARD_FLOAT
)
25494 /* Generate VFP register multi-pop. */
25495 int end_reg
= LAST_VFP_REGNUM
+ 1;
25497 /* Scan the registers in reverse order. We need to match
25498 any groupings made in the prologue and generate matching
25499 vldm operations. The need to match groups is because,
25500 unlike pop, vldm can only do consecutive regs. */
25501 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25502 /* Look for a case where a reg does not need restoring. */
25503 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25504 && (!df_regs_ever_live_p (i
+ 1)
25505 || call_used_regs
[i
+ 1]))
25507 /* Restore the regs discovered so far (from reg+2 to
25509 if (end_reg
> i
+ 2)
25510 arm_emit_vfp_multi_reg_pop (i
+ 2,
25511 (end_reg
- (i
+ 2)) / 2,
25512 stack_pointer_rtx
);
25516 /* Restore the remaining regs that we have discovered (or possibly
25517 even all of them, if the conditional in the for loop never
25519 if (end_reg
> i
+ 2)
25520 arm_emit_vfp_multi_reg_pop (i
+ 2,
25521 (end_reg
- (i
+ 2)) / 2,
25522 stack_pointer_rtx
);
25526 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25527 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25530 rtx addr
= gen_rtx_MEM (V2SImode
,
25531 gen_rtx_POST_INC (SImode
,
25532 stack_pointer_rtx
));
25533 set_mem_alias_set (addr
, get_frame_alias_set ());
25534 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25535 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25536 gen_rtx_REG (V2SImode
, i
),
25538 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25539 stack_pointer_rtx
, stack_pointer_rtx
);
25542 if (saved_regs_mask
)
25545 bool return_in_pc
= false;
25547 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25548 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25549 && !IS_STACKALIGN (func_type
)
25551 && crtl
->args
.pretend_args_size
== 0
25552 && saved_regs_mask
& (1 << LR_REGNUM
)
25553 && !crtl
->calls_eh_return
)
25555 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25556 saved_regs_mask
|= (1 << PC_REGNUM
);
25557 return_in_pc
= true;
25560 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25562 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25563 if (saved_regs_mask
& (1 << i
))
25565 rtx addr
= gen_rtx_MEM (SImode
,
25566 gen_rtx_POST_INC (SImode
,
25567 stack_pointer_rtx
));
25568 set_mem_alias_set (addr
, get_frame_alias_set ());
25570 if (i
== PC_REGNUM
)
25572 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25573 XVECEXP (insn
, 0, 0) = ret_rtx
;
25574 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25576 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25577 insn
= emit_jump_insn (insn
);
25581 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25583 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25584 gen_rtx_REG (SImode
, i
),
25586 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25588 stack_pointer_rtx
);
25595 && current_tune
->prefer_ldrd_strd
25596 && !optimize_function_for_size_p (cfun
))
25599 thumb2_emit_ldrd_pop (saved_regs_mask
);
25600 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25601 arm_emit_ldrd_pop (saved_regs_mask
);
25603 arm_emit_multi_reg_pop (saved_regs_mask
);
25606 arm_emit_multi_reg_pop (saved_regs_mask
);
25614 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25618 rtx dwarf
= NULL_RTX
;
25620 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25622 GEN_INT (amount
)));
25624 RTX_FRAME_RELATED_P (tmp
) = 1;
25626 if (cfun
->machine
->uses_anonymous_args
)
25628 /* Restore pretend args. Refer arm_expand_prologue on how to save
25629 pretend_args in stack. */
25630 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25631 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25632 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25633 if (saved_regs_mask
& (1 << i
))
25635 rtx reg
= gen_rtx_REG (SImode
, i
);
25636 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25639 REG_NOTES (tmp
) = dwarf
;
25641 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25642 stack_pointer_rtx
, stack_pointer_rtx
);
25645 if (!really_return
)
25648 if (crtl
->calls_eh_return
)
25649 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25651 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25653 if (IS_STACKALIGN (func_type
))
25654 /* Restore the original stack pointer. Before prologue, the stack was
25655 realigned and the original stack pointer saved in r0. For details,
25656 see comment in arm_expand_prologue. */
25657 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25659 emit_jump_insn (simple_return_rtx
);
25662 /* Implementation of insn prologue_thumb1_interwork. This is the first
25663 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25666 thumb1_output_interwork (void)
25669 FILE *f
= asm_out_file
;
25671 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25672 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25674 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25676 /* Generate code sequence to switch us into Thumb mode. */
25677 /* The .code 32 directive has already been emitted by
25678 ASM_DECLARE_FUNCTION_NAME. */
25679 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25680 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25682 /* Generate a label, so that the debugger will notice the
25683 change in instruction sets. This label is also used by
25684 the assembler to bypass the ARM code when this function
25685 is called from a Thumb encoded function elsewhere in the
25686 same file. Hence the definition of STUB_NAME here must
25687 agree with the definition in gas/config/tc-arm.c. */
25689 #define STUB_NAME ".real_start_of"
25691 fprintf (f
, "\t.code\t16\n");
25693 if (arm_dllexport_name_p (name
))
25694 name
= arm_strip_name_encoding (name
);
25696 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25697 fprintf (f
, "\t.thumb_func\n");
25698 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25703 /* Handle the case of a double word load into a low register from
25704 a computed memory address. The computed address may involve a
25705 register which is overwritten by the load. */
25707 thumb_load_double_from_address (rtx
*operands
)
25715 gcc_assert (REG_P (operands
[0]));
25716 gcc_assert (MEM_P (operands
[1]));
25718 /* Get the memory address. */
25719 addr
= XEXP (operands
[1], 0);
25721 /* Work out how the memory address is computed. */
25722 switch (GET_CODE (addr
))
25725 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25727 if (REGNO (operands
[0]) == REGNO (addr
))
25729 output_asm_insn ("ldr\t%H0, %2", operands
);
25730 output_asm_insn ("ldr\t%0, %1", operands
);
25734 output_asm_insn ("ldr\t%0, %1", operands
);
25735 output_asm_insn ("ldr\t%H0, %2", operands
);
25740 /* Compute <address> + 4 for the high order load. */
25741 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25743 output_asm_insn ("ldr\t%0, %1", operands
);
25744 output_asm_insn ("ldr\t%H0, %2", operands
);
25748 arg1
= XEXP (addr
, 0);
25749 arg2
= XEXP (addr
, 1);
25751 if (CONSTANT_P (arg1
))
25752 base
= arg2
, offset
= arg1
;
25754 base
= arg1
, offset
= arg2
;
25756 gcc_assert (REG_P (base
));
25758 /* Catch the case of <address> = <reg> + <reg> */
25759 if (REG_P (offset
))
25761 int reg_offset
= REGNO (offset
);
25762 int reg_base
= REGNO (base
);
25763 int reg_dest
= REGNO (operands
[0]);
25765 /* Add the base and offset registers together into the
25766 higher destination register. */
25767 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25768 reg_dest
+ 1, reg_base
, reg_offset
);
25770 /* Load the lower destination register from the address in
25771 the higher destination register. */
25772 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25773 reg_dest
, reg_dest
+ 1);
25775 /* Load the higher destination register from its own address
25777 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25778 reg_dest
+ 1, reg_dest
+ 1);
25782 /* Compute <address> + 4 for the high order load. */
25783 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25785 /* If the computed address is held in the low order register
25786 then load the high order register first, otherwise always
25787 load the low order register first. */
25788 if (REGNO (operands
[0]) == REGNO (base
))
25790 output_asm_insn ("ldr\t%H0, %2", operands
);
25791 output_asm_insn ("ldr\t%0, %1", operands
);
25795 output_asm_insn ("ldr\t%0, %1", operands
);
25796 output_asm_insn ("ldr\t%H0, %2", operands
);
25802 /* With no registers to worry about we can just load the value
25804 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25806 output_asm_insn ("ldr\t%H0, %2", operands
);
25807 output_asm_insn ("ldr\t%0, %1", operands
);
25811 gcc_unreachable ();
25818 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25823 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25824 std::swap (operands
[4], operands
[5]);
25826 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25827 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25831 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25832 std::swap (operands
[4], operands
[5]);
25833 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25834 std::swap (operands
[5], operands
[6]);
25835 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25836 std::swap (operands
[4], operands
[5]);
25838 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25839 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25843 gcc_unreachable ();
25849 /* Output a call-via instruction for thumb state. */
25851 thumb_call_via_reg (rtx reg
)
25853 int regno
= REGNO (reg
);
25856 gcc_assert (regno
< LR_REGNUM
);
25858 /* If we are in the normal text section we can use a single instance
25859 per compilation unit. If we are doing function sections, then we need
25860 an entry per section, since we can't rely on reachability. */
25861 if (in_section
== text_section
)
25863 thumb_call_reg_needed
= 1;
25865 if (thumb_call_via_label
[regno
] == NULL
)
25866 thumb_call_via_label
[regno
] = gen_label_rtx ();
25867 labelp
= thumb_call_via_label
+ regno
;
25871 if (cfun
->machine
->call_via
[regno
] == NULL
)
25872 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25873 labelp
= cfun
->machine
->call_via
+ regno
;
25876 output_asm_insn ("bl\t%a0", labelp
);
25880 /* Routines for generating rtl. */
25882 thumb_expand_movmemqi (rtx
*operands
)
25884 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25885 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25886 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25887 HOST_WIDE_INT offset
= 0;
25891 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25897 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25903 rtx reg
= gen_reg_rtx (SImode
);
25904 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25905 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25912 rtx reg
= gen_reg_rtx (HImode
);
25913 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25914 plus_constant (Pmode
, in
,
25916 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25925 rtx reg
= gen_reg_rtx (QImode
);
25926 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25927 plus_constant (Pmode
, in
,
25929 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25936 thumb_reload_out_hi (rtx
*operands
)
25938 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25941 /* Return the length of a function name prefix
25942 that starts with the character 'c'. */
25944 arm_get_strip_length (int c
)
25948 ARM_NAME_ENCODING_LENGTHS
25953 /* Return a pointer to a function's name with any
25954 and all prefix encodings stripped from it. */
25956 arm_strip_name_encoding (const char *name
)
25960 while ((skip
= arm_get_strip_length (* name
)))
25966 /* If there is a '*' anywhere in the name's prefix, then
25967 emit the stripped name verbatim, otherwise prepend an
25968 underscore if leading underscores are being used. */
25970 arm_asm_output_labelref (FILE *stream
, const char *name
)
25975 while ((skip
= arm_get_strip_length (* name
)))
25977 verbatim
|= (*name
== '*');
25982 fputs (name
, stream
);
25984 asm_fprintf (stream
, "%U%s", name
);
25987 /* This function is used to emit an EABI tag and its associated value.
25988 We emit the numerical value of the tag in case the assembler does not
25989 support textual tags. (Eg gas prior to 2.20). If requested we include
25990 the tag name in a comment so that anyone reading the assembler output
25991 will know which tag is being set.
25993 This function is not static because arm-c.c needs it too. */
25996 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25998 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25999 if (flag_verbose_asm
|| flag_debug_asm
)
26000 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26001 asm_fprintf (asm_out_file
, "\n");
26004 /* This function is used to print CPU tuning information as comment
26005 in assembler file. Pointers are not printed for now. */
26008 arm_print_tune_info (void)
26010 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
26011 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
26012 current_tune
->constant_limit
);
26013 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
26014 current_tune
->max_insns_skipped
);
26015 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
26016 current_tune
->prefetch
.num_slots
);
26017 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
26018 current_tune
->prefetch
.l1_cache_size
);
26019 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
26020 current_tune
->prefetch
.l1_cache_line_size
);
26021 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
26022 (int) current_tune
->prefer_constant_pool
);
26023 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
26024 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
26025 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
26026 current_tune
->branch_cost (false, false));
26027 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
26028 current_tune
->branch_cost (false, true));
26029 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
26030 current_tune
->branch_cost (true, false));
26031 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
26032 current_tune
->branch_cost (true, true));
26033 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
26034 (int) current_tune
->prefer_ldrd_strd
);
26035 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
26036 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26037 (int) current_tune
->logical_op_non_short_circuit_arm
);
26038 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
26039 (int) current_tune
->prefer_neon_for_64bits
);
26040 asm_fprintf (asm_out_file
,
26041 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
26042 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26043 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
26044 (int) current_tune
->string_ops_prefer_neon
);
26045 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
26046 current_tune
->max_insns_inline_memset
);
26047 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
26048 current_tune
->fusible_ops
);
26049 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
26050 (int) current_tune
->sched_autopref
);
26054 arm_file_start (void)
26060 if (arm_selected_arch
)
26062 /* armv7ve doesn't support any extensions. */
26063 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
26065 /* Keep backward compatability for assemblers
26066 which don't support armv7ve. */
26067 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26068 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26069 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26070 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26071 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26075 const char* pos
= strchr (arm_selected_arch
->name
, '+');
26079 gcc_assert (strlen (arm_selected_arch
->name
)
26080 <= sizeof (buf
) / sizeof (*pos
));
26081 strncpy (buf
, arm_selected_arch
->name
,
26082 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
26083 buf
[pos
- arm_selected_arch
->name
] = '\0';
26084 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26085 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26088 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
26091 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
26092 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
26095 const char* truncated_name
26096 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
26097 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26100 if (print_tune_info
)
26101 arm_print_tune_info ();
26103 if (! TARGET_SOFT_FLOAT
)
26105 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26106 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26108 if (TARGET_HARD_FLOAT_ABI
)
26109 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26112 /* Some of these attributes only apply when the corresponding features
26113 are used. However we don't have any easy way of figuring this out.
26114 Conservatively record the setting that would have been used. */
26116 if (flag_rounding_math
)
26117 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26119 if (!flag_unsafe_math_optimizations
)
26121 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26122 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26124 if (flag_signaling_nans
)
26125 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26127 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26128 flag_finite_math_only
? 1 : 3);
26130 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26131 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26132 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26133 flag_short_enums
? 1 : 2);
26135 /* Tag_ABI_optimization_goals. */
26138 else if (optimize
>= 2)
26144 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26146 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26149 if (arm_fp16_format
)
26150 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26151 (int) arm_fp16_format
);
26153 if (arm_lang_output_object_attributes_hook
)
26154 arm_lang_output_object_attributes_hook();
26157 default_file_start ();
26161 arm_file_end (void)
26165 if (NEED_INDICATE_EXEC_STACK
)
26166 /* Add .note.GNU-stack. */
26167 file_end_indicate_exec_stack ();
26169 if (! thumb_call_reg_needed
)
26172 switch_to_section (text_section
);
26173 asm_fprintf (asm_out_file
, "\t.code 16\n");
26174 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26176 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26178 rtx label
= thumb_call_via_label
[regno
];
26182 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26183 CODE_LABEL_NUMBER (label
));
26184 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26190 /* Symbols in the text segment can be accessed without indirecting via the
26191 constant pool; it may take an extra binary operation, but this is still
26192 faster than indirecting via memory. Don't do this when not optimizing,
26193 since we won't be calculating al of the offsets necessary to do this
26197 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26199 if (optimize
> 0 && TREE_CONSTANT (decl
))
26200 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26202 default_encode_section_info (decl
, rtl
, first
);
26204 #endif /* !ARM_PE */
26207 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26209 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26210 && !strcmp (prefix
, "L"))
26212 arm_ccfsm_state
= 0;
26213 arm_target_insn
= NULL
;
26215 default_internal_label (stream
, prefix
, labelno
);
26218 /* Output code to add DELTA to the first argument, and then jump
26219 to FUNCTION. Used for C++ multiple inheritance. */
26222 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26223 HOST_WIDE_INT
, tree function
)
26225 static int thunk_label
= 0;
26228 int mi_delta
= delta
;
26229 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26231 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26234 mi_delta
= - mi_delta
;
26236 final_start_function (emit_barrier (), file
, 1);
26240 int labelno
= thunk_label
++;
26241 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26242 /* Thunks are entered in arm mode when avaiable. */
26243 if (TARGET_THUMB1_ONLY
)
26245 /* push r3 so we can use it as a temporary. */
26246 /* TODO: Omit this save if r3 is not used. */
26247 fputs ("\tpush {r3}\n", file
);
26248 fputs ("\tldr\tr3, ", file
);
26252 fputs ("\tldr\tr12, ", file
);
26254 assemble_name (file
, label
);
26255 fputc ('\n', file
);
26258 /* If we are generating PIC, the ldr instruction below loads
26259 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26260 the address of the add + 8, so we have:
26262 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26265 Note that we have "+ 1" because some versions of GNU ld
26266 don't set the low bit of the result for R_ARM_REL32
26267 relocations against thumb function symbols.
26268 On ARMv6M this is +4, not +8. */
26269 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26270 assemble_name (file
, labelpc
);
26271 fputs (":\n", file
);
26272 if (TARGET_THUMB1_ONLY
)
26274 /* This is 2 insns after the start of the thunk, so we know it
26275 is 4-byte aligned. */
26276 fputs ("\tadd\tr3, pc, r3\n", file
);
26277 fputs ("\tmov r12, r3\n", file
);
26280 fputs ("\tadd\tr12, pc, r12\n", file
);
26282 else if (TARGET_THUMB1_ONLY
)
26283 fputs ("\tmov r12, r3\n", file
);
26285 if (TARGET_THUMB1_ONLY
)
26287 if (mi_delta
> 255)
26289 fputs ("\tldr\tr3, ", file
);
26290 assemble_name (file
, label
);
26291 fputs ("+4\n", file
);
26292 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26293 mi_op
, this_regno
, this_regno
);
26295 else if (mi_delta
!= 0)
26297 /* Thumb1 unified syntax requires s suffix in instruction name when
26298 one of the operands is immediate. */
26299 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26300 mi_op
, this_regno
, this_regno
,
26306 /* TODO: Use movw/movt for large constants when available. */
26307 while (mi_delta
!= 0)
26309 if ((mi_delta
& (3 << shift
)) == 0)
26313 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26314 mi_op
, this_regno
, this_regno
,
26315 mi_delta
& (0xff << shift
));
26316 mi_delta
&= ~(0xff << shift
);
26323 if (TARGET_THUMB1_ONLY
)
26324 fputs ("\tpop\t{r3}\n", file
);
26326 fprintf (file
, "\tbx\tr12\n");
26327 ASM_OUTPUT_ALIGN (file
, 2);
26328 assemble_name (file
, label
);
26329 fputs (":\n", file
);
26332 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26333 rtx tem
= XEXP (DECL_RTL (function
), 0);
26334 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26335 pipeline offset is four rather than eight. Adjust the offset
26337 tem
= plus_constant (GET_MODE (tem
), tem
,
26338 TARGET_THUMB1_ONLY
? -3 : -7);
26339 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26341 gen_rtx_SYMBOL_REF (Pmode
,
26342 ggc_strdup (labelpc
)));
26343 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26346 /* Output ".word .LTHUNKn". */
26347 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26349 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26350 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26354 fputs ("\tb\t", file
);
26355 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26356 if (NEED_PLT_RELOC
)
26357 fputs ("(PLT)", file
);
26358 fputc ('\n', file
);
26361 final_end_function ();
26364 /* MI thunk handling for TARGET_32BIT. */
26367 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26368 HOST_WIDE_INT vcall_offset
, tree function
)
26370 /* On ARM, this_regno is R0 or R1 depending on
26371 whether the function returns an aggregate or not.
26373 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26375 ? R1_REGNUM
: R0_REGNUM
);
26377 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26378 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26379 reload_completed
= 1;
26380 emit_note (NOTE_INSN_PROLOGUE_END
);
26382 /* Add DELTA to THIS_RTX. */
26384 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26385 delta
, this_rtx
, this_rtx
, false);
26387 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26388 if (vcall_offset
!= 0)
26390 /* Load *THIS_RTX. */
26391 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26392 /* Compute *THIS_RTX + VCALL_OFFSET. */
26393 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26395 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26396 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26397 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26400 /* Generate a tail call to the target function. */
26401 if (!TREE_USED (function
))
26403 assemble_external (function
);
26404 TREE_USED (function
) = 1;
26406 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26407 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26408 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26409 SIBLING_CALL_P (insn
) = 1;
26411 insn
= get_insns ();
26412 shorten_branches (insn
);
26413 final_start_function (insn
, file
, 1);
26414 final (insn
, file
, 1);
26415 final_end_function ();
26417 /* Stop pretending this is a post-reload pass. */
26418 reload_completed
= 0;
26421 /* Output code to add DELTA to the first argument, and then jump
26422 to FUNCTION. Used for C++ multiple inheritance. */
26425 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26426 HOST_WIDE_INT vcall_offset
, tree function
)
26429 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26431 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26435 arm_emit_vector_const (FILE *file
, rtx x
)
26438 const char * pattern
;
26440 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26442 switch (GET_MODE (x
))
26444 case V2SImode
: pattern
= "%08x"; break;
26445 case V4HImode
: pattern
= "%04x"; break;
26446 case V8QImode
: pattern
= "%02x"; break;
26447 default: gcc_unreachable ();
26450 fprintf (file
, "0x");
26451 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26455 element
= CONST_VECTOR_ELT (x
, i
);
26456 fprintf (file
, pattern
, INTVAL (element
));
26462 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26463 HFmode constant pool entries are actually loaded with ldr. */
26465 arm_emit_fp16_const (rtx c
)
26469 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26470 if (WORDS_BIG_ENDIAN
)
26471 assemble_zeros (2);
26472 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26473 if (!WORDS_BIG_ENDIAN
)
26474 assemble_zeros (2);
26478 arm_output_load_gr (rtx
*operands
)
26485 if (!MEM_P (operands
[1])
26486 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26487 || !REG_P (reg
= XEXP (sum
, 0))
26488 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26489 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26490 return "wldrw%?\t%0, %1";
26492 /* Fix up an out-of-range load of a GR register. */
26493 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26494 wcgr
= operands
[0];
26496 output_asm_insn ("ldr%?\t%0, %1", operands
);
26498 operands
[0] = wcgr
;
26500 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26501 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26506 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26508 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26509 named arg and all anonymous args onto the stack.
26510 XXX I know the prologue shouldn't be pushing registers, but it is faster
26514 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26518 int second_time ATTRIBUTE_UNUSED
)
26520 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26523 cfun
->machine
->uses_anonymous_args
= 1;
26524 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26526 nregs
= pcum
->aapcs_ncrn
;
26527 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26531 nregs
= pcum
->nregs
;
26533 if (nregs
< NUM_ARG_REGS
)
26534 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26537 /* We can't rely on the caller doing the proper promotion when
26538 using APCS or ATPCS. */
26541 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26543 return !TARGET_AAPCS_BASED
;
26546 static machine_mode
26547 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26549 int *punsignedp ATTRIBUTE_UNUSED
,
26550 const_tree fntype ATTRIBUTE_UNUSED
,
26551 int for_return ATTRIBUTE_UNUSED
)
26553 if (GET_MODE_CLASS (mode
) == MODE_INT
26554 && GET_MODE_SIZE (mode
) < 4)
26560 /* AAPCS based ABIs use short enums by default. */
26563 arm_default_short_enums (void)
26565 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26569 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26572 arm_align_anon_bitfield (void)
26574 return TARGET_AAPCS_BASED
;
26578 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26581 arm_cxx_guard_type (void)
26583 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26587 /* The EABI says test the least significant bit of a guard variable. */
26590 arm_cxx_guard_mask_bit (void)
26592 return TARGET_AAPCS_BASED
;
26596 /* The EABI specifies that all array cookies are 8 bytes long. */
26599 arm_get_cookie_size (tree type
)
26603 if (!TARGET_AAPCS_BASED
)
26604 return default_cxx_get_cookie_size (type
);
26606 size
= build_int_cst (sizetype
, 8);
26611 /* The EABI says that array cookies should also contain the element size. */
26614 arm_cookie_has_size (void)
26616 return TARGET_AAPCS_BASED
;
26620 /* The EABI says constructors and destructors should return a pointer to
26621 the object constructed/destroyed. */
26624 arm_cxx_cdtor_returns_this (void)
26626 return TARGET_AAPCS_BASED
;
26629 /* The EABI says that an inline function may never be the key
26633 arm_cxx_key_method_may_be_inline (void)
26635 return !TARGET_AAPCS_BASED
;
26639 arm_cxx_determine_class_data_visibility (tree decl
)
26641 if (!TARGET_AAPCS_BASED
26642 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26645 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26646 is exported. However, on systems without dynamic vague linkage,
26647 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26648 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26649 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26651 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26652 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26656 arm_cxx_class_data_always_comdat (void)
26658 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26659 vague linkage if the class has no key function. */
26660 return !TARGET_AAPCS_BASED
;
26664 /* The EABI says __aeabi_atexit should be used to register static
26668 arm_cxx_use_aeabi_atexit (void)
26670 return TARGET_AAPCS_BASED
;
26675 arm_set_return_address (rtx source
, rtx scratch
)
26677 arm_stack_offsets
*offsets
;
26678 HOST_WIDE_INT delta
;
26680 unsigned long saved_regs
;
26682 offsets
= arm_get_frame_offsets ();
26683 saved_regs
= offsets
->saved_regs_mask
;
26685 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26686 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26689 if (frame_pointer_needed
)
26690 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26693 /* LR will be the first saved register. */
26694 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26699 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26700 GEN_INT (delta
& ~4095)));
26705 addr
= stack_pointer_rtx
;
26707 addr
= plus_constant (Pmode
, addr
, delta
);
26709 /* The store needs to be marked as frame related in order to prevent
26710 DSE from deleting it as dead if it is based on fp. */
26711 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26712 RTX_FRAME_RELATED_P (insn
) = 1;
26713 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26719 thumb_set_return_address (rtx source
, rtx scratch
)
26721 arm_stack_offsets
*offsets
;
26722 HOST_WIDE_INT delta
;
26723 HOST_WIDE_INT limit
;
26726 unsigned long mask
;
26730 offsets
= arm_get_frame_offsets ();
26731 mask
= offsets
->saved_regs_mask
;
26732 if (mask
& (1 << LR_REGNUM
))
26735 /* Find the saved regs. */
26736 if (frame_pointer_needed
)
26738 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26739 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26745 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26748 /* Allow for the stack frame. */
26749 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26751 /* The link register is always the first saved register. */
26754 /* Construct the address. */
26755 addr
= gen_rtx_REG (SImode
, reg
);
26758 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26759 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26763 addr
= plus_constant (Pmode
, addr
, delta
);
26765 /* The store needs to be marked as frame related in order to prevent
26766 DSE from deleting it as dead if it is based on fp. */
26767 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26768 RTX_FRAME_RELATED_P (insn
) = 1;
26769 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26772 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26775 /* Implements target hook vector_mode_supported_p. */
26777 arm_vector_mode_supported_p (machine_mode mode
)
26779 /* Neon also supports V2SImode, etc. listed in the clause below. */
26780 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26781 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26782 || mode
== V2DImode
|| mode
== V8HFmode
))
26785 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26786 && ((mode
== V2SImode
)
26787 || (mode
== V4HImode
)
26788 || (mode
== V8QImode
)))
26791 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26792 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26793 || mode
== V2HAmode
))
26799 /* Implements target hook array_mode_supported_p. */
26802 arm_array_mode_supported_p (machine_mode mode
,
26803 unsigned HOST_WIDE_INT nelems
)
26806 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26807 && (nelems
>= 2 && nelems
<= 4))
26813 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26814 registers when autovectorizing for Neon, at least until multiple vector
26815 widths are supported properly by the middle-end. */
26817 static machine_mode
26818 arm_preferred_simd_mode (machine_mode mode
)
26824 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26826 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26828 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26830 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26832 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26839 if (TARGET_REALLY_IWMMXT
)
26855 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26857 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26858 using r0-r4 for function arguments, r7 for the stack frame and don't have
26859 enough left over to do doubleword arithmetic. For Thumb-2 all the
26860 potentially problematic instructions accept high registers so this is not
26861 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26862 that require many low registers. */
26864 arm_class_likely_spilled_p (reg_class_t rclass
)
26866 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26867 || rclass
== CC_REG
)
26873 /* Implements target hook small_register_classes_for_mode_p. */
26875 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26877 return TARGET_THUMB1
;
26880 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26881 ARM insns and therefore guarantee that the shift count is modulo 256.
26882 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26883 guarantee no particular behavior for out-of-range counts. */
26885 static unsigned HOST_WIDE_INT
26886 arm_shift_truncation_mask (machine_mode mode
)
26888 return mode
== SImode
? 255 : 0;
26892 /* Map internal gcc register numbers to DWARF2 register numbers. */
26895 arm_dbx_register_number (unsigned int regno
)
26900 if (IS_VFP_REGNUM (regno
))
26902 /* See comment in arm_dwarf_register_span. */
26903 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26904 return 64 + regno
- FIRST_VFP_REGNUM
;
26906 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26909 if (IS_IWMMXT_GR_REGNUM (regno
))
26910 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26912 if (IS_IWMMXT_REGNUM (regno
))
26913 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26915 return DWARF_FRAME_REGISTERS
;
26918 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26919 GCC models tham as 64 32-bit registers, so we need to describe this to
26920 the DWARF generation code. Other registers can use the default. */
26922 arm_dwarf_register_span (rtx rtl
)
26930 regno
= REGNO (rtl
);
26931 if (!IS_VFP_REGNUM (regno
))
26934 /* XXX FIXME: The EABI defines two VFP register ranges:
26935 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26937 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26938 corresponding D register. Until GDB supports this, we shall use the
26939 legacy encodings. We also use these encodings for D0-D15 for
26940 compatibility with older debuggers. */
26941 mode
= GET_MODE (rtl
);
26942 if (GET_MODE_SIZE (mode
) < 8)
26945 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26947 nregs
= GET_MODE_SIZE (mode
) / 4;
26948 for (i
= 0; i
< nregs
; i
+= 2)
26949 if (TARGET_BIG_END
)
26951 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26952 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26956 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26957 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26962 nregs
= GET_MODE_SIZE (mode
) / 8;
26963 for (i
= 0; i
< nregs
; i
++)
26964 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26967 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26970 #if ARM_UNWIND_INFO
26971 /* Emit unwind directives for a store-multiple instruction or stack pointer
26972 push during alignment.
26973 These should only ever be generated by the function prologue code, so
26974 expect them to have a particular form.
26975 The store-multiple instruction sometimes pushes pc as the last register,
26976 although it should not be tracked into unwind information, or for -Os
26977 sometimes pushes some dummy registers before first register that needs
26978 to be tracked in unwind information; such dummy registers are there just
26979 to avoid separate stack adjustment, and will not be restored in the
26983 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26986 HOST_WIDE_INT offset
;
26987 HOST_WIDE_INT nregs
;
26991 unsigned padfirst
= 0, padlast
= 0;
26994 e
= XVECEXP (p
, 0, 0);
26995 gcc_assert (GET_CODE (e
) == SET
);
26997 /* First insn will adjust the stack pointer. */
26998 gcc_assert (GET_CODE (e
) == SET
26999 && REG_P (SET_DEST (e
))
27000 && REGNO (SET_DEST (e
)) == SP_REGNUM
27001 && GET_CODE (SET_SRC (e
)) == PLUS
);
27003 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27004 nregs
= XVECLEN (p
, 0) - 1;
27005 gcc_assert (nregs
);
27007 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27010 /* For -Os dummy registers can be pushed at the beginning to
27011 avoid separate stack pointer adjustment. */
27012 e
= XVECEXP (p
, 0, 1);
27013 e
= XEXP (SET_DEST (e
), 0);
27014 if (GET_CODE (e
) == PLUS
)
27015 padfirst
= INTVAL (XEXP (e
, 1));
27016 gcc_assert (padfirst
== 0 || optimize_size
);
27017 /* The function prologue may also push pc, but not annotate it as it is
27018 never restored. We turn this into a stack pointer adjustment. */
27019 e
= XVECEXP (p
, 0, nregs
);
27020 e
= XEXP (SET_DEST (e
), 0);
27021 if (GET_CODE (e
) == PLUS
)
27022 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27024 padlast
= offset
- 4;
27025 gcc_assert (padlast
== 0 || padlast
== 4);
27027 fprintf (asm_out_file
, "\t.pad #4\n");
27029 fprintf (asm_out_file
, "\t.save {");
27031 else if (IS_VFP_REGNUM (reg
))
27034 fprintf (asm_out_file
, "\t.vsave {");
27037 /* Unknown register type. */
27038 gcc_unreachable ();
27040 /* If the stack increment doesn't match the size of the saved registers,
27041 something has gone horribly wrong. */
27042 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27046 /* The remaining insns will describe the stores. */
27047 for (i
= 1; i
<= nregs
; i
++)
27049 /* Expect (set (mem <addr>) (reg)).
27050 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27051 e
= XVECEXP (p
, 0, i
);
27052 gcc_assert (GET_CODE (e
) == SET
27053 && MEM_P (SET_DEST (e
))
27054 && REG_P (SET_SRC (e
)));
27056 reg
= REGNO (SET_SRC (e
));
27057 gcc_assert (reg
>= lastreg
);
27060 fprintf (asm_out_file
, ", ");
27061 /* We can't use %r for vfp because we need to use the
27062 double precision register names. */
27063 if (IS_VFP_REGNUM (reg
))
27064 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27066 asm_fprintf (asm_out_file
, "%r", reg
);
27070 /* Check that the addresses are consecutive. */
27071 e
= XEXP (SET_DEST (e
), 0);
27072 if (GET_CODE (e
) == PLUS
)
27073 gcc_assert (REG_P (XEXP (e
, 0))
27074 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27075 && CONST_INT_P (XEXP (e
, 1))
27076 && offset
== INTVAL (XEXP (e
, 1)));
27080 && REGNO (e
) == SP_REGNUM
);
27081 offset
+= reg_size
;
27084 fprintf (asm_out_file
, "}\n");
27086 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27089 /* Emit unwind directives for a SET. */
27092 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27100 switch (GET_CODE (e0
))
27103 /* Pushing a single register. */
27104 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27105 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27106 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27109 asm_fprintf (asm_out_file
, "\t.save ");
27110 if (IS_VFP_REGNUM (REGNO (e1
)))
27111 asm_fprintf(asm_out_file
, "{d%d}\n",
27112 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27114 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27118 if (REGNO (e0
) == SP_REGNUM
)
27120 /* A stack increment. */
27121 if (GET_CODE (e1
) != PLUS
27122 || !REG_P (XEXP (e1
, 0))
27123 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27124 || !CONST_INT_P (XEXP (e1
, 1)))
27127 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27128 -INTVAL (XEXP (e1
, 1)));
27130 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27132 HOST_WIDE_INT offset
;
27134 if (GET_CODE (e1
) == PLUS
)
27136 if (!REG_P (XEXP (e1
, 0))
27137 || !CONST_INT_P (XEXP (e1
, 1)))
27139 reg
= REGNO (XEXP (e1
, 0));
27140 offset
= INTVAL (XEXP (e1
, 1));
27141 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27142 HARD_FRAME_POINTER_REGNUM
, reg
,
27145 else if (REG_P (e1
))
27148 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27149 HARD_FRAME_POINTER_REGNUM
, reg
);
27154 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27156 /* Move from sp to reg. */
27157 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27159 else if (GET_CODE (e1
) == PLUS
27160 && REG_P (XEXP (e1
, 0))
27161 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27162 && CONST_INT_P (XEXP (e1
, 1)))
27164 /* Set reg to offset from sp. */
27165 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27166 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27178 /* Emit unwind directives for the given insn. */
27181 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27184 bool handled_one
= false;
27186 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27189 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27190 && (TREE_NOTHROW (current_function_decl
)
27191 || crtl
->all_throwers_are_sibcalls
))
27194 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27197 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27199 switch (REG_NOTE_KIND (note
))
27201 case REG_FRAME_RELATED_EXPR
:
27202 pat
= XEXP (note
, 0);
27205 case REG_CFA_REGISTER
:
27206 pat
= XEXP (note
, 0);
27209 pat
= PATTERN (insn
);
27210 if (GET_CODE (pat
) == PARALLEL
)
27211 pat
= XVECEXP (pat
, 0, 0);
27214 /* Only emitted for IS_STACKALIGN re-alignment. */
27219 src
= SET_SRC (pat
);
27220 dest
= SET_DEST (pat
);
27222 gcc_assert (src
== stack_pointer_rtx
);
27223 reg
= REGNO (dest
);
27224 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27227 handled_one
= true;
27230 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27231 to get correct dwarf information for shrink-wrap. We should not
27232 emit unwind information for it because these are used either for
27233 pretend arguments or notes to adjust sp and restore registers from
27235 case REG_CFA_DEF_CFA
:
27236 case REG_CFA_ADJUST_CFA
:
27237 case REG_CFA_RESTORE
:
27240 case REG_CFA_EXPRESSION
:
27241 case REG_CFA_OFFSET
:
27242 /* ??? Only handling here what we actually emit. */
27243 gcc_unreachable ();
27251 pat
= PATTERN (insn
);
27254 switch (GET_CODE (pat
))
27257 arm_unwind_emit_set (asm_out_file
, pat
);
27261 /* Store multiple. */
27262 arm_unwind_emit_sequence (asm_out_file
, pat
);
27271 /* Output a reference from a function exception table to the type_info
27272 object X. The EABI specifies that the symbol should be relocated by
27273 an R_ARM_TARGET2 relocation. */
27276 arm_output_ttype (rtx x
)
27278 fputs ("\t.word\t", asm_out_file
);
27279 output_addr_const (asm_out_file
, x
);
27280 /* Use special relocations for symbol references. */
27281 if (!CONST_INT_P (x
))
27282 fputs ("(TARGET2)", asm_out_file
);
27283 fputc ('\n', asm_out_file
);
27288 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27291 arm_asm_emit_except_personality (rtx personality
)
27293 fputs ("\t.personality\t", asm_out_file
);
27294 output_addr_const (asm_out_file
, personality
);
27295 fputc ('\n', asm_out_file
);
27297 #endif /* ARM_UNWIND_INFO */
27299 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27302 arm_asm_init_sections (void)
27304 #if ARM_UNWIND_INFO
27305 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27307 #endif /* ARM_UNWIND_INFO */
27309 #ifdef OBJECT_FORMAT_ELF
27310 if (target_pure_code
)
27311 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27315 /* Output unwind directives for the start/end of a function. */
27318 arm_output_fn_unwind (FILE * f
, bool prologue
)
27320 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27324 fputs ("\t.fnstart\n", f
);
27327 /* If this function will never be unwound, then mark it as such.
27328 The came condition is used in arm_unwind_emit to suppress
27329 the frame annotations. */
27330 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27331 && (TREE_NOTHROW (current_function_decl
)
27332 || crtl
->all_throwers_are_sibcalls
))
27333 fputs("\t.cantunwind\n", f
);
27335 fputs ("\t.fnend\n", f
);
27340 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27342 enum tls_reloc reloc
;
27345 val
= XVECEXP (x
, 0, 0);
27346 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27348 output_addr_const (fp
, val
);
27353 fputs ("(tlsgd)", fp
);
27356 fputs ("(tlsldm)", fp
);
27359 fputs ("(tlsldo)", fp
);
27362 fputs ("(gottpoff)", fp
);
27365 fputs ("(tpoff)", fp
);
27368 fputs ("(tlsdesc)", fp
);
27371 gcc_unreachable ();
27380 fputs (" + (. - ", fp
);
27381 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27382 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27383 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27384 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27394 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27397 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27399 gcc_assert (size
== 4);
27400 fputs ("\t.word\t", file
);
27401 output_addr_const (file
, x
);
27402 fputs ("(tlsldo)", file
);
27405 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27408 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27410 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27411 return arm_emit_tls_decoration (fp
, x
);
27412 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27415 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27417 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27418 assemble_name_raw (fp
, label
);
27422 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27424 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27428 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27432 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27434 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27438 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27442 else if (GET_CODE (x
) == CONST_VECTOR
)
27443 return arm_emit_vector_const (fp
, x
);
27448 /* Output assembly for a shift instruction.
27449 SET_FLAGS determines how the instruction modifies the condition codes.
27450 0 - Do not set condition codes.
27451 1 - Set condition codes.
27452 2 - Use smallest instruction. */
27454 arm_output_shift(rtx
* operands
, int set_flags
)
27457 static const char flag_chars
[3] = {'?', '.', '!'};
27462 c
= flag_chars
[set_flags
];
27463 shift
= shift_op(operands
[3], &val
);
27467 operands
[2] = GEN_INT(val
);
27468 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27471 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27473 output_asm_insn (pattern
, operands
);
27477 /* Output assembly for a WMMX immediate shift instruction. */
27479 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27481 int shift
= INTVAL (operands
[2]);
27483 machine_mode opmode
= GET_MODE (operands
[0]);
27485 gcc_assert (shift
>= 0);
27487 /* If the shift value in the register versions is > 63 (for D qualifier),
27488 31 (for W qualifier) or 15 (for H qualifier). */
27489 if (((opmode
== V4HImode
) && (shift
> 15))
27490 || ((opmode
== V2SImode
) && (shift
> 31))
27491 || ((opmode
== DImode
) && (shift
> 63)))
27495 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27496 output_asm_insn (templ
, operands
);
27497 if (opmode
== DImode
)
27499 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27500 output_asm_insn (templ
, operands
);
27505 /* The destination register will contain all zeros. */
27506 sprintf (templ
, "wzero\t%%0");
27507 output_asm_insn (templ
, operands
);
27512 if ((opmode
== DImode
) && (shift
> 32))
27514 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27515 output_asm_insn (templ
, operands
);
27516 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27517 output_asm_insn (templ
, operands
);
27521 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27522 output_asm_insn (templ
, operands
);
27527 /* Output assembly for a WMMX tinsr instruction. */
27529 arm_output_iwmmxt_tinsr (rtx
*operands
)
27531 int mask
= INTVAL (operands
[3]);
27534 int units
= mode_nunits
[GET_MODE (operands
[0])];
27535 gcc_assert ((mask
& (mask
- 1)) == 0);
27536 for (i
= 0; i
< units
; ++i
)
27538 if ((mask
& 0x01) == 1)
27544 gcc_assert (i
< units
);
27546 switch (GET_MODE (operands
[0]))
27549 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27552 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27555 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27558 gcc_unreachable ();
27561 output_asm_insn (templ
, operands
);
27566 /* Output a Thumb-1 casesi dispatch sequence. */
27568 thumb1_output_casesi (rtx
*operands
)
27570 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27572 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27574 switch (GET_MODE(diff_vec
))
27577 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27578 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27580 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27581 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27583 return "bl\t%___gnu_thumb1_case_si";
27585 gcc_unreachable ();
27589 /* Output a Thumb-2 casesi instruction. */
27591 thumb2_output_casesi (rtx
*operands
)
27593 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27595 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27597 output_asm_insn ("cmp\t%0, %1", operands
);
27598 output_asm_insn ("bhi\t%l3", operands
);
27599 switch (GET_MODE(diff_vec
))
27602 return "tbb\t[%|pc, %0]";
27604 return "tbh\t[%|pc, %0, lsl #1]";
27608 output_asm_insn ("adr\t%4, %l2", operands
);
27609 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27610 output_asm_insn ("add\t%4, %4, %5", operands
);
27615 output_asm_insn ("adr\t%4, %l2", operands
);
27616 return "ldr\t%|pc, [%4, %0, lsl #2]";
27619 gcc_unreachable ();
27623 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27624 per-core tuning structs. */
27626 arm_issue_rate (void)
27628 return current_tune
->issue_rate
;
27631 /* Return how many instructions should scheduler lookahead to choose the
27634 arm_first_cycle_multipass_dfa_lookahead (void)
27636 int issue_rate
= arm_issue_rate ();
27638 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27641 /* Enable modeling of L2 auto-prefetcher. */
27643 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27645 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27649 arm_mangle_type (const_tree type
)
27651 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27652 has to be managled as if it is in the "std" namespace. */
27653 if (TARGET_AAPCS_BASED
27654 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27655 return "St9__va_list";
27657 /* Half-precision float. */
27658 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27661 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27663 if (TYPE_NAME (type
) != NULL
)
27664 return arm_mangle_builtin_type (type
);
27666 /* Use the default mangling. */
27670 /* Order of allocation of core registers for Thumb: this allocation is
27671 written over the corresponding initial entries of the array
27672 initialized with REG_ALLOC_ORDER. We allocate all low registers
27673 first. Saving and restoring a low register is usually cheaper than
27674 using a call-clobbered high register. */
27676 static const int thumb_core_reg_alloc_order
[] =
27678 3, 2, 1, 0, 4, 5, 6, 7,
27679 14, 12, 8, 9, 10, 11
27682 /* Adjust register allocation order when compiling for Thumb. */
27685 arm_order_regs_for_local_alloc (void)
27687 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27688 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27690 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27691 sizeof (thumb_core_reg_alloc_order
));
27694 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27697 arm_frame_pointer_required (void)
27699 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27702 /* If the function receives nonlocal gotos, it needs to save the frame
27703 pointer in the nonlocal_goto_save_area object. */
27704 if (cfun
->has_nonlocal_label
)
27707 /* The frame pointer is required for non-leaf APCS frames. */
27708 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
27711 /* If we are probing the stack in the prologue, we will have a faulting
27712 instruction prior to the stack adjustment and this requires a frame
27713 pointer if we want to catch the exception using the EABI unwinder. */
27714 if (!IS_INTERRUPT (arm_current_func_type ())
27715 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27716 && arm_except_unwind_info (&global_options
) == UI_TARGET
27717 && cfun
->can_throw_non_call_exceptions
)
27719 HOST_WIDE_INT size
= get_frame_size ();
27721 /* That's irrelevant if there is no stack adjustment. */
27725 /* That's relevant only if there is a stack probe. */
27726 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27728 /* We don't have the final size of the frame so adjust. */
27729 size
+= 32 * UNITS_PER_WORD
;
27730 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27740 /* Only thumb1 can't support conditional execution, so return true if
27741 the target is not thumb1. */
27743 arm_have_conditional_execution (void)
27745 return !TARGET_THUMB1
;
27748 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27749 static HOST_WIDE_INT
27750 arm_vector_alignment (const_tree type
)
27752 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27754 if (TARGET_AAPCS_BASED
)
27755 align
= MIN (align
, 64);
27760 static unsigned int
27761 arm_autovectorize_vector_sizes (void)
27763 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27767 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27769 /* Vectors which aren't in packed structures will not be less aligned than
27770 the natural alignment of their element type, so this is safe. */
27771 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27774 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27778 arm_builtin_support_vector_misalignment (machine_mode mode
,
27779 const_tree type
, int misalignment
,
27782 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27784 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27789 /* If the misalignment is unknown, we should be able to handle the access
27790 so long as it is not to a member of a packed data structure. */
27791 if (misalignment
== -1)
27794 /* Return true if the misalignment is a multiple of the natural alignment
27795 of the vector's element type. This is probably always going to be
27796 true in practice, since we've already established that this isn't a
27798 return ((misalignment
% align
) == 0);
27801 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27806 arm_conditional_register_usage (void)
27810 if (TARGET_THUMB1
&& optimize_size
)
27812 /* When optimizing for size on Thumb-1, it's better not
27813 to use the HI regs, because of the overhead of
27815 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27816 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27819 /* The link register can be clobbered by any branch insn,
27820 but we have no way to track that at present, so mark
27821 it as unavailable. */
27823 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27825 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27827 /* VFPv3 registers are disabled when earlier VFP
27828 versions are selected due to the definition of
27829 LAST_VFP_REGNUM. */
27830 for (regno
= FIRST_VFP_REGNUM
;
27831 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27833 fixed_regs
[regno
] = 0;
27834 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27835 || regno
>= FIRST_VFP_REGNUM
+ 32;
27839 if (TARGET_REALLY_IWMMXT
)
27841 regno
= FIRST_IWMMXT_GR_REGNUM
;
27842 /* The 2002/10/09 revision of the XScale ABI has wCG0
27843 and wCG1 as call-preserved registers. The 2002/11/21
27844 revision changed this so that all wCG registers are
27845 scratch registers. */
27846 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27847 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27848 fixed_regs
[regno
] = 0;
27849 /* The XScale ABI has wR0 - wR9 as scratch registers,
27850 the rest as call-preserved registers. */
27851 for (regno
= FIRST_IWMMXT_REGNUM
;
27852 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27854 fixed_regs
[regno
] = 0;
27855 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27859 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27861 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27862 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27864 else if (TARGET_APCS_STACK
)
27866 fixed_regs
[10] = 1;
27867 call_used_regs
[10] = 1;
27869 /* -mcaller-super-interworking reserves r11 for calls to
27870 _interwork_r11_call_via_rN(). Making the register global
27871 is an easy way of ensuring that it remains valid for all
27873 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27874 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27876 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27877 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27878 if (TARGET_CALLER_INTERWORKING
)
27879 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27881 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27885 arm_preferred_rename_class (reg_class_t rclass
)
27887 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27888 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27889 and code size can be reduced. */
27890 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27896 /* Compute the attribute "length" of insn "*push_multi".
27897 So this function MUST be kept in sync with that insn pattern. */
27899 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27901 int i
, regno
, hi_reg
;
27902 int num_saves
= XVECLEN (parallel_op
, 0);
27912 regno
= REGNO (first_op
);
27913 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27914 list is 8-bit. Normally this means all registers in the list must be
27915 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27916 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27917 with 16-bit encoding. */
27918 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27919 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27921 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27922 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27930 /* Compute the attribute "length" of insn. Currently, this function is used
27931 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27932 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27933 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27934 true if OPERANDS contains insn which explicit updates base register. */
27937 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27946 rtx parallel_op
= operands
[0];
27947 /* Initialize to elements number of PARALLEL. */
27948 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27949 /* Initialize the value to base register. */
27950 unsigned regno
= REGNO (operands
[1]);
27951 /* Skip return and write back pattern.
27952 We only need register pop pattern for later analysis. */
27953 unsigned first_indx
= 0;
27954 first_indx
+= return_pc
? 1 : 0;
27955 first_indx
+= write_back_p
? 1 : 0;
27957 /* A pop operation can be done through LDM or POP. If the base register is SP
27958 and if it's with write back, then a LDM will be alias of POP. */
27959 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27960 bool ldm_p
= !pop_p
;
27962 /* Check base register for LDM. */
27963 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
27966 /* Check each register in the list. */
27967 for (; indx
>= first_indx
; indx
--)
27969 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
27970 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27971 comment in arm_attr_length_push_multi. */
27972 if (REGNO_REG_CLASS (regno
) == HI_REGS
27973 && (regno
!= PC_REGNUM
|| ldm_p
))
27980 /* Compute the number of instructions emitted by output_move_double. */
27982 arm_count_output_move_double_insns (rtx
*operands
)
27986 /* output_move_double may modify the operands array, so call it
27987 here on a copy of the array. */
27988 ops
[0] = operands
[0];
27989 ops
[1] = operands
[1];
27990 output_move_double (ops
, false, &count
);
27995 vfp3_const_double_for_fract_bits (rtx operand
)
27997 REAL_VALUE_TYPE r0
;
27999 if (!CONST_DOUBLE_P (operand
))
28002 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28003 if (exact_real_inverse (DFmode
, &r0
)
28004 && !REAL_VALUE_NEGATIVE (r0
))
28006 if (exact_real_truncate (DFmode
, &r0
))
28008 HOST_WIDE_INT value
= real_to_integer (&r0
);
28009 value
= value
& 0xffffffff;
28010 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28012 int ret
= exact_log2 (value
);
28013 gcc_assert (IN_RANGE (ret
, 0, 31));
28021 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28022 log2 is in [1, 32], return that log2. Otherwise return -1.
28023 This is used in the patterns for vcvt.s32.f32 floating-point to
28024 fixed-point conversions. */
28027 vfp3_const_double_for_bits (rtx x
)
28029 const REAL_VALUE_TYPE
*r
;
28031 if (!CONST_DOUBLE_P (x
))
28034 r
= CONST_DOUBLE_REAL_VALUE (x
);
28036 if (REAL_VALUE_NEGATIVE (*r
)
28037 || REAL_VALUE_ISNAN (*r
)
28038 || REAL_VALUE_ISINF (*r
)
28039 || !real_isinteger (r
, SFmode
))
28042 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28044 /* The exact_log2 above will have returned -1 if this is
28045 not an exact log2. */
28046 if (!IN_RANGE (hwint
, 1, 32))
28053 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28056 arm_pre_atomic_barrier (enum memmodel model
)
28058 if (need_atomic_barrier_p (model
, true))
28059 emit_insn (gen_memory_barrier ());
28063 arm_post_atomic_barrier (enum memmodel model
)
28065 if (need_atomic_barrier_p (model
, false))
28066 emit_insn (gen_memory_barrier ());
28069 /* Emit the load-exclusive and store-exclusive instructions.
28070 Use acquire and release versions if necessary. */
28073 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28075 rtx (*gen
) (rtx
, rtx
);
28081 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28082 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28083 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28084 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28086 gcc_unreachable ();
28093 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28094 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28095 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28096 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28098 gcc_unreachable ();
28102 emit_insn (gen (rval
, mem
));
28106 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28109 rtx (*gen
) (rtx
, rtx
, rtx
);
28115 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28116 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28117 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28118 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28120 gcc_unreachable ();
28127 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28128 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28129 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28130 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28132 gcc_unreachable ();
28136 emit_insn (gen (bval
, rval
, mem
));
28139 /* Mark the previous jump instruction as unlikely. */
28142 emit_unlikely_jump (rtx insn
)
28144 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28146 insn
= emit_jump_insn (insn
);
28147 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
28150 /* Expand a compare and swap pattern. */
28153 arm_expand_compare_and_swap (rtx operands
[])
28155 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28157 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28159 bval
= operands
[0];
28160 rval
= operands
[1];
28162 oldval
= operands
[3];
28163 newval
= operands
[4];
28164 is_weak
= operands
[5];
28165 mod_s
= operands
[6];
28166 mod_f
= operands
[7];
28167 mode
= GET_MODE (mem
);
28169 /* Normally the succ memory model must be stronger than fail, but in the
28170 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28171 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28173 if (TARGET_HAVE_LDACQ
28174 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28175 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28176 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28182 /* For narrow modes, we're going to perform the comparison in SImode,
28183 so do the zero-extension now. */
28184 rval
= gen_reg_rtx (SImode
);
28185 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28189 /* Force the value into a register if needed. We waited until after
28190 the zero-extension above to do this properly. */
28191 if (!arm_add_operand (oldval
, SImode
))
28192 oldval
= force_reg (SImode
, oldval
);
28196 if (!cmpdi_operand (oldval
, mode
))
28197 oldval
= force_reg (mode
, oldval
);
28201 gcc_unreachable ();
28206 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28207 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28208 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28209 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28211 gcc_unreachable ();
28214 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28216 if (mode
== QImode
|| mode
== HImode
)
28217 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28219 /* In all cases, we arrange for success to be signaled by Z set.
28220 This arrangement allows for the boolean result to be used directly
28221 in a subsequent branch, post optimization. */
28222 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28223 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
28224 emit_insn (gen_rtx_SET (bval
, x
));
28227 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28228 another memory store between the load-exclusive and store-exclusive can
28229 reset the monitor from Exclusive to Open state. This means we must wait
28230 until after reload to split the pattern, lest we get a register spill in
28231 the middle of the atomic sequence. */
28234 arm_split_compare_and_swap (rtx operands
[])
28236 rtx rval
, mem
, oldval
, newval
, scratch
;
28238 enum memmodel mod_s
, mod_f
;
28240 rtx_code_label
*label1
, *label2
;
28243 rval
= operands
[0];
28245 oldval
= operands
[2];
28246 newval
= operands
[3];
28247 is_weak
= (operands
[4] != const0_rtx
);
28248 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
28249 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
28250 scratch
= operands
[7];
28251 mode
= GET_MODE (mem
);
28253 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28255 bool use_acquire
= TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28257 || is_mm_release (mod_s
));
28259 bool use_release
= TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28261 || is_mm_acquire (mod_s
));
28263 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28266 use_acquire
= false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire
|| use_release
))
28270 arm_pre_atomic_barrier (mod_s
);
28275 label1
= gen_label_rtx ();
28276 emit_label (label1
);
28278 label2
= gen_label_rtx ();
28280 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28282 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
28283 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28284 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28285 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28286 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28288 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
28290 /* Weak or strong, we want EQ to be true for success, so that we
28291 match the flags that we got from the compare above. */
28292 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28293 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
28294 emit_insn (gen_rtx_SET (cond
, x
));
28298 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28299 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28300 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
28301 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28304 if (!is_mm_relaxed (mod_f
))
28305 emit_label (label2
);
28307 /* Checks whether a barrier is needed and emits one accordingly. */
28309 || !(use_acquire
|| use_release
))
28310 arm_post_atomic_barrier (mod_s
);
28312 if (is_mm_relaxed (mod_f
))
28313 emit_label (label2
);
28317 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28318 rtx value
, rtx model_rtx
, rtx cond
)
28320 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28321 machine_mode mode
= GET_MODE (mem
);
28322 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28323 rtx_code_label
*label
;
28326 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28328 bool use_acquire
= TARGET_HAVE_LDACQ
28329 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28330 || is_mm_release (model
));
28332 bool use_release
= TARGET_HAVE_LDACQ
28333 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28334 || is_mm_acquire (model
));
28336 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28337 a full barrier is emitted after the store-release. */
28339 use_acquire
= false;
28341 /* Checks whether a barrier is needed and emits one accordingly. */
28342 if (!(use_acquire
|| use_release
))
28343 arm_pre_atomic_barrier (model
);
28345 label
= gen_label_rtx ();
28346 emit_label (label
);
28349 new_out
= gen_lowpart (wmode
, new_out
);
28351 old_out
= gen_lowpart (wmode
, old_out
);
28354 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28356 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28365 x
= gen_rtx_AND (wmode
, old_out
, value
);
28366 emit_insn (gen_rtx_SET (new_out
, x
));
28367 x
= gen_rtx_NOT (wmode
, new_out
);
28368 emit_insn (gen_rtx_SET (new_out
, x
));
28372 if (CONST_INT_P (value
))
28374 value
= GEN_INT (-INTVAL (value
));
28380 if (mode
== DImode
)
28382 /* DImode plus/minus need to clobber flags. */
28383 /* The adddi3 and subdi3 patterns are incorrectly written so that
28384 they require matching operands, even when we could easily support
28385 three operands. Thankfully, this can be fixed up post-splitting,
28386 as the individual add+adc patterns do accept three operands and
28387 post-reload cprop can make these moves go away. */
28388 emit_move_insn (new_out
, old_out
);
28390 x
= gen_adddi3 (new_out
, new_out
, value
);
28392 x
= gen_subdi3 (new_out
, new_out
, value
);
28399 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28400 emit_insn (gen_rtx_SET (new_out
, x
));
28404 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28407 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28408 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28410 /* Checks whether a barrier is needed and emits one accordingly. */
28412 || !(use_acquire
|| use_release
))
28413 arm_post_atomic_barrier (model
);
28416 #define MAX_VECT_LEN 16
28418 struct expand_vec_perm_d
28420 rtx target
, op0
, op1
;
28421 unsigned char perm
[MAX_VECT_LEN
];
28422 machine_mode vmode
;
28423 unsigned char nelt
;
28428 /* Generate a variable permutation. */
28431 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28433 machine_mode vmode
= GET_MODE (target
);
28434 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28436 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28437 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28438 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28439 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28440 gcc_checking_assert (TARGET_NEON
);
28444 if (vmode
== V8QImode
)
28445 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28447 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28453 if (vmode
== V8QImode
)
28455 pair
= gen_reg_rtx (V16QImode
);
28456 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28457 pair
= gen_lowpart (TImode
, pair
);
28458 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28462 pair
= gen_reg_rtx (OImode
);
28463 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28464 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28470 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28472 machine_mode vmode
= GET_MODE (target
);
28473 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28474 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28475 rtx rmask
[MAX_VECT_LEN
], mask
;
28477 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28478 numbering of elements for big-endian, we must reverse the order. */
28479 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28481 /* The VTBL instruction does not use a modulo index, so we must take care
28482 of that ourselves. */
28483 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28484 for (i
= 0; i
< nelt
; ++i
)
28486 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28487 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28489 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28492 /* Map lane ordering between architectural lane order, and GCC lane order,
28493 taking into account ABI. See comment above output_move_neon for details. */
28496 neon_endian_lane_map (machine_mode mode
, int lane
)
28498 if (BYTES_BIG_ENDIAN
)
28500 int nelems
= GET_MODE_NUNITS (mode
);
28501 /* Reverse lane order. */
28502 lane
= (nelems
- 1 - lane
);
28503 /* Reverse D register order, to match ABI. */
28504 if (GET_MODE_SIZE (mode
) == 16)
28505 lane
= lane
^ (nelems
/ 2);
28510 /* Some permutations index into pairs of vectors, this is a helper function
28511 to map indexes into those pairs of vectors. */
28514 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28516 int nelem
= GET_MODE_NUNITS (mode
);
28517 if (BYTES_BIG_ENDIAN
)
28519 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28523 /* Generate or test for an insn that supports a constant permutation. */
28525 /* Recognize patterns for the VUZP insns. */
28528 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28530 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28531 rtx out0
, out1
, in0
, in1
;
28532 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28536 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28539 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28540 big endian pattern on 64 bit vectors, so we correct for that. */
28541 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28542 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28544 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28546 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28548 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28552 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28554 for (i
= 0; i
< nelt
; i
++)
28557 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28558 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28568 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28569 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28570 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28571 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28572 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28573 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28574 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28575 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28576 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28577 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28579 gcc_unreachable ();
28584 if (swap_nelt
!= 0)
28585 std::swap (in0
, in1
);
28588 out1
= gen_reg_rtx (d
->vmode
);
28590 std::swap (out0
, out1
);
28592 emit_insn (gen (out0
, in0
, in1
, out1
));
28596 /* Recognize patterns for the VZIP insns. */
28599 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28601 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28602 rtx out0
, out1
, in0
, in1
;
28603 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28607 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28610 is_swapped
= BYTES_BIG_ENDIAN
;
28612 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28615 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28617 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28621 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28623 for (i
= 0; i
< nelt
/ 2; i
++)
28626 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28627 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28631 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28632 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28643 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28644 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28645 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28646 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28647 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28648 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28649 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28650 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28651 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28652 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28654 gcc_unreachable ();
28660 std::swap (in0
, in1
);
28663 out1
= gen_reg_rtx (d
->vmode
);
28665 std::swap (out0
, out1
);
28667 emit_insn (gen (out0
, in0
, in1
, out1
));
28671 /* Recognize patterns for the VREV insns. */
28674 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28676 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28677 rtx (*gen
)(rtx
, rtx
);
28679 if (!d
->one_vector_p
)
28688 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28689 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28697 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28698 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28699 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28700 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28701 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28702 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28710 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28711 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28712 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28713 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28714 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28715 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28716 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28717 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28726 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28727 for (j
= 0; j
<= diff
; j
+= 1)
28729 /* This is guaranteed to be true as the value of diff
28730 is 7, 3, 1 and we should have enough elements in the
28731 queue to generate this. Getting a vector mask with a
28732 value of diff other than these values implies that
28733 something is wrong by the time we get here. */
28734 gcc_assert (i
+ j
< nelt
);
28735 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28743 emit_insn (gen (d
->target
, d
->op0
));
28747 /* Recognize patterns for the VTRN insns. */
28750 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28752 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28753 rtx out0
, out1
, in0
, in1
;
28754 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28756 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28759 /* Note that these are little-endian tests. Adjust for big-endian later. */
28760 if (d
->perm
[0] == 0)
28762 else if (d
->perm
[0] == 1)
28766 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28768 for (i
= 0; i
< nelt
; i
+= 2)
28770 if (d
->perm
[i
] != i
+ odd
)
28772 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28782 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28783 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28784 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28785 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28786 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28787 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28788 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28789 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28790 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28791 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28793 gcc_unreachable ();
28798 if (BYTES_BIG_ENDIAN
)
28800 std::swap (in0
, in1
);
28805 out1
= gen_reg_rtx (d
->vmode
);
28807 std::swap (out0
, out1
);
28809 emit_insn (gen (out0
, in0
, in1
, out1
));
28813 /* Recognize patterns for the VEXT insns. */
28816 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28818 unsigned int i
, nelt
= d
->nelt
;
28819 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28822 unsigned int location
;
28824 unsigned int next
= d
->perm
[0] + 1;
28826 /* TODO: Handle GCC's numbering of elements for big-endian. */
28827 if (BYTES_BIG_ENDIAN
)
28830 /* Check if the extracted indexes are increasing by one. */
28831 for (i
= 1; i
< nelt
; next
++, i
++)
28833 /* If we hit the most significant element of the 2nd vector in
28834 the previous iteration, no need to test further. */
28835 if (next
== 2 * nelt
)
28838 /* If we are operating on only one vector: it could be a
28839 rotation. If there are only two elements of size < 64, let
28840 arm_evpc_neon_vrev catch it. */
28841 if (d
->one_vector_p
&& (next
== nelt
))
28843 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28849 if (d
->perm
[i
] != next
)
28853 location
= d
->perm
[0];
28857 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28858 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28859 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28860 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28861 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28862 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28863 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
28864 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
28865 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28866 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28867 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28876 offset
= GEN_INT (location
);
28877 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28881 /* The NEON VTBL instruction is a fully variable permuation that's even
28882 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28883 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28884 can do slightly better by expanding this as a constant where we don't
28885 have to apply a mask. */
28888 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28890 rtx rperm
[MAX_VECT_LEN
], sel
;
28891 machine_mode vmode
= d
->vmode
;
28892 unsigned int i
, nelt
= d
->nelt
;
28894 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28895 numbering of elements for big-endian, we must reverse the order. */
28896 if (BYTES_BIG_ENDIAN
)
28902 /* Generic code will try constant permutation twice. Once with the
28903 original mode and again with the elements lowered to QImode.
28904 So wait and don't do the selector expansion ourselves. */
28905 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28908 for (i
= 0; i
< nelt
; ++i
)
28909 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28910 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28911 sel
= force_reg (vmode
, sel
);
28913 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28918 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28920 /* Check if the input mask matches vext before reordering the
28923 if (arm_evpc_neon_vext (d
))
28926 /* The pattern matching functions above are written to look for a small
28927 number to begin the sequence (0, 1, N/2). If we begin with an index
28928 from the second operand, we can swap the operands. */
28929 if (d
->perm
[0] >= d
->nelt
)
28931 unsigned i
, nelt
= d
->nelt
;
28933 for (i
= 0; i
< nelt
; ++i
)
28934 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28936 std::swap (d
->op0
, d
->op1
);
28941 if (arm_evpc_neon_vuzp (d
))
28943 if (arm_evpc_neon_vzip (d
))
28945 if (arm_evpc_neon_vrev (d
))
28947 if (arm_evpc_neon_vtrn (d
))
28949 return arm_evpc_neon_vtbl (d
);
28954 /* Expand a vec_perm_const pattern. */
28957 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28959 struct expand_vec_perm_d d
;
28960 int i
, nelt
, which
;
28966 d
.vmode
= GET_MODE (target
);
28967 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28968 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28969 d
.testing_p
= false;
28971 for (i
= which
= 0; i
< nelt
; ++i
)
28973 rtx e
= XVECEXP (sel
, 0, i
);
28974 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28975 which
|= (ei
< nelt
? 1 : 2);
28985 d
.one_vector_p
= false;
28986 if (!rtx_equal_p (op0
, op1
))
28989 /* The elements of PERM do not suggest that only the first operand
28990 is used, but both operands are identical. Allow easier matching
28991 of the permutation by folding the permutation into the single
28995 for (i
= 0; i
< nelt
; ++i
)
28996 d
.perm
[i
] &= nelt
- 1;
28998 d
.one_vector_p
= true;
29003 d
.one_vector_p
= true;
29007 return arm_expand_vec_perm_const_1 (&d
);
29010 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29013 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29014 const unsigned char *sel
)
29016 struct expand_vec_perm_d d
;
29017 unsigned int i
, nelt
, which
;
29021 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29022 d
.testing_p
= true;
29023 memcpy (d
.perm
, sel
, nelt
);
29025 /* Categorize the set of elements in the selector. */
29026 for (i
= which
= 0; i
< nelt
; ++i
)
29028 unsigned char e
= d
.perm
[i
];
29029 gcc_assert (e
< 2 * nelt
);
29030 which
|= (e
< nelt
? 1 : 2);
29033 /* For all elements from second vector, fold the elements to first. */
29035 for (i
= 0; i
< nelt
; ++i
)
29038 /* Check whether the mask can be applied to the vector type. */
29039 d
.one_vector_p
= (which
!= 3);
29041 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29042 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29043 if (!d
.one_vector_p
)
29044 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29047 ret
= arm_expand_vec_perm_const_1 (&d
);
29054 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29056 /* If we are soft float and we do not have ldrd
29057 then all auto increment forms are ok. */
29058 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29063 /* Post increment and Pre Decrement are supported for all
29064 instruction forms except for vector forms. */
29067 if (VECTOR_MODE_P (mode
))
29069 if (code
!= ARM_PRE_DEC
)
29079 /* Without LDRD and mode size greater than
29080 word size, there is no point in auto-incrementing
29081 because ldm and stm will not have these forms. */
29082 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29085 /* Vector and floating point modes do not support
29086 these auto increment forms. */
29087 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29100 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29101 on ARM, since we know that shifts by negative amounts are no-ops.
29102 Additionally, the default expansion code is not available or suitable
29103 for post-reload insn splits (this can occur when the register allocator
29104 chooses not to do a shift in NEON).
29106 This function is used in both initial expand and post-reload splits, and
29107 handles all kinds of 64-bit shifts.
29109 Input requirements:
29110 - It is safe for the input and output to be the same register, but
29111 early-clobber rules apply for the shift amount and scratch registers.
29112 - Shift by register requires both scratch registers. In all other cases
29113 the scratch registers may be NULL.
29114 - Ashiftrt by a register also clobbers the CC register. */
29116 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29117 rtx amount
, rtx scratch1
, rtx scratch2
)
29119 rtx out_high
= gen_highpart (SImode
, out
);
29120 rtx out_low
= gen_lowpart (SImode
, out
);
29121 rtx in_high
= gen_highpart (SImode
, in
);
29122 rtx in_low
= gen_lowpart (SImode
, in
);
29125 in = the register pair containing the input value.
29126 out = the destination register pair.
29127 up = the high- or low-part of each pair.
29128 down = the opposite part to "up".
29129 In a shift, we can consider bits to shift from "up"-stream to
29130 "down"-stream, so in a left-shift "up" is the low-part and "down"
29131 is the high-part of each register pair. */
29133 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29134 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29135 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29136 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29138 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29140 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29141 && GET_MODE (out
) == DImode
);
29143 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29144 && GET_MODE (in
) == DImode
);
29146 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29147 && GET_MODE (amount
) == SImode
)
29148 || CONST_INT_P (amount
)));
29149 gcc_assert (scratch1
== NULL
29150 || (GET_CODE (scratch1
) == SCRATCH
)
29151 || (GET_MODE (scratch1
) == SImode
29152 && REG_P (scratch1
)));
29153 gcc_assert (scratch2
== NULL
29154 || (GET_CODE (scratch2
) == SCRATCH
)
29155 || (GET_MODE (scratch2
) == SImode
29156 && REG_P (scratch2
)));
29157 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29158 || !HARD_REGISTER_P (out
)
29159 || (REGNO (out
) != REGNO (amount
)
29160 && REGNO (out
) + 1 != REGNO (amount
)));
29162 /* Macros to make following code more readable. */
29163 #define SUB_32(DEST,SRC) \
29164 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29165 #define RSB_32(DEST,SRC) \
29166 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29167 #define SUB_S_32(DEST,SRC) \
29168 gen_addsi3_compare0 ((DEST), (SRC), \
29170 #define SET(DEST,SRC) \
29171 gen_rtx_SET ((DEST), (SRC))
29172 #define SHIFT(CODE,SRC,AMOUNT) \
29173 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29174 #define LSHIFT(CODE,SRC,AMOUNT) \
29175 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29176 SImode, (SRC), (AMOUNT))
29177 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29178 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29179 SImode, (SRC), (AMOUNT))
29181 gen_rtx_IOR (SImode, (A), (B))
29182 #define BRANCH(COND,LABEL) \
29183 gen_arm_cond_branch ((LABEL), \
29184 gen_rtx_ ## COND (CCmode, cc_reg, \
29188 /* Shifts by register and shifts by constant are handled separately. */
29189 if (CONST_INT_P (amount
))
29191 /* We have a shift-by-constant. */
29193 /* First, handle out-of-range shift amounts.
29194 In both cases we try to match the result an ARM instruction in a
29195 shift-by-register would give. This helps reduce execution
29196 differences between optimization levels, but it won't stop other
29197 parts of the compiler doing different things. This is "undefined
29198 behavior, in any case. */
29199 if (INTVAL (amount
) <= 0)
29200 emit_insn (gen_movdi (out
, in
));
29201 else if (INTVAL (amount
) >= 64)
29203 if (code
== ASHIFTRT
)
29205 rtx const31_rtx
= GEN_INT (31);
29206 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29207 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29210 emit_insn (gen_movdi (out
, const0_rtx
));
29213 /* Now handle valid shifts. */
29214 else if (INTVAL (amount
) < 32)
29216 /* Shifts by a constant less than 32. */
29217 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29219 /* Clearing the out register in DImode first avoids lots
29220 of spilling and results in less stack usage.
29221 Later this redundant insn is completely removed.
29222 Do that only if "in" and "out" are different registers. */
29223 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29224 emit_insn (SET (out
, const0_rtx
));
29225 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29226 emit_insn (SET (out_down
,
29227 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29229 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29233 /* Shifts by a constant greater than 31. */
29234 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29236 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29237 emit_insn (SET (out
, const0_rtx
));
29238 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29239 if (code
== ASHIFTRT
)
29240 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29243 emit_insn (SET (out_up
, const0_rtx
));
29248 /* We have a shift-by-register. */
29249 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29251 /* This alternative requires the scratch registers. */
29252 gcc_assert (scratch1
&& REG_P (scratch1
));
29253 gcc_assert (scratch2
&& REG_P (scratch2
));
29255 /* We will need the values "amount-32" and "32-amount" later.
29256 Swapping them around now allows the later code to be more general. */
29260 emit_insn (SUB_32 (scratch1
, amount
));
29261 emit_insn (RSB_32 (scratch2
, amount
));
29264 emit_insn (RSB_32 (scratch1
, amount
));
29265 /* Also set CC = amount > 32. */
29266 emit_insn (SUB_S_32 (scratch2
, amount
));
29269 emit_insn (RSB_32 (scratch1
, amount
));
29270 emit_insn (SUB_32 (scratch2
, amount
));
29273 gcc_unreachable ();
29276 /* Emit code like this:
29279 out_down = in_down << amount;
29280 out_down = (in_up << (amount - 32)) | out_down;
29281 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29282 out_up = in_up << amount;
29285 out_down = in_down >> amount;
29286 out_down = (in_up << (32 - amount)) | out_down;
29288 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29289 out_up = in_up << amount;
29292 out_down = in_down >> amount;
29293 out_down = (in_up << (32 - amount)) | out_down;
29295 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29296 out_up = in_up << amount;
29298 The ARM and Thumb2 variants are the same but implemented slightly
29299 differently. If this were only called during expand we could just
29300 use the Thumb2 case and let combine do the right thing, but this
29301 can also be called from post-reload splitters. */
29303 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29305 if (!TARGET_THUMB2
)
29307 /* Emit code for ARM mode. */
29308 emit_insn (SET (out_down
,
29309 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29310 if (code
== ASHIFTRT
)
29312 rtx_code_label
*done_label
= gen_label_rtx ();
29313 emit_jump_insn (BRANCH (LT
, done_label
));
29314 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29316 emit_label (done_label
);
29319 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29324 /* Emit code for Thumb2 mode.
29325 Thumb2 can't do shift and or in one insn. */
29326 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29327 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29329 if (code
== ASHIFTRT
)
29331 rtx_code_label
*done_label
= gen_label_rtx ();
29332 emit_jump_insn (BRANCH (LT
, done_label
));
29333 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29334 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29335 emit_label (done_label
);
29339 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29340 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29344 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29358 /* Returns true if the pattern is a valid symbolic address, which is either a
29359 symbol_ref or (symbol_ref + addend).
29361 According to the ARM ELF ABI, the initial addend of REL-type relocations
29362 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29363 literal field of the instruction as a 16-bit signed value in the range
29364 -32768 <= A < 32768. */
29367 arm_valid_symbolic_address_p (rtx addr
)
29369 rtx xop0
, xop1
= NULL_RTX
;
29372 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29375 /* (const (plus: symbol_ref const_int)) */
29376 if (GET_CODE (addr
) == CONST
)
29377 tmp
= XEXP (addr
, 0);
29379 if (GET_CODE (tmp
) == PLUS
)
29381 xop0
= XEXP (tmp
, 0);
29382 xop1
= XEXP (tmp
, 1);
29384 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29385 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29391 /* Returns true if a valid comparison operation and makes
29392 the operands in a form that is valid. */
29394 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29396 enum rtx_code code
= GET_CODE (*comparison
);
29398 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29399 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29401 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29403 if (code
== UNEQ
|| code
== LTGT
)
29406 code_int
= (int)code
;
29407 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29408 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29413 if (!arm_add_operand (*op1
, mode
))
29414 *op1
= force_reg (mode
, *op1
);
29415 if (!arm_add_operand (*op2
, mode
))
29416 *op2
= force_reg (mode
, *op2
);
29420 if (!cmpdi_operand (*op1
, mode
))
29421 *op1
= force_reg (mode
, *op1
);
29422 if (!cmpdi_operand (*op2
, mode
))
29423 *op2
= force_reg (mode
, *op2
);
29427 if (!TARGET_VFP_FP16INST
)
29429 /* FP16 comparisons are done in SF mode. */
29431 *op1
= convert_to_mode (mode
, *op1
, 1);
29432 *op2
= convert_to_mode (mode
, *op2
, 1);
29433 /* Fall through. */
29436 if (!vfp_compare_operand (*op1
, mode
))
29437 *op1
= force_reg (mode
, *op1
);
29438 if (!vfp_compare_operand (*op2
, mode
))
29439 *op2
= force_reg (mode
, *op2
);
29449 /* Maximum number of instructions to set block of memory. */
29451 arm_block_set_max_insns (void)
29453 if (optimize_function_for_size_p (cfun
))
29456 return current_tune
->max_insns_inline_memset
;
29459 /* Return TRUE if it's profitable to set block of memory for
29460 non-vectorized case. VAL is the value to set the memory
29461 with. LENGTH is the number of bytes to set. ALIGN is the
29462 alignment of the destination memory in bytes. UNALIGNED_P
29463 is TRUE if we can only set the memory with instructions
29464 meeting alignment requirements. USE_STRD_P is TRUE if we
29465 can use strd to set the memory. */
29467 arm_block_set_non_vect_profit_p (rtx val
,
29468 unsigned HOST_WIDE_INT length
,
29469 unsigned HOST_WIDE_INT align
,
29470 bool unaligned_p
, bool use_strd_p
)
29473 /* For leftovers in bytes of 0-7, we can set the memory block using
29474 strb/strh/str with minimum instruction number. */
29475 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29479 num
= arm_const_inline_cost (SET
, val
);
29480 num
+= length
/ align
+ length
% align
;
29482 else if (use_strd_p
)
29484 num
= arm_const_double_inline_cost (val
);
29485 num
+= (length
>> 3) + leftover
[length
& 7];
29489 num
= arm_const_inline_cost (SET
, val
);
29490 num
+= (length
>> 2) + leftover
[length
& 3];
29493 /* We may be able to combine last pair STRH/STRB into a single STR
29494 by shifting one byte back. */
29495 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29498 return (num
<= arm_block_set_max_insns ());
29501 /* Return TRUE if it's profitable to set block of memory for
29502 vectorized case. LENGTH is the number of bytes to set.
29503 ALIGN is the alignment of destination memory in bytes.
29504 MODE is the vector mode used to set the memory. */
29506 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29507 unsigned HOST_WIDE_INT align
,
29511 bool unaligned_p
= ((align
& 3) != 0);
29512 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29514 /* Instruction loading constant value. */
29516 /* Instructions storing the memory. */
29517 num
+= (length
+ nelt
- 1) / nelt
;
29518 /* Instructions adjusting the address expression. Only need to
29519 adjust address expression if it's 4 bytes aligned and bytes
29520 leftover can only be stored by mis-aligned store instruction. */
29521 if (!unaligned_p
&& (length
& 3) != 0)
29524 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29525 if (!unaligned_p
&& mode
== V16QImode
)
29528 return (num
<= arm_block_set_max_insns ());
29531 /* Set a block of memory using vectorization instructions for the
29532 unaligned case. We fill the first LENGTH bytes of the memory
29533 area starting from DSTBASE with byte constant VALUE. ALIGN is
29534 the alignment requirement of memory. Return TRUE if succeeded. */
29536 arm_block_set_unaligned_vect (rtx dstbase
,
29537 unsigned HOST_WIDE_INT length
,
29538 unsigned HOST_WIDE_INT value
,
29539 unsigned HOST_WIDE_INT align
)
29541 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29543 rtx val_elt
, val_vec
, reg
;
29544 rtx rval
[MAX_VECT_LEN
];
29545 rtx (*gen_func
) (rtx
, rtx
);
29547 unsigned HOST_WIDE_INT v
= value
;
29548 unsigned int offset
= 0;
29549 gcc_assert ((align
& 0x3) != 0);
29550 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29551 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29552 if (length
>= nelt_v16
)
29555 gen_func
= gen_movmisalignv16qi
;
29560 gen_func
= gen_movmisalignv8qi
;
29562 nelt_mode
= GET_MODE_NUNITS (mode
);
29563 gcc_assert (length
>= nelt_mode
);
29564 /* Skip if it isn't profitable. */
29565 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29568 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29569 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29571 v
= sext_hwi (v
, BITS_PER_WORD
);
29572 val_elt
= GEN_INT (v
);
29573 for (j
= 0; j
< nelt_mode
; j
++)
29576 reg
= gen_reg_rtx (mode
);
29577 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29578 /* Emit instruction loading the constant value. */
29579 emit_move_insn (reg
, val_vec
);
29581 /* Handle nelt_mode bytes in a vector. */
29582 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29584 emit_insn ((*gen_func
) (mem
, reg
));
29585 if (i
+ 2 * nelt_mode
<= length
)
29587 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29588 offset
+= nelt_mode
;
29589 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29593 /* If there are not less than nelt_v8 bytes leftover, we must be in
29595 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29597 /* Handle (8, 16) bytes leftover. */
29598 if (i
+ nelt_v8
< length
)
29600 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29601 offset
+= length
- i
;
29602 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29604 /* We are shifting bytes back, set the alignment accordingly. */
29605 if ((length
& 1) != 0 && align
>= 2)
29606 set_mem_align (mem
, BITS_PER_UNIT
);
29608 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29610 /* Handle (0, 8] bytes leftover. */
29611 else if (i
< length
&& i
+ nelt_v8
>= length
)
29613 if (mode
== V16QImode
)
29614 reg
= gen_lowpart (V8QImode
, reg
);
29616 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29617 + (nelt_mode
- nelt_v8
))));
29618 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29619 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29621 /* We are shifting bytes back, set the alignment accordingly. */
29622 if ((length
& 1) != 0 && align
>= 2)
29623 set_mem_align (mem
, BITS_PER_UNIT
);
29625 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29631 /* Set a block of memory using vectorization instructions for the
29632 aligned case. We fill the first LENGTH bytes of the memory area
29633 starting from DSTBASE with byte constant VALUE. ALIGN is the
29634 alignment requirement of memory. Return TRUE if succeeded. */
29636 arm_block_set_aligned_vect (rtx dstbase
,
29637 unsigned HOST_WIDE_INT length
,
29638 unsigned HOST_WIDE_INT value
,
29639 unsigned HOST_WIDE_INT align
)
29641 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29642 rtx dst
, addr
, mem
;
29643 rtx val_elt
, val_vec
, reg
;
29644 rtx rval
[MAX_VECT_LEN
];
29646 unsigned HOST_WIDE_INT v
= value
;
29647 unsigned int offset
= 0;
29649 gcc_assert ((align
& 0x3) == 0);
29650 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29651 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29652 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29657 nelt_mode
= GET_MODE_NUNITS (mode
);
29658 gcc_assert (length
>= nelt_mode
);
29659 /* Skip if it isn't profitable. */
29660 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29663 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29665 v
= sext_hwi (v
, BITS_PER_WORD
);
29666 val_elt
= GEN_INT (v
);
29667 for (j
= 0; j
< nelt_mode
; j
++)
29670 reg
= gen_reg_rtx (mode
);
29671 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29672 /* Emit instruction loading the constant value. */
29673 emit_move_insn (reg
, val_vec
);
29676 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29677 if (mode
== V16QImode
)
29679 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29680 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29682 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29683 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29685 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29686 offset
+= length
- nelt_mode
;
29687 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29688 /* We are shifting bytes back, set the alignment accordingly. */
29689 if ((length
& 0x3) == 0)
29690 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29691 else if ((length
& 0x1) == 0)
29692 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29694 set_mem_align (mem
, BITS_PER_UNIT
);
29696 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29699 /* Fall through for bytes leftover. */
29701 nelt_mode
= GET_MODE_NUNITS (mode
);
29702 reg
= gen_lowpart (V8QImode
, reg
);
29705 /* Handle 8 bytes in a vector. */
29706 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29708 addr
= plus_constant (Pmode
, dst
, i
);
29709 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29710 emit_move_insn (mem
, reg
);
29713 /* Handle single word leftover by shifting 4 bytes back. We can
29714 use aligned access for this case. */
29715 if (i
+ UNITS_PER_WORD
== length
)
29717 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29718 offset
+= i
- UNITS_PER_WORD
;
29719 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29720 /* We are shifting 4 bytes back, set the alignment accordingly. */
29721 if (align
> UNITS_PER_WORD
)
29722 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29724 emit_move_insn (mem
, reg
);
29726 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29727 We have to use unaligned access for this case. */
29728 else if (i
< length
)
29730 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29731 offset
+= length
- nelt_mode
;
29732 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29733 /* We are shifting bytes back, set the alignment accordingly. */
29734 if ((length
& 1) == 0)
29735 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29737 set_mem_align (mem
, BITS_PER_UNIT
);
29739 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29745 /* Set a block of memory using plain strh/strb instructions, only
29746 using instructions allowed by ALIGN on processor. We fill the
29747 first LENGTH bytes of the memory area starting from DSTBASE
29748 with byte constant VALUE. ALIGN is the alignment requirement
29751 arm_block_set_unaligned_non_vect (rtx dstbase
,
29752 unsigned HOST_WIDE_INT length
,
29753 unsigned HOST_WIDE_INT value
,
29754 unsigned HOST_WIDE_INT align
)
29757 rtx dst
, addr
, mem
;
29758 rtx val_exp
, val_reg
, reg
;
29760 HOST_WIDE_INT v
= value
;
29762 gcc_assert (align
== 1 || align
== 2);
29765 v
|= (value
<< BITS_PER_UNIT
);
29767 v
= sext_hwi (v
, BITS_PER_WORD
);
29768 val_exp
= GEN_INT (v
);
29769 /* Skip if it isn't profitable. */
29770 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29771 align
, true, false))
29774 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29775 mode
= (align
== 2 ? HImode
: QImode
);
29776 val_reg
= force_reg (SImode
, val_exp
);
29777 reg
= gen_lowpart (mode
, val_reg
);
29779 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29781 addr
= plus_constant (Pmode
, dst
, i
);
29782 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29783 emit_move_insn (mem
, reg
);
29786 /* Handle single byte leftover. */
29787 if (i
+ 1 == length
)
29789 reg
= gen_lowpart (QImode
, val_reg
);
29790 addr
= plus_constant (Pmode
, dst
, i
);
29791 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29792 emit_move_insn (mem
, reg
);
29796 gcc_assert (i
== length
);
29800 /* Set a block of memory using plain strd/str/strh/strb instructions,
29801 to permit unaligned copies on processors which support unaligned
29802 semantics for those instructions. We fill the first LENGTH bytes
29803 of the memory area starting from DSTBASE with byte constant VALUE.
29804 ALIGN is the alignment requirement of memory. */
29806 arm_block_set_aligned_non_vect (rtx dstbase
,
29807 unsigned HOST_WIDE_INT length
,
29808 unsigned HOST_WIDE_INT value
,
29809 unsigned HOST_WIDE_INT align
)
29812 rtx dst
, addr
, mem
;
29813 rtx val_exp
, val_reg
, reg
;
29814 unsigned HOST_WIDE_INT v
;
29817 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29818 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29820 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29821 if (length
< UNITS_PER_WORD
)
29822 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29825 v
|= (v
<< BITS_PER_WORD
);
29827 v
= sext_hwi (v
, BITS_PER_WORD
);
29829 val_exp
= GEN_INT (v
);
29830 /* Skip if it isn't profitable. */
29831 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29832 align
, false, use_strd_p
))
29837 /* Try without strd. */
29838 v
= (v
>> BITS_PER_WORD
);
29839 v
= sext_hwi (v
, BITS_PER_WORD
);
29840 val_exp
= GEN_INT (v
);
29841 use_strd_p
= false;
29842 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29843 align
, false, use_strd_p
))
29848 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29849 /* Handle double words using strd if possible. */
29852 val_reg
= force_reg (DImode
, val_exp
);
29854 for (; (i
+ 8 <= length
); i
+= 8)
29856 addr
= plus_constant (Pmode
, dst
, i
);
29857 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29858 emit_move_insn (mem
, reg
);
29862 val_reg
= force_reg (SImode
, val_exp
);
29864 /* Handle words. */
29865 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29866 for (; (i
+ 4 <= length
); i
+= 4)
29868 addr
= plus_constant (Pmode
, dst
, i
);
29869 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29870 if ((align
& 3) == 0)
29871 emit_move_insn (mem
, reg
);
29873 emit_insn (gen_unaligned_storesi (mem
, reg
));
29876 /* Merge last pair of STRH and STRB into a STR if possible. */
29877 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29879 addr
= plus_constant (Pmode
, dst
, i
- 1);
29880 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29881 /* We are shifting one byte back, set the alignment accordingly. */
29882 if ((align
& 1) == 0)
29883 set_mem_align (mem
, BITS_PER_UNIT
);
29885 /* Most likely this is an unaligned access, and we can't tell at
29886 compilation time. */
29887 emit_insn (gen_unaligned_storesi (mem
, reg
));
29891 /* Handle half word leftover. */
29892 if (i
+ 2 <= length
)
29894 reg
= gen_lowpart (HImode
, val_reg
);
29895 addr
= plus_constant (Pmode
, dst
, i
);
29896 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29897 if ((align
& 1) == 0)
29898 emit_move_insn (mem
, reg
);
29900 emit_insn (gen_unaligned_storehi (mem
, reg
));
29905 /* Handle single byte leftover. */
29906 if (i
+ 1 == length
)
29908 reg
= gen_lowpart (QImode
, val_reg
);
29909 addr
= plus_constant (Pmode
, dst
, i
);
29910 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29911 emit_move_insn (mem
, reg
);
29917 /* Set a block of memory using vectorization instructions for both
29918 aligned and unaligned cases. We fill the first LENGTH bytes of
29919 the memory area starting from DSTBASE with byte constant VALUE.
29920 ALIGN is the alignment requirement of memory. */
29922 arm_block_set_vect (rtx dstbase
,
29923 unsigned HOST_WIDE_INT length
,
29924 unsigned HOST_WIDE_INT value
,
29925 unsigned HOST_WIDE_INT align
)
29927 /* Check whether we need to use unaligned store instruction. */
29928 if (((align
& 3) != 0 || (length
& 3) != 0)
29929 /* Check whether unaligned store instruction is available. */
29930 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29933 if ((align
& 3) == 0)
29934 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29936 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29939 /* Expand string store operation. Firstly we try to do that by using
29940 vectorization instructions, then try with ARM unaligned access and
29941 double-word store if profitable. OPERANDS[0] is the destination,
29942 OPERANDS[1] is the number of bytes, operands[2] is the value to
29943 initialize the memory, OPERANDS[3] is the known alignment of the
29946 arm_gen_setmem (rtx
*operands
)
29948 rtx dstbase
= operands
[0];
29949 unsigned HOST_WIDE_INT length
;
29950 unsigned HOST_WIDE_INT value
;
29951 unsigned HOST_WIDE_INT align
;
29953 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29956 length
= UINTVAL (operands
[1]);
29960 value
= (UINTVAL (operands
[2]) & 0xFF);
29961 align
= UINTVAL (operands
[3]);
29962 if (TARGET_NEON
&& length
>= 8
29963 && current_tune
->string_ops_prefer_neon
29964 && arm_block_set_vect (dstbase
, length
, value
, align
))
29967 if (!unaligned_access
&& (align
& 3) != 0)
29968 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29970 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29975 arm_macro_fusion_p (void)
29977 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29980 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
29981 for MOVW / MOVT macro fusion. */
29984 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
29986 /* We are trying to fuse
29987 movw imm / movt imm
29988 instructions as a group that gets scheduled together. */
29990 rtx set_dest
= SET_DEST (curr_set
);
29992 if (GET_MODE (set_dest
) != SImode
)
29995 /* We are trying to match:
29996 prev (movw) == (set (reg r0) (const_int imm16))
29997 curr (movt) == (set (zero_extract (reg r0)
30000 (const_int imm16_1))
30002 prev (movw) == (set (reg r1)
30003 (high (symbol_ref ("SYM"))))
30004 curr (movt) == (set (reg r0)
30006 (symbol_ref ("SYM")))) */
30008 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30010 if (CONST_INT_P (SET_SRC (curr_set
))
30011 && CONST_INT_P (SET_SRC (prev_set
))
30012 && REG_P (XEXP (set_dest
, 0))
30013 && REG_P (SET_DEST (prev_set
))
30014 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30018 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30019 && REG_P (SET_DEST (curr_set
))
30020 && REG_P (SET_DEST (prev_set
))
30021 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30022 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30029 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30031 rtx prev_set
= single_set (prev
);
30032 rtx curr_set
= single_set (curr
);
30038 if (any_condjump_p (curr
))
30041 if (!arm_macro_fusion_p ())
30044 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30045 && aarch_crypto_can_dual_issue (prev
, curr
))
30048 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30049 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30055 /* Return true iff the instruction fusion described by OP is enabled. */
30057 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30059 return current_tune
->fusible_ops
& op
;
30062 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30064 static unsigned HOST_WIDE_INT
30065 arm_asan_shadow_offset (void)
30067 return HOST_WIDE_INT_1U
<< 29;
30071 /* This is a temporary fix for PR60655. Ideally we need
30072 to handle most of these cases in the generic part but
30073 currently we reject minus (..) (sym_ref). We try to
30074 ameliorate the case with minus (sym_ref1) (sym_ref2)
30075 where they are in the same section. */
30078 arm_const_not_ok_for_debug_p (rtx p
)
30080 tree decl_op0
= NULL
;
30081 tree decl_op1
= NULL
;
30083 if (GET_CODE (p
) == MINUS
)
30085 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30087 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30089 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30090 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30092 if ((TREE_CODE (decl_op1
) == VAR_DECL
30093 || TREE_CODE (decl_op1
) == CONST_DECL
)
30094 && (TREE_CODE (decl_op0
) == VAR_DECL
30095 || TREE_CODE (decl_op0
) == CONST_DECL
))
30096 return (get_variable_section (decl_op1
, false)
30097 != get_variable_section (decl_op0
, false));
30099 if (TREE_CODE (decl_op1
) == LABEL_DECL
30100 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30101 return (DECL_CONTEXT (decl_op1
)
30102 != DECL_CONTEXT (decl_op0
));
30112 /* return TRUE if x is a reference to a value in a constant pool */
30114 arm_is_constant_pool_ref (rtx x
)
30117 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30118 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30121 /* Remember the last target of arm_set_current_function. */
30122 static GTY(()) tree arm_previous_fndecl
;
30124 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30127 save_restore_target_globals (tree new_tree
)
30129 /* If we have a previous state, use it. */
30130 if (TREE_TARGET_GLOBALS (new_tree
))
30131 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30132 else if (new_tree
== target_option_default_node
)
30133 restore_target_globals (&default_target_globals
);
30136 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30137 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30140 arm_option_params_internal ();
30143 /* Invalidate arm_previous_fndecl. */
30146 arm_reset_previous_fndecl (void)
30148 arm_previous_fndecl
= NULL_TREE
;
30151 /* Establish appropriate back-end context for processing the function
30152 FNDECL. The argument might be NULL to indicate processing at top
30153 level, outside of any function scope. */
30156 arm_set_current_function (tree fndecl
)
30158 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30161 tree old_tree
= (arm_previous_fndecl
30162 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30165 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30167 /* If current function has no attributes but previous one did,
30168 use the default node. */
30169 if (! new_tree
&& old_tree
)
30170 new_tree
= target_option_default_node
;
30172 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30173 the default have been handled by save_restore_target_globals from
30174 arm_pragma_target_parse. */
30175 if (old_tree
== new_tree
)
30178 arm_previous_fndecl
= fndecl
;
30180 /* First set the target options. */
30181 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30183 save_restore_target_globals (new_tree
);
30186 /* Implement TARGET_OPTION_PRINT. */
30189 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30191 int flags
= ptr
->x_target_flags
;
30192 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[ptr
->x_arm_fpu_index
];
30194 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30195 TARGET_THUMB2_P (flags
) ? "thumb2" :
30196 TARGET_THUMB_P (flags
) ? "thumb1" :
30199 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_desc
->name
);
30202 /* Hook to determine if one function can safely inline another. */
30205 arm_can_inline_p (tree caller
, tree callee
)
30207 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30208 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30210 struct cl_target_option
*caller_opts
30211 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30212 : target_option_default_node
);
30214 struct cl_target_option
*callee_opts
30215 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30216 : target_option_default_node
);
30218 const struct arm_fpu_desc
*caller_fpu
30219 = &all_fpus
[caller_opts
->x_arm_fpu_index
];
30220 const struct arm_fpu_desc
*callee_fpu
30221 = &all_fpus
[callee_opts
->x_arm_fpu_index
];
30223 /* Callee's fpu features should be a subset of the caller's. */
30224 if ((caller_fpu
->features
& callee_fpu
->features
) != callee_fpu
->features
)
30227 /* Need same FPU regs. */
30228 if (callee_fpu
->regs
!= callee_fpu
->regs
)
30231 /* OK to inline between different modes.
30232 Function with mode specific instructions, e.g using asm,
30233 must be explicitly protected with noinline. */
30237 /* Hook to fix function's alignment affected by target attribute. */
30240 arm_relayout_function (tree fndecl
)
30242 if (DECL_USER_ALIGN (fndecl
))
30245 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30248 callee_tree
= target_option_default_node
;
30250 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30251 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
30254 /* Inner function to process the attribute((target(...))), take an argument and
30255 set the current options from the argument. If we have a list, recursively
30256 go over the list. */
30259 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30261 if (TREE_CODE (args
) == TREE_LIST
)
30265 for (; args
; args
= TREE_CHAIN (args
))
30266 if (TREE_VALUE (args
)
30267 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30272 else if (TREE_CODE (args
) != STRING_CST
)
30274 error ("attribute %<target%> argument not a string");
30278 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30281 while ((q
= strtok (argstr
, ",")) != NULL
)
30283 while (ISSPACE (*q
)) ++q
;
30286 if (!strncmp (q
, "thumb", 5))
30287 opts
->x_target_flags
|= MASK_THUMB
;
30289 else if (!strncmp (q
, "arm", 3))
30290 opts
->x_target_flags
&= ~MASK_THUMB
;
30292 else if (!strncmp (q
, "fpu=", 4))
30294 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30295 &opts
->x_arm_fpu_index
, CL_TARGET
))
30297 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30303 error ("attribute(target(\"%s\")) is unknown", q
);
30307 arm_option_check_internal (opts
);
30313 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30316 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30317 struct gcc_options
*opts_set
)
30319 if (!arm_valid_target_attribute_rec (args
, opts
))
30322 /* Do any overrides, such as global options arch=xxx. */
30323 arm_option_override_internal (opts
, opts_set
);
30325 return build_target_option_node (opts
);
30329 add_attribute (const char * mode
, tree
*attributes
)
30331 size_t len
= strlen (mode
);
30332 tree value
= build_string (len
, mode
);
30334 TREE_TYPE (value
) = build_array_type (char_type_node
,
30335 build_index_type (size_int (len
)));
30337 *attributes
= tree_cons (get_identifier ("target"),
30338 build_tree_list (NULL_TREE
, value
),
30342 /* For testing. Insert thumb or arm modes alternatively on functions. */
30345 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30349 if (! TARGET_FLIP_THUMB
)
30352 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30353 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30356 /* Nested definitions must inherit mode. */
30357 if (current_function_decl
)
30359 mode
= TARGET_THUMB
? "thumb" : "arm";
30360 add_attribute (mode
, attributes
);
30364 /* If there is already a setting don't change it. */
30365 if (lookup_attribute ("target", *attributes
) != NULL
)
30368 mode
= thumb_flipper
? "thumb" : "arm";
30369 add_attribute (mode
, attributes
);
30371 thumb_flipper
= !thumb_flipper
;
30374 /* Hook to validate attribute((target("string"))). */
30377 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30378 tree args
, int ARG_UNUSED (flags
))
30381 struct gcc_options func_options
;
30382 tree cur_tree
, new_optimize
;
30383 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30385 /* Get the optimization options of the current function. */
30386 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30388 /* If the function changed the optimization levels as well as setting target
30389 options, start with the optimizations specified. */
30390 if (!func_optimize
)
30391 func_optimize
= optimization_default_node
;
30393 /* Init func_options. */
30394 memset (&func_options
, 0, sizeof (func_options
));
30395 init_options_struct (&func_options
, NULL
);
30396 lang_hooks
.init_options_struct (&func_options
);
30398 /* Initialize func_options to the defaults. */
30399 cl_optimization_restore (&func_options
,
30400 TREE_OPTIMIZATION (func_optimize
));
30402 cl_target_option_restore (&func_options
,
30403 TREE_TARGET_OPTION (target_option_default_node
));
30405 /* Set func_options flags with new target mode. */
30406 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30407 &global_options_set
);
30409 if (cur_tree
== NULL_TREE
)
30412 new_optimize
= build_optimization_node (&func_options
);
30414 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30416 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30418 finalize_options_struct (&func_options
);
30424 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30427 fprintf (stream
, "\t.syntax unified\n");
30431 if (is_called_in_ARM_mode (decl
)
30432 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30433 && cfun
->is_thunk
))
30434 fprintf (stream
, "\t.code 32\n");
30435 else if (TARGET_THUMB1
)
30436 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30438 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30441 fprintf (stream
, "\t.arm\n");
30443 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30444 TARGET_SOFT_FLOAT
? "softvfp" : TARGET_FPU_NAME
);
30446 if (TARGET_POKE_FUNCTION_NAME
)
30447 arm_poke_function_name (stream
, (const char *) name
);
30450 /* If MEM is in the form of [base+offset], extract the two parts
30451 of address and set to BASE and OFFSET, otherwise return false
30452 after clearing BASE and OFFSET. */
30455 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30459 gcc_assert (MEM_P (mem
));
30461 addr
= XEXP (mem
, 0);
30463 /* Strip off const from addresses like (const (addr)). */
30464 if (GET_CODE (addr
) == CONST
)
30465 addr
= XEXP (addr
, 0);
30467 if (GET_CODE (addr
) == REG
)
30470 *offset
= const0_rtx
;
30474 if (GET_CODE (addr
) == PLUS
30475 && GET_CODE (XEXP (addr
, 0)) == REG
30476 && CONST_INT_P (XEXP (addr
, 1)))
30478 *base
= XEXP (addr
, 0);
30479 *offset
= XEXP (addr
, 1);
30484 *offset
= NULL_RTX
;
30489 /* If INSN is a load or store of address in the form of [base+offset],
30490 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30491 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30492 otherwise return FALSE. */
30495 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30499 gcc_assert (INSN_P (insn
));
30500 x
= PATTERN (insn
);
30501 if (GET_CODE (x
) != SET
)
30505 dest
= SET_DEST (x
);
30506 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30509 extract_base_offset_in_addr (dest
, base
, offset
);
30511 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30514 extract_base_offset_in_addr (src
, base
, offset
);
30519 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30522 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30524 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30525 and PRI are only calculated for these instructions. For other instruction,
30526 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30527 instruction fusion can be supported by returning different priorities.
30529 It's important that irrelevant instructions get the largest FUSION_PRI. */
30532 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30533 int *fusion_pri
, int *pri
)
30539 gcc_assert (INSN_P (insn
));
30542 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30549 /* Load goes first. */
30551 *fusion_pri
= tmp
- 1;
30553 *fusion_pri
= tmp
- 2;
30557 /* INSN with smaller base register goes first. */
30558 tmp
-= ((REGNO (base
) & 0xff) << 20);
30560 /* INSN with smaller offset goes first. */
30561 off_val
= (int)(INTVAL (offset
));
30563 tmp
-= (off_val
& 0xfffff);
30565 tmp
+= ((- off_val
) & 0xfffff);
30572 /* Construct and return a PARALLEL RTX vector with elements numbering the
30573 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30574 the vector - from the perspective of the architecture. This does not
30575 line up with GCC's perspective on lane numbers, so we end up with
30576 different masks depending on our target endian-ness. The diagram
30577 below may help. We must draw the distinction when building masks
30578 which select one half of the vector. An instruction selecting
30579 architectural low-lanes for a big-endian target, must be described using
30580 a mask selecting GCC high-lanes.
30582 Big-Endian Little-Endian
30584 GCC 0 1 2 3 3 2 1 0
30585 | x | x | x | x | | x | x | x | x |
30586 Architecture 3 2 1 0 3 2 1 0
30588 Low Mask: { 2, 3 } { 0, 1 }
30589 High Mask: { 0, 1 } { 2, 3 }
30593 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30595 int nunits
= GET_MODE_NUNITS (mode
);
30596 rtvec v
= rtvec_alloc (nunits
/ 2);
30597 int high_base
= nunits
/ 2;
30603 if (BYTES_BIG_ENDIAN
)
30604 base
= high
? low_base
: high_base
;
30606 base
= high
? high_base
: low_base
;
30608 for (i
= 0; i
< nunits
/ 2; i
++)
30609 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30611 t1
= gen_rtx_PARALLEL (mode
, v
);
30615 /* Check OP for validity as a PARALLEL RTX vector with elements
30616 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30617 from the perspective of the architecture. See the diagram above
30618 arm_simd_vect_par_cnst_half_p for more details. */
30621 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30624 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30625 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30626 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30629 if (!VECTOR_MODE_P (mode
))
30632 if (count_op
!= count_ideal
)
30635 for (i
= 0; i
< count_ideal
; i
++)
30637 rtx elt_op
= XVECEXP (op
, 0, i
);
30638 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30640 if (!CONST_INT_P (elt_op
)
30641 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30647 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30650 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30653 /* For now, we punt and not handle this for TARGET_THUMB1. */
30654 if (vcall_offset
&& TARGET_THUMB1
)
30657 /* Otherwise ok. */
30661 /* Generate RTL for a conditional branch with rtx comparison CODE in
30662 mode CC_MODE. The destination of the unlikely conditional branch
30666 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30670 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30671 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30674 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30675 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30677 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30680 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30682 For pure-code sections there is no letter code for this attribute, so
30683 output all the section flags numerically when this is needed. */
30686 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30689 if (flags
& SECTION_ARM_PURECODE
)
30693 if (!(flags
& SECTION_DEBUG
))
30695 if (flags
& SECTION_EXCLUDE
)
30696 *num
|= 0x80000000;
30697 if (flags
& SECTION_WRITE
)
30699 if (flags
& SECTION_CODE
)
30701 if (flags
& SECTION_MERGE
)
30703 if (flags
& SECTION_STRINGS
)
30705 if (flags
& SECTION_TLS
)
30707 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30716 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30718 If pure-code is passed as an option, make sure all functions are in
30719 sections that have the SHF_ARM_PURECODE attribute. */
30722 arm_function_section (tree decl
, enum node_frequency freq
,
30723 bool startup
, bool exit
)
30725 const char * section_name
;
30728 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30729 return default_function_section (decl
, freq
, startup
, exit
);
30731 if (!target_pure_code
)
30732 return default_function_section (decl
, freq
, startup
, exit
);
30735 section_name
= DECL_SECTION_NAME (decl
);
30737 /* If a function is not in a named section then it falls under the 'default'
30738 text section, also known as '.text'. We can preserve previous behavior as
30739 the default text section already has the SHF_ARM_PURECODE section
30743 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30746 /* If default_sec is not null, then it must be a special section like for
30747 example .text.startup. We set the pure-code attribute and return the
30748 same section to preserve existing behavior. */
30750 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30751 return default_sec
;
30754 /* Otherwise look whether a section has already been created with
30756 sec
= get_named_section (decl
, section_name
, 0);
30758 /* If that is not the case passing NULL as the section's name to
30759 'get_named_section' will create a section with the declaration's
30761 sec
= get_named_section (decl
, NULL
, 0);
30763 /* Set the SHF_ARM_PURECODE attribute. */
30764 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30769 /* Implements the TARGET_SECTION_FLAGS hook.
30771 If DECL is a function declaration and pure-code is passed as an option
30772 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30773 section's name and RELOC indicates whether the declarations initializer may
30774 contain runtime relocations. */
30776 static unsigned int
30777 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
30779 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
30781 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
30782 flags
|= SECTION_ARM_PURECODE
;
30787 #include "gt-arm.h"