1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
51 #include "diagnostic-core.h"
54 #include "dominance.h"
60 #include "cfgcleanup.h"
61 #include "basic-block.h"
64 #include "plugin-api.h"
71 #include "sched-int.h"
72 #include "target-def.h"
74 #include "langhooks.h"
81 #include "gimple-expr.h"
83 #include "tm-constrs.h"
85 /* Forward definitions of types. */
86 typedef struct minipool_node Mnode
;
87 typedef struct minipool_fixup Mfix
;
89 void (*arm_lang_output_object_attributes_hook
)(void);
96 /* Forward function declarations. */
97 static bool arm_const_not_ok_for_debug_p (rtx
);
98 static bool arm_lra_p (void);
99 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
100 static int arm_compute_static_chain_stack_bytes (void);
101 static arm_stack_offsets
*arm_get_frame_offsets (void);
102 static void arm_add_gc_roots (void);
103 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
104 HOST_WIDE_INT
, rtx
, rtx
, int, int);
105 static unsigned bit_count (unsigned long);
106 static int arm_address_register_rtx_p (rtx
, int);
107 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
108 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
109 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
110 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
111 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
112 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
113 inline static int thumb1_index_register_rtx_p (rtx
, int);
114 static int thumb_far_jump_used_p (void);
115 static bool thumb_force_lr_save (void);
116 static unsigned arm_size_return_regs (void);
117 static bool arm_assemble_integer (rtx
, unsigned int, int);
118 static void arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
);
119 static void arm_print_operand (FILE *, rtx
, int);
120 static void arm_print_operand_address (FILE *, rtx
);
121 static bool arm_print_operand_punct_valid_p (unsigned char code
);
122 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
123 static arm_cc
get_arm_condition_code (rtx
);
124 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
125 static const char *output_multi_immediate (rtx
*, const char *, const char *,
127 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
128 static struct machine_function
*arm_init_machine_status (void);
129 static void thumb_exit (FILE *, int);
130 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
131 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
132 static Mnode
*add_minipool_forward_ref (Mfix
*);
133 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
134 static Mnode
*add_minipool_backward_ref (Mfix
*);
135 static void assign_minipool_offsets (Mfix
*);
136 static void arm_print_value (FILE *, rtx
);
137 static void dump_minipool (rtx_insn
*);
138 static int arm_barrier_cost (rtx
);
139 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
140 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
141 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
143 static void arm_reorg (void);
144 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
145 static unsigned long arm_compute_save_reg0_reg12_mask (void);
146 static unsigned long arm_compute_save_reg_mask (void);
147 static unsigned long arm_isr_value (tree
);
148 static unsigned long arm_compute_func_type (void);
149 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
150 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
151 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
152 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
153 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
155 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
156 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
157 static int arm_comp_type_attributes (const_tree
, const_tree
);
158 static void arm_set_default_type_attributes (tree
);
159 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
);
164 static int optimal_immediate_sequence_1 (enum rtx_code code
,
165 unsigned HOST_WIDE_INT val
,
166 struct four_ints
*return_sequence
,
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree
, tree
);
170 static machine_mode
arm_promote_function_mode (const_tree
,
173 static bool arm_return_in_memory (const_tree
, const_tree
);
174 static rtx
arm_function_value (const_tree
, const_tree
, bool);
175 static rtx
arm_libcall_value_1 (machine_mode
);
176 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
183 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
184 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
185 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
186 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
187 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
188 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
189 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
190 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
191 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
192 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
193 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
194 static void arm_init_builtins (void);
195 static void arm_init_iwmmxt_builtins (void);
196 static rtx
safe_vector_operand (rtx
, machine_mode
);
197 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
198 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
199 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, machine_mode
, int);
200 static tree
arm_builtin_decl (unsigned, bool);
201 static void emit_constant_insn (rtx cond
, rtx pattern
);
202 static rtx_insn
*emit_set_insn (rtx
, rtx
);
203 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
204 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
206 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
208 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
210 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
211 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
213 static rtx
aapcs_libcall_value (machine_mode
);
214 static int aapcs_select_return_coproc (const_tree
, const_tree
);
216 #ifdef OBJECT_FORMAT_ELF
217 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
218 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
221 static void arm_encode_section_info (tree
, rtx
, int);
224 static void arm_file_end (void);
225 static void arm_file_start (void);
227 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
229 static bool arm_pass_by_reference (cumulative_args_t
,
230 machine_mode
, const_tree
, bool);
231 static bool arm_promote_prototypes (const_tree
);
232 static bool arm_default_short_enums (void);
233 static bool arm_align_anon_bitfield (void);
234 static bool arm_return_in_msb (const_tree
);
235 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
236 static bool arm_return_in_memory (const_tree
, const_tree
);
238 static void arm_unwind_emit (FILE *, rtx_insn
*);
239 static bool arm_output_ttype (rtx
);
240 static void arm_asm_emit_except_personality (rtx
);
241 static void arm_asm_init_sections (void);
243 static rtx
arm_dwarf_register_span (rtx
);
245 static tree
arm_cxx_guard_type (void);
246 static bool arm_cxx_guard_mask_bit (void);
247 static tree
arm_get_cookie_size (tree
);
248 static bool arm_cookie_has_size (void);
249 static bool arm_cxx_cdtor_returns_this (void);
250 static bool arm_cxx_key_method_may_be_inline (void);
251 static void arm_cxx_determine_class_data_visibility (tree
);
252 static bool arm_cxx_class_data_always_comdat (void);
253 static bool arm_cxx_use_aeabi_atexit (void);
254 static void arm_init_libfuncs (void);
255 static tree
arm_build_builtin_va_list (void);
256 static void arm_expand_builtin_va_start (tree
, rtx
);
257 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
258 static void arm_option_override (void);
259 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
260 static bool arm_cannot_copy_insn_p (rtx_insn
*);
261 static int arm_issue_rate (void);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
263 static bool arm_output_addr_const_extra (FILE *, rtx
);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree
);
266 static const char *arm_invalid_parameter_type (const_tree t
);
267 static const char *arm_invalid_return_type (const_tree t
);
268 static tree
arm_promoted_type (const_tree t
);
269 static tree
arm_convert_to_type (tree type
, tree expr
);
270 static bool arm_scalar_mode_supported_p (machine_mode
);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx
, tree
, rtx
);
275 static rtx
arm_trampoline_adjust_address (rtx
);
276 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
280 static bool arm_array_mode_supported_p (machine_mode
,
281 unsigned HOST_WIDE_INT
);
282 static machine_mode
arm_preferred_simd_mode (machine_mode
);
283 static bool arm_class_likely_spilled_p (reg_class_t
);
284 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
285 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
290 static void arm_conditional_register_usage (void);
291 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
292 static unsigned int arm_autovectorize_vector_sizes (void);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
298 const unsigned char *sel
);
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
302 int misalign ATTRIBUTE_UNUSED
);
303 static unsigned arm_add_stmt_cost (void *data
, int count
,
304 enum vect_cost_for_stmt kind
,
305 struct _stmt_vec_info
*stmt_info
,
307 enum vect_cost_model_location where
);
309 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
310 bool op0_preserve_value
);
311 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
313 /* Table of machine attributes. */
314 static const struct attribute_spec arm_attribute_table
[] =
316 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
317 affects_type_identity } */
318 /* Function calls made to this symbol must be done indirectly, because
319 it may lie outside of the 26 bit addressing range of a normal function
321 { "long_call", 0, 0, false, true, true, NULL
, false },
322 /* Whereas these functions are always known to reside within the 26 bit
324 { "short_call", 0, 0, false, true, true, NULL
, false },
325 /* Specify the procedure call conventions for a function. */
326 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
328 /* Interrupt Service Routines have special prologue and epilogue requirements. */
329 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
331 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
333 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
336 /* ARM/PE has three new attributes:
338 dllexport - for exporting a function/variable that will live in a dll
339 dllimport - for importing a function/variable from a dll
341 Microsoft allows multiple declspecs in one __declspec, separating
342 them with spaces. We do NOT support this. Instead, use __declspec
345 { "dllimport", 0, 0, true, false, false, NULL
, false },
346 { "dllexport", 0, 0, true, false, false, NULL
, false },
347 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
350 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
351 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
352 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
355 { NULL
, 0, 0, false, false, false, NULL
, false }
358 /* Initialize the GCC target structure. */
359 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
360 #undef TARGET_MERGE_DECL_ATTRIBUTES
361 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
364 #undef TARGET_LEGITIMIZE_ADDRESS
365 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
368 #define TARGET_LRA_P arm_lra_p
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_ASM_FILE_START
374 #define TARGET_ASM_FILE_START arm_file_start
375 #undef TARGET_ASM_FILE_END
376 #define TARGET_ASM_FILE_END arm_file_end
378 #undef TARGET_ASM_ALIGNED_SI_OP
379 #define TARGET_ASM_ALIGNED_SI_OP NULL
380 #undef TARGET_ASM_INTEGER
381 #define TARGET_ASM_INTEGER arm_assemble_integer
383 #undef TARGET_PRINT_OPERAND
384 #define TARGET_PRINT_OPERAND arm_print_operand
385 #undef TARGET_PRINT_OPERAND_ADDRESS
386 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
387 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
388 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
390 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
391 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
393 #undef TARGET_ASM_FUNCTION_PROLOGUE
394 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
396 #undef TARGET_ASM_FUNCTION_EPILOGUE
397 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_COMP_TYPE_ATTRIBUTES
403 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
405 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
406 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
408 #undef TARGET_SCHED_ADJUST_COST
409 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
411 #undef TARGET_SCHED_REORDER
412 #define TARGET_SCHED_REORDER arm_sched_reorder
414 #undef TARGET_REGISTER_MOVE_COST
415 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
417 #undef TARGET_MEMORY_MOVE_COST
418 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
420 #undef TARGET_ENCODE_SECTION_INFO
422 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
424 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
427 #undef TARGET_STRIP_NAME_ENCODING
428 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
430 #undef TARGET_ASM_INTERNAL_LABEL
431 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
434 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
436 #undef TARGET_FUNCTION_VALUE
437 #define TARGET_FUNCTION_VALUE arm_function_value
439 #undef TARGET_LIBCALL_VALUE
440 #define TARGET_LIBCALL_VALUE arm_libcall_value
442 #undef TARGET_FUNCTION_VALUE_REGNO_P
443 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
445 #undef TARGET_ASM_OUTPUT_MI_THUNK
446 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
447 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
448 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
450 #undef TARGET_RTX_COSTS
451 #define TARGET_RTX_COSTS arm_rtx_costs
452 #undef TARGET_ADDRESS_COST
453 #define TARGET_ADDRESS_COST arm_address_cost
455 #undef TARGET_SHIFT_TRUNCATION_MASK
456 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
458 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
459 #undef TARGET_ARRAY_MODE_SUPPORTED_P
460 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
461 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
462 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
463 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
464 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
465 arm_autovectorize_vector_sizes
467 #undef TARGET_MACHINE_DEPENDENT_REORG
468 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
470 #undef TARGET_INIT_BUILTINS
471 #define TARGET_INIT_BUILTINS arm_init_builtins
472 #undef TARGET_EXPAND_BUILTIN
473 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
474 #undef TARGET_BUILTIN_DECL
475 #define TARGET_BUILTIN_DECL arm_builtin_decl
477 #undef TARGET_INIT_LIBFUNCS
478 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
480 #undef TARGET_PROMOTE_FUNCTION_MODE
481 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
484 #undef TARGET_PASS_BY_REFERENCE
485 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
486 #undef TARGET_ARG_PARTIAL_BYTES
487 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
488 #undef TARGET_FUNCTION_ARG
489 #define TARGET_FUNCTION_ARG arm_function_arg
490 #undef TARGET_FUNCTION_ARG_ADVANCE
491 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
492 #undef TARGET_FUNCTION_ARG_BOUNDARY
493 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
495 #undef TARGET_SETUP_INCOMING_VARARGS
496 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
498 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
499 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
501 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
502 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
503 #undef TARGET_TRAMPOLINE_INIT
504 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
505 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
506 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
508 #undef TARGET_WARN_FUNC_RETURN
509 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
511 #undef TARGET_DEFAULT_SHORT_ENUMS
512 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
514 #undef TARGET_ALIGN_ANON_BITFIELD
515 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
517 #undef TARGET_NARROW_VOLATILE_BITFIELD
518 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
520 #undef TARGET_CXX_GUARD_TYPE
521 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
523 #undef TARGET_CXX_GUARD_MASK_BIT
524 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
526 #undef TARGET_CXX_GET_COOKIE_SIZE
527 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
529 #undef TARGET_CXX_COOKIE_HAS_SIZE
530 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
532 #undef TARGET_CXX_CDTOR_RETURNS_THIS
533 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
535 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
536 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
538 #undef TARGET_CXX_USE_AEABI_ATEXIT
539 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
541 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
542 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
543 arm_cxx_determine_class_data_visibility
545 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
546 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
548 #undef TARGET_RETURN_IN_MSB
549 #define TARGET_RETURN_IN_MSB arm_return_in_msb
551 #undef TARGET_RETURN_IN_MEMORY
552 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
554 #undef TARGET_MUST_PASS_IN_STACK
555 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
558 #undef TARGET_ASM_UNWIND_EMIT
559 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
561 /* EABI unwinding tables use a different format for the typeinfo tables. */
562 #undef TARGET_ASM_TTYPE
563 #define TARGET_ASM_TTYPE arm_output_ttype
565 #undef TARGET_ARM_EABI_UNWINDER
566 #define TARGET_ARM_EABI_UNWINDER true
568 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
569 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
571 #undef TARGET_ASM_INIT_SECTIONS
572 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
573 #endif /* ARM_UNWIND_INFO */
575 #undef TARGET_DWARF_REGISTER_SPAN
576 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
578 #undef TARGET_CANNOT_COPY_INSN_P
579 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
582 #undef TARGET_HAVE_TLS
583 #define TARGET_HAVE_TLS true
586 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
587 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
589 #undef TARGET_LEGITIMATE_CONSTANT_P
590 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
592 #undef TARGET_CANNOT_FORCE_CONST_MEM
593 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
595 #undef TARGET_MAX_ANCHOR_OFFSET
596 #define TARGET_MAX_ANCHOR_OFFSET 4095
598 /* The minimum is set such that the total size of the block
599 for a particular anchor is -4088 + 1 + 4095 bytes, which is
600 divisible by eight, ensuring natural spacing of anchors. */
601 #undef TARGET_MIN_ANCHOR_OFFSET
602 #define TARGET_MIN_ANCHOR_OFFSET -4088
604 #undef TARGET_SCHED_ISSUE_RATE
605 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
607 #undef TARGET_MANGLE_TYPE
608 #define TARGET_MANGLE_TYPE arm_mangle_type
610 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
611 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
613 #undef TARGET_BUILD_BUILTIN_VA_LIST
614 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
615 #undef TARGET_EXPAND_BUILTIN_VA_START
616 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
617 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
618 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
621 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
622 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
625 #undef TARGET_LEGITIMATE_ADDRESS_P
626 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
628 #undef TARGET_PREFERRED_RELOAD_CLASS
629 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
631 #undef TARGET_INVALID_PARAMETER_TYPE
632 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
634 #undef TARGET_INVALID_RETURN_TYPE
635 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
637 #undef TARGET_PROMOTED_TYPE
638 #define TARGET_PROMOTED_TYPE arm_promoted_type
640 #undef TARGET_CONVERT_TO_TYPE
641 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
643 #undef TARGET_SCALAR_MODE_SUPPORTED_P
644 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
646 #undef TARGET_FRAME_POINTER_REQUIRED
647 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
649 #undef TARGET_CAN_ELIMINATE
650 #define TARGET_CAN_ELIMINATE arm_can_eliminate
652 #undef TARGET_CONDITIONAL_REGISTER_USAGE
653 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
655 #undef TARGET_CLASS_LIKELY_SPILLED_P
656 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
658 #undef TARGET_VECTORIZE_BUILTINS
659 #define TARGET_VECTORIZE_BUILTINS
661 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
662 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
663 arm_builtin_vectorized_function
665 #undef TARGET_VECTOR_ALIGNMENT
666 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
668 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
669 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
670 arm_vector_alignment_reachable
672 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
673 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
674 arm_builtin_support_vector_misalignment
676 #undef TARGET_PREFERRED_RENAME_CLASS
677 #define TARGET_PREFERRED_RENAME_CLASS \
678 arm_preferred_rename_class
680 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
681 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
682 arm_vectorize_vec_perm_const_ok
684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
686 arm_builtin_vectorization_cost
687 #undef TARGET_VECTORIZE_ADD_STMT_COST
688 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
690 #undef TARGET_CANONICALIZE_COMPARISON
691 #define TARGET_CANONICALIZE_COMPARISON \
692 arm_canonicalize_comparison
694 #undef TARGET_ASAN_SHADOW_OFFSET
695 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
697 #undef MAX_INSN_PER_IT_BLOCK
698 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
700 #undef TARGET_CAN_USE_DOLOOP_P
701 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
703 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
704 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
706 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
707 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
709 struct gcc_target targetm
= TARGET_INITIALIZER
;
711 /* Obstack for minipool constant handling. */
712 static struct obstack minipool_obstack
;
713 static char * minipool_startobj
;
715 /* The maximum number of insns skipped which
716 will be conditionalised if possible. */
717 static int max_insns_skipped
= 5;
719 extern FILE * asm_out_file
;
721 /* True if we are currently building a constant table. */
722 int making_const_table
;
724 /* The processor for which instructions should be scheduled. */
725 enum processor_type arm_tune
= arm_none
;
727 /* The current tuning set. */
728 const struct tune_params
*current_tune
;
730 /* Which floating point hardware to schedule for. */
733 /* Which floating popint hardware to use. */
734 const struct arm_fpu_desc
*arm_fpu_desc
;
736 /* Used for Thumb call_via trampolines. */
737 rtx thumb_call_via_label
[14];
738 static int thumb_call_reg_needed
;
740 /* Bit values used to identify processor capabilities. */
741 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
742 #define FL_ARCH3M (1 << 1) /* Extended multiply */
743 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
744 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
745 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
746 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
747 #define FL_THUMB (1 << 6) /* Thumb aware */
748 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
749 #define FL_STRONG (1 << 8) /* StrongARM */
750 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
751 #define FL_XSCALE (1 << 10) /* XScale */
752 /* spare (1 << 11) */
753 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
754 media instructions. */
755 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
756 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
757 Note: ARM6 & 7 derivatives only. */
758 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
759 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
760 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
762 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
763 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
764 #define FL_NEON (1 << 20) /* Neon instructions. */
765 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
767 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
768 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
769 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
770 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
772 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
773 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
775 /* Flags that only effect tuning, not available instructions. */
776 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
779 #define FL_FOR_ARCH2 FL_NOTM
780 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
781 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
782 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
783 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
784 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
785 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
786 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
787 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
788 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
789 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
790 #define FL_FOR_ARCH6J FL_FOR_ARCH6
791 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
792 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
793 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
794 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
795 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
796 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
797 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
798 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
799 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
800 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
801 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
802 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
804 /* The bits in this mask specify which
805 instructions we are allowed to generate. */
806 static unsigned long insn_flags
= 0;
808 /* The bits in this mask specify which instruction scheduling options should
810 static unsigned long tune_flags
= 0;
812 /* The highest ARM architecture version supported by the
814 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
816 /* The following are used in the arm.md file as equivalents to bits
817 in the above two flag variables. */
819 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
822 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
825 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
828 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
831 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
834 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
837 /* Nonzero if this chip supports the ARM 6K extensions. */
840 /* Nonzero if instructions present in ARMv6-M can be used. */
843 /* Nonzero if this chip supports the ARM 7 extensions. */
846 /* Nonzero if instructions not present in the 'M' profile can be used. */
847 int arm_arch_notm
= 0;
849 /* Nonzero if instructions present in ARMv7E-M can be used. */
852 /* Nonzero if instructions present in ARMv8 can be used. */
855 /* Nonzero if this chip can benefit from load scheduling. */
856 int arm_ld_sched
= 0;
858 /* Nonzero if this chip is a StrongARM. */
859 int arm_tune_strongarm
= 0;
861 /* Nonzero if this chip supports Intel Wireless MMX technology. */
862 int arm_arch_iwmmxt
= 0;
864 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
865 int arm_arch_iwmmxt2
= 0;
867 /* Nonzero if this chip is an XScale. */
868 int arm_arch_xscale
= 0;
870 /* Nonzero if tuning for XScale */
871 int arm_tune_xscale
= 0;
873 /* Nonzero if we want to tune for stores that access the write-buffer.
874 This typically means an ARM6 or ARM7 with MMU or MPU. */
875 int arm_tune_wbuf
= 0;
877 /* Nonzero if tuning for Cortex-A9. */
878 int arm_tune_cortex_a9
= 0;
880 /* Nonzero if generating Thumb instructions. */
883 /* Nonzero if generating Thumb-1 instructions. */
886 /* Nonzero if we should define __THUMB_INTERWORK__ in the
888 XXX This is a bit of a hack, it's intended to help work around
889 problems in GLD which doesn't understand that armv5t code is
890 interworking clean. */
891 int arm_cpp_interwork
= 0;
893 /* Nonzero if chip supports Thumb 2. */
896 /* Nonzero if chip supports integer division instruction. */
897 int arm_arch_arm_hwdiv
;
898 int arm_arch_thumb_hwdiv
;
900 /* Nonzero if we should use Neon to handle 64-bits operations rather
901 than core registers. */
902 int prefer_neon_for_64bits
= 0;
904 /* Nonzero if we shouldn't use literal pools. */
905 bool arm_disable_literal_pool
= false;
907 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
908 we must report the mode of the memory reference from
909 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
910 machine_mode output_memory_reference_mode
;
912 /* The register number to be used for the PIC offset register. */
913 unsigned arm_pic_register
= INVALID_REGNUM
;
915 enum arm_pcs arm_pcs_default
;
917 /* For an explanation of these variables, see final_prescan_insn below. */
919 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
920 enum arm_cond_code arm_current_cc
;
923 int arm_target_label
;
924 /* The number of conditionally executed insns, including the current insn. */
925 int arm_condexec_count
= 0;
926 /* A bitmask specifying the patterns for the IT block.
927 Zero means do not output an IT block before this insn. */
928 int arm_condexec_mask
= 0;
929 /* The number of bits used in arm_condexec_mask. */
930 int arm_condexec_masklen
= 0;
932 /* Nonzero if chip supports the ARMv8 CRC instructions. */
933 int arm_arch_crc
= 0;
935 /* The condition codes of the ARM, and the inverse function. */
936 static const char * const arm_condition_codes
[] =
938 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
939 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
942 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
943 int arm_regs_in_sequence
[] =
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
948 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
949 #define streq(string1, string2) (strcmp (string1, string2) == 0)
951 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
952 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
953 | (1 << PIC_OFFSET_TABLE_REGNUM)))
955 /* Initialization code. */
959 const char *const name
;
960 enum processor_type core
;
962 enum base_architecture base_arch
;
963 const unsigned long flags
;
964 const struct tune_params
*const tune
;
968 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
969 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
974 /* arm generic vectorizer costs. */
976 struct cpu_vec_costs arm_default_vec_cost
= {
977 1, /* scalar_stmt_cost. */
978 1, /* scalar load_cost. */
979 1, /* scalar_store_cost. */
980 1, /* vec_stmt_cost. */
981 1, /* vec_to_scalar_cost. */
982 1, /* scalar_to_vec_cost. */
983 1, /* vec_align_load_cost. */
984 1, /* vec_unalign_load_cost. */
985 1, /* vec_unalign_store_cost. */
986 1, /* vec_store_cost. */
987 3, /* cond_taken_branch_cost. */
988 1, /* cond_not_taken_branch_cost. */
991 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
992 #include "aarch-cost-tables.h"
996 const struct cpu_cost_table cortexa9_extra_costs
=
1003 COSTS_N_INSNS (1), /* shift_reg. */
1004 COSTS_N_INSNS (1), /* arith_shift. */
1005 COSTS_N_INSNS (2), /* arith_shift_reg. */
1007 COSTS_N_INSNS (1), /* log_shift_reg. */
1008 COSTS_N_INSNS (1), /* extend. */
1009 COSTS_N_INSNS (2), /* extend_arith. */
1010 COSTS_N_INSNS (1), /* bfi. */
1011 COSTS_N_INSNS (1), /* bfx. */
1015 true /* non_exec_costs_exec. */
1020 COSTS_N_INSNS (3), /* simple. */
1021 COSTS_N_INSNS (3), /* flag_setting. */
1022 COSTS_N_INSNS (2), /* extend. */
1023 COSTS_N_INSNS (3), /* add. */
1024 COSTS_N_INSNS (2), /* extend_add. */
1025 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1029 0, /* simple (N/A). */
1030 0, /* flag_setting (N/A). */
1031 COSTS_N_INSNS (4), /* extend. */
1033 COSTS_N_INSNS (4), /* extend_add. */
1039 COSTS_N_INSNS (2), /* load. */
1040 COSTS_N_INSNS (2), /* load_sign_extend. */
1041 COSTS_N_INSNS (2), /* ldrd. */
1042 COSTS_N_INSNS (2), /* ldm_1st. */
1043 1, /* ldm_regs_per_insn_1st. */
1044 2, /* ldm_regs_per_insn_subsequent. */
1045 COSTS_N_INSNS (5), /* loadf. */
1046 COSTS_N_INSNS (5), /* loadd. */
1047 COSTS_N_INSNS (1), /* load_unaligned. */
1048 COSTS_N_INSNS (2), /* store. */
1049 COSTS_N_INSNS (2), /* strd. */
1050 COSTS_N_INSNS (2), /* stm_1st. */
1051 1, /* stm_regs_per_insn_1st. */
1052 2, /* stm_regs_per_insn_subsequent. */
1053 COSTS_N_INSNS (1), /* storef. */
1054 COSTS_N_INSNS (1), /* stored. */
1055 COSTS_N_INSNS (1) /* store_unaligned. */
1060 COSTS_N_INSNS (14), /* div. */
1061 COSTS_N_INSNS (4), /* mult. */
1062 COSTS_N_INSNS (7), /* mult_addsub. */
1063 COSTS_N_INSNS (30), /* fma. */
1064 COSTS_N_INSNS (3), /* addsub. */
1065 COSTS_N_INSNS (1), /* fpconst. */
1066 COSTS_N_INSNS (1), /* neg. */
1067 COSTS_N_INSNS (3), /* compare. */
1068 COSTS_N_INSNS (3), /* widen. */
1069 COSTS_N_INSNS (3), /* narrow. */
1070 COSTS_N_INSNS (3), /* toint. */
1071 COSTS_N_INSNS (3), /* fromint. */
1072 COSTS_N_INSNS (3) /* roundint. */
1076 COSTS_N_INSNS (24), /* div. */
1077 COSTS_N_INSNS (5), /* mult. */
1078 COSTS_N_INSNS (8), /* mult_addsub. */
1079 COSTS_N_INSNS (30), /* fma. */
1080 COSTS_N_INSNS (3), /* addsub. */
1081 COSTS_N_INSNS (1), /* fpconst. */
1082 COSTS_N_INSNS (1), /* neg. */
1083 COSTS_N_INSNS (3), /* compare. */
1084 COSTS_N_INSNS (3), /* widen. */
1085 COSTS_N_INSNS (3), /* narrow. */
1086 COSTS_N_INSNS (3), /* toint. */
1087 COSTS_N_INSNS (3), /* fromint. */
1088 COSTS_N_INSNS (3) /* roundint. */
1093 COSTS_N_INSNS (1) /* alu. */
1097 const struct cpu_cost_table cortexa8_extra_costs
=
1103 COSTS_N_INSNS (1), /* shift. */
1105 COSTS_N_INSNS (1), /* arith_shift. */
1106 0, /* arith_shift_reg. */
1107 COSTS_N_INSNS (1), /* log_shift. */
1108 0, /* log_shift_reg. */
1110 0, /* extend_arith. */
1116 true /* non_exec_costs_exec. */
1121 COSTS_N_INSNS (1), /* simple. */
1122 COSTS_N_INSNS (1), /* flag_setting. */
1123 COSTS_N_INSNS (1), /* extend. */
1124 COSTS_N_INSNS (1), /* add. */
1125 COSTS_N_INSNS (1), /* extend_add. */
1126 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1130 0, /* simple (N/A). */
1131 0, /* flag_setting (N/A). */
1132 COSTS_N_INSNS (2), /* extend. */
1134 COSTS_N_INSNS (2), /* extend_add. */
1140 COSTS_N_INSNS (1), /* load. */
1141 COSTS_N_INSNS (1), /* load_sign_extend. */
1142 COSTS_N_INSNS (1), /* ldrd. */
1143 COSTS_N_INSNS (1), /* ldm_1st. */
1144 1, /* ldm_regs_per_insn_1st. */
1145 2, /* ldm_regs_per_insn_subsequent. */
1146 COSTS_N_INSNS (1), /* loadf. */
1147 COSTS_N_INSNS (1), /* loadd. */
1148 COSTS_N_INSNS (1), /* load_unaligned. */
1149 COSTS_N_INSNS (1), /* store. */
1150 COSTS_N_INSNS (1), /* strd. */
1151 COSTS_N_INSNS (1), /* stm_1st. */
1152 1, /* stm_regs_per_insn_1st. */
1153 2, /* stm_regs_per_insn_subsequent. */
1154 COSTS_N_INSNS (1), /* storef. */
1155 COSTS_N_INSNS (1), /* stored. */
1156 COSTS_N_INSNS (1) /* store_unaligned. */
1161 COSTS_N_INSNS (36), /* div. */
1162 COSTS_N_INSNS (11), /* mult. */
1163 COSTS_N_INSNS (20), /* mult_addsub. */
1164 COSTS_N_INSNS (30), /* fma. */
1165 COSTS_N_INSNS (9), /* addsub. */
1166 COSTS_N_INSNS (3), /* fpconst. */
1167 COSTS_N_INSNS (3), /* neg. */
1168 COSTS_N_INSNS (6), /* compare. */
1169 COSTS_N_INSNS (4), /* widen. */
1170 COSTS_N_INSNS (4), /* narrow. */
1171 COSTS_N_INSNS (8), /* toint. */
1172 COSTS_N_INSNS (8), /* fromint. */
1173 COSTS_N_INSNS (8) /* roundint. */
1177 COSTS_N_INSNS (64), /* div. */
1178 COSTS_N_INSNS (16), /* mult. */
1179 COSTS_N_INSNS (25), /* mult_addsub. */
1180 COSTS_N_INSNS (30), /* fma. */
1181 COSTS_N_INSNS (9), /* addsub. */
1182 COSTS_N_INSNS (3), /* fpconst. */
1183 COSTS_N_INSNS (3), /* neg. */
1184 COSTS_N_INSNS (6), /* compare. */
1185 COSTS_N_INSNS (6), /* widen. */
1186 COSTS_N_INSNS (6), /* narrow. */
1187 COSTS_N_INSNS (8), /* toint. */
1188 COSTS_N_INSNS (8), /* fromint. */
1189 COSTS_N_INSNS (8) /* roundint. */
1194 COSTS_N_INSNS (1) /* alu. */
1198 const struct cpu_cost_table cortexa5_extra_costs
=
1204 COSTS_N_INSNS (1), /* shift. */
1205 COSTS_N_INSNS (1), /* shift_reg. */
1206 COSTS_N_INSNS (1), /* arith_shift. */
1207 COSTS_N_INSNS (1), /* arith_shift_reg. */
1208 COSTS_N_INSNS (1), /* log_shift. */
1209 COSTS_N_INSNS (1), /* log_shift_reg. */
1210 COSTS_N_INSNS (1), /* extend. */
1211 COSTS_N_INSNS (1), /* extend_arith. */
1212 COSTS_N_INSNS (1), /* bfi. */
1213 COSTS_N_INSNS (1), /* bfx. */
1214 COSTS_N_INSNS (1), /* clz. */
1215 COSTS_N_INSNS (1), /* rev. */
1217 true /* non_exec_costs_exec. */
1224 COSTS_N_INSNS (1), /* flag_setting. */
1225 COSTS_N_INSNS (1), /* extend. */
1226 COSTS_N_INSNS (1), /* add. */
1227 COSTS_N_INSNS (1), /* extend_add. */
1228 COSTS_N_INSNS (7) /* idiv. */
1232 0, /* simple (N/A). */
1233 0, /* flag_setting (N/A). */
1234 COSTS_N_INSNS (1), /* extend. */
1236 COSTS_N_INSNS (2), /* extend_add. */
1242 COSTS_N_INSNS (1), /* load. */
1243 COSTS_N_INSNS (1), /* load_sign_extend. */
1244 COSTS_N_INSNS (6), /* ldrd. */
1245 COSTS_N_INSNS (1), /* ldm_1st. */
1246 1, /* ldm_regs_per_insn_1st. */
1247 2, /* ldm_regs_per_insn_subsequent. */
1248 COSTS_N_INSNS (2), /* loadf. */
1249 COSTS_N_INSNS (4), /* loadd. */
1250 COSTS_N_INSNS (1), /* load_unaligned. */
1251 COSTS_N_INSNS (1), /* store. */
1252 COSTS_N_INSNS (3), /* strd. */
1253 COSTS_N_INSNS (1), /* stm_1st. */
1254 1, /* stm_regs_per_insn_1st. */
1255 2, /* stm_regs_per_insn_subsequent. */
1256 COSTS_N_INSNS (2), /* storef. */
1257 COSTS_N_INSNS (2), /* stored. */
1258 COSTS_N_INSNS (1) /* store_unaligned. */
1263 COSTS_N_INSNS (15), /* div. */
1264 COSTS_N_INSNS (3), /* mult. */
1265 COSTS_N_INSNS (7), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1279 COSTS_N_INSNS (30), /* div. */
1280 COSTS_N_INSNS (6), /* mult. */
1281 COSTS_N_INSNS (10), /* mult_addsub. */
1282 COSTS_N_INSNS (7), /* fma. */
1283 COSTS_N_INSNS (3), /* addsub. */
1284 COSTS_N_INSNS (3), /* fpconst. */
1285 COSTS_N_INSNS (3), /* neg. */
1286 COSTS_N_INSNS (3), /* compare. */
1287 COSTS_N_INSNS (3), /* widen. */
1288 COSTS_N_INSNS (3), /* narrow. */
1289 COSTS_N_INSNS (3), /* toint. */
1290 COSTS_N_INSNS (3), /* fromint. */
1291 COSTS_N_INSNS (3) /* roundint. */
1296 COSTS_N_INSNS (1) /* alu. */
1301 const struct cpu_cost_table cortexa7_extra_costs
=
1307 COSTS_N_INSNS (1), /* shift. */
1308 COSTS_N_INSNS (1), /* shift_reg. */
1309 COSTS_N_INSNS (1), /* arith_shift. */
1310 COSTS_N_INSNS (1), /* arith_shift_reg. */
1311 COSTS_N_INSNS (1), /* log_shift. */
1312 COSTS_N_INSNS (1), /* log_shift_reg. */
1313 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (1), /* extend_arith. */
1315 COSTS_N_INSNS (1), /* bfi. */
1316 COSTS_N_INSNS (1), /* bfx. */
1317 COSTS_N_INSNS (1), /* clz. */
1318 COSTS_N_INSNS (1), /* rev. */
1320 true /* non_exec_costs_exec. */
1327 COSTS_N_INSNS (1), /* flag_setting. */
1328 COSTS_N_INSNS (1), /* extend. */
1329 COSTS_N_INSNS (1), /* add. */
1330 COSTS_N_INSNS (1), /* extend_add. */
1331 COSTS_N_INSNS (7) /* idiv. */
1335 0, /* simple (N/A). */
1336 0, /* flag_setting (N/A). */
1337 COSTS_N_INSNS (1), /* extend. */
1339 COSTS_N_INSNS (2), /* extend_add. */
1345 COSTS_N_INSNS (1), /* load. */
1346 COSTS_N_INSNS (1), /* load_sign_extend. */
1347 COSTS_N_INSNS (3), /* ldrd. */
1348 COSTS_N_INSNS (1), /* ldm_1st. */
1349 1, /* ldm_regs_per_insn_1st. */
1350 2, /* ldm_regs_per_insn_subsequent. */
1351 COSTS_N_INSNS (2), /* loadf. */
1352 COSTS_N_INSNS (2), /* loadd. */
1353 COSTS_N_INSNS (1), /* load_unaligned. */
1354 COSTS_N_INSNS (1), /* store. */
1355 COSTS_N_INSNS (3), /* strd. */
1356 COSTS_N_INSNS (1), /* stm_1st. */
1357 1, /* stm_regs_per_insn_1st. */
1358 2, /* stm_regs_per_insn_subsequent. */
1359 COSTS_N_INSNS (2), /* storef. */
1360 COSTS_N_INSNS (2), /* stored. */
1361 COSTS_N_INSNS (1) /* store_unaligned. */
1366 COSTS_N_INSNS (15), /* div. */
1367 COSTS_N_INSNS (3), /* mult. */
1368 COSTS_N_INSNS (7), /* mult_addsub. */
1369 COSTS_N_INSNS (7), /* fma. */
1370 COSTS_N_INSNS (3), /* addsub. */
1371 COSTS_N_INSNS (3), /* fpconst. */
1372 COSTS_N_INSNS (3), /* neg. */
1373 COSTS_N_INSNS (3), /* compare. */
1374 COSTS_N_INSNS (3), /* widen. */
1375 COSTS_N_INSNS (3), /* narrow. */
1376 COSTS_N_INSNS (3), /* toint. */
1377 COSTS_N_INSNS (3), /* fromint. */
1378 COSTS_N_INSNS (3) /* roundint. */
1382 COSTS_N_INSNS (30), /* div. */
1383 COSTS_N_INSNS (6), /* mult. */
1384 COSTS_N_INSNS (10), /* mult_addsub. */
1385 COSTS_N_INSNS (7), /* fma. */
1386 COSTS_N_INSNS (3), /* addsub. */
1387 COSTS_N_INSNS (3), /* fpconst. */
1388 COSTS_N_INSNS (3), /* neg. */
1389 COSTS_N_INSNS (3), /* compare. */
1390 COSTS_N_INSNS (3), /* widen. */
1391 COSTS_N_INSNS (3), /* narrow. */
1392 COSTS_N_INSNS (3), /* toint. */
1393 COSTS_N_INSNS (3), /* fromint. */
1394 COSTS_N_INSNS (3) /* roundint. */
1399 COSTS_N_INSNS (1) /* alu. */
1403 const struct cpu_cost_table cortexa12_extra_costs
=
1410 COSTS_N_INSNS (1), /* shift_reg. */
1411 COSTS_N_INSNS (1), /* arith_shift. */
1412 COSTS_N_INSNS (1), /* arith_shift_reg. */
1413 COSTS_N_INSNS (1), /* log_shift. */
1414 COSTS_N_INSNS (1), /* log_shift_reg. */
1416 COSTS_N_INSNS (1), /* extend_arith. */
1418 COSTS_N_INSNS (1), /* bfx. */
1419 COSTS_N_INSNS (1), /* clz. */
1420 COSTS_N_INSNS (1), /* rev. */
1422 true /* non_exec_costs_exec. */
1427 COSTS_N_INSNS (2), /* simple. */
1428 COSTS_N_INSNS (3), /* flag_setting. */
1429 COSTS_N_INSNS (2), /* extend. */
1430 COSTS_N_INSNS (3), /* add. */
1431 COSTS_N_INSNS (2), /* extend_add. */
1432 COSTS_N_INSNS (18) /* idiv. */
1436 0, /* simple (N/A). */
1437 0, /* flag_setting (N/A). */
1438 COSTS_N_INSNS (3), /* extend. */
1440 COSTS_N_INSNS (3), /* extend_add. */
1446 COSTS_N_INSNS (3), /* load. */
1447 COSTS_N_INSNS (3), /* load_sign_extend. */
1448 COSTS_N_INSNS (3), /* ldrd. */
1449 COSTS_N_INSNS (3), /* ldm_1st. */
1450 1, /* ldm_regs_per_insn_1st. */
1451 2, /* ldm_regs_per_insn_subsequent. */
1452 COSTS_N_INSNS (3), /* loadf. */
1453 COSTS_N_INSNS (3), /* loadd. */
1454 0, /* load_unaligned. */
1458 1, /* stm_regs_per_insn_1st. */
1459 2, /* stm_regs_per_insn_subsequent. */
1460 COSTS_N_INSNS (2), /* storef. */
1461 COSTS_N_INSNS (2), /* stored. */
1462 0 /* store_unaligned. */
1467 COSTS_N_INSNS (17), /* div. */
1468 COSTS_N_INSNS (4), /* mult. */
1469 COSTS_N_INSNS (8), /* mult_addsub. */
1470 COSTS_N_INSNS (8), /* fma. */
1471 COSTS_N_INSNS (4), /* addsub. */
1472 COSTS_N_INSNS (2), /* fpconst. */
1473 COSTS_N_INSNS (2), /* neg. */
1474 COSTS_N_INSNS (2), /* compare. */
1475 COSTS_N_INSNS (4), /* widen. */
1476 COSTS_N_INSNS (4), /* narrow. */
1477 COSTS_N_INSNS (4), /* toint. */
1478 COSTS_N_INSNS (4), /* fromint. */
1479 COSTS_N_INSNS (4) /* roundint. */
1483 COSTS_N_INSNS (31), /* div. */
1484 COSTS_N_INSNS (4), /* mult. */
1485 COSTS_N_INSNS (8), /* mult_addsub. */
1486 COSTS_N_INSNS (8), /* fma. */
1487 COSTS_N_INSNS (4), /* addsub. */
1488 COSTS_N_INSNS (2), /* fpconst. */
1489 COSTS_N_INSNS (2), /* neg. */
1490 COSTS_N_INSNS (2), /* compare. */
1491 COSTS_N_INSNS (4), /* widen. */
1492 COSTS_N_INSNS (4), /* narrow. */
1493 COSTS_N_INSNS (4), /* toint. */
1494 COSTS_N_INSNS (4), /* fromint. */
1495 COSTS_N_INSNS (4) /* roundint. */
1500 COSTS_N_INSNS (1) /* alu. */
1504 const struct cpu_cost_table cortexa15_extra_costs
=
1512 COSTS_N_INSNS (1), /* arith_shift. */
1513 COSTS_N_INSNS (1), /* arith_shift_reg. */
1514 COSTS_N_INSNS (1), /* log_shift. */
1515 COSTS_N_INSNS (1), /* log_shift_reg. */
1517 COSTS_N_INSNS (1), /* extend_arith. */
1518 COSTS_N_INSNS (1), /* bfi. */
1523 true /* non_exec_costs_exec. */
1528 COSTS_N_INSNS (2), /* simple. */
1529 COSTS_N_INSNS (3), /* flag_setting. */
1530 COSTS_N_INSNS (2), /* extend. */
1531 COSTS_N_INSNS (2), /* add. */
1532 COSTS_N_INSNS (2), /* extend_add. */
1533 COSTS_N_INSNS (18) /* idiv. */
1537 0, /* simple (N/A). */
1538 0, /* flag_setting (N/A). */
1539 COSTS_N_INSNS (3), /* extend. */
1541 COSTS_N_INSNS (3), /* extend_add. */
1547 COSTS_N_INSNS (3), /* load. */
1548 COSTS_N_INSNS (3), /* load_sign_extend. */
1549 COSTS_N_INSNS (3), /* ldrd. */
1550 COSTS_N_INSNS (4), /* ldm_1st. */
1551 1, /* ldm_regs_per_insn_1st. */
1552 2, /* ldm_regs_per_insn_subsequent. */
1553 COSTS_N_INSNS (4), /* loadf. */
1554 COSTS_N_INSNS (4), /* loadd. */
1555 0, /* load_unaligned. */
1558 COSTS_N_INSNS (1), /* stm_1st. */
1559 1, /* stm_regs_per_insn_1st. */
1560 2, /* stm_regs_per_insn_subsequent. */
1563 0 /* store_unaligned. */
1568 COSTS_N_INSNS (17), /* div. */
1569 COSTS_N_INSNS (4), /* mult. */
1570 COSTS_N_INSNS (8), /* mult_addsub. */
1571 COSTS_N_INSNS (8), /* fma. */
1572 COSTS_N_INSNS (4), /* addsub. */
1573 COSTS_N_INSNS (2), /* fpconst. */
1574 COSTS_N_INSNS (2), /* neg. */
1575 COSTS_N_INSNS (5), /* compare. */
1576 COSTS_N_INSNS (4), /* widen. */
1577 COSTS_N_INSNS (4), /* narrow. */
1578 COSTS_N_INSNS (4), /* toint. */
1579 COSTS_N_INSNS (4), /* fromint. */
1580 COSTS_N_INSNS (4) /* roundint. */
1584 COSTS_N_INSNS (31), /* div. */
1585 COSTS_N_INSNS (4), /* mult. */
1586 COSTS_N_INSNS (8), /* mult_addsub. */
1587 COSTS_N_INSNS (8), /* fma. */
1588 COSTS_N_INSNS (4), /* addsub. */
1589 COSTS_N_INSNS (2), /* fpconst. */
1590 COSTS_N_INSNS (2), /* neg. */
1591 COSTS_N_INSNS (2), /* compare. */
1592 COSTS_N_INSNS (4), /* widen. */
1593 COSTS_N_INSNS (4), /* narrow. */
1594 COSTS_N_INSNS (4), /* toint. */
1595 COSTS_N_INSNS (4), /* fromint. */
1596 COSTS_N_INSNS (4) /* roundint. */
1601 COSTS_N_INSNS (1) /* alu. */
1605 const struct cpu_cost_table v7m_extra_costs
=
1613 0, /* arith_shift. */
1614 COSTS_N_INSNS (1), /* arith_shift_reg. */
1616 COSTS_N_INSNS (1), /* log_shift_reg. */
1618 COSTS_N_INSNS (1), /* extend_arith. */
1623 COSTS_N_INSNS (1), /* non_exec. */
1624 false /* non_exec_costs_exec. */
1629 COSTS_N_INSNS (1), /* simple. */
1630 COSTS_N_INSNS (1), /* flag_setting. */
1631 COSTS_N_INSNS (2), /* extend. */
1632 COSTS_N_INSNS (1), /* add. */
1633 COSTS_N_INSNS (3), /* extend_add. */
1634 COSTS_N_INSNS (8) /* idiv. */
1638 0, /* simple (N/A). */
1639 0, /* flag_setting (N/A). */
1640 COSTS_N_INSNS (2), /* extend. */
1642 COSTS_N_INSNS (3), /* extend_add. */
1648 COSTS_N_INSNS (2), /* load. */
1649 0, /* load_sign_extend. */
1650 COSTS_N_INSNS (3), /* ldrd. */
1651 COSTS_N_INSNS (2), /* ldm_1st. */
1652 1, /* ldm_regs_per_insn_1st. */
1653 1, /* ldm_regs_per_insn_subsequent. */
1654 COSTS_N_INSNS (2), /* loadf. */
1655 COSTS_N_INSNS (3), /* loadd. */
1656 COSTS_N_INSNS (1), /* load_unaligned. */
1657 COSTS_N_INSNS (2), /* store. */
1658 COSTS_N_INSNS (3), /* strd. */
1659 COSTS_N_INSNS (2), /* stm_1st. */
1660 1, /* stm_regs_per_insn_1st. */
1661 1, /* stm_regs_per_insn_subsequent. */
1662 COSTS_N_INSNS (2), /* storef. */
1663 COSTS_N_INSNS (3), /* stored. */
1664 COSTS_N_INSNS (1) /* store_unaligned. */
1669 COSTS_N_INSNS (7), /* div. */
1670 COSTS_N_INSNS (2), /* mult. */
1671 COSTS_N_INSNS (5), /* mult_addsub. */
1672 COSTS_N_INSNS (3), /* fma. */
1673 COSTS_N_INSNS (1), /* addsub. */
1685 COSTS_N_INSNS (15), /* div. */
1686 COSTS_N_INSNS (5), /* mult. */
1687 COSTS_N_INSNS (7), /* mult_addsub. */
1688 COSTS_N_INSNS (7), /* fma. */
1689 COSTS_N_INSNS (3), /* addsub. */
1702 COSTS_N_INSNS (1) /* alu. */
1706 const struct tune_params arm_slowmul_tune
=
1708 arm_slowmul_rtx_costs
,
1710 NULL
, /* Sched adj cost. */
1711 3, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL
,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost
,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost
, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8 /* Maximum insns to inline memset. */
1725 const struct tune_params arm_fastmul_tune
=
1727 arm_fastmul_rtx_costs
,
1729 NULL
, /* Sched adj cost. */
1730 1, /* Constant limit. */
1731 5, /* Max cond insns. */
1732 ARM_PREFETCH_NOT_BENEFICIAL
,
1733 true, /* Prefer constant pool. */
1734 arm_default_branch_cost
,
1735 false, /* Prefer LDRD/STRD. */
1736 {true, true}, /* Prefer non short circuit. */
1737 &arm_default_vec_cost
, /* Vectorizer costs. */
1738 false, /* Prefer Neon for 64-bits bitops. */
1739 false, false, /* Prefer 32-bit encodings. */
1740 false, /* Prefer Neon for stringops. */
1741 8 /* Maximum insns to inline memset. */
1744 /* StrongARM has early execution of branches, so a sequence that is worth
1745 skipping is shorter. Set max_insns_skipped to a lower value. */
1747 const struct tune_params arm_strongarm_tune
=
1749 arm_fastmul_rtx_costs
,
1751 NULL
, /* Sched adj cost. */
1752 1, /* Constant limit. */
1753 3, /* Max cond insns. */
1754 ARM_PREFETCH_NOT_BENEFICIAL
,
1755 true, /* Prefer constant pool. */
1756 arm_default_branch_cost
,
1757 false, /* Prefer LDRD/STRD. */
1758 {true, true}, /* Prefer non short circuit. */
1759 &arm_default_vec_cost
, /* Vectorizer costs. */
1760 false, /* Prefer Neon for 64-bits bitops. */
1761 false, false, /* Prefer 32-bit encodings. */
1762 false, /* Prefer Neon for stringops. */
1763 8 /* Maximum insns to inline memset. */
1766 const struct tune_params arm_xscale_tune
=
1768 arm_xscale_rtx_costs
,
1770 xscale_sched_adjust_cost
,
1771 2, /* Constant limit. */
1772 3, /* Max cond insns. */
1773 ARM_PREFETCH_NOT_BENEFICIAL
,
1774 true, /* Prefer constant pool. */
1775 arm_default_branch_cost
,
1776 false, /* Prefer LDRD/STRD. */
1777 {true, true}, /* Prefer non short circuit. */
1778 &arm_default_vec_cost
, /* Vectorizer costs. */
1779 false, /* Prefer Neon for 64-bits bitops. */
1780 false, false, /* Prefer 32-bit encodings. */
1781 false, /* Prefer Neon for stringops. */
1782 8 /* Maximum insns to inline memset. */
1785 const struct tune_params arm_9e_tune
=
1789 NULL
, /* Sched adj cost. */
1790 1, /* Constant limit. */
1791 5, /* Max cond insns. */
1792 ARM_PREFETCH_NOT_BENEFICIAL
,
1793 true, /* Prefer constant pool. */
1794 arm_default_branch_cost
,
1795 false, /* Prefer LDRD/STRD. */
1796 {true, true}, /* Prefer non short circuit. */
1797 &arm_default_vec_cost
, /* Vectorizer costs. */
1798 false, /* Prefer Neon for 64-bits bitops. */
1799 false, false, /* Prefer 32-bit encodings. */
1800 false, /* Prefer Neon for stringops. */
1801 8 /* Maximum insns to inline memset. */
1804 const struct tune_params arm_v6t2_tune
=
1808 NULL
, /* Sched adj cost. */
1809 1, /* Constant limit. */
1810 5, /* Max cond insns. */
1811 ARM_PREFETCH_NOT_BENEFICIAL
,
1812 false, /* Prefer constant pool. */
1813 arm_default_branch_cost
,
1814 false, /* Prefer LDRD/STRD. */
1815 {true, true}, /* Prefer non short circuit. */
1816 &arm_default_vec_cost
, /* Vectorizer costs. */
1817 false, /* Prefer Neon for 64-bits bitops. */
1818 false, false, /* Prefer 32-bit encodings. */
1819 false, /* Prefer Neon for stringops. */
1820 8 /* Maximum insns to inline memset. */
1823 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1824 const struct tune_params arm_cortex_tune
=
1827 &generic_extra_costs
,
1828 NULL
, /* Sched adj cost. */
1829 1, /* Constant limit. */
1830 5, /* Max cond insns. */
1831 ARM_PREFETCH_NOT_BENEFICIAL
,
1832 false, /* Prefer constant pool. */
1833 arm_default_branch_cost
,
1834 false, /* Prefer LDRD/STRD. */
1835 {true, true}, /* Prefer non short circuit. */
1836 &arm_default_vec_cost
, /* Vectorizer costs. */
1837 false, /* Prefer Neon for 64-bits bitops. */
1838 false, false, /* Prefer 32-bit encodings. */
1839 false, /* Prefer Neon for stringops. */
1840 8 /* Maximum insns to inline memset. */
1843 const struct tune_params arm_cortex_a8_tune
=
1846 &cortexa8_extra_costs
,
1847 NULL
, /* Sched adj cost. */
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL
,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost
,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost
, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 true, /* Prefer Neon for stringops. */
1859 8 /* Maximum insns to inline memset. */
1862 const struct tune_params arm_cortex_a7_tune
=
1865 &cortexa7_extra_costs
,
1867 1, /* Constant limit. */
1868 5, /* Max cond insns. */
1869 ARM_PREFETCH_NOT_BENEFICIAL
,
1870 false, /* Prefer constant pool. */
1871 arm_default_branch_cost
,
1872 false, /* Prefer LDRD/STRD. */
1873 {true, true}, /* Prefer non short circuit. */
1874 &arm_default_vec_cost
, /* Vectorizer costs. */
1875 false, /* Prefer Neon for 64-bits bitops. */
1876 false, false, /* Prefer 32-bit encodings. */
1877 true, /* Prefer Neon for stringops. */
1878 8 /* Maximum insns to inline memset. */
1881 const struct tune_params arm_cortex_a15_tune
=
1884 &cortexa15_extra_costs
,
1885 NULL
, /* Sched adj cost. */
1886 1, /* Constant limit. */
1887 2, /* Max cond insns. */
1888 ARM_PREFETCH_NOT_BENEFICIAL
,
1889 false, /* Prefer constant pool. */
1890 arm_default_branch_cost
,
1891 true, /* Prefer LDRD/STRD. */
1892 {true, true}, /* Prefer non short circuit. */
1893 &arm_default_vec_cost
, /* Vectorizer costs. */
1894 false, /* Prefer Neon for 64-bits bitops. */
1895 true, true, /* Prefer 32-bit encodings. */
1896 true, /* Prefer Neon for stringops. */
1897 8 /* Maximum insns to inline memset. */
1900 const struct tune_params arm_cortex_a53_tune
=
1903 &cortexa53_extra_costs
,
1904 NULL
, /* Scheduler cost adjustment. */
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 ARM_PREFETCH_NOT_BENEFICIAL
,
1908 false, /* Prefer constant pool. */
1909 arm_default_branch_cost
,
1910 false, /* Prefer LDRD/STRD. */
1911 {true, true}, /* Prefer non short circuit. */
1912 &arm_default_vec_cost
, /* Vectorizer costs. */
1913 false, /* Prefer Neon for 64-bits bitops. */
1914 false, false, /* Prefer 32-bit encodings. */
1915 false, /* Prefer Neon for stringops. */
1916 8 /* Maximum insns to inline memset. */
1919 const struct tune_params arm_cortex_a57_tune
=
1922 &cortexa57_extra_costs
,
1923 NULL
, /* Scheduler cost adjustment. */
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 false, /* Prefer constant pool. */
1928 arm_default_branch_cost
,
1929 true, /* Prefer LDRD/STRD. */
1930 {true, true}, /* Prefer non short circuit. */
1931 &arm_default_vec_cost
, /* Vectorizer costs. */
1932 false, /* Prefer Neon for 64-bits bitops. */
1933 true, true, /* Prefer 32-bit encodings. */
1934 false, /* Prefer Neon for stringops. */
1935 8 /* Maximum insns to inline memset. */
1938 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1939 less appealing. Set max_insns_skipped to a low value. */
1941 const struct tune_params arm_cortex_a5_tune
=
1944 &cortexa5_extra_costs
,
1945 NULL
, /* Sched adj cost. */
1946 1, /* Constant limit. */
1947 1, /* Max cond insns. */
1948 ARM_PREFETCH_NOT_BENEFICIAL
,
1949 false, /* Prefer constant pool. */
1950 arm_cortex_a5_branch_cost
,
1951 false, /* Prefer LDRD/STRD. */
1952 {false, false}, /* Prefer non short circuit. */
1953 &arm_default_vec_cost
, /* Vectorizer costs. */
1954 false, /* Prefer Neon for 64-bits bitops. */
1955 false, false, /* Prefer 32-bit encodings. */
1956 true, /* Prefer Neon for stringops. */
1957 8 /* Maximum insns to inline memset. */
1960 const struct tune_params arm_cortex_a9_tune
=
1963 &cortexa9_extra_costs
,
1964 cortex_a9_sched_adjust_cost
,
1965 1, /* Constant limit. */
1966 5, /* Max cond insns. */
1967 ARM_PREFETCH_BENEFICIAL(4,32,32),
1968 false, /* Prefer constant pool. */
1969 arm_default_branch_cost
,
1970 false, /* Prefer LDRD/STRD. */
1971 {true, true}, /* Prefer non short circuit. */
1972 &arm_default_vec_cost
, /* Vectorizer costs. */
1973 false, /* Prefer Neon for 64-bits bitops. */
1974 false, false, /* Prefer 32-bit encodings. */
1975 false, /* Prefer Neon for stringops. */
1976 8 /* Maximum insns to inline memset. */
1979 const struct tune_params arm_cortex_a12_tune
=
1982 &cortexa12_extra_costs
,
1984 1, /* Constant limit. */
1985 5, /* Max cond insns. */
1986 ARM_PREFETCH_BENEFICIAL(4,32,32),
1987 false, /* Prefer constant pool. */
1988 arm_default_branch_cost
,
1989 true, /* Prefer LDRD/STRD. */
1990 {true, true}, /* Prefer non short circuit. */
1991 &arm_default_vec_cost
, /* Vectorizer costs. */
1992 false, /* Prefer Neon for 64-bits bitops. */
1993 false, false, /* Prefer 32-bit encodings. */
1994 true, /* Prefer Neon for stringops. */
1995 8 /* Maximum insns to inline memset. */
1998 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1999 cycle to execute each. An LDR from the constant pool also takes two cycles
2000 to execute, but mildly increases pipelining opportunity (consecutive
2001 loads/stores can be pipelined together, saving one cycle), and may also
2002 improve icache utilisation. Hence we prefer the constant pool for such
2005 const struct tune_params arm_v7m_tune
=
2009 NULL
, /* Sched adj cost. */
2010 1, /* Constant limit. */
2011 2, /* Max cond insns. */
2012 ARM_PREFETCH_NOT_BENEFICIAL
,
2013 true, /* Prefer constant pool. */
2014 arm_cortex_m_branch_cost
,
2015 false, /* Prefer LDRD/STRD. */
2016 {false, false}, /* Prefer non short circuit. */
2017 &arm_default_vec_cost
, /* Vectorizer costs. */
2018 false, /* Prefer Neon for 64-bits bitops. */
2019 false, false, /* Prefer 32-bit encodings. */
2020 false, /* Prefer Neon for stringops. */
2021 8 /* Maximum insns to inline memset. */
2024 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2025 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2026 const struct tune_params arm_v6m_tune
=
2030 NULL
, /* Sched adj cost. */
2031 1, /* Constant limit. */
2032 5, /* Max cond insns. */
2033 ARM_PREFETCH_NOT_BENEFICIAL
,
2034 false, /* Prefer constant pool. */
2035 arm_default_branch_cost
,
2036 false, /* Prefer LDRD/STRD. */
2037 {false, false}, /* Prefer non short circuit. */
2038 &arm_default_vec_cost
, /* Vectorizer costs. */
2039 false, /* Prefer Neon for 64-bits bitops. */
2040 false, false, /* Prefer 32-bit encodings. */
2041 false, /* Prefer Neon for stringops. */
2042 8 /* Maximum insns to inline memset. */
2045 const struct tune_params arm_fa726te_tune
=
2049 fa726te_sched_adjust_cost
,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 ARM_PREFETCH_NOT_BENEFICIAL
,
2053 true, /* Prefer constant pool. */
2054 arm_default_branch_cost
,
2055 false, /* Prefer LDRD/STRD. */
2056 {true, true}, /* Prefer non short circuit. */
2057 &arm_default_vec_cost
, /* Vectorizer costs. */
2058 false, /* Prefer Neon for 64-bits bitops. */
2059 false, false, /* Prefer 32-bit encodings. */
2060 false, /* Prefer Neon for stringops. */
2061 8 /* Maximum insns to inline memset. */
2065 /* Not all of these give usefully different compilation alternatives,
2066 but there is no simple way of generalizing them. */
2067 static const struct processors all_cores
[] =
2070 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2071 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2072 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2073 #include "arm-cores.def"
2075 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2078 static const struct processors all_architectures
[] =
2080 /* ARM Architectures */
2081 /* We don't specify tuning costs here as it will be figured out
2084 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2085 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2086 #include "arm-arches.def"
2088 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2092 /* These are populated as commandline arguments are processed, or NULL
2093 if not specified. */
2094 static const struct processors
*arm_selected_arch
;
2095 static const struct processors
*arm_selected_cpu
;
2096 static const struct processors
*arm_selected_tune
;
2098 /* The name of the preprocessor macro to define for this architecture. */
2100 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2102 /* Available values for -mfpu=. */
2104 static const struct arm_fpu_desc all_fpus
[] =
2106 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2107 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2108 #include "arm-fpus.def"
2113 /* Supported TLS relocations. */
2121 TLS_DESCSEQ
/* GNU scheme */
2124 /* The maximum number of insns to be used when loading a constant. */
2126 arm_constant_limit (bool size_p
)
2128 return size_p
? 1 : current_tune
->constant_limit
;
2131 /* Emit an insn that's a simple single-set. Both the operands must be known
2133 inline static rtx_insn
*
2134 emit_set_insn (rtx x
, rtx y
)
2136 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2139 /* Return the number of bits set in VALUE. */
2141 bit_count (unsigned long value
)
2143 unsigned long count
= 0;
2148 value
&= value
- 1; /* Clear the least-significant set bit. */
2158 } arm_fixed_mode_set
;
2160 /* A small helper for setting fixed-point library libfuncs. */
2163 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2164 const char *funcname
, const char *modename
,
2169 if (num_suffix
== 0)
2170 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2172 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2174 set_optab_libfunc (optable
, mode
, buffer
);
2178 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2179 machine_mode from
, const char *funcname
,
2180 const char *toname
, const char *fromname
)
2183 const char *maybe_suffix_2
= "";
2185 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2186 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2187 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2188 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2189 maybe_suffix_2
= "2";
2191 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2194 set_conv_libfunc (optable
, to
, from
, buffer
);
2197 /* Set up library functions unique to ARM. */
2200 arm_init_libfuncs (void)
2202 /* For Linux, we have access to kernel support for atomic operations. */
2203 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2204 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2206 /* There are no special library functions unless we are using the
2211 /* The functions below are described in Section 4 of the "Run-Time
2212 ABI for the ARM architecture", Version 1.0. */
2214 /* Double-precision floating-point arithmetic. Table 2. */
2215 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2216 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2217 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2218 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2219 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2221 /* Double-precision comparisons. Table 3. */
2222 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2223 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2224 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2225 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2226 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2227 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2228 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2230 /* Single-precision floating-point arithmetic. Table 4. */
2231 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2232 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2233 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2234 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2235 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2237 /* Single-precision comparisons. Table 5. */
2238 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2239 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2240 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2241 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2242 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2243 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2244 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2246 /* Floating-point to integer conversions. Table 6. */
2247 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2248 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2249 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2250 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2251 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2252 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2253 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2254 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2256 /* Conversions between floating types. Table 7. */
2257 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2258 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2260 /* Integer to floating-point conversions. Table 8. */
2261 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2262 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2263 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2264 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2265 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2266 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2267 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2268 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2270 /* Long long. Table 9. */
2271 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2272 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2273 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2274 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2275 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2276 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2277 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2278 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2280 /* Integer (32/32->32) division. \S 4.3.1. */
2281 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2282 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2284 /* The divmod functions are designed so that they can be used for
2285 plain division, even though they return both the quotient and the
2286 remainder. The quotient is returned in the usual location (i.e.,
2287 r0 for SImode, {r0, r1} for DImode), just as would be expected
2288 for an ordinary division routine. Because the AAPCS calling
2289 conventions specify that all of { r0, r1, r2, r3 } are
2290 callee-saved registers, there is no need to tell the compiler
2291 explicitly that those registers are clobbered by these
2293 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2294 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2296 /* For SImode division the ABI provides div-without-mod routines,
2297 which are faster. */
2298 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2299 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2301 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2302 divmod libcalls instead. */
2303 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2304 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2305 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2306 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2308 /* Half-precision float operations. The compiler handles all operations
2309 with NULL libfuncs by converting the SFmode. */
2310 switch (arm_fp16_format
)
2312 case ARM_FP16_FORMAT_IEEE
:
2313 case ARM_FP16_FORMAT_ALTERNATIVE
:
2316 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2317 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2319 : "__gnu_f2h_alternative"));
2320 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2321 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2323 : "__gnu_h2f_alternative"));
2326 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2327 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2328 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2329 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2330 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2333 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2334 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2335 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2336 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2337 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2338 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2339 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2346 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2348 const arm_fixed_mode_set fixed_arith_modes
[] =
2369 const arm_fixed_mode_set fixed_conv_modes
[] =
2399 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2401 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2402 "add", fixed_arith_modes
[i
].name
, 3);
2403 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2404 "ssadd", fixed_arith_modes
[i
].name
, 3);
2405 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2406 "usadd", fixed_arith_modes
[i
].name
, 3);
2407 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2408 "sub", fixed_arith_modes
[i
].name
, 3);
2409 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2410 "sssub", fixed_arith_modes
[i
].name
, 3);
2411 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2412 "ussub", fixed_arith_modes
[i
].name
, 3);
2413 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2414 "mul", fixed_arith_modes
[i
].name
, 3);
2415 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2416 "ssmul", fixed_arith_modes
[i
].name
, 3);
2417 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2418 "usmul", fixed_arith_modes
[i
].name
, 3);
2419 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2420 "div", fixed_arith_modes
[i
].name
, 3);
2421 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2422 "udiv", fixed_arith_modes
[i
].name
, 3);
2423 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2424 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2425 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2426 "usdiv", fixed_arith_modes
[i
].name
, 3);
2427 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2428 "neg", fixed_arith_modes
[i
].name
, 2);
2429 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2430 "ssneg", fixed_arith_modes
[i
].name
, 2);
2431 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2432 "usneg", fixed_arith_modes
[i
].name
, 2);
2433 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2434 "ashl", fixed_arith_modes
[i
].name
, 3);
2435 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2436 "ashr", fixed_arith_modes
[i
].name
, 3);
2437 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2438 "lshr", fixed_arith_modes
[i
].name
, 3);
2439 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2440 "ssashl", fixed_arith_modes
[i
].name
, 3);
2441 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2442 "usashl", fixed_arith_modes
[i
].name
, 3);
2443 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2444 "cmp", fixed_arith_modes
[i
].name
, 2);
2447 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2448 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2451 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2452 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2455 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2456 fixed_conv_modes
[j
].mode
, "fract",
2457 fixed_conv_modes
[i
].name
,
2458 fixed_conv_modes
[j
].name
);
2459 arm_set_fixed_conv_libfunc (satfract_optab
,
2460 fixed_conv_modes
[i
].mode
,
2461 fixed_conv_modes
[j
].mode
, "satfract",
2462 fixed_conv_modes
[i
].name
,
2463 fixed_conv_modes
[j
].name
);
2464 arm_set_fixed_conv_libfunc (fractuns_optab
,
2465 fixed_conv_modes
[i
].mode
,
2466 fixed_conv_modes
[j
].mode
, "fractuns",
2467 fixed_conv_modes
[i
].name
,
2468 fixed_conv_modes
[j
].name
);
2469 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2470 fixed_conv_modes
[i
].mode
,
2471 fixed_conv_modes
[j
].mode
, "satfractuns",
2472 fixed_conv_modes
[i
].name
,
2473 fixed_conv_modes
[j
].name
);
2477 if (TARGET_AAPCS_BASED
)
2478 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2481 /* On AAPCS systems, this is the "struct __va_list". */
2482 static GTY(()) tree va_list_type
;
2484 /* Return the type to use as __builtin_va_list. */
2486 arm_build_builtin_va_list (void)
2491 if (!TARGET_AAPCS_BASED
)
2492 return std_build_builtin_va_list ();
2494 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2502 The C Library ABI further reinforces this definition in \S
2505 We must follow this definition exactly. The structure tag
2506 name is visible in C++ mangled names, and thus forms a part
2507 of the ABI. The field name may be used by people who
2508 #include <stdarg.h>. */
2509 /* Create the type. */
2510 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2511 /* Give it the required name. */
2512 va_list_name
= build_decl (BUILTINS_LOCATION
,
2514 get_identifier ("__va_list"),
2516 DECL_ARTIFICIAL (va_list_name
) = 1;
2517 TYPE_NAME (va_list_type
) = va_list_name
;
2518 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2519 /* Create the __ap field. */
2520 ap_field
= build_decl (BUILTINS_LOCATION
,
2522 get_identifier ("__ap"),
2524 DECL_ARTIFICIAL (ap_field
) = 1;
2525 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2526 TYPE_FIELDS (va_list_type
) = ap_field
;
2527 /* Compute its layout. */
2528 layout_type (va_list_type
);
2530 return va_list_type
;
2533 /* Return an expression of type "void *" pointing to the next
2534 available argument in a variable-argument list. VALIST is the
2535 user-level va_list object, of type __builtin_va_list. */
2537 arm_extract_valist_ptr (tree valist
)
2539 if (TREE_TYPE (valist
) == error_mark_node
)
2540 return error_mark_node
;
2542 /* On an AAPCS target, the pointer is stored within "struct
2544 if (TARGET_AAPCS_BASED
)
2546 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2547 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2548 valist
, ap_field
, NULL_TREE
);
2554 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2556 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2558 valist
= arm_extract_valist_ptr (valist
);
2559 std_expand_builtin_va_start (valist
, nextarg
);
2562 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2564 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2567 valist
= arm_extract_valist_ptr (valist
);
2568 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2571 /* Fix up any incompatible options that the user has specified. */
2573 arm_option_override (void)
2575 if (global_options_set
.x_arm_arch_option
)
2576 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2578 if (global_options_set
.x_arm_cpu_option
)
2580 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2581 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2584 if (global_options_set
.x_arm_tune_option
)
2585 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2587 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2588 SUBTARGET_OVERRIDE_OPTIONS
;
2591 if (arm_selected_arch
)
2593 if (arm_selected_cpu
)
2595 /* Check for conflict between mcpu and march. */
2596 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2598 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2599 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2600 /* -march wins for code generation.
2601 -mcpu wins for default tuning. */
2602 if (!arm_selected_tune
)
2603 arm_selected_tune
= arm_selected_cpu
;
2605 arm_selected_cpu
= arm_selected_arch
;
2609 arm_selected_arch
= NULL
;
2612 /* Pick a CPU based on the architecture. */
2613 arm_selected_cpu
= arm_selected_arch
;
2616 /* If the user did not specify a processor, choose one for them. */
2617 if (!arm_selected_cpu
)
2619 const struct processors
* sel
;
2620 unsigned int sought
;
2622 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2623 if (!arm_selected_cpu
->name
)
2625 #ifdef SUBTARGET_CPU_DEFAULT
2626 /* Use the subtarget default CPU if none was specified by
2628 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2630 /* Default to ARM6. */
2631 if (!arm_selected_cpu
->name
)
2632 arm_selected_cpu
= &all_cores
[arm6
];
2635 sel
= arm_selected_cpu
;
2636 insn_flags
= sel
->flags
;
2638 /* Now check to see if the user has specified some command line
2639 switch that require certain abilities from the cpu. */
2642 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2644 sought
|= (FL_THUMB
| FL_MODE32
);
2646 /* There are no ARM processors that support both APCS-26 and
2647 interworking. Therefore we force FL_MODE26 to be removed
2648 from insn_flags here (if it was set), so that the search
2649 below will always be able to find a compatible processor. */
2650 insn_flags
&= ~FL_MODE26
;
2653 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2655 /* Try to locate a CPU type that supports all of the abilities
2656 of the default CPU, plus the extra abilities requested by
2658 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2659 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2662 if (sel
->name
== NULL
)
2664 unsigned current_bit_count
= 0;
2665 const struct processors
* best_fit
= NULL
;
2667 /* Ideally we would like to issue an error message here
2668 saying that it was not possible to find a CPU compatible
2669 with the default CPU, but which also supports the command
2670 line options specified by the programmer, and so they
2671 ought to use the -mcpu=<name> command line option to
2672 override the default CPU type.
2674 If we cannot find a cpu that has both the
2675 characteristics of the default cpu and the given
2676 command line options we scan the array again looking
2677 for a best match. */
2678 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2679 if ((sel
->flags
& sought
) == sought
)
2683 count
= bit_count (sel
->flags
& insn_flags
);
2685 if (count
>= current_bit_count
)
2688 current_bit_count
= count
;
2692 gcc_assert (best_fit
);
2696 arm_selected_cpu
= sel
;
2700 gcc_assert (arm_selected_cpu
);
2701 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2702 if (!arm_selected_tune
)
2703 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2705 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2706 insn_flags
= arm_selected_cpu
->flags
;
2707 arm_base_arch
= arm_selected_cpu
->base_arch
;
2709 arm_tune
= arm_selected_tune
->core
;
2710 tune_flags
= arm_selected_tune
->flags
;
2711 current_tune
= arm_selected_tune
->tune
;
2713 /* Make sure that the processor choice does not conflict with any of the
2714 other command line choices. */
2715 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2716 error ("target CPU does not support ARM mode");
2718 /* BPABI targets use linker tricks to allow interworking on cores
2719 without thumb support. */
2720 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2722 warning (0, "target CPU does not support interworking" );
2723 target_flags
&= ~MASK_INTERWORK
;
2726 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2728 warning (0, "target CPU does not support THUMB instructions");
2729 target_flags
&= ~MASK_THUMB
;
2732 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2734 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2735 target_flags
&= ~MASK_APCS_FRAME
;
2738 /* Callee super interworking implies thumb interworking. Adding
2739 this to the flags here simplifies the logic elsewhere. */
2740 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2741 target_flags
|= MASK_INTERWORK
;
2743 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2744 from here where no function is being compiled currently. */
2745 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2746 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2748 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2749 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2751 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2753 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2754 target_flags
|= MASK_APCS_FRAME
;
2757 if (TARGET_POKE_FUNCTION_NAME
)
2758 target_flags
|= MASK_APCS_FRAME
;
2760 if (TARGET_APCS_REENT
&& flag_pic
)
2761 error ("-fpic and -mapcs-reent are incompatible");
2763 if (TARGET_APCS_REENT
)
2764 warning (0, "APCS reentrant code not supported. Ignored");
2766 /* If this target is normally configured to use APCS frames, warn if they
2767 are turned off and debugging is turned on. */
2769 && write_symbols
!= NO_DEBUG
2770 && !TARGET_APCS_FRAME
2771 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2772 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2774 if (TARGET_APCS_FLOAT
)
2775 warning (0, "passing floating point arguments in fp regs not yet supported");
2777 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2778 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2779 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2780 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2781 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2782 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2783 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2784 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2785 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2786 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2787 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2788 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2789 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2790 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2791 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2793 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2794 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2795 thumb_code
= TARGET_ARM
== 0;
2796 thumb1_code
= TARGET_THUMB1
!= 0;
2797 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2798 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2799 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2800 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2801 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2802 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2803 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2804 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2805 if (arm_restrict_it
== 2)
2806 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2809 arm_restrict_it
= 0;
2811 /* If we are not using the default (ARM mode) section anchor offset
2812 ranges, then set the correct ranges now. */
2815 /* Thumb-1 LDR instructions cannot have negative offsets.
2816 Permissible positive offset ranges are 5-bit (for byte loads),
2817 6-bit (for halfword loads), or 7-bit (for word loads).
2818 Empirical results suggest a 7-bit anchor range gives the best
2819 overall code size. */
2820 targetm
.min_anchor_offset
= 0;
2821 targetm
.max_anchor_offset
= 127;
2823 else if (TARGET_THUMB2
)
2825 /* The minimum is set such that the total size of the block
2826 for a particular anchor is 248 + 1 + 4095 bytes, which is
2827 divisible by eight, ensuring natural spacing of anchors. */
2828 targetm
.min_anchor_offset
= -248;
2829 targetm
.max_anchor_offset
= 4095;
2832 /* V5 code we generate is completely interworking capable, so we turn off
2833 TARGET_INTERWORK here to avoid many tests later on. */
2835 /* XXX However, we must pass the right pre-processor defines to CPP
2836 or GLD can get confused. This is a hack. */
2837 if (TARGET_INTERWORK
)
2838 arm_cpp_interwork
= 1;
2841 target_flags
&= ~MASK_INTERWORK
;
2843 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2844 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2846 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2847 error ("iwmmxt abi requires an iwmmxt capable cpu");
2849 if (!global_options_set
.x_arm_fpu_index
)
2851 const char *target_fpu_name
;
2854 #ifdef FPUTYPE_DEFAULT
2855 target_fpu_name
= FPUTYPE_DEFAULT
;
2857 target_fpu_name
= "vfp";
2860 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2865 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2867 if (TARGET_NEON
&& !arm_arch7
)
2868 error ("target CPU does not support NEON");
2870 switch (arm_fpu_desc
->model
)
2872 case ARM_FP_MODEL_VFP
:
2873 arm_fpu_attr
= FPU_VFP
;
2880 if (TARGET_AAPCS_BASED
)
2882 if (TARGET_CALLER_INTERWORKING
)
2883 error ("AAPCS does not support -mcaller-super-interworking");
2885 if (TARGET_CALLEE_INTERWORKING
)
2886 error ("AAPCS does not support -mcallee-super-interworking");
2889 /* iWMMXt and NEON are incompatible. */
2890 if (TARGET_IWMMXT
&& TARGET_NEON
)
2891 error ("iWMMXt and NEON are incompatible");
2893 /* iWMMXt unsupported under Thumb mode. */
2894 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2895 error ("iWMMXt unsupported under Thumb mode");
2897 /* __fp16 support currently assumes the core has ldrh. */
2898 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2899 sorry ("__fp16 and no ldrh");
2901 /* If soft-float is specified then don't use FPU. */
2902 if (TARGET_SOFT_FLOAT
)
2903 arm_fpu_attr
= FPU_NONE
;
2905 if (TARGET_AAPCS_BASED
)
2907 if (arm_abi
== ARM_ABI_IWMMXT
)
2908 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2909 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2910 && TARGET_HARD_FLOAT
2912 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2914 arm_pcs_default
= ARM_PCS_AAPCS
;
2918 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2919 sorry ("-mfloat-abi=hard and VFP");
2921 if (arm_abi
== ARM_ABI_APCS
)
2922 arm_pcs_default
= ARM_PCS_APCS
;
2924 arm_pcs_default
= ARM_PCS_ATPCS
;
2927 /* For arm2/3 there is no need to do any scheduling if we are doing
2928 software floating-point. */
2929 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2930 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2932 /* Use the cp15 method if it is available. */
2933 if (target_thread_pointer
== TP_AUTO
)
2935 if (arm_arch6k
&& !TARGET_THUMB1
)
2936 target_thread_pointer
= TP_CP15
;
2938 target_thread_pointer
= TP_SOFT
;
2941 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2942 error ("can not use -mtp=cp15 with 16-bit Thumb");
2944 /* Override the default structure alignment for AAPCS ABI. */
2945 if (!global_options_set
.x_arm_structure_size_boundary
)
2947 if (TARGET_AAPCS_BASED
)
2948 arm_structure_size_boundary
= 8;
2952 if (arm_structure_size_boundary
!= 8
2953 && arm_structure_size_boundary
!= 32
2954 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2956 if (ARM_DOUBLEWORD_ALIGN
)
2958 "structure size boundary can only be set to 8, 32 or 64");
2960 warning (0, "structure size boundary can only be set to 8 or 32");
2961 arm_structure_size_boundary
2962 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2966 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2968 error ("RTP PIC is incompatible with Thumb");
2972 /* If stack checking is disabled, we can use r10 as the PIC register,
2973 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2974 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2976 if (TARGET_VXWORKS_RTP
)
2977 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2978 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2981 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2982 arm_pic_register
= 9;
2984 if (arm_pic_register_string
!= NULL
)
2986 int pic_register
= decode_reg_name (arm_pic_register_string
);
2989 warning (0, "-mpic-register= is useless without -fpic");
2991 /* Prevent the user from choosing an obviously stupid PIC register. */
2992 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2993 || pic_register
== HARD_FRAME_POINTER_REGNUM
2994 || pic_register
== STACK_POINTER_REGNUM
2995 || pic_register
>= PC_REGNUM
2996 || (TARGET_VXWORKS_RTP
2997 && (unsigned int) pic_register
!= arm_pic_register
))
2998 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3000 arm_pic_register
= pic_register
;
3003 if (TARGET_VXWORKS_RTP
3004 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3005 arm_pic_data_is_text_relative
= 0;
3007 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3008 if (fix_cm3_ldrd
== 2)
3010 if (arm_selected_cpu
->core
== cortexm3
)
3016 /* Enable -munaligned-access by default for
3017 - all ARMv6 architecture-based processors
3018 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3019 - ARMv8 architecture-base processors.
3021 Disable -munaligned-access by default for
3022 - all pre-ARMv6 architecture-based processors
3023 - ARMv6-M architecture-based processors. */
3025 if (unaligned_access
== 2)
3027 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3028 unaligned_access
= 1;
3030 unaligned_access
= 0;
3032 else if (unaligned_access
== 1
3033 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3035 warning (0, "target CPU does not support unaligned accesses");
3036 unaligned_access
= 0;
3039 if (TARGET_THUMB1
&& flag_schedule_insns
)
3041 /* Don't warn since it's on by default in -O2. */
3042 flag_schedule_insns
= 0;
3047 /* If optimizing for size, bump the number of instructions that we
3048 are prepared to conditionally execute (even on a StrongARM). */
3049 max_insns_skipped
= 6;
3051 /* For THUMB2, we limit the conditional sequence to one IT block. */
3053 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3056 max_insns_skipped
= current_tune
->max_insns_skipped
;
3058 /* Hot/Cold partitioning is not currently supported, since we can't
3059 handle literal pool placement in that case. */
3060 if (flag_reorder_blocks_and_partition
)
3062 inform (input_location
,
3063 "-freorder-blocks-and-partition not supported on this architecture");
3064 flag_reorder_blocks_and_partition
= 0;
3065 flag_reorder_blocks
= 1;
3069 /* Hoisting PIC address calculations more aggressively provides a small,
3070 but measurable, size reduction for PIC code. Therefore, we decrease
3071 the bar for unrestricted expression hoisting to the cost of PIC address
3072 calculation, which is 2 instructions. */
3073 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3074 global_options
.x_param_values
,
3075 global_options_set
.x_param_values
);
3077 /* ARM EABI defaults to strict volatile bitfields. */
3078 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3079 && abi_version_at_least(2))
3080 flag_strict_volatile_bitfields
= 1;
3082 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3083 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3084 if (flag_prefetch_loop_arrays
< 0
3087 && current_tune
->num_prefetch_slots
> 0)
3088 flag_prefetch_loop_arrays
= 1;
3090 /* Set up parameters to be used in prefetching algorithm. Do not override the
3091 defaults unless we are tuning for a core we have researched values for. */
3092 if (current_tune
->num_prefetch_slots
> 0)
3093 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3094 current_tune
->num_prefetch_slots
,
3095 global_options
.x_param_values
,
3096 global_options_set
.x_param_values
);
3097 if (current_tune
->l1_cache_line_size
>= 0)
3098 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3099 current_tune
->l1_cache_line_size
,
3100 global_options
.x_param_values
,
3101 global_options_set
.x_param_values
);
3102 if (current_tune
->l1_cache_size
>= 0)
3103 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3104 current_tune
->l1_cache_size
,
3105 global_options
.x_param_values
,
3106 global_options_set
.x_param_values
);
3108 /* Use Neon to perform 64-bits operations rather than core
3110 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3111 if (use_neon_for_64bits
== 1)
3112 prefer_neon_for_64bits
= true;
3114 /* Use the alternative scheduling-pressure algorithm by default. */
3115 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3116 global_options
.x_param_values
,
3117 global_options_set
.x_param_values
);
3119 /* Disable shrink-wrap when optimizing function for size, since it tends to
3120 generate additional returns. */
3121 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3122 flag_shrink_wrap
= false;
3123 /* TBD: Dwarf info for apcs frame is not handled yet. */
3124 if (TARGET_APCS_FRAME
)
3125 flag_shrink_wrap
= false;
3127 /* We only support -mslow-flash-data on armv7-m targets. */
3128 if (target_slow_flash_data
3129 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3130 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3131 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3133 /* Currently, for slow flash data, we just disable literal pools. */
3134 if (target_slow_flash_data
)
3135 arm_disable_literal_pool
= true;
3137 /* Register global variables with the garbage collector. */
3138 arm_add_gc_roots ();
3142 arm_add_gc_roots (void)
3144 gcc_obstack_init(&minipool_obstack
);
3145 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3148 /* A table of known ARM exception types.
3149 For use with the interrupt function attribute. */
3153 const char *const arg
;
3154 const unsigned long return_value
;
3158 static const isr_attribute_arg isr_attribute_args
[] =
3160 { "IRQ", ARM_FT_ISR
},
3161 { "irq", ARM_FT_ISR
},
3162 { "FIQ", ARM_FT_FIQ
},
3163 { "fiq", ARM_FT_FIQ
},
3164 { "ABORT", ARM_FT_ISR
},
3165 { "abort", ARM_FT_ISR
},
3166 { "ABORT", ARM_FT_ISR
},
3167 { "abort", ARM_FT_ISR
},
3168 { "UNDEF", ARM_FT_EXCEPTION
},
3169 { "undef", ARM_FT_EXCEPTION
},
3170 { "SWI", ARM_FT_EXCEPTION
},
3171 { "swi", ARM_FT_EXCEPTION
},
3172 { NULL
, ARM_FT_NORMAL
}
3175 /* Returns the (interrupt) function type of the current
3176 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3178 static unsigned long
3179 arm_isr_value (tree argument
)
3181 const isr_attribute_arg
* ptr
;
3185 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3187 /* No argument - default to IRQ. */
3188 if (argument
== NULL_TREE
)
3191 /* Get the value of the argument. */
3192 if (TREE_VALUE (argument
) == NULL_TREE
3193 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3194 return ARM_FT_UNKNOWN
;
3196 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3198 /* Check it against the list of known arguments. */
3199 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3200 if (streq (arg
, ptr
->arg
))
3201 return ptr
->return_value
;
3203 /* An unrecognized interrupt type. */
3204 return ARM_FT_UNKNOWN
;
3207 /* Computes the type of the current function. */
3209 static unsigned long
3210 arm_compute_func_type (void)
3212 unsigned long type
= ARM_FT_UNKNOWN
;
3216 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3218 /* Decide if the current function is volatile. Such functions
3219 never return, and many memory cycles can be saved by not storing
3220 register values that will never be needed again. This optimization
3221 was added to speed up context switching in a kernel application. */
3223 && (TREE_NOTHROW (current_function_decl
)
3224 || !(flag_unwind_tables
3226 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3227 && TREE_THIS_VOLATILE (current_function_decl
))
3228 type
|= ARM_FT_VOLATILE
;
3230 if (cfun
->static_chain_decl
!= NULL
)
3231 type
|= ARM_FT_NESTED
;
3233 attr
= DECL_ATTRIBUTES (current_function_decl
);
3235 a
= lookup_attribute ("naked", attr
);
3237 type
|= ARM_FT_NAKED
;
3239 a
= lookup_attribute ("isr", attr
);
3241 a
= lookup_attribute ("interrupt", attr
);
3244 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3246 type
|= arm_isr_value (TREE_VALUE (a
));
3251 /* Returns the type of the current function. */
3254 arm_current_func_type (void)
3256 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3257 cfun
->machine
->func_type
= arm_compute_func_type ();
3259 return cfun
->machine
->func_type
;
3263 arm_allocate_stack_slots_for_args (void)
3265 /* Naked functions should not allocate stack slots for arguments. */
3266 return !IS_NAKED (arm_current_func_type ());
3270 arm_warn_func_return (tree decl
)
3272 /* Naked functions are implemented entirely in assembly, including the
3273 return sequence, so suppress warnings about this. */
3274 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3278 /* Output assembler code for a block containing the constant parts
3279 of a trampoline, leaving space for the variable parts.
3281 On the ARM, (if r8 is the static chain regnum, and remembering that
3282 referencing pc adds an offset of 8) the trampoline looks like:
3285 .word static chain value
3286 .word function's address
3287 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3290 arm_asm_trampoline_template (FILE *f
)
3294 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3295 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3297 else if (TARGET_THUMB2
)
3299 /* The Thumb-2 trampoline is similar to the arm implementation.
3300 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3301 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3302 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3303 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3307 ASM_OUTPUT_ALIGN (f
, 2);
3308 fprintf (f
, "\t.code\t16\n");
3309 fprintf (f
, ".Ltrampoline_start:\n");
3310 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3311 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3312 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3313 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3314 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3315 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3317 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3318 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3321 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3324 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3326 rtx fnaddr
, mem
, a_tramp
;
3328 emit_block_move (m_tramp
, assemble_trampoline_template (),
3329 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3331 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3332 emit_move_insn (mem
, chain_value
);
3334 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3335 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3336 emit_move_insn (mem
, fnaddr
);
3338 a_tramp
= XEXP (m_tramp
, 0);
3339 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3340 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3341 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3344 /* Thumb trampolines should be entered in thumb mode, so set
3345 the bottom bit of the address. */
3348 arm_trampoline_adjust_address (rtx addr
)
3351 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3352 NULL
, 0, OPTAB_LIB_WIDEN
);
3356 /* Return 1 if it is possible to return using a single instruction.
3357 If SIBLING is non-null, this is a test for a return before a sibling
3358 call. SIBLING is the call insn, so we can examine its register usage. */
3361 use_return_insn (int iscond
, rtx sibling
)
3364 unsigned int func_type
;
3365 unsigned long saved_int_regs
;
3366 unsigned HOST_WIDE_INT stack_adjust
;
3367 arm_stack_offsets
*offsets
;
3369 /* Never use a return instruction before reload has run. */
3370 if (!reload_completed
)
3373 func_type
= arm_current_func_type ();
3375 /* Naked, volatile and stack alignment functions need special
3377 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3380 /* So do interrupt functions that use the frame pointer and Thumb
3381 interrupt functions. */
3382 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3385 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3386 && !optimize_function_for_size_p (cfun
))
3389 offsets
= arm_get_frame_offsets ();
3390 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3392 /* As do variadic functions. */
3393 if (crtl
->args
.pretend_args_size
3394 || cfun
->machine
->uses_anonymous_args
3395 /* Or if the function calls __builtin_eh_return () */
3396 || crtl
->calls_eh_return
3397 /* Or if the function calls alloca */
3398 || cfun
->calls_alloca
3399 /* Or if there is a stack adjustment. However, if the stack pointer
3400 is saved on the stack, we can use a pre-incrementing stack load. */
3401 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3402 && stack_adjust
== 4)))
3405 saved_int_regs
= offsets
->saved_regs_mask
;
3407 /* Unfortunately, the insn
3409 ldmib sp, {..., sp, ...}
3411 triggers a bug on most SA-110 based devices, such that the stack
3412 pointer won't be correctly restored if the instruction takes a
3413 page fault. We work around this problem by popping r3 along with
3414 the other registers, since that is never slower than executing
3415 another instruction.
3417 We test for !arm_arch5 here, because code for any architecture
3418 less than this could potentially be run on one of the buggy
3420 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3422 /* Validate that r3 is a call-clobbered register (always true in
3423 the default abi) ... */
3424 if (!call_used_regs
[3])
3427 /* ... that it isn't being used for a return value ... */
3428 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3431 /* ... or for a tail-call argument ... */
3434 gcc_assert (CALL_P (sibling
));
3436 if (find_regno_fusage (sibling
, USE
, 3))
3440 /* ... and that there are no call-saved registers in r0-r2
3441 (always true in the default ABI). */
3442 if (saved_int_regs
& 0x7)
3446 /* Can't be done if interworking with Thumb, and any registers have been
3448 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3451 /* On StrongARM, conditional returns are expensive if they aren't
3452 taken and multiple registers have been stacked. */
3453 if (iscond
&& arm_tune_strongarm
)
3455 /* Conditional return when just the LR is stored is a simple
3456 conditional-load instruction, that's not expensive. */
3457 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3461 && arm_pic_register
!= INVALID_REGNUM
3462 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3466 /* If there are saved registers but the LR isn't saved, then we need
3467 two instructions for the return. */
3468 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3471 /* Can't be done if any of the VFP regs are pushed,
3472 since this also requires an insn. */
3473 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3474 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3475 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3478 if (TARGET_REALLY_IWMMXT
)
3479 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3480 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3486 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3487 shrink-wrapping if possible. This is the case if we need to emit a
3488 prologue, which we can test by looking at the offsets. */
3490 use_simple_return_p (void)
3492 arm_stack_offsets
*offsets
;
3494 offsets
= arm_get_frame_offsets ();
3495 return offsets
->outgoing_args
!= 0;
3498 /* Return TRUE if int I is a valid immediate ARM constant. */
3501 const_ok_for_arm (HOST_WIDE_INT i
)
3505 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3506 be all zero, or all one. */
3507 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3508 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3509 != ((~(unsigned HOST_WIDE_INT
) 0)
3510 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3513 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3515 /* Fast return for 0 and small values. We must do this for zero, since
3516 the code below can't handle that one case. */
3517 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3520 /* Get the number of trailing zeros. */
3521 lowbit
= ffs((int) i
) - 1;
3523 /* Only even shifts are allowed in ARM mode so round down to the
3524 nearest even number. */
3528 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3533 /* Allow rotated constants in ARM mode. */
3535 && ((i
& ~0xc000003f) == 0
3536 || (i
& ~0xf000000f) == 0
3537 || (i
& ~0xfc000003) == 0))
3544 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3547 if (i
== v
|| i
== (v
| (v
<< 8)))
3550 /* Allow repeated pattern 0xXY00XY00. */
3560 /* Return true if I is a valid constant for the operation CODE. */
3562 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3564 if (const_ok_for_arm (i
))
3570 /* See if we can use movw. */
3571 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3574 /* Otherwise, try mvn. */
3575 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3578 /* See if we can use addw or subw. */
3580 && ((i
& 0xfffff000) == 0
3581 || ((-i
) & 0xfffff000) == 0))
3583 /* else fall through. */
3603 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3605 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3611 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3615 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3622 /* Return true if I is a valid di mode constant for the operation CODE. */
3624 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3626 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3627 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3628 rtx hi
= GEN_INT (hi_val
);
3629 rtx lo
= GEN_INT (lo_val
);
3639 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3640 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3642 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3649 /* Emit a sequence of insns to handle a large constant.
3650 CODE is the code of the operation required, it can be any of SET, PLUS,
3651 IOR, AND, XOR, MINUS;
3652 MODE is the mode in which the operation is being performed;
3653 VAL is the integer to operate on;
3654 SOURCE is the other operand (a register, or a null-pointer for SET);
3655 SUBTARGETS means it is safe to create scratch registers if that will
3656 either produce a simpler sequence, or we will want to cse the values.
3657 Return value is the number of insns emitted. */
3659 /* ??? Tweak this for thumb2. */
3661 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3662 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3666 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3667 cond
= COND_EXEC_TEST (PATTERN (insn
));
3671 if (subtargets
|| code
== SET
3672 || (REG_P (target
) && REG_P (source
)
3673 && REGNO (target
) != REGNO (source
)))
3675 /* After arm_reorg has been called, we can't fix up expensive
3676 constants by pushing them into memory so we must synthesize
3677 them in-line, regardless of the cost. This is only likely to
3678 be more costly on chips that have load delay slots and we are
3679 compiling without running the scheduler (so no splitting
3680 occurred before the final instruction emission).
3682 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3684 if (!cfun
->machine
->after_arm_reorg
3686 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3688 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3693 /* Currently SET is the only monadic value for CODE, all
3694 the rest are diadic. */
3695 if (TARGET_USE_MOVT
)
3696 arm_emit_movpair (target
, GEN_INT (val
));
3698 emit_set_insn (target
, GEN_INT (val
));
3704 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3706 if (TARGET_USE_MOVT
)
3707 arm_emit_movpair (temp
, GEN_INT (val
));
3709 emit_set_insn (temp
, GEN_INT (val
));
3711 /* For MINUS, the value is subtracted from, since we never
3712 have subtraction of a constant. */
3714 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3716 emit_set_insn (target
,
3717 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3723 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3727 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3728 ARM/THUMB2 immediates, and add up to VAL.
3729 Thr function return value gives the number of insns required. */
3731 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3732 struct four_ints
*return_sequence
)
3734 int best_consecutive_zeros
= 0;
3738 struct four_ints tmp_sequence
;
3740 /* If we aren't targeting ARM, the best place to start is always at
3741 the bottom, otherwise look more closely. */
3744 for (i
= 0; i
< 32; i
+= 2)
3746 int consecutive_zeros
= 0;
3748 if (!(val
& (3 << i
)))
3750 while ((i
< 32) && !(val
& (3 << i
)))
3752 consecutive_zeros
+= 2;
3755 if (consecutive_zeros
> best_consecutive_zeros
)
3757 best_consecutive_zeros
= consecutive_zeros
;
3758 best_start
= i
- consecutive_zeros
;
3765 /* So long as it won't require any more insns to do so, it's
3766 desirable to emit a small constant (in bits 0...9) in the last
3767 insn. This way there is more chance that it can be combined with
3768 a later addressing insn to form a pre-indexed load or store
3769 operation. Consider:
3771 *((volatile int *)0xe0000100) = 1;
3772 *((volatile int *)0xe0000110) = 2;
3774 We want this to wind up as:
3778 str rB, [rA, #0x100]
3780 str rB, [rA, #0x110]
3782 rather than having to synthesize both large constants from scratch.
3784 Therefore, we calculate how many insns would be required to emit
3785 the constant starting from `best_start', and also starting from
3786 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3787 yield a shorter sequence, we may as well use zero. */
3788 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3790 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3792 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3793 if (insns2
<= insns1
)
3795 *return_sequence
= tmp_sequence
;
3803 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3805 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3806 struct four_ints
*return_sequence
, int i
)
3808 int remainder
= val
& 0xffffffff;
3811 /* Try and find a way of doing the job in either two or three
3814 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3815 location. We start at position I. This may be the MSB, or
3816 optimial_immediate_sequence may have positioned it at the largest block
3817 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3818 wrapping around to the top of the word when we drop off the bottom.
3819 In the worst case this code should produce no more than four insns.
3821 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3822 constants, shifted to any arbitrary location. We should always start
3827 unsigned int b1
, b2
, b3
, b4
;
3828 unsigned HOST_WIDE_INT result
;
3831 gcc_assert (insns
< 4);
3836 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3837 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3840 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3841 /* We can use addw/subw for the last 12 bits. */
3845 /* Use an 8-bit shifted/rotated immediate. */
3849 result
= remainder
& ((0x0ff << end
)
3850 | ((i
< end
) ? (0xff >> (32 - end
))
3857 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3858 arbitrary shifts. */
3859 i
-= TARGET_ARM
? 2 : 1;
3863 /* Next, see if we can do a better job with a thumb2 replicated
3866 We do it this way around to catch the cases like 0x01F001E0 where
3867 two 8-bit immediates would work, but a replicated constant would
3870 TODO: 16-bit constants that don't clear all the bits, but still win.
3871 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3874 b1
= (remainder
& 0xff000000) >> 24;
3875 b2
= (remainder
& 0x00ff0000) >> 16;
3876 b3
= (remainder
& 0x0000ff00) >> 8;
3877 b4
= remainder
& 0xff;
3881 /* The 8-bit immediate already found clears b1 (and maybe b2),
3882 but must leave b3 and b4 alone. */
3884 /* First try to find a 32-bit replicated constant that clears
3885 almost everything. We can assume that we can't do it in one,
3886 or else we wouldn't be here. */
3887 unsigned int tmp
= b1
& b2
& b3
& b4
;
3888 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3890 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3891 + (tmp
== b3
) + (tmp
== b4
);
3893 && (matching_bytes
>= 3
3894 || (matching_bytes
== 2
3895 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3897 /* At least 3 of the bytes match, and the fourth has at
3898 least as many bits set, or two of the bytes match
3899 and it will only require one more insn to finish. */
3907 /* Second, try to find a 16-bit replicated constant that can
3908 leave three of the bytes clear. If b2 or b4 is already
3909 zero, then we can. If the 8-bit from above would not
3910 clear b2 anyway, then we still win. */
3911 else if (b1
== b3
&& (!b2
|| !b4
3912 || (remainder
& 0x00ff0000 & ~result
)))
3914 result
= remainder
& 0xff00ff00;
3920 /* The 8-bit immediate already found clears b2 (and maybe b3)
3921 and we don't get here unless b1 is alredy clear, but it will
3922 leave b4 unchanged. */
3924 /* If we can clear b2 and b4 at once, then we win, since the
3925 8-bits couldn't possibly reach that far. */
3928 result
= remainder
& 0x00ff00ff;
3934 return_sequence
->i
[insns
++] = result
;
3935 remainder
&= ~result
;
3937 if (code
== SET
|| code
== MINUS
)
3945 /* Emit an instruction with the indicated PATTERN. If COND is
3946 non-NULL, conditionalize the execution of the instruction on COND
3950 emit_constant_insn (rtx cond
, rtx pattern
)
3953 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3954 emit_insn (pattern
);
3957 /* As above, but extra parameter GENERATE which, if clear, suppresses
3961 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
3962 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3967 int final_invert
= 0;
3969 int set_sign_bit_copies
= 0;
3970 int clear_sign_bit_copies
= 0;
3971 int clear_zero_bit_copies
= 0;
3972 int set_zero_bit_copies
= 0;
3973 int insns
= 0, neg_insns
, inv_insns
;
3974 unsigned HOST_WIDE_INT temp1
, temp2
;
3975 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3976 struct four_ints
*immediates
;
3977 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3979 /* Find out which operations are safe for a given CODE. Also do a quick
3980 check for degenerate cases; these can occur when DImode operations
3993 if (remainder
== 0xffffffff)
3996 emit_constant_insn (cond
,
3997 gen_rtx_SET (VOIDmode
, target
,
3998 GEN_INT (ARM_SIGN_EXTEND (val
))));
4004 if (reload_completed
&& rtx_equal_p (target
, source
))
4008 emit_constant_insn (cond
,
4009 gen_rtx_SET (VOIDmode
, target
, source
));
4018 emit_constant_insn (cond
,
4019 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
4022 if (remainder
== 0xffffffff)
4024 if (reload_completed
&& rtx_equal_p (target
, source
))
4027 emit_constant_insn (cond
,
4028 gen_rtx_SET (VOIDmode
, target
, source
));
4037 if (reload_completed
&& rtx_equal_p (target
, source
))
4040 emit_constant_insn (cond
,
4041 gen_rtx_SET (VOIDmode
, target
, source
));
4045 if (remainder
== 0xffffffff)
4048 emit_constant_insn (cond
,
4049 gen_rtx_SET (VOIDmode
, target
,
4050 gen_rtx_NOT (mode
, source
)));
4057 /* We treat MINUS as (val - source), since (source - val) is always
4058 passed as (source + (-val)). */
4062 emit_constant_insn (cond
,
4063 gen_rtx_SET (VOIDmode
, target
,
4064 gen_rtx_NEG (mode
, source
)));
4067 if (const_ok_for_arm (val
))
4070 emit_constant_insn (cond
,
4071 gen_rtx_SET (VOIDmode
, target
,
4072 gen_rtx_MINUS (mode
, GEN_INT (val
),
4083 /* If we can do it in one insn get out quickly. */
4084 if (const_ok_for_op (val
, code
))
4087 emit_constant_insn (cond
,
4088 gen_rtx_SET (VOIDmode
, target
,
4090 ? gen_rtx_fmt_ee (code
, mode
, source
,
4096 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4098 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4099 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4103 if (mode
== SImode
&& i
== 16)
4104 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4106 emit_constant_insn (cond
,
4107 gen_zero_extendhisi2
4108 (target
, gen_lowpart (HImode
, source
)));
4110 /* Extz only supports SImode, but we can coerce the operands
4112 emit_constant_insn (cond
,
4113 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4114 gen_lowpart (SImode
, source
),
4115 GEN_INT (i
), const0_rtx
));
4121 /* Calculate a few attributes that may be useful for specific
4123 /* Count number of leading zeros. */
4124 for (i
= 31; i
>= 0; i
--)
4126 if ((remainder
& (1 << i
)) == 0)
4127 clear_sign_bit_copies
++;
4132 /* Count number of leading 1's. */
4133 for (i
= 31; i
>= 0; i
--)
4135 if ((remainder
& (1 << i
)) != 0)
4136 set_sign_bit_copies
++;
4141 /* Count number of trailing zero's. */
4142 for (i
= 0; i
<= 31; i
++)
4144 if ((remainder
& (1 << i
)) == 0)
4145 clear_zero_bit_copies
++;
4150 /* Count number of trailing 1's. */
4151 for (i
= 0; i
<= 31; i
++)
4153 if ((remainder
& (1 << i
)) != 0)
4154 set_zero_bit_copies
++;
4162 /* See if we can do this by sign_extending a constant that is known
4163 to be negative. This is a good, way of doing it, since the shift
4164 may well merge into a subsequent insn. */
4165 if (set_sign_bit_copies
> 1)
4167 if (const_ok_for_arm
4168 (temp1
= ARM_SIGN_EXTEND (remainder
4169 << (set_sign_bit_copies
- 1))))
4173 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4174 emit_constant_insn (cond
,
4175 gen_rtx_SET (VOIDmode
, new_src
,
4177 emit_constant_insn (cond
,
4178 gen_ashrsi3 (target
, new_src
,
4179 GEN_INT (set_sign_bit_copies
- 1)));
4183 /* For an inverted constant, we will need to set the low bits,
4184 these will be shifted out of harm's way. */
4185 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4186 if (const_ok_for_arm (~temp1
))
4190 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4191 emit_constant_insn (cond
,
4192 gen_rtx_SET (VOIDmode
, new_src
,
4194 emit_constant_insn (cond
,
4195 gen_ashrsi3 (target
, new_src
,
4196 GEN_INT (set_sign_bit_copies
- 1)));
4202 /* See if we can calculate the value as the difference between two
4203 valid immediates. */
4204 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4206 int topshift
= clear_sign_bit_copies
& ~1;
4208 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4209 & (0xff000000 >> topshift
));
4211 /* If temp1 is zero, then that means the 9 most significant
4212 bits of remainder were 1 and we've caused it to overflow.
4213 When topshift is 0 we don't need to do anything since we
4214 can borrow from 'bit 32'. */
4215 if (temp1
== 0 && topshift
!= 0)
4216 temp1
= 0x80000000 >> (topshift
- 1);
4218 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4220 if (const_ok_for_arm (temp2
))
4224 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4225 emit_constant_insn (cond
,
4226 gen_rtx_SET (VOIDmode
, new_src
,
4228 emit_constant_insn (cond
,
4229 gen_addsi3 (target
, new_src
,
4237 /* See if we can generate this by setting the bottom (or the top)
4238 16 bits, and then shifting these into the other half of the
4239 word. We only look for the simplest cases, to do more would cost
4240 too much. Be careful, however, not to generate this when the
4241 alternative would take fewer insns. */
4242 if (val
& 0xffff0000)
4244 temp1
= remainder
& 0xffff0000;
4245 temp2
= remainder
& 0x0000ffff;
4247 /* Overlaps outside this range are best done using other methods. */
4248 for (i
= 9; i
< 24; i
++)
4250 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4251 && !const_ok_for_arm (temp2
))
4253 rtx new_src
= (subtargets
4254 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4256 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4257 source
, subtargets
, generate
);
4265 gen_rtx_ASHIFT (mode
, source
,
4272 /* Don't duplicate cases already considered. */
4273 for (i
= 17; i
< 24; i
++)
4275 if (((temp1
| (temp1
>> i
)) == remainder
)
4276 && !const_ok_for_arm (temp1
))
4278 rtx new_src
= (subtargets
4279 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4281 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4282 source
, subtargets
, generate
);
4287 gen_rtx_SET (VOIDmode
, target
,
4290 gen_rtx_LSHIFTRT (mode
, source
,
4301 /* If we have IOR or XOR, and the constant can be loaded in a
4302 single instruction, and we can find a temporary to put it in,
4303 then this can be done in two instructions instead of 3-4. */
4305 /* TARGET can't be NULL if SUBTARGETS is 0 */
4306 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4308 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4312 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4314 emit_constant_insn (cond
,
4315 gen_rtx_SET (VOIDmode
, sub
,
4317 emit_constant_insn (cond
,
4318 gen_rtx_SET (VOIDmode
, target
,
4319 gen_rtx_fmt_ee (code
, mode
,
4330 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4331 and the remainder 0s for e.g. 0xfff00000)
4332 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4334 This can be done in 2 instructions by using shifts with mov or mvn.
4339 mvn r0, r0, lsr #12 */
4340 if (set_sign_bit_copies
> 8
4341 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4345 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4346 rtx shift
= GEN_INT (set_sign_bit_copies
);
4350 gen_rtx_SET (VOIDmode
, sub
,
4352 gen_rtx_ASHIFT (mode
,
4357 gen_rtx_SET (VOIDmode
, target
,
4359 gen_rtx_LSHIFTRT (mode
, sub
,
4366 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4368 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4370 For eg. r0 = r0 | 0xfff
4375 if (set_zero_bit_copies
> 8
4376 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4380 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4381 rtx shift
= GEN_INT (set_zero_bit_copies
);
4385 gen_rtx_SET (VOIDmode
, sub
,
4387 gen_rtx_LSHIFTRT (mode
,
4392 gen_rtx_SET (VOIDmode
, target
,
4394 gen_rtx_ASHIFT (mode
, sub
,
4400 /* This will never be reached for Thumb2 because orn is a valid
4401 instruction. This is for Thumb1 and the ARM 32 bit cases.
4403 x = y | constant (such that ~constant is a valid constant)
4405 x = ~(~y & ~constant).
4407 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4411 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4412 emit_constant_insn (cond
,
4413 gen_rtx_SET (VOIDmode
, sub
,
4414 gen_rtx_NOT (mode
, source
)));
4417 sub
= gen_reg_rtx (mode
);
4418 emit_constant_insn (cond
,
4419 gen_rtx_SET (VOIDmode
, sub
,
4420 gen_rtx_AND (mode
, source
,
4422 emit_constant_insn (cond
,
4423 gen_rtx_SET (VOIDmode
, target
,
4424 gen_rtx_NOT (mode
, sub
)));
4431 /* See if two shifts will do 2 or more insn's worth of work. */
4432 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4434 HOST_WIDE_INT shift_mask
= ((0xffffffff
4435 << (32 - clear_sign_bit_copies
))
4438 if ((remainder
| shift_mask
) != 0xffffffff)
4442 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4443 insns
= arm_gen_constant (AND
, mode
, cond
,
4444 remainder
| shift_mask
,
4445 new_src
, source
, subtargets
, 1);
4450 rtx targ
= subtargets
? NULL_RTX
: target
;
4451 insns
= arm_gen_constant (AND
, mode
, cond
,
4452 remainder
| shift_mask
,
4453 targ
, source
, subtargets
, 0);
4459 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4460 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4462 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4463 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4469 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4471 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4473 if ((remainder
| shift_mask
) != 0xffffffff)
4477 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4479 insns
= arm_gen_constant (AND
, mode
, cond
,
4480 remainder
| shift_mask
,
4481 new_src
, source
, subtargets
, 1);
4486 rtx targ
= subtargets
? NULL_RTX
: target
;
4488 insns
= arm_gen_constant (AND
, mode
, cond
,
4489 remainder
| shift_mask
,
4490 targ
, source
, subtargets
, 0);
4496 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4497 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4499 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4500 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4512 /* Calculate what the instruction sequences would be if we generated it
4513 normally, negated, or inverted. */
4515 /* AND cannot be split into multiple insns, so invert and use BIC. */
4518 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4521 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4526 if (can_invert
|| final_invert
)
4527 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4532 immediates
= &pos_immediates
;
4534 /* Is the negated immediate sequence more efficient? */
4535 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4538 immediates
= &neg_immediates
;
4543 /* Is the inverted immediate sequence more efficient?
4544 We must allow for an extra NOT instruction for XOR operations, although
4545 there is some chance that the final 'mvn' will get optimized later. */
4546 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4549 immediates
= &inv_immediates
;
4557 /* Now output the chosen sequence as instructions. */
4560 for (i
= 0; i
< insns
; i
++)
4562 rtx new_src
, temp1_rtx
;
4564 temp1
= immediates
->i
[i
];
4566 if (code
== SET
|| code
== MINUS
)
4567 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4568 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4569 new_src
= gen_reg_rtx (mode
);
4575 else if (can_negate
)
4578 temp1
= trunc_int_for_mode (temp1
, mode
);
4579 temp1_rtx
= GEN_INT (temp1
);
4583 else if (code
== MINUS
)
4584 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4586 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4588 emit_constant_insn (cond
,
4589 gen_rtx_SET (VOIDmode
, new_src
,
4595 can_negate
= can_invert
;
4599 else if (code
== MINUS
)
4607 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4608 gen_rtx_NOT (mode
, source
)));
4615 /* Canonicalize a comparison so that we are more likely to recognize it.
4616 This can be done for a few constant compares, where we can make the
4617 immediate value easier to load. */
4620 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4621 bool op0_preserve_value
)
4624 unsigned HOST_WIDE_INT i
, maxval
;
4626 mode
= GET_MODE (*op0
);
4627 if (mode
== VOIDmode
)
4628 mode
= GET_MODE (*op1
);
4630 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4632 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4633 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4634 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4635 for GTU/LEU in Thumb mode. */
4640 if (*code
== GT
|| *code
== LE
4641 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4643 /* Missing comparison. First try to use an available
4645 if (CONST_INT_P (*op1
))
4653 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4655 *op1
= GEN_INT (i
+ 1);
4656 *code
= *code
== GT
? GE
: LT
;
4662 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4663 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4665 *op1
= GEN_INT (i
+ 1);
4666 *code
= *code
== GTU
? GEU
: LTU
;
4675 /* If that did not work, reverse the condition. */
4676 if (!op0_preserve_value
)
4681 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4687 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4688 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4689 to facilitate possible combining with a cmp into 'ands'. */
4691 && GET_CODE (*op0
) == ZERO_EXTEND
4692 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4693 && GET_MODE (XEXP (*op0
, 0)) == QImode
4694 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4695 && subreg_lowpart_p (XEXP (*op0
, 0))
4696 && *op1
== const0_rtx
)
4697 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4700 /* Comparisons smaller than DImode. Only adjust comparisons against
4701 an out-of-range constant. */
4702 if (!CONST_INT_P (*op1
)
4703 || const_ok_for_arm (INTVAL (*op1
))
4704 || const_ok_for_arm (- INTVAL (*op1
)))
4718 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4720 *op1
= GEN_INT (i
+ 1);
4721 *code
= *code
== GT
? GE
: LT
;
4729 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4731 *op1
= GEN_INT (i
- 1);
4732 *code
= *code
== GE
? GT
: LE
;
4739 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4740 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4742 *op1
= GEN_INT (i
+ 1);
4743 *code
= *code
== GTU
? GEU
: LTU
;
4751 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4753 *op1
= GEN_INT (i
- 1);
4754 *code
= *code
== GEU
? GTU
: LEU
;
4765 /* Define how to find the value returned by a function. */
4768 arm_function_value(const_tree type
, const_tree func
,
4769 bool outgoing ATTRIBUTE_UNUSED
)
4772 int unsignedp ATTRIBUTE_UNUSED
;
4773 rtx r ATTRIBUTE_UNUSED
;
4775 mode
= TYPE_MODE (type
);
4777 if (TARGET_AAPCS_BASED
)
4778 return aapcs_allocate_return_reg (mode
, type
, func
);
4780 /* Promote integer types. */
4781 if (INTEGRAL_TYPE_P (type
))
4782 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4784 /* Promotes small structs returned in a register to full-word size
4785 for big-endian AAPCS. */
4786 if (arm_return_in_msb (type
))
4788 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4789 if (size
% UNITS_PER_WORD
!= 0)
4791 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4792 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4796 return arm_libcall_value_1 (mode
);
4799 /* libcall hashtable helpers. */
4801 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4803 typedef rtx_def value_type
;
4804 typedef rtx_def compare_type
;
4805 static inline hashval_t
hash (const value_type
*);
4806 static inline bool equal (const value_type
*, const compare_type
*);
4807 static inline void remove (value_type
*);
4811 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4813 return rtx_equal_p (p1
, p2
);
4817 libcall_hasher::hash (const value_type
*p1
)
4819 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4822 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4825 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4827 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4831 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4833 static bool init_done
= false;
4834 static libcall_table_type
*libcall_htab
= NULL
;
4840 libcall_htab
= new libcall_table_type (31);
4841 add_libcall (libcall_htab
,
4842 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4843 add_libcall (libcall_htab
,
4844 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4845 add_libcall (libcall_htab
,
4846 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4847 add_libcall (libcall_htab
,
4848 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4850 add_libcall (libcall_htab
,
4851 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4852 add_libcall (libcall_htab
,
4853 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4854 add_libcall (libcall_htab
,
4855 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4856 add_libcall (libcall_htab
,
4857 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4859 add_libcall (libcall_htab
,
4860 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4861 add_libcall (libcall_htab
,
4862 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4863 add_libcall (libcall_htab
,
4864 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4865 add_libcall (libcall_htab
,
4866 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4867 add_libcall (libcall_htab
,
4868 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4869 add_libcall (libcall_htab
,
4870 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4871 add_libcall (libcall_htab
,
4872 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4873 add_libcall (libcall_htab
,
4874 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4876 /* Values from double-precision helper functions are returned in core
4877 registers if the selected core only supports single-precision
4878 arithmetic, even if we are using the hard-float ABI. The same is
4879 true for single-precision helpers, but we will never be using the
4880 hard-float ABI on a CPU which doesn't support single-precision
4881 operations in hardware. */
4882 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4883 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4884 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4885 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4886 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4887 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4888 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4889 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4890 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4891 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4892 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4893 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4895 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4899 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
4903 arm_libcall_value_1 (machine_mode mode
)
4905 if (TARGET_AAPCS_BASED
)
4906 return aapcs_libcall_value (mode
);
4907 else if (TARGET_IWMMXT_ABI
4908 && arm_vector_mode_supported_p (mode
))
4909 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4911 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4914 /* Define how to find the value returned by a library function
4915 assuming the value has mode MODE. */
4918 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
4920 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4921 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4923 /* The following libcalls return their result in integer registers,
4924 even though they return a floating point value. */
4925 if (arm_libcall_uses_aapcs_base (libcall
))
4926 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4930 return arm_libcall_value_1 (mode
);
4933 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4936 arm_function_value_regno_p (const unsigned int regno
)
4938 if (regno
== ARG_REGISTER (1)
4940 && TARGET_AAPCS_BASED
4942 && TARGET_HARD_FLOAT
4943 && regno
== FIRST_VFP_REGNUM
)
4944 || (TARGET_IWMMXT_ABI
4945 && regno
== FIRST_IWMMXT_REGNUM
))
4951 /* Determine the amount of memory needed to store the possible return
4952 registers of an untyped call. */
4954 arm_apply_result_size (void)
4960 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4962 if (TARGET_IWMMXT_ABI
)
4969 /* Decide whether TYPE should be returned in memory (true)
4970 or in a register (false). FNTYPE is the type of the function making
4973 arm_return_in_memory (const_tree type
, const_tree fntype
)
4977 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4979 if (TARGET_AAPCS_BASED
)
4981 /* Simple, non-aggregate types (ie not including vectors and
4982 complex) are always returned in a register (or registers).
4983 We don't care about which register here, so we can short-cut
4984 some of the detail. */
4985 if (!AGGREGATE_TYPE_P (type
)
4986 && TREE_CODE (type
) != VECTOR_TYPE
4987 && TREE_CODE (type
) != COMPLEX_TYPE
)
4990 /* Any return value that is no larger than one word can be
4992 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4995 /* Check any available co-processors to see if they accept the
4996 type as a register candidate (VFP, for example, can return
4997 some aggregates in consecutive registers). These aren't
4998 available if the call is variadic. */
4999 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5002 /* Vector values should be returned using ARM registers, not
5003 memory (unless they're over 16 bytes, which will break since
5004 we only have four call-clobbered registers to play with). */
5005 if (TREE_CODE (type
) == VECTOR_TYPE
)
5006 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5008 /* The rest go in memory. */
5012 if (TREE_CODE (type
) == VECTOR_TYPE
)
5013 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5015 if (!AGGREGATE_TYPE_P (type
) &&
5016 (TREE_CODE (type
) != VECTOR_TYPE
))
5017 /* All simple types are returned in registers. */
5020 if (arm_abi
!= ARM_ABI_APCS
)
5022 /* ATPCS and later return aggregate types in memory only if they are
5023 larger than a word (or are variable size). */
5024 return (size
< 0 || size
> UNITS_PER_WORD
);
5027 /* For the arm-wince targets we choose to be compatible with Microsoft's
5028 ARM and Thumb compilers, which always return aggregates in memory. */
5030 /* All structures/unions bigger than one word are returned in memory.
5031 Also catch the case where int_size_in_bytes returns -1. In this case
5032 the aggregate is either huge or of variable size, and in either case
5033 we will want to return it via memory and not in a register. */
5034 if (size
< 0 || size
> UNITS_PER_WORD
)
5037 if (TREE_CODE (type
) == RECORD_TYPE
)
5041 /* For a struct the APCS says that we only return in a register
5042 if the type is 'integer like' and every addressable element
5043 has an offset of zero. For practical purposes this means
5044 that the structure can have at most one non bit-field element
5045 and that this element must be the first one in the structure. */
5047 /* Find the first field, ignoring non FIELD_DECL things which will
5048 have been created by C++. */
5049 for (field
= TYPE_FIELDS (type
);
5050 field
&& TREE_CODE (field
) != FIELD_DECL
;
5051 field
= DECL_CHAIN (field
))
5055 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5057 /* Check that the first field is valid for returning in a register. */
5059 /* ... Floats are not allowed */
5060 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5063 /* ... Aggregates that are not themselves valid for returning in
5064 a register are not allowed. */
5065 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5068 /* Now check the remaining fields, if any. Only bitfields are allowed,
5069 since they are not addressable. */
5070 for (field
= DECL_CHAIN (field
);
5072 field
= DECL_CHAIN (field
))
5074 if (TREE_CODE (field
) != FIELD_DECL
)
5077 if (!DECL_BIT_FIELD_TYPE (field
))
5084 if (TREE_CODE (type
) == UNION_TYPE
)
5088 /* Unions can be returned in registers if every element is
5089 integral, or can be returned in an integer register. */
5090 for (field
= TYPE_FIELDS (type
);
5092 field
= DECL_CHAIN (field
))
5094 if (TREE_CODE (field
) != FIELD_DECL
)
5097 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5100 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5106 #endif /* not ARM_WINCE */
5108 /* Return all other types in memory. */
5112 const struct pcs_attribute_arg
5116 } pcs_attribute_args
[] =
5118 {"aapcs", ARM_PCS_AAPCS
},
5119 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5121 /* We could recognize these, but changes would be needed elsewhere
5122 * to implement them. */
5123 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5124 {"atpcs", ARM_PCS_ATPCS
},
5125 {"apcs", ARM_PCS_APCS
},
5127 {NULL
, ARM_PCS_UNKNOWN
}
5131 arm_pcs_from_attribute (tree attr
)
5133 const struct pcs_attribute_arg
*ptr
;
5136 /* Get the value of the argument. */
5137 if (TREE_VALUE (attr
) == NULL_TREE
5138 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5139 return ARM_PCS_UNKNOWN
;
5141 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5143 /* Check it against the list of known arguments. */
5144 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5145 if (streq (arg
, ptr
->arg
))
5148 /* An unrecognized interrupt type. */
5149 return ARM_PCS_UNKNOWN
;
5152 /* Get the PCS variant to use for this call. TYPE is the function's type
5153 specification, DECL is the specific declartion. DECL may be null if
5154 the call could be indirect or if this is a library call. */
5156 arm_get_pcs_model (const_tree type
, const_tree decl
)
5158 bool user_convention
= false;
5159 enum arm_pcs user_pcs
= arm_pcs_default
;
5164 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5167 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5168 user_convention
= true;
5171 if (TARGET_AAPCS_BASED
)
5173 /* Detect varargs functions. These always use the base rules
5174 (no argument is ever a candidate for a co-processor
5176 bool base_rules
= stdarg_p (type
);
5178 if (user_convention
)
5180 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5181 sorry ("non-AAPCS derived PCS variant");
5182 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5183 error ("variadic functions must use the base AAPCS variant");
5187 return ARM_PCS_AAPCS
;
5188 else if (user_convention
)
5190 else if (decl
&& flag_unit_at_a_time
)
5192 /* Local functions never leak outside this compilation unit,
5193 so we are free to use whatever conventions are
5195 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5196 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5198 return ARM_PCS_AAPCS_LOCAL
;
5201 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5202 sorry ("PCS variant");
5204 /* For everything else we use the target's default. */
5205 return arm_pcs_default
;
5210 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5211 const_tree fntype ATTRIBUTE_UNUSED
,
5212 rtx libcall ATTRIBUTE_UNUSED
,
5213 const_tree fndecl ATTRIBUTE_UNUSED
)
5215 /* Record the unallocated VFP registers. */
5216 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5217 pcum
->aapcs_vfp_reg_alloc
= 0;
5220 /* Walk down the type tree of TYPE counting consecutive base elements.
5221 If *MODEP is VOIDmode, then set it to the first valid floating point
5222 type. If a non-floating point type is found, or if a floating point
5223 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5224 otherwise return the count in the sub-tree. */
5226 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5231 switch (TREE_CODE (type
))
5234 mode
= TYPE_MODE (type
);
5235 if (mode
!= DFmode
&& mode
!= SFmode
)
5238 if (*modep
== VOIDmode
)
5247 mode
= TYPE_MODE (TREE_TYPE (type
));
5248 if (mode
!= DFmode
&& mode
!= SFmode
)
5251 if (*modep
== VOIDmode
)
5260 /* Use V2SImode and V4SImode as representatives of all 64-bit
5261 and 128-bit vector types, whether or not those modes are
5262 supported with the present options. */
5263 size
= int_size_in_bytes (type
);
5276 if (*modep
== VOIDmode
)
5279 /* Vector modes are considered to be opaque: two vectors are
5280 equivalent for the purposes of being homogeneous aggregates
5281 if they are the same size. */
5290 tree index
= TYPE_DOMAIN (type
);
5292 /* Can't handle incomplete types nor sizes that are not
5294 if (!COMPLETE_TYPE_P (type
)
5295 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5298 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5301 || !TYPE_MAX_VALUE (index
)
5302 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5303 || !TYPE_MIN_VALUE (index
)
5304 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5308 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5309 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5311 /* There must be no padding. */
5312 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5324 /* Can't handle incomplete types nor sizes that are not
5326 if (!COMPLETE_TYPE_P (type
)
5327 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5330 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5332 if (TREE_CODE (field
) != FIELD_DECL
)
5335 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5341 /* There must be no padding. */
5342 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5349 case QUAL_UNION_TYPE
:
5351 /* These aren't very interesting except in a degenerate case. */
5356 /* Can't handle incomplete types nor sizes that are not
5358 if (!COMPLETE_TYPE_P (type
)
5359 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5362 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5364 if (TREE_CODE (field
) != FIELD_DECL
)
5367 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5370 count
= count
> sub_count
? count
: sub_count
;
5373 /* There must be no padding. */
5374 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5387 /* Return true if PCS_VARIANT should use VFP registers. */
5389 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5391 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5393 static bool seen_thumb1_vfp
= false;
5395 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5397 sorry ("Thumb-1 hard-float VFP ABI");
5398 /* sorry() is not immediately fatal, so only display this once. */
5399 seen_thumb1_vfp
= true;
5405 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5408 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5409 (TARGET_VFP_DOUBLE
|| !is_double
));
5412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5413 suitable for passing or returning in VFP registers for the PCS
5414 variant selected. If it is, then *BASE_MODE is updated to contain
5415 a machine mode describing each element of the argument's type and
5416 *COUNT to hold the number of such elements. */
5418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5419 machine_mode mode
, const_tree type
,
5420 machine_mode
*base_mode
, int *count
)
5422 machine_mode new_mode
= VOIDmode
;
5424 /* If we have the type information, prefer that to working things
5425 out from the mode. */
5428 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5430 if (ag_count
> 0 && ag_count
<= 4)
5435 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5436 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5437 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5442 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5445 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5451 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5454 *base_mode
= new_mode
;
5459 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5460 machine_mode mode
, const_tree type
)
5462 int count ATTRIBUTE_UNUSED
;
5463 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5465 if (!use_vfp_abi (pcs_variant
, false))
5467 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5472 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5475 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5478 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5479 &pcum
->aapcs_vfp_rmode
,
5480 &pcum
->aapcs_vfp_rcount
);
5484 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5485 const_tree type ATTRIBUTE_UNUSED
)
5487 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5488 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5491 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5492 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5494 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5496 || (mode
== TImode
&& ! TARGET_NEON
)
5497 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5500 int rcount
= pcum
->aapcs_vfp_rcount
;
5502 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5506 /* Avoid using unsupported vector modes. */
5507 if (rmode
== V2SImode
)
5509 else if (rmode
== V4SImode
)
5516 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5517 for (i
= 0; i
< rcount
; i
++)
5519 rtx tmp
= gen_rtx_REG (rmode
,
5520 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5521 tmp
= gen_rtx_EXPR_LIST
5523 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5524 XVECEXP (par
, 0, i
) = tmp
;
5527 pcum
->aapcs_reg
= par
;
5530 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5537 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5539 const_tree type ATTRIBUTE_UNUSED
)
5541 if (!use_vfp_abi (pcs_variant
, false))
5544 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5547 machine_mode ag_mode
;
5552 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5557 if (ag_mode
== V2SImode
)
5559 else if (ag_mode
== V4SImode
)
5565 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5566 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5567 for (i
= 0; i
< count
; i
++)
5569 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5570 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5571 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5572 XVECEXP (par
, 0, i
) = tmp
;
5578 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5582 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5583 machine_mode mode ATTRIBUTE_UNUSED
,
5584 const_tree type ATTRIBUTE_UNUSED
)
5586 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5587 pcum
->aapcs_vfp_reg_alloc
= 0;
5591 #define AAPCS_CP(X) \
5593 aapcs_ ## X ## _cum_init, \
5594 aapcs_ ## X ## _is_call_candidate, \
5595 aapcs_ ## X ## _allocate, \
5596 aapcs_ ## X ## _is_return_candidate, \
5597 aapcs_ ## X ## _allocate_return_reg, \
5598 aapcs_ ## X ## _advance \
5601 /* Table of co-processors that can be used to pass arguments in
5602 registers. Idealy no arugment should be a candidate for more than
5603 one co-processor table entry, but the table is processed in order
5604 and stops after the first match. If that entry then fails to put
5605 the argument into a co-processor register, the argument will go on
5609 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5610 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5612 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5613 BLKmode) is a candidate for this co-processor's registers; this
5614 function should ignore any position-dependent state in
5615 CUMULATIVE_ARGS and only use call-type dependent information. */
5616 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5618 /* Return true if the argument does get a co-processor register; it
5619 should set aapcs_reg to an RTX of the register allocated as is
5620 required for a return from FUNCTION_ARG. */
5621 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5623 /* Return true if a result of mode MODE (or type TYPE if MODE is
5624 BLKmode) is can be returned in this co-processor's registers. */
5625 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5627 /* Allocate and return an RTX element to hold the return type of a
5628 call, this routine must not fail and will only be called if
5629 is_return_candidate returned true with the same parameters. */
5630 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5632 /* Finish processing this argument and prepare to start processing
5634 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5635 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5643 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5648 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5649 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5656 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5658 /* We aren't passed a decl, so we can't check that a call is local.
5659 However, it isn't clear that that would be a win anyway, since it
5660 might limit some tail-calling opportunities. */
5661 enum arm_pcs pcs_variant
;
5665 const_tree fndecl
= NULL_TREE
;
5667 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5670 fntype
= TREE_TYPE (fntype
);
5673 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5676 pcs_variant
= arm_pcs_default
;
5678 if (pcs_variant
!= ARM_PCS_AAPCS
)
5682 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5683 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5692 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5695 /* We aren't passed a decl, so we can't check that a call is local.
5696 However, it isn't clear that that would be a win anyway, since it
5697 might limit some tail-calling opportunities. */
5698 enum arm_pcs pcs_variant
;
5699 int unsignedp ATTRIBUTE_UNUSED
;
5703 const_tree fndecl
= NULL_TREE
;
5705 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5708 fntype
= TREE_TYPE (fntype
);
5711 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5714 pcs_variant
= arm_pcs_default
;
5716 /* Promote integer types. */
5717 if (type
&& INTEGRAL_TYPE_P (type
))
5718 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5720 if (pcs_variant
!= ARM_PCS_AAPCS
)
5724 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5725 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5727 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5731 /* Promotes small structs returned in a register to full-word size
5732 for big-endian AAPCS. */
5733 if (type
&& arm_return_in_msb (type
))
5735 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5736 if (size
% UNITS_PER_WORD
!= 0)
5738 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5739 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5743 return gen_rtx_REG (mode
, R0_REGNUM
);
5747 aapcs_libcall_value (machine_mode mode
)
5749 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5750 && GET_MODE_SIZE (mode
) <= 4)
5753 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5756 /* Lay out a function argument using the AAPCS rules. The rule
5757 numbers referred to here are those in the AAPCS. */
5759 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5760 const_tree type
, bool named
)
5765 /* We only need to do this once per argument. */
5766 if (pcum
->aapcs_arg_processed
)
5769 pcum
->aapcs_arg_processed
= true;
5771 /* Special case: if named is false then we are handling an incoming
5772 anonymous argument which is on the stack. */
5776 /* Is this a potential co-processor register candidate? */
5777 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5779 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5780 pcum
->aapcs_cprc_slot
= slot
;
5782 /* We don't have to apply any of the rules from part B of the
5783 preparation phase, these are handled elsewhere in the
5788 /* A Co-processor register candidate goes either in its own
5789 class of registers or on the stack. */
5790 if (!pcum
->aapcs_cprc_failed
[slot
])
5792 /* C1.cp - Try to allocate the argument to co-processor
5794 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5797 /* C2.cp - Put the argument on the stack and note that we
5798 can't assign any more candidates in this slot. We also
5799 need to note that we have allocated stack space, so that
5800 we won't later try to split a non-cprc candidate between
5801 core registers and the stack. */
5802 pcum
->aapcs_cprc_failed
[slot
] = true;
5803 pcum
->can_split
= false;
5806 /* We didn't get a register, so this argument goes on the
5808 gcc_assert (pcum
->can_split
== false);
5813 /* C3 - For double-word aligned arguments, round the NCRN up to the
5814 next even number. */
5815 ncrn
= pcum
->aapcs_ncrn
;
5816 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5819 nregs
= ARM_NUM_REGS2(mode
, type
);
5821 /* Sigh, this test should really assert that nregs > 0, but a GCC
5822 extension allows empty structs and then gives them empty size; it
5823 then allows such a structure to be passed by value. For some of
5824 the code below we have to pretend that such an argument has
5825 non-zero size so that we 'locate' it correctly either in
5826 registers or on the stack. */
5827 gcc_assert (nregs
>= 0);
5829 nregs2
= nregs
? nregs
: 1;
5831 /* C4 - Argument fits entirely in core registers. */
5832 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5834 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5835 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5839 /* C5 - Some core registers left and there are no arguments already
5840 on the stack: split this argument between the remaining core
5841 registers and the stack. */
5842 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5844 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5845 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5846 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5850 /* C6 - NCRN is set to 4. */
5851 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5853 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5857 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5858 for a call to a function whose data type is FNTYPE.
5859 For a library call, FNTYPE is NULL. */
5861 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5863 tree fndecl ATTRIBUTE_UNUSED
)
5865 /* Long call handling. */
5867 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5869 pcum
->pcs_variant
= arm_pcs_default
;
5871 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5873 if (arm_libcall_uses_aapcs_base (libname
))
5874 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5876 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5877 pcum
->aapcs_reg
= NULL_RTX
;
5878 pcum
->aapcs_partial
= 0;
5879 pcum
->aapcs_arg_processed
= false;
5880 pcum
->aapcs_cprc_slot
= -1;
5881 pcum
->can_split
= true;
5883 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5887 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5889 pcum
->aapcs_cprc_failed
[i
] = false;
5890 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5898 /* On the ARM, the offset starts at 0. */
5900 pcum
->iwmmxt_nregs
= 0;
5901 pcum
->can_split
= true;
5903 /* Varargs vectors are treated the same as long long.
5904 named_count avoids having to change the way arm handles 'named' */
5905 pcum
->named_count
= 0;
5908 if (TARGET_REALLY_IWMMXT
&& fntype
)
5912 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5914 fn_arg
= TREE_CHAIN (fn_arg
))
5915 pcum
->named_count
+= 1;
5917 if (! pcum
->named_count
)
5918 pcum
->named_count
= INT_MAX
;
5922 /* Return true if we use LRA instead of reload pass. */
5926 return arm_lra_flag
;
5929 /* Return true if mode/type need doubleword alignment. */
5931 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
5933 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5934 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5938 /* Determine where to put an argument to a function.
5939 Value is zero to push the argument on the stack,
5940 or a hard register in which to store the argument.
5942 MODE is the argument's machine mode.
5943 TYPE is the data type of the argument (as a tree).
5944 This is null for libcalls where that information may
5946 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5947 the preceding args and about the function being called.
5948 NAMED is nonzero if this argument is a named parameter
5949 (otherwise it is an extra parameter matching an ellipsis).
5951 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5952 other arguments are passed on the stack. If (NAMED == 0) (which happens
5953 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5954 defined), say it is passed in the stack (function_prologue will
5955 indeed make it pass in the stack if necessary). */
5958 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
5959 const_tree type
, bool named
)
5961 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5964 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5965 a call insn (op3 of a call_value insn). */
5966 if (mode
== VOIDmode
)
5969 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5971 aapcs_layout_arg (pcum
, mode
, type
, named
);
5972 return pcum
->aapcs_reg
;
5975 /* Varargs vectors are treated the same as long long.
5976 named_count avoids having to change the way arm handles 'named' */
5977 if (TARGET_IWMMXT_ABI
5978 && arm_vector_mode_supported_p (mode
)
5979 && pcum
->named_count
> pcum
->nargs
+ 1)
5981 if (pcum
->iwmmxt_nregs
<= 9)
5982 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5985 pcum
->can_split
= false;
5990 /* Put doubleword aligned quantities in even register pairs. */
5992 && ARM_DOUBLEWORD_ALIGN
5993 && arm_needs_doubleword_align (mode
, type
))
5996 /* Only allow splitting an arg between regs and memory if all preceding
5997 args were allocated to regs. For args passed by reference we only count
5998 the reference pointer. */
5999 if (pcum
->can_split
)
6002 nregs
= ARM_NUM_REGS2 (mode
, type
);
6004 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6007 return gen_rtx_REG (mode
, pcum
->nregs
);
6011 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6013 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6014 ? DOUBLEWORD_ALIGNMENT
6019 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6020 tree type
, bool named
)
6022 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6023 int nregs
= pcum
->nregs
;
6025 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6027 aapcs_layout_arg (pcum
, mode
, type
, named
);
6028 return pcum
->aapcs_partial
;
6031 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6034 if (NUM_ARG_REGS
> nregs
6035 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6037 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6042 /* Update the data in PCUM to advance over an argument
6043 of mode MODE and data type TYPE.
6044 (TYPE is null for libcalls where that information may not be available.) */
6047 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6048 const_tree type
, bool named
)
6050 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6052 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6054 aapcs_layout_arg (pcum
, mode
, type
, named
);
6056 if (pcum
->aapcs_cprc_slot
>= 0)
6058 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6060 pcum
->aapcs_cprc_slot
= -1;
6063 /* Generic stuff. */
6064 pcum
->aapcs_arg_processed
= false;
6065 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6066 pcum
->aapcs_reg
= NULL_RTX
;
6067 pcum
->aapcs_partial
= 0;
6072 if (arm_vector_mode_supported_p (mode
)
6073 && pcum
->named_count
> pcum
->nargs
6074 && TARGET_IWMMXT_ABI
)
6075 pcum
->iwmmxt_nregs
+= 1;
6077 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6081 /* Variable sized types are passed by reference. This is a GCC
6082 extension to the ARM ABI. */
6085 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6086 machine_mode mode ATTRIBUTE_UNUSED
,
6087 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6089 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6092 /* Encode the current state of the #pragma [no_]long_calls. */
6095 OFF
, /* No #pragma [no_]long_calls is in effect. */
6096 LONG
, /* #pragma long_calls is in effect. */
6097 SHORT
/* #pragma no_long_calls is in effect. */
6100 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6103 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6105 arm_pragma_long_calls
= LONG
;
6109 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6111 arm_pragma_long_calls
= SHORT
;
6115 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6117 arm_pragma_long_calls
= OFF
;
6120 /* Handle an attribute requiring a FUNCTION_DECL;
6121 arguments as in struct attribute_spec.handler. */
6123 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6124 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6126 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6128 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6130 *no_add_attrs
= true;
6136 /* Handle an "interrupt" or "isr" attribute;
6137 arguments as in struct attribute_spec.handler. */
6139 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6144 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6146 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6148 *no_add_attrs
= true;
6150 /* FIXME: the argument if any is checked for type attributes;
6151 should it be checked for decl ones? */
6155 if (TREE_CODE (*node
) == FUNCTION_TYPE
6156 || TREE_CODE (*node
) == METHOD_TYPE
)
6158 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6160 warning (OPT_Wattributes
, "%qE attribute ignored",
6162 *no_add_attrs
= true;
6165 else if (TREE_CODE (*node
) == POINTER_TYPE
6166 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6167 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6168 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6170 *node
= build_variant_type_copy (*node
);
6171 TREE_TYPE (*node
) = build_type_attribute_variant
6173 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6174 *no_add_attrs
= true;
6178 /* Possibly pass this attribute on from the type to a decl. */
6179 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6180 | (int) ATTR_FLAG_FUNCTION_NEXT
6181 | (int) ATTR_FLAG_ARRAY_NEXT
))
6183 *no_add_attrs
= true;
6184 return tree_cons (name
, args
, NULL_TREE
);
6188 warning (OPT_Wattributes
, "%qE attribute ignored",
6197 /* Handle a "pcs" attribute; arguments as in struct
6198 attribute_spec.handler. */
6200 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6201 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6203 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6205 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6206 *no_add_attrs
= true;
6211 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6212 /* Handle the "notshared" attribute. This attribute is another way of
6213 requesting hidden visibility. ARM's compiler supports
6214 "__declspec(notshared)"; we support the same thing via an
6218 arm_handle_notshared_attribute (tree
*node
,
6219 tree name ATTRIBUTE_UNUSED
,
6220 tree args ATTRIBUTE_UNUSED
,
6221 int flags ATTRIBUTE_UNUSED
,
6224 tree decl
= TYPE_NAME (*node
);
6228 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6229 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6230 *no_add_attrs
= false;
6236 /* Return 0 if the attributes for two types are incompatible, 1 if they
6237 are compatible, and 2 if they are nearly compatible (which causes a
6238 warning to be generated). */
6240 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6244 /* Check for mismatch of non-default calling convention. */
6245 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6248 /* Check for mismatched call attributes. */
6249 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6250 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6251 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6252 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6254 /* Only bother to check if an attribute is defined. */
6255 if (l1
| l2
| s1
| s2
)
6257 /* If one type has an attribute, the other must have the same attribute. */
6258 if ((l1
!= l2
) || (s1
!= s2
))
6261 /* Disallow mixed attributes. */
6262 if ((l1
& s2
) || (l2
& s1
))
6266 /* Check for mismatched ISR attribute. */
6267 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6269 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6270 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6272 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6279 /* Assigns default attributes to newly defined type. This is used to
6280 set short_call/long_call attributes for function types of
6281 functions defined inside corresponding #pragma scopes. */
6283 arm_set_default_type_attributes (tree type
)
6285 /* Add __attribute__ ((long_call)) to all functions, when
6286 inside #pragma long_calls or __attribute__ ((short_call)),
6287 when inside #pragma no_long_calls. */
6288 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6290 tree type_attr_list
, attr_name
;
6291 type_attr_list
= TYPE_ATTRIBUTES (type
);
6293 if (arm_pragma_long_calls
== LONG
)
6294 attr_name
= get_identifier ("long_call");
6295 else if (arm_pragma_long_calls
== SHORT
)
6296 attr_name
= get_identifier ("short_call");
6300 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6301 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6305 /* Return true if DECL is known to be linked into section SECTION. */
6308 arm_function_in_section_p (tree decl
, section
*section
)
6310 /* We can only be certain about functions defined in the same
6311 compilation unit. */
6312 if (!TREE_STATIC (decl
))
6315 /* Make sure that SYMBOL always binds to the definition in this
6316 compilation unit. */
6317 if (!targetm
.binds_local_p (decl
))
6320 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6321 if (!DECL_SECTION_NAME (decl
))
6323 /* Make sure that we will not create a unique section for DECL. */
6324 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6328 return function_section (decl
) == section
;
6331 /* Return nonzero if a 32-bit "long_call" should be generated for
6332 a call from the current function to DECL. We generate a long_call
6335 a. has an __attribute__((long call))
6336 or b. is within the scope of a #pragma long_calls
6337 or c. the -mlong-calls command line switch has been specified
6339 However we do not generate a long call if the function:
6341 d. has an __attribute__ ((short_call))
6342 or e. is inside the scope of a #pragma no_long_calls
6343 or f. is defined in the same section as the current function. */
6346 arm_is_long_call_p (tree decl
)
6351 return TARGET_LONG_CALLS
;
6353 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6354 if (lookup_attribute ("short_call", attrs
))
6357 /* For "f", be conservative, and only cater for cases in which the
6358 whole of the current function is placed in the same section. */
6359 if (!flag_reorder_blocks_and_partition
6360 && TREE_CODE (decl
) == FUNCTION_DECL
6361 && arm_function_in_section_p (decl
, current_function_section ()))
6364 if (lookup_attribute ("long_call", attrs
))
6367 return TARGET_LONG_CALLS
;
6370 /* Return nonzero if it is ok to make a tail-call to DECL. */
6372 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6374 unsigned long func_type
;
6376 if (cfun
->machine
->sibcall_blocked
)
6379 /* Never tailcall something if we are generating code for Thumb-1. */
6383 /* The PIC register is live on entry to VxWorks PLT entries, so we
6384 must make the call before restoring the PIC register. */
6385 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6388 /* If we are interworking and the function is not declared static
6389 then we can't tail-call it unless we know that it exists in this
6390 compilation unit (since it might be a Thumb routine). */
6391 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6392 && !TREE_ASM_WRITTEN (decl
))
6395 func_type
= arm_current_func_type ();
6396 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6397 if (IS_INTERRUPT (func_type
))
6400 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6402 /* Check that the return value locations are the same. For
6403 example that we aren't returning a value from the sibling in
6404 a VFP register but then need to transfer it to a core
6408 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6409 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6411 if (!rtx_equal_p (a
, b
))
6415 /* Never tailcall if function may be called with a misaligned SP. */
6416 if (IS_STACKALIGN (func_type
))
6419 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6420 references should become a NOP. Don't convert such calls into
6422 if (TARGET_AAPCS_BASED
6423 && arm_abi
== ARM_ABI_AAPCS
6425 && DECL_WEAK (decl
))
6428 /* Everything else is ok. */
6433 /* Addressing mode support functions. */
6435 /* Return nonzero if X is a legitimate immediate operand when compiling
6436 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6438 legitimate_pic_operand_p (rtx x
)
6440 if (GET_CODE (x
) == SYMBOL_REF
6441 || (GET_CODE (x
) == CONST
6442 && GET_CODE (XEXP (x
, 0)) == PLUS
6443 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6449 /* Record that the current function needs a PIC register. Initialize
6450 cfun->machine->pic_reg if we have not already done so. */
6453 require_pic_register (void)
6455 /* A lot of the logic here is made obscure by the fact that this
6456 routine gets called as part of the rtx cost estimation process.
6457 We don't want those calls to affect any assumptions about the real
6458 function; and further, we can't call entry_of_function() until we
6459 start the real expansion process. */
6460 if (!crtl
->uses_pic_offset_table
)
6462 gcc_assert (can_create_pseudo_p ());
6463 if (arm_pic_register
!= INVALID_REGNUM
6464 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6466 if (!cfun
->machine
->pic_reg
)
6467 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6469 /* Play games to avoid marking the function as needing pic
6470 if we are being called as part of the cost-estimation
6472 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6473 crtl
->uses_pic_offset_table
= 1;
6477 rtx_insn
*seq
, *insn
;
6479 if (!cfun
->machine
->pic_reg
)
6480 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6482 /* Play games to avoid marking the function as needing pic
6483 if we are being called as part of the cost-estimation
6485 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6487 crtl
->uses_pic_offset_table
= 1;
6490 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6491 && arm_pic_register
> LAST_LO_REGNUM
)
6492 emit_move_insn (cfun
->machine
->pic_reg
,
6493 gen_rtx_REG (Pmode
, arm_pic_register
));
6495 arm_load_pic_register (0UL);
6500 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6502 INSN_LOCATION (insn
) = prologue_location
;
6504 /* We can be called during expansion of PHI nodes, where
6505 we can't yet emit instructions directly in the final
6506 insn stream. Queue the insns on the entry edge, they will
6507 be committed after everything else is expanded. */
6508 insert_insn_on_edge (seq
,
6509 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6516 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6518 if (GET_CODE (orig
) == SYMBOL_REF
6519 || GET_CODE (orig
) == LABEL_REF
)
6525 gcc_assert (can_create_pseudo_p ());
6526 reg
= gen_reg_rtx (Pmode
);
6529 /* VxWorks does not impose a fixed gap between segments; the run-time
6530 gap can be different from the object-file gap. We therefore can't
6531 use GOTOFF unless we are absolutely sure that the symbol is in the
6532 same segment as the GOT. Unfortunately, the flexibility of linker
6533 scripts means that we can't be sure of that in general, so assume
6534 that GOTOFF is never valid on VxWorks. */
6535 if ((GET_CODE (orig
) == LABEL_REF
6536 || (GET_CODE (orig
) == SYMBOL_REF
&&
6537 SYMBOL_REF_LOCAL_P (orig
)))
6539 && arm_pic_data_is_text_relative
)
6540 insn
= arm_pic_static_addr (orig
, reg
);
6546 /* If this function doesn't have a pic register, create one now. */
6547 require_pic_register ();
6549 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6551 /* Make the MEM as close to a constant as possible. */
6552 mem
= SET_SRC (pat
);
6553 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6554 MEM_READONLY_P (mem
) = 1;
6555 MEM_NOTRAP_P (mem
) = 1;
6557 insn
= emit_insn (pat
);
6560 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6562 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6566 else if (GET_CODE (orig
) == CONST
)
6570 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6571 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6574 /* Handle the case where we have: const (UNSPEC_TLS). */
6575 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6576 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6579 /* Handle the case where we have:
6580 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6582 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6583 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6584 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6586 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6592 gcc_assert (can_create_pseudo_p ());
6593 reg
= gen_reg_rtx (Pmode
);
6596 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6598 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6599 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6600 base
== reg
? 0 : reg
);
6602 if (CONST_INT_P (offset
))
6604 /* The base register doesn't really matter, we only want to
6605 test the index for the appropriate mode. */
6606 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6608 gcc_assert (can_create_pseudo_p ());
6609 offset
= force_reg (Pmode
, offset
);
6612 if (CONST_INT_P (offset
))
6613 return plus_constant (Pmode
, base
, INTVAL (offset
));
6616 if (GET_MODE_SIZE (mode
) > 4
6617 && (GET_MODE_CLASS (mode
) == MODE_INT
6618 || TARGET_SOFT_FLOAT
))
6620 emit_insn (gen_addsi3 (reg
, base
, offset
));
6624 return gen_rtx_PLUS (Pmode
, base
, offset
);
6631 /* Find a spare register to use during the prolog of a function. */
6634 thumb_find_work_register (unsigned long pushed_regs_mask
)
6638 /* Check the argument registers first as these are call-used. The
6639 register allocation order means that sometimes r3 might be used
6640 but earlier argument registers might not, so check them all. */
6641 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6642 if (!df_regs_ever_live_p (reg
))
6645 /* Before going on to check the call-saved registers we can try a couple
6646 more ways of deducing that r3 is available. The first is when we are
6647 pushing anonymous arguments onto the stack and we have less than 4
6648 registers worth of fixed arguments(*). In this case r3 will be part of
6649 the variable argument list and so we can be sure that it will be
6650 pushed right at the start of the function. Hence it will be available
6651 for the rest of the prologue.
6652 (*): ie crtl->args.pretend_args_size is greater than 0. */
6653 if (cfun
->machine
->uses_anonymous_args
6654 && crtl
->args
.pretend_args_size
> 0)
6655 return LAST_ARG_REGNUM
;
6657 /* The other case is when we have fixed arguments but less than 4 registers
6658 worth. In this case r3 might be used in the body of the function, but
6659 it is not being used to convey an argument into the function. In theory
6660 we could just check crtl->args.size to see how many bytes are
6661 being passed in argument registers, but it seems that it is unreliable.
6662 Sometimes it will have the value 0 when in fact arguments are being
6663 passed. (See testcase execute/20021111-1.c for an example). So we also
6664 check the args_info.nregs field as well. The problem with this field is
6665 that it makes no allowances for arguments that are passed to the
6666 function but which are not used. Hence we could miss an opportunity
6667 when a function has an unused argument in r3. But it is better to be
6668 safe than to be sorry. */
6669 if (! cfun
->machine
->uses_anonymous_args
6670 && crtl
->args
.size
>= 0
6671 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6672 && (TARGET_AAPCS_BASED
6673 ? crtl
->args
.info
.aapcs_ncrn
< 4
6674 : crtl
->args
.info
.nregs
< 4))
6675 return LAST_ARG_REGNUM
;
6677 /* Otherwise look for a call-saved register that is going to be pushed. */
6678 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6679 if (pushed_regs_mask
& (1 << reg
))
6684 /* Thumb-2 can use high regs. */
6685 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6686 if (pushed_regs_mask
& (1 << reg
))
6689 /* Something went wrong - thumb_compute_save_reg_mask()
6690 should have arranged for a suitable register to be pushed. */
6694 static GTY(()) int pic_labelno
;
6696 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6700 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6702 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6704 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6707 gcc_assert (flag_pic
);
6709 pic_reg
= cfun
->machine
->pic_reg
;
6710 if (TARGET_VXWORKS_RTP
)
6712 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6713 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6714 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6716 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6718 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6719 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6723 /* We use an UNSPEC rather than a LABEL_REF because this label
6724 never appears in the code stream. */
6726 labelno
= GEN_INT (pic_labelno
++);
6727 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6728 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6730 /* On the ARM the PC register contains 'dot + 8' at the time of the
6731 addition, on the Thumb it is 'dot + 4'. */
6732 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6733 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6735 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6739 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6741 else /* TARGET_THUMB1 */
6743 if (arm_pic_register
!= INVALID_REGNUM
6744 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6746 /* We will have pushed the pic register, so we should always be
6747 able to find a work register. */
6748 pic_tmp
= gen_rtx_REG (SImode
,
6749 thumb_find_work_register (saved_regs
));
6750 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6751 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6752 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6754 else if (arm_pic_register
!= INVALID_REGNUM
6755 && arm_pic_register
> LAST_LO_REGNUM
6756 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6758 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6759 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6760 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6763 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6767 /* Need to emit this whether or not we obey regdecls,
6768 since setjmp/longjmp can cause life info to screw up. */
6772 /* Generate code to load the address of a static var when flag_pic is set. */
6774 arm_pic_static_addr (rtx orig
, rtx reg
)
6776 rtx l1
, labelno
, offset_rtx
, insn
;
6778 gcc_assert (flag_pic
);
6780 /* We use an UNSPEC rather than a LABEL_REF because this label
6781 never appears in the code stream. */
6782 labelno
= GEN_INT (pic_labelno
++);
6783 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6784 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6786 /* On the ARM the PC register contains 'dot + 8' at the time of the
6787 addition, on the Thumb it is 'dot + 4'. */
6788 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6789 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6790 UNSPEC_SYMBOL_OFFSET
);
6791 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6793 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6797 /* Return nonzero if X is valid as an ARM state addressing register. */
6799 arm_address_register_rtx_p (rtx x
, int strict_p
)
6809 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6811 return (regno
<= LAST_ARM_REGNUM
6812 || regno
>= FIRST_PSEUDO_REGISTER
6813 || regno
== FRAME_POINTER_REGNUM
6814 || regno
== ARG_POINTER_REGNUM
);
6817 /* Return TRUE if this rtx is the difference of a symbol and a label,
6818 and will reduce to a PC-relative relocation in the object file.
6819 Expressions like this can be left alone when generating PIC, rather
6820 than forced through the GOT. */
6822 pcrel_constant_p (rtx x
)
6824 if (GET_CODE (x
) == MINUS
)
6825 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6830 /* Return true if X will surely end up in an index register after next
6833 will_be_in_index_register (const_rtx x
)
6835 /* arm.md: calculate_pic_address will split this into a register. */
6836 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6839 /* Return nonzero if X is a valid ARM state address operand. */
6841 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6845 enum rtx_code code
= GET_CODE (x
);
6847 if (arm_address_register_rtx_p (x
, strict_p
))
6850 use_ldrd
= (TARGET_LDRD
6852 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6854 if (code
== POST_INC
|| code
== PRE_DEC
6855 || ((code
== PRE_INC
|| code
== POST_DEC
)
6856 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6857 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6859 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6860 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6861 && GET_CODE (XEXP (x
, 1)) == PLUS
6862 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6864 rtx addend
= XEXP (XEXP (x
, 1), 1);
6866 /* Don't allow ldrd post increment by register because it's hard
6867 to fixup invalid register choices. */
6869 && GET_CODE (x
) == POST_MODIFY
6873 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6874 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6877 /* After reload constants split into minipools will have addresses
6878 from a LABEL_REF. */
6879 else if (reload_completed
6880 && (code
== LABEL_REF
6882 && GET_CODE (XEXP (x
, 0)) == PLUS
6883 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6884 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6887 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6890 else if (code
== PLUS
)
6892 rtx xop0
= XEXP (x
, 0);
6893 rtx xop1
= XEXP (x
, 1);
6895 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6896 && ((CONST_INT_P (xop1
)
6897 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6898 || (!strict_p
&& will_be_in_index_register (xop1
))))
6899 || (arm_address_register_rtx_p (xop1
, strict_p
)
6900 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6904 /* Reload currently can't handle MINUS, so disable this for now */
6905 else if (GET_CODE (x
) == MINUS
)
6907 rtx xop0
= XEXP (x
, 0);
6908 rtx xop1
= XEXP (x
, 1);
6910 return (arm_address_register_rtx_p (xop0
, strict_p
)
6911 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6915 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6916 && code
== SYMBOL_REF
6917 && CONSTANT_POOL_ADDRESS_P (x
)
6919 && symbol_mentioned_p (get_pool_constant (x
))
6920 && ! pcrel_constant_p (get_pool_constant (x
))))
6926 /* Return nonzero if X is a valid Thumb-2 address operand. */
6928 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
6931 enum rtx_code code
= GET_CODE (x
);
6933 if (arm_address_register_rtx_p (x
, strict_p
))
6936 use_ldrd
= (TARGET_LDRD
6938 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6940 if (code
== POST_INC
|| code
== PRE_DEC
6941 || ((code
== PRE_INC
|| code
== POST_DEC
)
6942 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6943 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6945 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6946 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6947 && GET_CODE (XEXP (x
, 1)) == PLUS
6948 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6950 /* Thumb-2 only has autoincrement by constant. */
6951 rtx addend
= XEXP (XEXP (x
, 1), 1);
6952 HOST_WIDE_INT offset
;
6954 if (!CONST_INT_P (addend
))
6957 offset
= INTVAL(addend
);
6958 if (GET_MODE_SIZE (mode
) <= 4)
6959 return (offset
> -256 && offset
< 256);
6961 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6962 && (offset
& 3) == 0);
6965 /* After reload constants split into minipools will have addresses
6966 from a LABEL_REF. */
6967 else if (reload_completed
6968 && (code
== LABEL_REF
6970 && GET_CODE (XEXP (x
, 0)) == PLUS
6971 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6972 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6975 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6978 else if (code
== PLUS
)
6980 rtx xop0
= XEXP (x
, 0);
6981 rtx xop1
= XEXP (x
, 1);
6983 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6984 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6985 || (!strict_p
&& will_be_in_index_register (xop1
))))
6986 || (arm_address_register_rtx_p (xop1
, strict_p
)
6987 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6990 /* Normally we can assign constant values to target registers without
6991 the help of constant pool. But there are cases we have to use constant
6993 1) assign a label to register.
6994 2) sign-extend a 8bit value to 32bit and then assign to register.
6996 Constant pool access in format:
6997 (set (reg r0) (mem (symbol_ref (".LC0"))))
6998 will cause the use of literal pool (later in function arm_reorg).
6999 So here we mark such format as an invalid format, then the compiler
7000 will adjust it into:
7001 (set (reg r0) (symbol_ref (".LC0")))
7002 (set (reg r0) (mem (reg r0))).
7003 No extra register is required, and (mem (reg r0)) won't cause the use
7004 of literal pools. */
7005 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7006 && CONSTANT_POOL_ADDRESS_P (x
))
7009 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7010 && code
== SYMBOL_REF
7011 && CONSTANT_POOL_ADDRESS_P (x
)
7013 && symbol_mentioned_p (get_pool_constant (x
))
7014 && ! pcrel_constant_p (get_pool_constant (x
))))
7020 /* Return nonzero if INDEX is valid for an address index operand in
7023 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7026 HOST_WIDE_INT range
;
7027 enum rtx_code code
= GET_CODE (index
);
7029 /* Standard coprocessor addressing modes. */
7030 if (TARGET_HARD_FLOAT
7032 && (mode
== SFmode
|| mode
== DFmode
))
7033 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7034 && INTVAL (index
) > -1024
7035 && (INTVAL (index
) & 3) == 0);
7037 /* For quad modes, we restrict the constant offset to be slightly less
7038 than what the instruction format permits. We do this because for
7039 quad mode moves, we will actually decompose them into two separate
7040 double-mode reads or writes. INDEX must therefore be a valid
7041 (double-mode) offset and so should INDEX+8. */
7042 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7043 return (code
== CONST_INT
7044 && INTVAL (index
) < 1016
7045 && INTVAL (index
) > -1024
7046 && (INTVAL (index
) & 3) == 0);
7048 /* We have no such constraint on double mode offsets, so we permit the
7049 full range of the instruction format. */
7050 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7051 return (code
== CONST_INT
7052 && INTVAL (index
) < 1024
7053 && INTVAL (index
) > -1024
7054 && (INTVAL (index
) & 3) == 0);
7056 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7057 return (code
== CONST_INT
7058 && INTVAL (index
) < 1024
7059 && INTVAL (index
) > -1024
7060 && (INTVAL (index
) & 3) == 0);
7062 if (arm_address_register_rtx_p (index
, strict_p
)
7063 && (GET_MODE_SIZE (mode
) <= 4))
7066 if (mode
== DImode
|| mode
== DFmode
)
7068 if (code
== CONST_INT
)
7070 HOST_WIDE_INT val
= INTVAL (index
);
7073 return val
> -256 && val
< 256;
7075 return val
> -4096 && val
< 4092;
7078 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7081 if (GET_MODE_SIZE (mode
) <= 4
7085 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7089 rtx xiop0
= XEXP (index
, 0);
7090 rtx xiop1
= XEXP (index
, 1);
7092 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7093 && power_of_two_operand (xiop1
, SImode
))
7094 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7095 && power_of_two_operand (xiop0
, SImode
)));
7097 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7098 || code
== ASHIFT
|| code
== ROTATERT
)
7100 rtx op
= XEXP (index
, 1);
7102 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7105 && INTVAL (op
) <= 31);
7109 /* For ARM v4 we may be doing a sign-extend operation during the
7115 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7121 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7123 return (code
== CONST_INT
7124 && INTVAL (index
) < range
7125 && INTVAL (index
) > -range
);
7128 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7129 index operand. i.e. 1, 2, 4 or 8. */
7131 thumb2_index_mul_operand (rtx op
)
7135 if (!CONST_INT_P (op
))
7139 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7142 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7144 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7146 enum rtx_code code
= GET_CODE (index
);
7148 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7149 /* Standard coprocessor addressing modes. */
7150 if (TARGET_HARD_FLOAT
7152 && (mode
== SFmode
|| mode
== DFmode
))
7153 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7154 /* Thumb-2 allows only > -256 index range for it's core register
7155 load/stores. Since we allow SF/DF in core registers, we have
7156 to use the intersection between -256~4096 (core) and -1024~1024
7158 && INTVAL (index
) > -256
7159 && (INTVAL (index
) & 3) == 0);
7161 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7163 /* For DImode assume values will usually live in core regs
7164 and only allow LDRD addressing modes. */
7165 if (!TARGET_LDRD
|| mode
!= DImode
)
7166 return (code
== CONST_INT
7167 && INTVAL (index
) < 1024
7168 && INTVAL (index
) > -1024
7169 && (INTVAL (index
) & 3) == 0);
7172 /* For quad modes, we restrict the constant offset to be slightly less
7173 than what the instruction format permits. We do this because for
7174 quad mode moves, we will actually decompose them into two separate
7175 double-mode reads or writes. INDEX must therefore be a valid
7176 (double-mode) offset and so should INDEX+8. */
7177 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7178 return (code
== CONST_INT
7179 && INTVAL (index
) < 1016
7180 && INTVAL (index
) > -1024
7181 && (INTVAL (index
) & 3) == 0);
7183 /* We have no such constraint on double mode offsets, so we permit the
7184 full range of the instruction format. */
7185 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7186 return (code
== CONST_INT
7187 && INTVAL (index
) < 1024
7188 && INTVAL (index
) > -1024
7189 && (INTVAL (index
) & 3) == 0);
7191 if (arm_address_register_rtx_p (index
, strict_p
)
7192 && (GET_MODE_SIZE (mode
) <= 4))
7195 if (mode
== DImode
|| mode
== DFmode
)
7197 if (code
== CONST_INT
)
7199 HOST_WIDE_INT val
= INTVAL (index
);
7200 /* ??? Can we assume ldrd for thumb2? */
7201 /* Thumb-2 ldrd only has reg+const addressing modes. */
7202 /* ldrd supports offsets of +-1020.
7203 However the ldr fallback does not. */
7204 return val
> -256 && val
< 256 && (val
& 3) == 0;
7212 rtx xiop0
= XEXP (index
, 0);
7213 rtx xiop1
= XEXP (index
, 1);
7215 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7216 && thumb2_index_mul_operand (xiop1
))
7217 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7218 && thumb2_index_mul_operand (xiop0
)));
7220 else if (code
== ASHIFT
)
7222 rtx op
= XEXP (index
, 1);
7224 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7227 && INTVAL (op
) <= 3);
7230 return (code
== CONST_INT
7231 && INTVAL (index
) < 4096
7232 && INTVAL (index
) > -256);
7235 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7237 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7247 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7249 return (regno
<= LAST_LO_REGNUM
7250 || regno
> LAST_VIRTUAL_REGISTER
7251 || regno
== FRAME_POINTER_REGNUM
7252 || (GET_MODE_SIZE (mode
) >= 4
7253 && (regno
== STACK_POINTER_REGNUM
7254 || regno
>= FIRST_PSEUDO_REGISTER
7255 || x
== hard_frame_pointer_rtx
7256 || x
== arg_pointer_rtx
)));
7259 /* Return nonzero if x is a legitimate index register. This is the case
7260 for any base register that can access a QImode object. */
7262 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7264 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7267 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7269 The AP may be eliminated to either the SP or the FP, so we use the
7270 least common denominator, e.g. SImode, and offsets from 0 to 64.
7272 ??? Verify whether the above is the right approach.
7274 ??? Also, the FP may be eliminated to the SP, so perhaps that
7275 needs special handling also.
7277 ??? Look at how the mips16 port solves this problem. It probably uses
7278 better ways to solve some of these problems.
7280 Although it is not incorrect, we don't accept QImode and HImode
7281 addresses based on the frame pointer or arg pointer until the
7282 reload pass starts. This is so that eliminating such addresses
7283 into stack based ones won't produce impossible code. */
7285 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7287 /* ??? Not clear if this is right. Experiment. */
7288 if (GET_MODE_SIZE (mode
) < 4
7289 && !(reload_in_progress
|| reload_completed
)
7290 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7291 || reg_mentioned_p (arg_pointer_rtx
, x
)
7292 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7293 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7294 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7295 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7298 /* Accept any base register. SP only in SImode or larger. */
7299 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7302 /* This is PC relative data before arm_reorg runs. */
7303 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7304 && GET_CODE (x
) == SYMBOL_REF
7305 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7308 /* This is PC relative data after arm_reorg runs. */
7309 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7311 && (GET_CODE (x
) == LABEL_REF
7312 || (GET_CODE (x
) == CONST
7313 && GET_CODE (XEXP (x
, 0)) == PLUS
7314 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7315 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7318 /* Post-inc indexing only supported for SImode and larger. */
7319 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7320 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7323 else if (GET_CODE (x
) == PLUS
)
7325 /* REG+REG address can be any two index registers. */
7326 /* We disallow FRAME+REG addressing since we know that FRAME
7327 will be replaced with STACK, and SP relative addressing only
7328 permits SP+OFFSET. */
7329 if (GET_MODE_SIZE (mode
) <= 4
7330 && XEXP (x
, 0) != frame_pointer_rtx
7331 && XEXP (x
, 1) != frame_pointer_rtx
7332 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7333 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7334 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7337 /* REG+const has 5-7 bit offset for non-SP registers. */
7338 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7339 || XEXP (x
, 0) == arg_pointer_rtx
)
7340 && CONST_INT_P (XEXP (x
, 1))
7341 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7344 /* REG+const has 10-bit offset for SP, but only SImode and
7345 larger is supported. */
7346 /* ??? Should probably check for DI/DFmode overflow here
7347 just like GO_IF_LEGITIMATE_OFFSET does. */
7348 else if (REG_P (XEXP (x
, 0))
7349 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7350 && GET_MODE_SIZE (mode
) >= 4
7351 && CONST_INT_P (XEXP (x
, 1))
7352 && INTVAL (XEXP (x
, 1)) >= 0
7353 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7354 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7357 else if (REG_P (XEXP (x
, 0))
7358 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7359 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7360 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7361 && REGNO (XEXP (x
, 0))
7362 <= LAST_VIRTUAL_POINTER_REGISTER
))
7363 && GET_MODE_SIZE (mode
) >= 4
7364 && CONST_INT_P (XEXP (x
, 1))
7365 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7369 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7370 && GET_MODE_SIZE (mode
) == 4
7371 && GET_CODE (x
) == SYMBOL_REF
7372 && CONSTANT_POOL_ADDRESS_P (x
)
7374 && symbol_mentioned_p (get_pool_constant (x
))
7375 && ! pcrel_constant_p (get_pool_constant (x
))))
7381 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7382 instruction of mode MODE. */
7384 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7386 switch (GET_MODE_SIZE (mode
))
7389 return val
>= 0 && val
< 32;
7392 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7396 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7402 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7405 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7406 else if (TARGET_THUMB2
)
7407 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7408 else /* if (TARGET_THUMB1) */
7409 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7412 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7414 Given an rtx X being reloaded into a reg required to be
7415 in class CLASS, return the class of reg to actually use.
7416 In general this is just CLASS, but for the Thumb core registers and
7417 immediate constants we prefer a LO_REGS class or a subset. */
7420 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7426 if (rclass
== GENERAL_REGS
)
7433 /* Build the SYMBOL_REF for __tls_get_addr. */
7435 static GTY(()) rtx tls_get_addr_libfunc
;
7438 get_tls_get_addr (void)
7440 if (!tls_get_addr_libfunc
)
7441 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7442 return tls_get_addr_libfunc
;
7446 arm_load_tp (rtx target
)
7449 target
= gen_reg_rtx (SImode
);
7453 /* Can return in any reg. */
7454 emit_insn (gen_load_tp_hard (target
));
7458 /* Always returned in r0. Immediately copy the result into a pseudo,
7459 otherwise other uses of r0 (e.g. setting up function arguments) may
7460 clobber the value. */
7464 emit_insn (gen_load_tp_soft ());
7466 tmp
= gen_rtx_REG (SImode
, 0);
7467 emit_move_insn (target
, tmp
);
7473 load_tls_operand (rtx x
, rtx reg
)
7477 if (reg
== NULL_RTX
)
7478 reg
= gen_reg_rtx (SImode
);
7480 tmp
= gen_rtx_CONST (SImode
, x
);
7482 emit_move_insn (reg
, tmp
);
7488 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7490 rtx insns
, label
, labelno
, sum
;
7492 gcc_assert (reloc
!= TLS_DESCSEQ
);
7495 labelno
= GEN_INT (pic_labelno
++);
7496 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7497 label
= gen_rtx_CONST (VOIDmode
, label
);
7499 sum
= gen_rtx_UNSPEC (Pmode
,
7500 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7501 GEN_INT (TARGET_ARM
? 8 : 4)),
7503 reg
= load_tls_operand (sum
, reg
);
7506 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7508 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7510 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7511 LCT_PURE
, /* LCT_CONST? */
7512 Pmode
, 1, reg
, Pmode
);
7514 insns
= get_insns ();
7521 arm_tls_descseq_addr (rtx x
, rtx reg
)
7523 rtx labelno
= GEN_INT (pic_labelno
++);
7524 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7525 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7526 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7527 gen_rtx_CONST (VOIDmode
, label
),
7528 GEN_INT (!TARGET_ARM
)),
7530 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7532 emit_insn (gen_tlscall (x
, labelno
));
7534 reg
= gen_reg_rtx (SImode
);
7536 gcc_assert (REGNO (reg
) != 0);
7538 emit_move_insn (reg
, reg0
);
7544 legitimize_tls_address (rtx x
, rtx reg
)
7546 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7547 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7551 case TLS_MODEL_GLOBAL_DYNAMIC
:
7552 if (TARGET_GNU2_TLS
)
7554 reg
= arm_tls_descseq_addr (x
, reg
);
7556 tp
= arm_load_tp (NULL_RTX
);
7558 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7562 /* Original scheme */
7563 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7564 dest
= gen_reg_rtx (Pmode
);
7565 emit_libcall_block (insns
, dest
, ret
, x
);
7569 case TLS_MODEL_LOCAL_DYNAMIC
:
7570 if (TARGET_GNU2_TLS
)
7572 reg
= arm_tls_descseq_addr (x
, reg
);
7574 tp
= arm_load_tp (NULL_RTX
);
7576 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7580 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7582 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7583 share the LDM result with other LD model accesses. */
7584 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7586 dest
= gen_reg_rtx (Pmode
);
7587 emit_libcall_block (insns
, dest
, ret
, eqv
);
7589 /* Load the addend. */
7590 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7591 GEN_INT (TLS_LDO32
)),
7593 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7594 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7598 case TLS_MODEL_INITIAL_EXEC
:
7599 labelno
= GEN_INT (pic_labelno
++);
7600 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7601 label
= gen_rtx_CONST (VOIDmode
, label
);
7602 sum
= gen_rtx_UNSPEC (Pmode
,
7603 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7604 GEN_INT (TARGET_ARM
? 8 : 4)),
7606 reg
= load_tls_operand (sum
, reg
);
7609 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7610 else if (TARGET_THUMB2
)
7611 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7614 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7615 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7618 tp
= arm_load_tp (NULL_RTX
);
7620 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7622 case TLS_MODEL_LOCAL_EXEC
:
7623 tp
= arm_load_tp (NULL_RTX
);
7625 reg
= gen_rtx_UNSPEC (Pmode
,
7626 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7628 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7630 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7637 /* Try machine-dependent ways of modifying an illegitimate address
7638 to be legitimate. If we find one, return the new, valid address. */
7640 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7642 if (arm_tls_referenced_p (x
))
7646 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7648 addend
= XEXP (XEXP (x
, 0), 1);
7649 x
= XEXP (XEXP (x
, 0), 0);
7652 if (GET_CODE (x
) != SYMBOL_REF
)
7655 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7657 x
= legitimize_tls_address (x
, NULL_RTX
);
7661 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7670 /* TODO: legitimize_address for Thumb2. */
7673 return thumb_legitimize_address (x
, orig_x
, mode
);
7676 if (GET_CODE (x
) == PLUS
)
7678 rtx xop0
= XEXP (x
, 0);
7679 rtx xop1
= XEXP (x
, 1);
7681 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7682 xop0
= force_reg (SImode
, xop0
);
7684 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7685 && !symbol_mentioned_p (xop1
))
7686 xop1
= force_reg (SImode
, xop1
);
7688 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7689 && CONST_INT_P (xop1
))
7691 HOST_WIDE_INT n
, low_n
;
7695 /* VFP addressing modes actually allow greater offsets, but for
7696 now we just stick with the lowest common denominator. */
7698 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7710 low_n
= ((mode
) == TImode
? 0
7711 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7715 base_reg
= gen_reg_rtx (SImode
);
7716 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7717 emit_move_insn (base_reg
, val
);
7718 x
= plus_constant (Pmode
, base_reg
, low_n
);
7720 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7721 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7724 /* XXX We don't allow MINUS any more -- see comment in
7725 arm_legitimate_address_outer_p (). */
7726 else if (GET_CODE (x
) == MINUS
)
7728 rtx xop0
= XEXP (x
, 0);
7729 rtx xop1
= XEXP (x
, 1);
7731 if (CONSTANT_P (xop0
))
7732 xop0
= force_reg (SImode
, xop0
);
7734 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7735 xop1
= force_reg (SImode
, xop1
);
7737 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7738 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7741 /* Make sure to take full advantage of the pre-indexed addressing mode
7742 with absolute addresses which often allows for the base register to
7743 be factorized for multiple adjacent memory references, and it might
7744 even allows for the mini pool to be avoided entirely. */
7745 else if (CONST_INT_P (x
) && optimize
> 0)
7748 HOST_WIDE_INT mask
, base
, index
;
7751 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7752 use a 8-bit index. So let's use a 12-bit index for SImode only and
7753 hope that arm_gen_constant will enable ldrb to use more bits. */
7754 bits
= (mode
== SImode
) ? 12 : 8;
7755 mask
= (1 << bits
) - 1;
7756 base
= INTVAL (x
) & ~mask
;
7757 index
= INTVAL (x
) & mask
;
7758 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7760 /* It'll most probably be more efficient to generate the base
7761 with more bits set and use a negative index instead. */
7765 base_reg
= force_reg (SImode
, GEN_INT (base
));
7766 x
= plus_constant (Pmode
, base_reg
, index
);
7771 /* We need to find and carefully transform any SYMBOL and LABEL
7772 references; so go back to the original address expression. */
7773 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7775 if (new_x
!= orig_x
)
7783 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7784 to be legitimate. If we find one, return the new, valid address. */
7786 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7788 if (GET_CODE (x
) == PLUS
7789 && CONST_INT_P (XEXP (x
, 1))
7790 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7791 || INTVAL (XEXP (x
, 1)) < 0))
7793 rtx xop0
= XEXP (x
, 0);
7794 rtx xop1
= XEXP (x
, 1);
7795 HOST_WIDE_INT offset
= INTVAL (xop1
);
7797 /* Try and fold the offset into a biasing of the base register and
7798 then offsetting that. Don't do this when optimizing for space
7799 since it can cause too many CSEs. */
7800 if (optimize_size
&& offset
>= 0
7801 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7803 HOST_WIDE_INT delta
;
7806 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7807 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7808 delta
= 31 * GET_MODE_SIZE (mode
);
7810 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7812 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7814 x
= plus_constant (Pmode
, xop0
, delta
);
7816 else if (offset
< 0 && offset
> -256)
7817 /* Small negative offsets are best done with a subtract before the
7818 dereference, forcing these into a register normally takes two
7820 x
= force_operand (x
, NULL_RTX
);
7823 /* For the remaining cases, force the constant into a register. */
7824 xop1
= force_reg (SImode
, xop1
);
7825 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7828 else if (GET_CODE (x
) == PLUS
7829 && s_register_operand (XEXP (x
, 1), SImode
)
7830 && !s_register_operand (XEXP (x
, 0), SImode
))
7832 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7834 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7839 /* We need to find and carefully transform any SYMBOL and LABEL
7840 references; so go back to the original address expression. */
7841 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7843 if (new_x
!= orig_x
)
7851 arm_legitimize_reload_address (rtx
*p
,
7853 int opnum
, int type
,
7854 int ind_levels ATTRIBUTE_UNUSED
)
7856 /* We must recognize output that we have already generated ourselves. */
7857 if (GET_CODE (*p
) == PLUS
7858 && GET_CODE (XEXP (*p
, 0)) == PLUS
7859 && REG_P (XEXP (XEXP (*p
, 0), 0))
7860 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7861 && CONST_INT_P (XEXP (*p
, 1)))
7863 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7864 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7865 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7869 if (GET_CODE (*p
) == PLUS
7870 && REG_P (XEXP (*p
, 0))
7871 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7872 /* If the base register is equivalent to a constant, let the generic
7873 code handle it. Otherwise we will run into problems if a future
7874 reload pass decides to rematerialize the constant. */
7875 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7876 && CONST_INT_P (XEXP (*p
, 1)))
7878 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7879 HOST_WIDE_INT low
, high
;
7881 /* Detect coprocessor load/stores. */
7882 bool coproc_p
= ((TARGET_HARD_FLOAT
7884 && (mode
== SFmode
|| mode
== DFmode
))
7885 || (TARGET_REALLY_IWMMXT
7886 && VALID_IWMMXT_REG_MODE (mode
))
7888 && (VALID_NEON_DREG_MODE (mode
)
7889 || VALID_NEON_QREG_MODE (mode
))));
7891 /* For some conditions, bail out when lower two bits are unaligned. */
7892 if ((val
& 0x3) != 0
7893 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7895 /* For DI, and DF under soft-float: */
7896 || ((mode
== DImode
|| mode
== DFmode
)
7897 /* Without ldrd, we use stm/ldm, which does not
7898 fair well with unaligned bits. */
7900 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7901 || TARGET_THUMB2
))))
7904 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7905 of which the (reg+high) gets turned into a reload add insn,
7906 we try to decompose the index into high/low values that can often
7907 also lead to better reload CSE.
7909 ldr r0, [r2, #4100] // Offset too large
7910 ldr r1, [r2, #4104] // Offset too large
7912 is best reloaded as:
7918 which post-reload CSE can simplify in most cases to eliminate the
7919 second add instruction:
7924 The idea here is that we want to split out the bits of the constant
7925 as a mask, rather than as subtracting the maximum offset that the
7926 respective type of load/store used can handle.
7928 When encountering negative offsets, we can still utilize it even if
7929 the overall offset is positive; sometimes this may lead to an immediate
7930 that can be constructed with fewer instructions.
7932 ldr r0, [r2, #0x3FFFFC]
7934 This is best reloaded as:
7935 add t1, r2, #0x400000
7938 The trick for spotting this for a load insn with N bits of offset
7939 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7940 negative offset that is going to make bit N and all the bits below
7941 it become zero in the remainder part.
7943 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7944 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7945 used in most cases of ARM load/store instructions. */
7947 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7948 (((VAL) & ((1 << (N)) - 1)) \
7949 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7954 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7956 /* NEON quad-word load/stores are made of two double-word accesses,
7957 so the valid index range is reduced by 8. Treat as 9-bit range if
7959 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7960 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7962 else if (GET_MODE_SIZE (mode
) == 8)
7965 low
= (TARGET_THUMB2
7966 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7967 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7969 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7970 to access doublewords. The supported load/store offsets are
7971 -8, -4, and 4, which we try to produce here. */
7972 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7974 else if (GET_MODE_SIZE (mode
) < 8)
7976 /* NEON element load/stores do not have an offset. */
7977 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7982 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7983 Try the wider 12-bit range first, and re-try if the result
7985 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7987 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7991 if (mode
== HImode
|| mode
== HFmode
)
7994 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7997 /* The storehi/movhi_bytes fallbacks can use only
7998 [-4094,+4094] of the full ldrb/strb index range. */
7999 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8000 if (low
== 4095 || low
== -4095)
8005 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
8011 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
8012 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
8013 - (unsigned HOST_WIDE_INT
) 0x80000000);
8014 /* Check for overflow or zero */
8015 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
8018 /* Reload the high part into a base reg; leave the low part
8020 Note that replacing this gen_rtx_PLUS with plus_constant is
8021 wrong in this case because we rely on the
8022 (plus (plus reg c1) c2) structure being preserved so that
8023 XEXP (*p, 0) in push_reload below uses the correct term. */
8024 *p
= gen_rtx_PLUS (GET_MODE (*p
),
8025 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
8028 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
8029 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
8030 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8038 thumb_legitimize_reload_address (rtx
*x_p
,
8040 int opnum
, int type
,
8041 int ind_levels ATTRIBUTE_UNUSED
)
8045 if (GET_CODE (x
) == PLUS
8046 && GET_MODE_SIZE (mode
) < 4
8047 && REG_P (XEXP (x
, 0))
8048 && XEXP (x
, 0) == stack_pointer_rtx
8049 && CONST_INT_P (XEXP (x
, 1))
8050 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8055 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8056 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8060 /* If both registers are hi-regs, then it's better to reload the
8061 entire expression rather than each register individually. That
8062 only requires one reload register rather than two. */
8063 if (GET_CODE (x
) == PLUS
8064 && REG_P (XEXP (x
, 0))
8065 && REG_P (XEXP (x
, 1))
8066 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8067 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8072 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8073 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8080 /* Test for various thread-local symbols. */
8082 /* Helper for arm_tls_referenced_p. */
8085 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
8087 if (GET_CODE (*x
) == SYMBOL_REF
)
8088 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
8090 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8091 TLS offsets, not real symbol references. */
8092 if (GET_CODE (*x
) == UNSPEC
8093 && XINT (*x
, 1) == UNSPEC_TLS
)
8099 /* Return TRUE if X contains any TLS symbol references. */
8102 arm_tls_referenced_p (rtx x
)
8104 if (! TARGET_HAVE_TLS
)
8107 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
8110 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8112 On the ARM, allow any integer (invalid ones are removed later by insn
8113 patterns), nice doubles and symbol_refs which refer to the function's
8116 When generating pic allow anything. */
8119 arm_legitimate_constant_p_1 (machine_mode mode
, rtx x
)
8121 /* At present, we have no support for Neon structure constants, so forbid
8122 them here. It might be possible to handle simple cases like 0 and -1
8124 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8127 return flag_pic
|| !label_mentioned_p (x
);
8131 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8133 return (CONST_INT_P (x
)
8134 || CONST_DOUBLE_P (x
)
8135 || CONSTANT_ADDRESS_P (x
)
8140 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8142 return (!arm_cannot_force_const_mem (mode
, x
)
8144 ? arm_legitimate_constant_p_1 (mode
, x
)
8145 : thumb_legitimate_constant_p (mode
, x
)));
8148 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8151 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8155 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8157 split_const (x
, &base
, &offset
);
8158 if (GET_CODE (base
) == SYMBOL_REF
8159 && !offset_within_block_p (base
, INTVAL (offset
)))
8162 return arm_tls_referenced_p (x
);
8165 #define REG_OR_SUBREG_REG(X) \
8167 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8169 #define REG_OR_SUBREG_RTX(X) \
8170 (REG_P (X) ? (X) : SUBREG_REG (X))
8173 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8175 machine_mode mode
= GET_MODE (x
);
8184 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8191 return COSTS_N_INSNS (1);
8194 if (CONST_INT_P (XEXP (x
, 1)))
8197 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8204 return COSTS_N_INSNS (2) + cycles
;
8206 return COSTS_N_INSNS (1) + 16;
8209 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8211 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8212 return (COSTS_N_INSNS (words
)
8213 + 4 * ((MEM_P (SET_SRC (x
)))
8214 + MEM_P (SET_DEST (x
))));
8219 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8221 if (thumb_shiftable_const (INTVAL (x
)))
8222 return COSTS_N_INSNS (2);
8223 return COSTS_N_INSNS (3);
8225 else if ((outer
== PLUS
|| outer
== COMPARE
)
8226 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8228 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8229 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8230 return COSTS_N_INSNS (1);
8231 else if (outer
== AND
)
8234 /* This duplicates the tests in the andsi3 expander. */
8235 for (i
= 9; i
<= 31; i
++)
8236 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8237 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8238 return COSTS_N_INSNS (2);
8240 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8241 || outer
== LSHIFTRT
)
8243 return COSTS_N_INSNS (2);
8249 return COSTS_N_INSNS (3);
8267 /* XXX another guess. */
8268 /* Memory costs quite a lot for the first word, but subsequent words
8269 load at the equivalent of a single insn each. */
8270 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8271 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8276 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8282 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8283 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8289 return total
+ COSTS_N_INSNS (1);
8291 /* Assume a two-shift sequence. Increase the cost slightly so
8292 we prefer actual shifts over an extend operation. */
8293 return total
+ 1 + COSTS_N_INSNS (2);
8301 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8303 machine_mode mode
= GET_MODE (x
);
8304 enum rtx_code subcode
;
8306 enum rtx_code code
= GET_CODE (x
);
8312 /* Memory costs quite a lot for the first word, but subsequent words
8313 load at the equivalent of a single insn each. */
8314 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8321 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8322 *total
= COSTS_N_INSNS (2);
8323 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8324 *total
= COSTS_N_INSNS (4);
8326 *total
= COSTS_N_INSNS (20);
8330 if (REG_P (XEXP (x
, 1)))
8331 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8332 else if (!CONST_INT_P (XEXP (x
, 1)))
8333 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8339 *total
+= COSTS_N_INSNS (4);
8344 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8345 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8348 *total
+= COSTS_N_INSNS (3);
8352 *total
+= COSTS_N_INSNS (1);
8353 /* Increase the cost of complex shifts because they aren't any faster,
8354 and reduce dual issue opportunities. */
8355 if (arm_tune_cortex_a9
8356 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8364 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8365 if (CONST_INT_P (XEXP (x
, 0))
8366 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8368 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8372 if (CONST_INT_P (XEXP (x
, 1))
8373 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8375 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8382 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8384 if (TARGET_HARD_FLOAT
8386 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8388 *total
= COSTS_N_INSNS (1);
8389 if (CONST_DOUBLE_P (XEXP (x
, 0))
8390 && arm_const_double_rtx (XEXP (x
, 0)))
8392 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8396 if (CONST_DOUBLE_P (XEXP (x
, 1))
8397 && arm_const_double_rtx (XEXP (x
, 1)))
8399 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8405 *total
= COSTS_N_INSNS (20);
8409 *total
= COSTS_N_INSNS (1);
8410 if (CONST_INT_P (XEXP (x
, 0))
8411 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8413 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8417 subcode
= GET_CODE (XEXP (x
, 1));
8418 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8419 || subcode
== LSHIFTRT
8420 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8422 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8423 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8427 /* A shift as a part of RSB costs no more than RSB itself. */
8428 if (GET_CODE (XEXP (x
, 0)) == MULT
8429 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8431 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8432 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8437 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8439 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8440 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8444 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8445 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8447 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8448 if (REG_P (XEXP (XEXP (x
, 1), 0))
8449 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8450 *total
+= COSTS_N_INSNS (1);
8458 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8459 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8460 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8462 *total
= COSTS_N_INSNS (1);
8463 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8465 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8469 /* MLA: All arguments must be registers. We filter out
8470 multiplication by a power of two, so that we fall down into
8472 if (GET_CODE (XEXP (x
, 0)) == MULT
8473 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8475 /* The cost comes from the cost of the multiply. */
8479 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8481 if (TARGET_HARD_FLOAT
8483 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8485 *total
= COSTS_N_INSNS (1);
8486 if (CONST_DOUBLE_P (XEXP (x
, 1))
8487 && arm_const_double_rtx (XEXP (x
, 1)))
8489 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8496 *total
= COSTS_N_INSNS (20);
8500 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8501 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8503 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8504 if (REG_P (XEXP (XEXP (x
, 0), 0))
8505 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8506 *total
+= COSTS_N_INSNS (1);
8512 case AND
: case XOR
: case IOR
:
8514 /* Normally the frame registers will be spilt into reg+const during
8515 reload, so it is a bad idea to combine them with other instructions,
8516 since then they might not be moved outside of loops. As a compromise
8517 we allow integration with ops that have a constant as their second
8519 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8520 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8521 && !CONST_INT_P (XEXP (x
, 1)))
8522 *total
= COSTS_N_INSNS (1);
8526 *total
+= COSTS_N_INSNS (2);
8527 if (CONST_INT_P (XEXP (x
, 1))
8528 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8530 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8537 *total
+= COSTS_N_INSNS (1);
8538 if (CONST_INT_P (XEXP (x
, 1))
8539 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8541 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8544 subcode
= GET_CODE (XEXP (x
, 0));
8545 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8546 || subcode
== LSHIFTRT
8547 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8549 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8550 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8555 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8557 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8558 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8562 if (subcode
== UMIN
|| subcode
== UMAX
8563 || subcode
== SMIN
|| subcode
== SMAX
)
8565 *total
= COSTS_N_INSNS (3);
8572 /* This should have been handled by the CPU specific routines. */
8576 if (arm_arch3m
&& mode
== SImode
8577 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8578 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8579 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8580 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8581 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8582 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8584 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8587 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8591 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8593 if (TARGET_HARD_FLOAT
8595 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8597 *total
= COSTS_N_INSNS (1);
8600 *total
= COSTS_N_INSNS (2);
8606 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8607 if (mode
== SImode
&& code
== NOT
)
8609 subcode
= GET_CODE (XEXP (x
, 0));
8610 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8611 || subcode
== LSHIFTRT
8612 || subcode
== ROTATE
|| subcode
== ROTATERT
8614 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8616 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8617 /* Register shifts cost an extra cycle. */
8618 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8619 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8628 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8630 *total
= COSTS_N_INSNS (4);
8634 operand
= XEXP (x
, 0);
8636 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8637 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8638 && REG_P (XEXP (operand
, 0))
8639 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8640 *total
+= COSTS_N_INSNS (1);
8641 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8642 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8646 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8648 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8654 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8655 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8657 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8663 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8664 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8666 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8686 /* SCC insns. In the case where the comparison has already been
8687 performed, then they cost 2 instructions. Otherwise they need
8688 an additional comparison before them. */
8689 *total
= COSTS_N_INSNS (2);
8690 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8697 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8703 *total
+= COSTS_N_INSNS (1);
8704 if (CONST_INT_P (XEXP (x
, 1))
8705 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8707 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8711 subcode
= GET_CODE (XEXP (x
, 0));
8712 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8713 || subcode
== LSHIFTRT
8714 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8716 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8717 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8722 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8724 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8725 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8735 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8736 if (!CONST_INT_P (XEXP (x
, 1))
8737 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8738 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8742 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8744 if (TARGET_HARD_FLOAT
8746 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8748 *total
= COSTS_N_INSNS (1);
8751 *total
= COSTS_N_INSNS (20);
8754 *total
= COSTS_N_INSNS (1);
8756 *total
+= COSTS_N_INSNS (3);
8762 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8764 rtx op
= XEXP (x
, 0);
8765 machine_mode opmode
= GET_MODE (op
);
8768 *total
+= COSTS_N_INSNS (1);
8770 if (opmode
!= SImode
)
8774 /* If !arm_arch4, we use one of the extendhisi2_mem
8775 or movhi_bytes patterns for HImode. For a QImode
8776 sign extension, we first zero-extend from memory
8777 and then perform a shift sequence. */
8778 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8779 *total
+= COSTS_N_INSNS (2);
8782 *total
+= COSTS_N_INSNS (1);
8784 /* We don't have the necessary insn, so we need to perform some
8786 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8787 /* An and with constant 255. */
8788 *total
+= COSTS_N_INSNS (1);
8790 /* A shift sequence. Increase costs slightly to avoid
8791 combining two shifts into an extend operation. */
8792 *total
+= COSTS_N_INSNS (2) + 1;
8798 switch (GET_MODE (XEXP (x
, 0)))
8805 *total
= COSTS_N_INSNS (1);
8815 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8819 if (const_ok_for_arm (INTVAL (x
))
8820 || const_ok_for_arm (~INTVAL (x
)))
8821 *total
= COSTS_N_INSNS (1);
8823 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8824 INTVAL (x
), NULL_RTX
,
8831 *total
= COSTS_N_INSNS (3);
8835 *total
= COSTS_N_INSNS (1);
8839 *total
= COSTS_N_INSNS (1);
8840 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8844 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8845 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8846 *total
= COSTS_N_INSNS (1);
8848 *total
= COSTS_N_INSNS (4);
8852 /* The vec_extract patterns accept memory operands that require an
8853 address reload. Account for the cost of that reload to give the
8854 auto-inc-dec pass an incentive to try to replace them. */
8855 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8856 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8858 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8859 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8860 *total
+= COSTS_N_INSNS (1);
8863 /* Likewise for the vec_set patterns. */
8864 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8865 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8866 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8868 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8869 *total
= rtx_cost (mem
, code
, 0, speed
);
8870 if (!neon_vector_mem_operand (mem
, 2, true))
8871 *total
+= COSTS_N_INSNS (1);
8877 /* We cost this as high as our memory costs to allow this to
8878 be hoisted from loops. */
8879 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8881 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8887 && TARGET_HARD_FLOAT
8889 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8890 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8891 *total
= COSTS_N_INSNS (1);
8893 *total
= COSTS_N_INSNS (4);
8897 *total
= COSTS_N_INSNS (4);
8902 /* Estimates the size cost of thumb1 instructions.
8903 For now most of the code is copied from thumb1_rtx_costs. We need more
8904 fine grain tuning when we have more related test cases. */
8906 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8908 machine_mode mode
= GET_MODE (x
);
8917 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8921 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8922 defined by RTL expansion, especially for the expansion of
8924 if ((GET_CODE (XEXP (x
, 0)) == MULT
8925 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8926 || (GET_CODE (XEXP (x
, 1)) == MULT
8927 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8928 return COSTS_N_INSNS (2);
8929 /* On purpose fall through for normal RTX. */
8933 return COSTS_N_INSNS (1);
8936 if (CONST_INT_P (XEXP (x
, 1)))
8938 /* Thumb1 mul instruction can't operate on const. We must Load it
8939 into a register first. */
8940 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8941 return COSTS_N_INSNS (1) + const_size
;
8943 return COSTS_N_INSNS (1);
8946 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8948 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8949 return COSTS_N_INSNS (words
)
8950 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8951 || satisfies_constraint_K (SET_SRC (x
))
8952 /* thumb1_movdi_insn. */
8953 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8958 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8959 return COSTS_N_INSNS (1);
8960 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8961 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8962 return COSTS_N_INSNS (2);
8963 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8964 if (thumb_shiftable_const (INTVAL (x
)))
8965 return COSTS_N_INSNS (2);
8966 return COSTS_N_INSNS (3);
8968 else if ((outer
== PLUS
|| outer
== COMPARE
)
8969 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8971 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8972 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8973 return COSTS_N_INSNS (1);
8974 else if (outer
== AND
)
8977 /* This duplicates the tests in the andsi3 expander. */
8978 for (i
= 9; i
<= 31; i
++)
8979 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8980 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8981 return COSTS_N_INSNS (2);
8983 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8984 || outer
== LSHIFTRT
)
8986 return COSTS_N_INSNS (2);
8992 return COSTS_N_INSNS (3);
9006 return COSTS_N_INSNS (1);
9009 return (COSTS_N_INSNS (1)
9011 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9012 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9013 ? COSTS_N_INSNS (1) : 0));
9017 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9022 /* XXX still guessing. */
9023 switch (GET_MODE (XEXP (x
, 0)))
9026 return (1 + (mode
== DImode
? 4 : 0)
9027 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9030 return (4 + (mode
== DImode
? 4 : 0)
9031 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9034 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9045 /* RTX costs when optimizing for size. */
9047 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9050 machine_mode mode
= GET_MODE (x
);
9053 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9057 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9061 /* A memory access costs 1 insn if the mode is small, or the address is
9062 a single register, otherwise it costs one insn per word. */
9063 if (REG_P (XEXP (x
, 0)))
9064 *total
= COSTS_N_INSNS (1);
9066 && GET_CODE (XEXP (x
, 0)) == PLUS
9067 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9068 /* This will be split into two instructions.
9069 See arm.md:calculate_pic_address. */
9070 *total
= COSTS_N_INSNS (2);
9072 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9079 /* Needs a libcall, so it costs about this. */
9080 *total
= COSTS_N_INSNS (2);
9084 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9086 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9094 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9096 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9099 else if (mode
== SImode
)
9101 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9102 /* Slightly disparage register shifts, but not by much. */
9103 if (!CONST_INT_P (XEXP (x
, 1)))
9104 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9108 /* Needs a libcall. */
9109 *total
= COSTS_N_INSNS (2);
9113 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9114 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9116 *total
= COSTS_N_INSNS (1);
9122 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9123 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9125 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9126 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9127 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9128 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9129 || subcode1
== ASHIFTRT
)
9131 /* It's just the cost of the two operands. */
9136 *total
= COSTS_N_INSNS (1);
9140 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9144 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9145 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9147 *total
= COSTS_N_INSNS (1);
9151 /* A shift as a part of ADD costs nothing. */
9152 if (GET_CODE (XEXP (x
, 0)) == MULT
9153 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9155 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9156 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9157 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9162 case AND
: case XOR
: case IOR
:
9165 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9167 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9168 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9169 || (code
== AND
&& subcode
== NOT
))
9171 /* It's just the cost of the two operands. */
9177 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9181 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9185 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9186 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9188 *total
= COSTS_N_INSNS (1);
9194 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9203 if (cc_register (XEXP (x
, 0), VOIDmode
))
9206 *total
= COSTS_N_INSNS (1);
9210 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9211 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9212 *total
= COSTS_N_INSNS (1);
9214 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9219 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9222 if (const_ok_for_arm (INTVAL (x
)))
9223 /* A multiplication by a constant requires another instruction
9224 to load the constant to a register. */
9225 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9227 else if (const_ok_for_arm (~INTVAL (x
)))
9228 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9229 else if (const_ok_for_arm (-INTVAL (x
)))
9231 if (outer_code
== COMPARE
|| outer_code
== PLUS
9232 || outer_code
== MINUS
)
9235 *total
= COSTS_N_INSNS (1);
9238 *total
= COSTS_N_INSNS (2);
9244 *total
= COSTS_N_INSNS (2);
9248 *total
= COSTS_N_INSNS (4);
9253 && TARGET_HARD_FLOAT
9254 && outer_code
== SET
9255 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9256 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9257 *total
= COSTS_N_INSNS (1);
9259 *total
= COSTS_N_INSNS (4);
9264 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9265 cost of these slightly. */
9266 *total
= COSTS_N_INSNS (1) + 1;
9273 if (mode
!= VOIDmode
)
9274 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9276 *total
= COSTS_N_INSNS (4); /* How knows? */
9281 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9282 operand, then return the operand that is being shifted. If the shift
9283 is not by a constant, then set SHIFT_REG to point to the operand.
9284 Return NULL if OP is not a shifter operand. */
9286 shifter_op_p (rtx op
, rtx
*shift_reg
)
9288 enum rtx_code code
= GET_CODE (op
);
9290 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9291 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9292 return XEXP (op
, 0);
9293 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9294 return XEXP (op
, 0);
9295 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9296 || code
== ASHIFTRT
)
9298 if (!CONST_INT_P (XEXP (op
, 1)))
9299 *shift_reg
= XEXP (op
, 1);
9300 return XEXP (op
, 0);
9307 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9309 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9310 gcc_assert (GET_CODE (x
) == UNSPEC
);
9312 switch (XINT (x
, 1))
9314 case UNSPEC_UNALIGNED_LOAD
:
9315 /* We can only do unaligned loads into the integer unit, and we can't
9317 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9319 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9320 + extra_cost
->ldst
.load_unaligned
);
9323 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9324 ADDR_SPACE_GENERIC
, speed_p
);
9328 case UNSPEC_UNALIGNED_STORE
:
9329 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9331 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9332 + extra_cost
->ldst
.store_unaligned
);
9334 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9336 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9337 ADDR_SPACE_GENERIC
, speed_p
);
9347 *cost
= COSTS_N_INSNS (1);
9349 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9353 *cost
= COSTS_N_INSNS (2);
9359 /* Cost of a libcall. We assume one insn per argument, an amount for the
9360 call (one insn for -Os) and then one for processing the result. */
9361 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9363 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9366 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9367 if (shift_op != NULL \
9368 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9373 *cost += extra_cost->alu.arith_shift_reg; \
9374 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9377 *cost += extra_cost->alu.arith_shift; \
9379 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9380 + rtx_cost (XEXP (x, 1 - IDX), \
9387 /* RTX costs. Make an estimate of the cost of executing the operation
9388 X, which is contained with an operation with code OUTER_CODE.
9389 SPEED_P indicates whether the cost desired is the performance cost,
9390 or the size cost. The estimate is stored in COST and the return
9391 value is TRUE if the cost calculation is final, or FALSE if the
9392 caller should recurse through the operands of X to add additional
9395 We currently make no attempt to model the size savings of Thumb-2
9396 16-bit instructions. At the normal points in compilation where
9397 this code is called we have no measure of whether the condition
9398 flags are live or not, and thus no realistic way to determine what
9399 the size will eventually be. */
9401 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9402 const struct cpu_cost_table
*extra_cost
,
9403 int *cost
, bool speed_p
)
9405 machine_mode mode
= GET_MODE (x
);
9410 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9412 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9420 /* SET RTXs don't have a mode so we get it from the destination. */
9421 mode
= GET_MODE (SET_DEST (x
));
9423 if (REG_P (SET_SRC (x
))
9424 && REG_P (SET_DEST (x
)))
9426 /* Assume that most copies can be done with a single insn,
9427 unless we don't have HW FP, in which case everything
9428 larger than word mode will require two insns. */
9429 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9430 && GET_MODE_SIZE (mode
) > 4)
9433 /* Conditional register moves can be encoded
9434 in 16 bits in Thumb mode. */
9435 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9441 if (CONST_INT_P (SET_SRC (x
)))
9443 /* Handle CONST_INT here, since the value doesn't have a mode
9444 and we would otherwise be unable to work out the true cost. */
9445 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9447 /* Slightly lower the cost of setting a core reg to a constant.
9448 This helps break up chains and allows for better scheduling. */
9449 if (REG_P (SET_DEST (x
))
9450 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9453 /* Immediate moves with an immediate in the range [0, 255] can be
9454 encoded in 16 bits in Thumb mode. */
9455 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9456 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9458 goto const_int_cost
;
9464 /* A memory access costs 1 insn if the mode is small, or the address is
9465 a single register, otherwise it costs one insn per word. */
9466 if (REG_P (XEXP (x
, 0)))
9467 *cost
= COSTS_N_INSNS (1);
9469 && GET_CODE (XEXP (x
, 0)) == PLUS
9470 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9471 /* This will be split into two instructions.
9472 See arm.md:calculate_pic_address. */
9473 *cost
= COSTS_N_INSNS (2);
9475 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9477 /* For speed optimizations, add the costs of the address and
9478 accessing memory. */
9481 *cost
+= (extra_cost
->ldst
.load
9482 + arm_address_cost (XEXP (x
, 0), mode
,
9483 ADDR_SPACE_GENERIC
, speed_p
));
9485 *cost
+= extra_cost
->ldst
.load
;
9491 /* Calculations of LDM costs are complex. We assume an initial cost
9492 (ldm_1st) which will load the number of registers mentioned in
9493 ldm_regs_per_insn_1st registers; then each additional
9494 ldm_regs_per_insn_subsequent registers cost one more insn. The
9495 formula for N regs is thus:
9497 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9498 + ldm_regs_per_insn_subsequent - 1)
9499 / ldm_regs_per_insn_subsequent).
9501 Additional costs may also be added for addressing. A similar
9502 formula is used for STM. */
9504 bool is_ldm
= load_multiple_operation (x
, SImode
);
9505 bool is_stm
= store_multiple_operation (x
, SImode
);
9507 *cost
= COSTS_N_INSNS (1);
9509 if (is_ldm
|| is_stm
)
9513 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9514 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9515 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9516 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9517 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9518 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9519 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9521 *cost
+= regs_per_insn_1st
9522 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9523 + regs_per_insn_sub
- 1)
9524 / regs_per_insn_sub
);
9533 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9534 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9535 *cost
= COSTS_N_INSNS (speed_p
9536 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9537 else if (mode
== SImode
&& TARGET_IDIV
)
9538 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9540 *cost
= LIBCALL_COST (2);
9541 return false; /* All arguments must be in registers. */
9545 *cost
= LIBCALL_COST (2);
9546 return false; /* All arguments must be in registers. */
9549 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9551 *cost
= (COSTS_N_INSNS (2)
9552 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9554 *cost
+= extra_cost
->alu
.shift_reg
;
9562 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9564 *cost
= (COSTS_N_INSNS (3)
9565 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9567 *cost
+= 2 * extra_cost
->alu
.shift
;
9570 else if (mode
== SImode
)
9572 *cost
= (COSTS_N_INSNS (1)
9573 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9574 /* Slightly disparage register shifts at -Os, but not by much. */
9575 if (!CONST_INT_P (XEXP (x
, 1)))
9576 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9577 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9580 else if (GET_MODE_CLASS (mode
) == MODE_INT
9581 && GET_MODE_SIZE (mode
) < 4)
9585 *cost
= (COSTS_N_INSNS (1)
9586 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9587 /* Slightly disparage register shifts at -Os, but not by
9589 if (!CONST_INT_P (XEXP (x
, 1)))
9590 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9591 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9593 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9595 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9597 /* Can use SBFX/UBFX. */
9598 *cost
= COSTS_N_INSNS (1);
9600 *cost
+= extra_cost
->alu
.bfx
;
9601 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9605 *cost
= COSTS_N_INSNS (2);
9606 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9609 if (CONST_INT_P (XEXP (x
, 1)))
9610 *cost
+= 2 * extra_cost
->alu
.shift
;
9612 *cost
+= (extra_cost
->alu
.shift
9613 + extra_cost
->alu
.shift_reg
);
9616 /* Slightly disparage register shifts. */
9617 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9622 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9623 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9626 if (CONST_INT_P (XEXP (x
, 1)))
9627 *cost
+= (2 * extra_cost
->alu
.shift
9628 + extra_cost
->alu
.log_shift
);
9630 *cost
+= (extra_cost
->alu
.shift
9631 + extra_cost
->alu
.shift_reg
9632 + extra_cost
->alu
.log_shift_reg
);
9638 *cost
= LIBCALL_COST (2);
9646 *cost
= COSTS_N_INSNS (1);
9648 *cost
+= extra_cost
->alu
.rev
;
9655 /* No rev instruction available. Look at arm_legacy_rev
9656 and thumb_legacy_rev for the form of RTL used then. */
9659 *cost
= COSTS_N_INSNS (10);
9663 *cost
+= 6 * extra_cost
->alu
.shift
;
9664 *cost
+= 3 * extra_cost
->alu
.logical
;
9669 *cost
= COSTS_N_INSNS (5);
9673 *cost
+= 2 * extra_cost
->alu
.shift
;
9674 *cost
+= extra_cost
->alu
.arith_shift
;
9675 *cost
+= 2 * extra_cost
->alu
.logical
;
9683 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9684 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9686 *cost
= COSTS_N_INSNS (1);
9687 if (GET_CODE (XEXP (x
, 0)) == MULT
9688 || GET_CODE (XEXP (x
, 1)) == MULT
)
9690 rtx mul_op0
, mul_op1
, sub_op
;
9693 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9695 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9697 mul_op0
= XEXP (XEXP (x
, 0), 0);
9698 mul_op1
= XEXP (XEXP (x
, 0), 1);
9699 sub_op
= XEXP (x
, 1);
9703 mul_op0
= XEXP (XEXP (x
, 1), 0);
9704 mul_op1
= XEXP (XEXP (x
, 1), 1);
9705 sub_op
= XEXP (x
, 0);
9708 /* The first operand of the multiply may be optionally
9710 if (GET_CODE (mul_op0
) == NEG
)
9711 mul_op0
= XEXP (mul_op0
, 0);
9713 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9714 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9715 + rtx_cost (sub_op
, code
, 0, speed_p
));
9721 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9727 rtx shift_by_reg
= NULL
;
9731 *cost
= COSTS_N_INSNS (1);
9733 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9734 if (shift_op
== NULL
)
9736 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9737 non_shift_op
= XEXP (x
, 0);
9740 non_shift_op
= XEXP (x
, 1);
9742 if (shift_op
!= NULL
)
9744 if (shift_by_reg
!= NULL
)
9747 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9748 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9751 *cost
+= extra_cost
->alu
.arith_shift
;
9753 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9754 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9759 && GET_CODE (XEXP (x
, 1)) == MULT
)
9763 *cost
+= extra_cost
->mult
[0].add
;
9764 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9765 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9766 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9770 if (CONST_INT_P (XEXP (x
, 0)))
9772 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9773 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9775 *cost
= COSTS_N_INSNS (insns
);
9777 *cost
+= insns
* extra_cost
->alu
.arith
;
9778 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9785 if (GET_MODE_CLASS (mode
) == MODE_INT
9786 && GET_MODE_SIZE (mode
) < 4)
9788 rtx shift_op
, shift_reg
;
9791 /* We check both sides of the MINUS for shifter operands since,
9792 unlike PLUS, it's not commutative. */
9794 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9795 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9797 /* Slightly disparage, as we might need to widen the result. */
9798 *cost
= 1 + COSTS_N_INSNS (1);
9800 *cost
+= extra_cost
->alu
.arith
;
9802 if (CONST_INT_P (XEXP (x
, 0)))
9804 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9813 *cost
= COSTS_N_INSNS (2);
9815 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9817 rtx op1
= XEXP (x
, 1);
9820 *cost
+= 2 * extra_cost
->alu
.arith
;
9822 if (GET_CODE (op1
) == ZERO_EXTEND
)
9823 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9825 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9826 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9830 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9833 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9834 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9836 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9839 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9840 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9843 *cost
+= (extra_cost
->alu
.arith
9844 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9845 ? extra_cost
->alu
.arith
9846 : extra_cost
->alu
.arith_shift
));
9847 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9848 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9849 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9854 *cost
+= 2 * extra_cost
->alu
.arith
;
9860 *cost
= LIBCALL_COST (2);
9864 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9865 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9867 *cost
= COSTS_N_INSNS (1);
9868 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9870 rtx mul_op0
, mul_op1
, add_op
;
9873 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9875 mul_op0
= XEXP (XEXP (x
, 0), 0);
9876 mul_op1
= XEXP (XEXP (x
, 0), 1);
9877 add_op
= XEXP (x
, 1);
9879 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9880 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9881 + rtx_cost (add_op
, code
, 0, speed_p
));
9887 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9890 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9892 *cost
= LIBCALL_COST (2);
9896 /* Narrow modes can be synthesized in SImode, but the range
9897 of useful sub-operations is limited. Check for shift operations
9898 on one of the operands. Only left shifts can be used in the
9900 if (GET_MODE_CLASS (mode
) == MODE_INT
9901 && GET_MODE_SIZE (mode
) < 4)
9903 rtx shift_op
, shift_reg
;
9906 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9908 if (CONST_INT_P (XEXP (x
, 1)))
9910 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9911 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9913 *cost
= COSTS_N_INSNS (insns
);
9915 *cost
+= insns
* extra_cost
->alu
.arith
;
9916 /* Slightly penalize a narrow operation as the result may
9918 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9922 /* Slightly penalize a narrow operation as the result may
9924 *cost
= 1 + COSTS_N_INSNS (1);
9926 *cost
+= extra_cost
->alu
.arith
;
9933 rtx shift_op
, shift_reg
;
9935 *cost
= COSTS_N_INSNS (1);
9937 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9938 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9940 /* UXTA[BH] or SXTA[BH]. */
9942 *cost
+= extra_cost
->alu
.extend_arith
;
9943 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9945 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9950 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9951 if (shift_op
!= NULL
)
9956 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9957 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9960 *cost
+= extra_cost
->alu
.arith_shift
;
9962 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9963 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9966 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9968 rtx mul_op
= XEXP (x
, 0);
9970 *cost
= COSTS_N_INSNS (1);
9972 if (TARGET_DSP_MULTIPLY
9973 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9974 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9975 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9976 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9977 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9978 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9979 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9980 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9981 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9982 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9983 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9984 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9989 *cost
+= extra_cost
->mult
[0].extend_add
;
9990 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9991 SIGN_EXTEND
, 0, speed_p
)
9992 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9993 SIGN_EXTEND
, 0, speed_p
)
9994 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9999 *cost
+= extra_cost
->mult
[0].add
;
10000 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
10001 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
10002 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10005 if (CONST_INT_P (XEXP (x
, 1)))
10007 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10008 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10010 *cost
= COSTS_N_INSNS (insns
);
10012 *cost
+= insns
* extra_cost
->alu
.arith
;
10013 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10019 if (mode
== DImode
)
10022 && GET_CODE (XEXP (x
, 0)) == MULT
10023 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10024 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10025 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10026 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10028 *cost
= COSTS_N_INSNS (1);
10030 *cost
+= extra_cost
->mult
[1].extend_add
;
10031 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10032 ZERO_EXTEND
, 0, speed_p
)
10033 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10034 ZERO_EXTEND
, 0, speed_p
)
10035 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10039 *cost
= COSTS_N_INSNS (2);
10041 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10042 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10045 *cost
+= (extra_cost
->alu
.arith
10046 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10047 ? extra_cost
->alu
.arith
10048 : extra_cost
->alu
.arith_shift
));
10050 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10052 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10057 *cost
+= 2 * extra_cost
->alu
.arith
;
10062 *cost
= LIBCALL_COST (2);
10065 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10067 *cost
= COSTS_N_INSNS (1);
10069 *cost
+= extra_cost
->alu
.rev
;
10073 /* Fall through. */
10074 case AND
: case XOR
:
10075 if (mode
== SImode
)
10077 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10078 rtx op0
= XEXP (x
, 0);
10079 rtx shift_op
, shift_reg
;
10081 *cost
= COSTS_N_INSNS (1);
10085 || (code
== IOR
&& TARGET_THUMB2
)))
10086 op0
= XEXP (op0
, 0);
10089 shift_op
= shifter_op_p (op0
, &shift_reg
);
10090 if (shift_op
!= NULL
)
10095 *cost
+= extra_cost
->alu
.log_shift_reg
;
10096 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10099 *cost
+= extra_cost
->alu
.log_shift
;
10101 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10102 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10106 if (CONST_INT_P (XEXP (x
, 1)))
10108 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10109 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10112 *cost
= COSTS_N_INSNS (insns
);
10114 *cost
+= insns
* extra_cost
->alu
.logical
;
10115 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10120 *cost
+= extra_cost
->alu
.logical
;
10121 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10122 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10126 if (mode
== DImode
)
10128 rtx op0
= XEXP (x
, 0);
10129 enum rtx_code subcode
= GET_CODE (op0
);
10131 *cost
= COSTS_N_INSNS (2);
10135 || (code
== IOR
&& TARGET_THUMB2
)))
10136 op0
= XEXP (op0
, 0);
10138 if (GET_CODE (op0
) == ZERO_EXTEND
)
10141 *cost
+= 2 * extra_cost
->alu
.logical
;
10143 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10144 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10147 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10150 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10152 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10153 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10158 *cost
+= 2 * extra_cost
->alu
.logical
;
10164 *cost
= LIBCALL_COST (2);
10168 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10169 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10171 rtx op0
= XEXP (x
, 0);
10173 *cost
= COSTS_N_INSNS (1);
10175 if (GET_CODE (op0
) == NEG
)
10176 op0
= XEXP (op0
, 0);
10179 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10181 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10182 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10185 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10187 *cost
= LIBCALL_COST (2);
10191 if (mode
== SImode
)
10193 *cost
= COSTS_N_INSNS (1);
10194 if (TARGET_DSP_MULTIPLY
10195 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10196 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10197 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10198 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10199 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10200 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10201 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10202 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10203 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10204 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10206 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10209 /* SMUL[TB][TB]. */
10211 *cost
+= extra_cost
->mult
[0].extend
;
10212 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10213 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10217 *cost
+= extra_cost
->mult
[0].simple
;
10221 if (mode
== DImode
)
10224 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10225 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10226 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10227 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10229 *cost
= COSTS_N_INSNS (1);
10231 *cost
+= extra_cost
->mult
[1].extend
;
10232 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10233 ZERO_EXTEND
, 0, speed_p
)
10234 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10235 ZERO_EXTEND
, 0, speed_p
));
10239 *cost
= LIBCALL_COST (2);
10244 *cost
= LIBCALL_COST (2);
10248 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10249 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10251 *cost
= COSTS_N_INSNS (1);
10253 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10257 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10259 *cost
= LIBCALL_COST (1);
10263 if (mode
== SImode
)
10265 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10267 *cost
= COSTS_N_INSNS (2);
10268 /* Assume the non-flag-changing variant. */
10270 *cost
+= (extra_cost
->alu
.log_shift
10271 + extra_cost
->alu
.arith_shift
);
10272 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10276 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10277 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10279 *cost
= COSTS_N_INSNS (2);
10280 /* No extra cost for MOV imm and MVN imm. */
10281 /* If the comparison op is using the flags, there's no further
10282 cost, otherwise we need to add the cost of the comparison. */
10283 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10284 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10285 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10287 *cost
+= (COSTS_N_INSNS (1)
10288 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10290 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10293 *cost
+= extra_cost
->alu
.arith
;
10297 *cost
= COSTS_N_INSNS (1);
10299 *cost
+= extra_cost
->alu
.arith
;
10303 if (GET_MODE_CLASS (mode
) == MODE_INT
10304 && GET_MODE_SIZE (mode
) < 4)
10306 /* Slightly disparage, as we might need an extend operation. */
10307 *cost
= 1 + COSTS_N_INSNS (1);
10309 *cost
+= extra_cost
->alu
.arith
;
10313 if (mode
== DImode
)
10315 *cost
= COSTS_N_INSNS (2);
10317 *cost
+= 2 * extra_cost
->alu
.arith
;
10322 *cost
= LIBCALL_COST (1);
10326 if (mode
== SImode
)
10329 rtx shift_reg
= NULL
;
10331 *cost
= COSTS_N_INSNS (1);
10332 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10336 if (shift_reg
!= NULL
)
10339 *cost
+= extra_cost
->alu
.log_shift_reg
;
10340 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10343 *cost
+= extra_cost
->alu
.log_shift
;
10344 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10349 *cost
+= extra_cost
->alu
.logical
;
10352 if (mode
== DImode
)
10354 *cost
= COSTS_N_INSNS (2);
10360 *cost
+= LIBCALL_COST (1);
10365 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10367 *cost
= COSTS_N_INSNS (4);
10370 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10371 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10373 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10374 /* Assume that if one arm of the if_then_else is a register,
10375 that it will be tied with the result and eliminate the
10376 conditional insn. */
10377 if (REG_P (XEXP (x
, 1)))
10379 else if (REG_P (XEXP (x
, 2)))
10385 if (extra_cost
->alu
.non_exec_costs_exec
)
10386 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10388 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10391 *cost
+= op1cost
+ op2cost
;
10397 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10401 machine_mode op0mode
;
10402 /* We'll mostly assume that the cost of a compare is the cost of the
10403 LHS. However, there are some notable exceptions. */
10405 /* Floating point compares are never done as side-effects. */
10406 op0mode
= GET_MODE (XEXP (x
, 0));
10407 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10408 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10410 *cost
= COSTS_N_INSNS (1);
10412 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10414 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10416 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10422 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10424 *cost
= LIBCALL_COST (2);
10428 /* DImode compares normally take two insns. */
10429 if (op0mode
== DImode
)
10431 *cost
= COSTS_N_INSNS (2);
10433 *cost
+= 2 * extra_cost
->alu
.arith
;
10437 if (op0mode
== SImode
)
10442 if (XEXP (x
, 1) == const0_rtx
10443 && !(REG_P (XEXP (x
, 0))
10444 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10445 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10447 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10449 /* Multiply operations that set the flags are often
10450 significantly more expensive. */
10452 && GET_CODE (XEXP (x
, 0)) == MULT
10453 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10454 *cost
+= extra_cost
->mult
[0].flag_setting
;
10457 && GET_CODE (XEXP (x
, 0)) == PLUS
10458 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10459 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10461 *cost
+= extra_cost
->mult
[0].flag_setting
;
10466 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10467 if (shift_op
!= NULL
)
10469 *cost
= COSTS_N_INSNS (1);
10470 if (shift_reg
!= NULL
)
10472 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10474 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10477 *cost
+= extra_cost
->alu
.arith_shift
;
10478 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10479 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10483 *cost
= COSTS_N_INSNS (1);
10485 *cost
+= extra_cost
->alu
.arith
;
10486 if (CONST_INT_P (XEXP (x
, 1))
10487 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10489 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10497 *cost
= LIBCALL_COST (2);
10520 if (outer_code
== SET
)
10522 /* Is it a store-flag operation? */
10523 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10524 && XEXP (x
, 1) == const0_rtx
)
10526 /* Thumb also needs an IT insn. */
10527 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10530 if (XEXP (x
, 1) == const0_rtx
)
10535 /* LSR Rd, Rn, #31. */
10536 *cost
= COSTS_N_INSNS (1);
10538 *cost
+= extra_cost
->alu
.shift
;
10548 *cost
= COSTS_N_INSNS (2);
10552 /* RSBS T1, Rn, Rn, LSR #31
10554 *cost
= COSTS_N_INSNS (2);
10556 *cost
+= extra_cost
->alu
.arith_shift
;
10560 /* RSB Rd, Rn, Rn, ASR #1
10561 LSR Rd, Rd, #31. */
10562 *cost
= COSTS_N_INSNS (2);
10564 *cost
+= (extra_cost
->alu
.arith_shift
10565 + extra_cost
->alu
.shift
);
10571 *cost
= COSTS_N_INSNS (2);
10573 *cost
+= extra_cost
->alu
.shift
;
10577 /* Remaining cases are either meaningless or would take
10578 three insns anyway. */
10579 *cost
= COSTS_N_INSNS (3);
10582 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10587 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10588 if (CONST_INT_P (XEXP (x
, 1))
10589 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10591 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10598 /* Not directly inside a set. If it involves the condition code
10599 register it must be the condition for a branch, cond_exec or
10600 I_T_E operation. Since the comparison is performed elsewhere
10601 this is just the control part which has no additional
10603 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10604 && XEXP (x
, 1) == const0_rtx
)
10612 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10613 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10615 *cost
= COSTS_N_INSNS (1);
10617 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10621 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10623 *cost
= LIBCALL_COST (1);
10627 if (mode
== SImode
)
10629 *cost
= COSTS_N_INSNS (1);
10631 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10635 *cost
= LIBCALL_COST (1);
10639 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10640 && MEM_P (XEXP (x
, 0)))
10642 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10644 if (mode
== DImode
)
10645 *cost
+= COSTS_N_INSNS (1);
10650 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10651 *cost
+= extra_cost
->ldst
.load
;
10653 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10655 if (mode
== DImode
)
10656 *cost
+= extra_cost
->alu
.shift
;
10661 /* Widening from less than 32-bits requires an extend operation. */
10662 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10664 /* We have SXTB/SXTH. */
10665 *cost
= COSTS_N_INSNS (1);
10666 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10668 *cost
+= extra_cost
->alu
.extend
;
10670 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10672 /* Needs two shifts. */
10673 *cost
= COSTS_N_INSNS (2);
10674 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10676 *cost
+= 2 * extra_cost
->alu
.shift
;
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode
== DImode
)
10682 *cost
+= COSTS_N_INSNS (1);
10684 *cost
+= extra_cost
->alu
.shift
;
10691 || GET_MODE (XEXP (x
, 0)) == SImode
10692 || GET_MODE (XEXP (x
, 0)) == QImode
)
10693 && MEM_P (XEXP (x
, 0)))
10695 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10697 if (mode
== DImode
)
10698 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10703 /* Widening from less than 32-bits requires an extend operation. */
10704 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10706 /* UXTB can be a shorter instruction in Thumb2, but it might
10707 be slower than the AND Rd, Rn, #255 alternative. When
10708 optimizing for speed it should never be slower to use
10709 AND, and we don't really model 16-bit vs 32-bit insns
10711 *cost
= COSTS_N_INSNS (1);
10713 *cost
+= extra_cost
->alu
.logical
;
10715 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10717 /* We have UXTB/UXTH. */
10718 *cost
= COSTS_N_INSNS (1);
10719 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10721 *cost
+= extra_cost
->alu
.extend
;
10723 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10725 /* Needs two shifts. It's marginally preferable to use
10726 shifts rather than two BIC instructions as the second
10727 shift may merge with a subsequent insn as a shifter
10729 *cost
= COSTS_N_INSNS (2);
10730 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10732 *cost
+= 2 * extra_cost
->alu
.shift
;
10734 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10735 *cost
= COSTS_N_INSNS (1);
10737 /* Widening beyond 32-bits requires one more insn. */
10738 if (mode
== DImode
)
10740 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10747 /* CONST_INT has no mode, so we cannot tell for sure how many
10748 insns are really going to be needed. The best we can do is
10749 look at the value passed. If it fits in SImode, then assume
10750 that's the mode it will be used for. Otherwise assume it
10751 will be used in DImode. */
10752 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10757 /* Avoid blowing up in arm_gen_constant (). */
10758 if (!(outer_code
== PLUS
10759 || outer_code
== AND
10760 || outer_code
== IOR
10761 || outer_code
== XOR
10762 || outer_code
== MINUS
))
10766 if (mode
== SImode
)
10768 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10769 INTVAL (x
), NULL
, NULL
,
10775 *cost
+= COSTS_N_INSNS (arm_gen_constant
10776 (outer_code
, SImode
, NULL
,
10777 trunc_int_for_mode (INTVAL (x
), SImode
),
10779 + arm_gen_constant (outer_code
, SImode
, NULL
,
10780 INTVAL (x
) >> 32, NULL
,
10792 if (arm_arch_thumb2
&& !flag_pic
)
10793 *cost
= COSTS_N_INSNS (2);
10795 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10798 *cost
= COSTS_N_INSNS (2);
10802 *cost
+= COSTS_N_INSNS (1);
10804 *cost
+= extra_cost
->alu
.arith
;
10810 *cost
= COSTS_N_INSNS (4);
10815 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10816 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10818 if (vfp3_const_double_rtx (x
))
10820 *cost
= COSTS_N_INSNS (1);
10822 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10828 *cost
= COSTS_N_INSNS (1);
10829 if (mode
== DFmode
)
10830 *cost
+= extra_cost
->ldst
.loadd
;
10832 *cost
+= extra_cost
->ldst
.loadf
;
10835 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10839 *cost
= COSTS_N_INSNS (4);
10845 && TARGET_HARD_FLOAT
10846 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10847 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10848 *cost
= COSTS_N_INSNS (1);
10850 *cost
= COSTS_N_INSNS (4);
10855 *cost
= COSTS_N_INSNS (1);
10856 /* When optimizing for size, we prefer constant pool entries to
10857 MOVW/MOVT pairs, so bump the cost of these slightly. */
10863 *cost
= COSTS_N_INSNS (1);
10865 *cost
+= extra_cost
->alu
.clz
;
10869 if (XEXP (x
, 1) == const0_rtx
)
10871 *cost
= COSTS_N_INSNS (1);
10873 *cost
+= extra_cost
->alu
.log_shift
;
10874 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10877 /* Fall through. */
10881 *cost
= COSTS_N_INSNS (2);
10885 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10886 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10887 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10888 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10889 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10890 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10891 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10892 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10895 *cost
= COSTS_N_INSNS (1);
10897 *cost
+= extra_cost
->mult
[1].extend
;
10898 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10900 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10904 *cost
= LIBCALL_COST (1);
10908 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10911 /* Reading the PC is like reading any other register. Writing it
10912 is more expensive, but we take that into account elsewhere. */
10917 /* TODO: Simple zero_extract of bottom bits using AND. */
10918 /* Fall through. */
10922 && CONST_INT_P (XEXP (x
, 1))
10923 && CONST_INT_P (XEXP (x
, 2)))
10925 *cost
= COSTS_N_INSNS (1);
10927 *cost
+= extra_cost
->alu
.bfx
;
10928 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10931 /* Without UBFX/SBFX, need to resort to shift operations. */
10932 *cost
= COSTS_N_INSNS (2);
10934 *cost
+= 2 * extra_cost
->alu
.shift
;
10935 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10939 if (TARGET_HARD_FLOAT
)
10941 *cost
= COSTS_N_INSNS (1);
10943 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10944 if (!TARGET_FPU_ARMV8
10945 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10947 /* Pre v8, widening HF->DF is a two-step process, first
10948 widening to SFmode. */
10949 *cost
+= COSTS_N_INSNS (1);
10951 *cost
+= extra_cost
->fp
[0].widen
;
10953 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10957 *cost
= LIBCALL_COST (1);
10960 case FLOAT_TRUNCATE
:
10961 if (TARGET_HARD_FLOAT
)
10963 *cost
= COSTS_N_INSNS (1);
10965 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10966 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10968 /* Vector modes? */
10970 *cost
= LIBCALL_COST (1);
10974 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10976 rtx op0
= XEXP (x
, 0);
10977 rtx op1
= XEXP (x
, 1);
10978 rtx op2
= XEXP (x
, 2);
10980 *cost
= COSTS_N_INSNS (1);
10982 /* vfms or vfnma. */
10983 if (GET_CODE (op0
) == NEG
)
10984 op0
= XEXP (op0
, 0);
10986 /* vfnms or vfnma. */
10987 if (GET_CODE (op2
) == NEG
)
10988 op2
= XEXP (op2
, 0);
10990 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10991 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10992 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10995 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11000 *cost
= LIBCALL_COST (3);
11005 if (TARGET_HARD_FLOAT
)
11007 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11009 *cost
= COSTS_N_INSNS (1);
11011 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
11012 /* Strip of the 'cost' of rounding towards zero. */
11013 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11014 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11016 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11017 /* ??? Increase the cost to deal with transferring from
11018 FP -> CORE registers? */
11021 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11022 && TARGET_FPU_ARMV8
)
11024 *cost
= COSTS_N_INSNS (1);
11026 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11029 /* Vector costs? */
11031 *cost
= LIBCALL_COST (1);
11035 case UNSIGNED_FLOAT
:
11036 if (TARGET_HARD_FLOAT
)
11038 /* ??? Increase the cost to deal with transferring from CORE
11039 -> FP registers? */
11040 *cost
= COSTS_N_INSNS (1);
11042 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11045 *cost
= LIBCALL_COST (1);
11049 *cost
= COSTS_N_INSNS (1);
11054 /* Just a guess. Guess number of instructions in the asm
11055 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11056 though (see PR60663). */
11057 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11058 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11060 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11064 if (mode
!= VOIDmode
)
11065 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11067 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11072 #undef HANDLE_NARROW_SHIFT_ARITH
11074 /* RTX costs when optimizing for size. */
11076 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11077 int *total
, bool speed
)
11081 if (TARGET_OLD_RTX_COSTS
11082 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11084 /* Old way. (Deprecated.) */
11086 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11087 (enum rtx_code
) outer_code
, total
);
11089 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11090 (enum rtx_code
) outer_code
, total
,
11096 if (current_tune
->insn_extra_cost
)
11097 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11098 (enum rtx_code
) outer_code
,
11099 current_tune
->insn_extra_cost
,
11101 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11102 && current_tune->insn_extra_cost != NULL */
11104 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11105 (enum rtx_code
) outer_code
,
11106 &generic_extra_costs
, total
, speed
);
11109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11111 print_rtl_single (dump_file
, x
);
11112 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11113 *total
, result
? "final" : "partial");
11118 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11119 supported on any "slowmul" cores, so it can be ignored. */
11122 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11123 int *total
, bool speed
)
11125 machine_mode mode
= GET_MODE (x
);
11129 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11136 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11139 *total
= COSTS_N_INSNS (20);
11143 if (CONST_INT_P (XEXP (x
, 1)))
11145 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11146 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11147 int cost
, const_ok
= const_ok_for_arm (i
);
11148 int j
, booth_unit_size
;
11150 /* Tune as appropriate. */
11151 cost
= const_ok
? 4 : 8;
11152 booth_unit_size
= 2;
11153 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11155 i
>>= booth_unit_size
;
11159 *total
= COSTS_N_INSNS (cost
);
11160 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11164 *total
= COSTS_N_INSNS (20);
11168 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11173 /* RTX cost for cores with a fast multiply unit (M variants). */
11176 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11177 int *total
, bool speed
)
11179 machine_mode mode
= GET_MODE (x
);
11183 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11187 /* ??? should thumb2 use different costs? */
11191 /* There is no point basing this on the tuning, since it is always the
11192 fast variant if it exists at all. */
11194 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11195 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11196 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11198 *total
= COSTS_N_INSNS(2);
11203 if (mode
== DImode
)
11205 *total
= COSTS_N_INSNS (5);
11209 if (CONST_INT_P (XEXP (x
, 1)))
11211 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11212 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11213 int cost
, const_ok
= const_ok_for_arm (i
);
11214 int j
, booth_unit_size
;
11216 /* Tune as appropriate. */
11217 cost
= const_ok
? 4 : 8;
11218 booth_unit_size
= 8;
11219 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11221 i
>>= booth_unit_size
;
11225 *total
= COSTS_N_INSNS(cost
);
11229 if (mode
== SImode
)
11231 *total
= COSTS_N_INSNS (4);
11235 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11237 if (TARGET_HARD_FLOAT
11239 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11241 *total
= COSTS_N_INSNS (1);
11246 /* Requires a lib call */
11247 *total
= COSTS_N_INSNS (20);
11251 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11256 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11257 so it can be ignored. */
11260 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11261 int *total
, bool speed
)
11263 machine_mode mode
= GET_MODE (x
);
11267 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11274 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11275 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11277 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11278 will stall until the multiplication is complete. */
11279 *total
= COSTS_N_INSNS (3);
11283 /* There is no point basing this on the tuning, since it is always the
11284 fast variant if it exists at all. */
11286 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11287 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11288 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11290 *total
= COSTS_N_INSNS (2);
11295 if (mode
== DImode
)
11297 *total
= COSTS_N_INSNS (5);
11301 if (CONST_INT_P (XEXP (x
, 1)))
11303 /* If operand 1 is a constant we can more accurately
11304 calculate the cost of the multiply. The multiplier can
11305 retire 15 bits on the first cycle and a further 12 on the
11306 second. We do, of course, have to load the constant into
11307 a register first. */
11308 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11309 /* There's a general overhead of one cycle. */
11311 unsigned HOST_WIDE_INT masked_const
;
11313 if (i
& 0x80000000)
11316 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11318 masked_const
= i
& 0xffff8000;
11319 if (masked_const
!= 0)
11322 masked_const
= i
& 0xf8000000;
11323 if (masked_const
!= 0)
11326 *total
= COSTS_N_INSNS (cost
);
11330 if (mode
== SImode
)
11332 *total
= COSTS_N_INSNS (3);
11336 /* Requires a lib call */
11337 *total
= COSTS_N_INSNS (20);
11341 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11346 /* RTX costs for 9e (and later) cores. */
11349 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11350 int *total
, bool speed
)
11352 machine_mode mode
= GET_MODE (x
);
11359 *total
= COSTS_N_INSNS (3);
11363 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11371 /* There is no point basing this on the tuning, since it is always the
11372 fast variant if it exists at all. */
11374 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11375 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11376 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11378 *total
= COSTS_N_INSNS (2);
11383 if (mode
== DImode
)
11385 *total
= COSTS_N_INSNS (5);
11389 if (mode
== SImode
)
11391 *total
= COSTS_N_INSNS (2);
11395 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11397 if (TARGET_HARD_FLOAT
11399 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11401 *total
= COSTS_N_INSNS (1);
11406 *total
= COSTS_N_INSNS (20);
11410 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11413 /* All address computations that can be done are free, but rtx cost returns
11414 the same for practically all of them. So we weight the different types
11415 of address here in the order (most pref first):
11416 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11418 arm_arm_address_cost (rtx x
)
11420 enum rtx_code c
= GET_CODE (x
);
11422 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11424 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11429 if (CONST_INT_P (XEXP (x
, 1)))
11432 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11442 arm_thumb_address_cost (rtx x
)
11444 enum rtx_code c
= GET_CODE (x
);
11449 && REG_P (XEXP (x
, 0))
11450 && CONST_INT_P (XEXP (x
, 1)))
11457 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11458 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11460 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11463 /* Adjust cost hook for XScale. */
11465 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11467 /* Some true dependencies can have a higher cost depending
11468 on precisely how certain input operands are used. */
11469 if (REG_NOTE_KIND(link
) == 0
11470 && recog_memoized (insn
) >= 0
11471 && recog_memoized (dep
) >= 0)
11473 int shift_opnum
= get_attr_shift (insn
);
11474 enum attr_type attr_type
= get_attr_type (dep
);
11476 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11477 operand for INSN. If we have a shifted input operand and the
11478 instruction we depend on is another ALU instruction, then we may
11479 have to account for an additional stall. */
11480 if (shift_opnum
!= 0
11481 && (attr_type
== TYPE_ALU_SHIFT_IMM
11482 || attr_type
== TYPE_ALUS_SHIFT_IMM
11483 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11484 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11485 || attr_type
== TYPE_ALU_SHIFT_REG
11486 || attr_type
== TYPE_ALUS_SHIFT_REG
11487 || attr_type
== TYPE_LOGIC_SHIFT_REG
11488 || attr_type
== TYPE_LOGICS_SHIFT_REG
11489 || attr_type
== TYPE_MOV_SHIFT
11490 || attr_type
== TYPE_MVN_SHIFT
11491 || attr_type
== TYPE_MOV_SHIFT_REG
11492 || attr_type
== TYPE_MVN_SHIFT_REG
))
11494 rtx shifted_operand
;
11497 /* Get the shifted operand. */
11498 extract_insn (insn
);
11499 shifted_operand
= recog_data
.operand
[shift_opnum
];
11501 /* Iterate over all the operands in DEP. If we write an operand
11502 that overlaps with SHIFTED_OPERAND, then we have increase the
11503 cost of this dependency. */
11504 extract_insn (dep
);
11505 preprocess_constraints (dep
);
11506 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11508 /* We can ignore strict inputs. */
11509 if (recog_data
.operand_type
[opno
] == OP_IN
)
11512 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11524 /* Adjust cost hook for Cortex A9. */
11526 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11528 switch (REG_NOTE_KIND (link
))
11535 case REG_DEP_OUTPUT
:
11536 if (recog_memoized (insn
) >= 0
11537 && recog_memoized (dep
) >= 0)
11539 if (GET_CODE (PATTERN (insn
)) == SET
)
11542 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11544 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11546 enum attr_type attr_type_insn
= get_attr_type (insn
);
11547 enum attr_type attr_type_dep
= get_attr_type (dep
);
11549 /* By default all dependencies of the form
11552 have an extra latency of 1 cycle because
11553 of the input and output dependency in this
11554 case. However this gets modeled as an true
11555 dependency and hence all these checks. */
11556 if (REG_P (SET_DEST (PATTERN (insn
)))
11557 && REG_P (SET_DEST (PATTERN (dep
)))
11558 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11559 SET_DEST (PATTERN (dep
))))
11561 /* FMACS is a special case where the dependent
11562 instruction can be issued 3 cycles before
11563 the normal latency in case of an output
11565 if ((attr_type_insn
== TYPE_FMACS
11566 || attr_type_insn
== TYPE_FMACD
)
11567 && (attr_type_dep
== TYPE_FMACS
11568 || attr_type_dep
== TYPE_FMACD
))
11570 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11571 *cost
= insn_default_latency (dep
) - 3;
11573 *cost
= insn_default_latency (dep
);
11578 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11579 *cost
= insn_default_latency (dep
) + 1;
11581 *cost
= insn_default_latency (dep
);
11591 gcc_unreachable ();
11597 /* Adjust cost hook for FA726TE. */
11599 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11601 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11602 have penalty of 3. */
11603 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11604 && recog_memoized (insn
) >= 0
11605 && recog_memoized (dep
) >= 0
11606 && get_attr_conds (dep
) == CONDS_SET
)
11608 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11609 if (get_attr_conds (insn
) == CONDS_USE
11610 && get_attr_type (insn
) != TYPE_BRANCH
)
11616 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11617 || get_attr_conds (insn
) == CONDS_USE
)
11627 /* Implement TARGET_REGISTER_MOVE_COST.
11629 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11630 it is typically more expensive than a single memory access. We set
11631 the cost to less than two memory accesses so that floating
11632 point to integer conversion does not go through memory. */
11635 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11636 reg_class_t from
, reg_class_t to
)
11640 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11641 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11643 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11644 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11646 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11653 if (from
== HI_REGS
|| to
== HI_REGS
)
11660 /* Implement TARGET_MEMORY_MOVE_COST. */
11663 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11664 bool in ATTRIBUTE_UNUSED
)
11670 if (GET_MODE_SIZE (mode
) < 4)
11673 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11677 /* Vectorizer cost model implementation. */
11679 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11681 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11683 int misalign ATTRIBUTE_UNUSED
)
11687 switch (type_of_cost
)
11690 return current_tune
->vec_costs
->scalar_stmt_cost
;
11693 return current_tune
->vec_costs
->scalar_load_cost
;
11696 return current_tune
->vec_costs
->scalar_store_cost
;
11699 return current_tune
->vec_costs
->vec_stmt_cost
;
11702 return current_tune
->vec_costs
->vec_align_load_cost
;
11705 return current_tune
->vec_costs
->vec_store_cost
;
11707 case vec_to_scalar
:
11708 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11710 case scalar_to_vec
:
11711 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11713 case unaligned_load
:
11714 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11716 case unaligned_store
:
11717 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11719 case cond_branch_taken
:
11720 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11722 case cond_branch_not_taken
:
11723 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11726 case vec_promote_demote
:
11727 return current_tune
->vec_costs
->vec_stmt_cost
;
11729 case vec_construct
:
11730 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11731 return elements
/ 2 + 1;
11734 gcc_unreachable ();
11738 /* Implement targetm.vectorize.add_stmt_cost. */
11741 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11742 struct _stmt_vec_info
*stmt_info
, int misalign
,
11743 enum vect_cost_model_location where
)
11745 unsigned *cost
= (unsigned *) data
;
11746 unsigned retval
= 0;
11748 if (flag_vect_cost_model
)
11750 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11751 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11753 /* Statements in an inner loop relative to the loop being
11754 vectorized are weighted more heavily. The value here is
11755 arbitrary and could potentially be improved with analysis. */
11756 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11757 count
*= 50; /* FIXME. */
11759 retval
= (unsigned) (count
* stmt_cost
);
11760 cost
[where
] += retval
;
11766 /* Return true if and only if this insn can dual-issue only as older. */
11768 cortexa7_older_only (rtx_insn
*insn
)
11770 if (recog_memoized (insn
) < 0)
11773 switch (get_attr_type (insn
))
11775 case TYPE_ALU_DSP_REG
:
11776 case TYPE_ALU_SREG
:
11777 case TYPE_ALUS_SREG
:
11778 case TYPE_LOGIC_REG
:
11779 case TYPE_LOGICS_REG
:
11781 case TYPE_ADCS_REG
:
11786 case TYPE_SHIFT_IMM
:
11787 case TYPE_SHIFT_REG
:
11788 case TYPE_LOAD_BYTE
:
11791 case TYPE_FFARITHS
:
11793 case TYPE_FFARITHD
:
11811 case TYPE_F_STORES
:
11818 /* Return true if and only if this insn can dual-issue as younger. */
11820 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11822 if (recog_memoized (insn
) < 0)
11825 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11829 switch (get_attr_type (insn
))
11832 case TYPE_ALUS_IMM
:
11833 case TYPE_LOGIC_IMM
:
11834 case TYPE_LOGICS_IMM
:
11839 case TYPE_MOV_SHIFT
:
11840 case TYPE_MOV_SHIFT_REG
:
11850 /* Look for an instruction that can dual issue only as an older
11851 instruction, and move it in front of any instructions that can
11852 dual-issue as younger, while preserving the relative order of all
11853 other instructions in the ready list. This is a hueuristic to help
11854 dual-issue in later cycles, by postponing issue of more flexible
11855 instructions. This heuristic may affect dual issue opportunities
11856 in the current cycle. */
11858 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11859 int *n_readyp
, int clock
)
11862 int first_older_only
= -1, first_younger
= -1;
11866 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11870 /* Traverse the ready list from the head (the instruction to issue
11871 first), and looking for the first instruction that can issue as
11872 younger and the first instruction that can dual-issue only as
11874 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11876 rtx_insn
*insn
= ready
[i
];
11877 if (cortexa7_older_only (insn
))
11879 first_older_only
= i
;
11881 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11884 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11888 /* Nothing to reorder because either no younger insn found or insn
11889 that can dual-issue only as older appears before any insn that
11890 can dual-issue as younger. */
11891 if (first_younger
== -1)
11894 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11898 /* Nothing to reorder because no older-only insn in the ready list. */
11899 if (first_older_only
== -1)
11902 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11906 /* Move first_older_only insn before first_younger. */
11908 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11909 INSN_UID(ready
[first_older_only
]),
11910 INSN_UID(ready
[first_younger
]));
11911 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11912 for (i
= first_older_only
; i
< first_younger
; i
++)
11914 ready
[i
] = ready
[i
+1];
11917 ready
[i
] = first_older_only_insn
;
11921 /* Implement TARGET_SCHED_REORDER. */
11923 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11929 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11932 /* Do nothing for other cores. */
11936 return arm_issue_rate ();
11939 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11940 It corrects the value of COST based on the relationship between
11941 INSN and DEP through the dependence LINK. It returns the new
11942 value. There is a per-core adjust_cost hook to adjust scheduler costs
11943 and the per-core hook can choose to completely override the generic
11944 adjust_cost function. Only put bits of code into arm_adjust_cost that
11945 are common across all cores. */
11947 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11951 /* When generating Thumb-1 code, we want to place flag-setting operations
11952 close to a conditional branch which depends on them, so that we can
11953 omit the comparison. */
11955 && REG_NOTE_KIND (link
) == 0
11956 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11957 && recog_memoized (dep
) >= 0
11958 && get_attr_conds (dep
) == CONDS_SET
)
11961 if (current_tune
->sched_adjust_cost
!= NULL
)
11963 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11967 /* XXX Is this strictly true? */
11968 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11969 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11972 /* Call insns don't incur a stall, even if they follow a load. */
11973 if (REG_NOTE_KIND (link
) == 0
11977 if ((i_pat
= single_set (insn
)) != NULL
11978 && MEM_P (SET_SRC (i_pat
))
11979 && (d_pat
= single_set (dep
)) != NULL
11980 && MEM_P (SET_DEST (d_pat
)))
11982 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11983 /* This is a load after a store, there is no conflict if the load reads
11984 from a cached area. Assume that loads from the stack, and from the
11985 constant pool are cached, and that others will miss. This is a
11988 if ((GET_CODE (src_mem
) == SYMBOL_REF
11989 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11990 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11991 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11992 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12000 arm_max_conditional_execute (void)
12002 return max_insns_skipped
;
12006 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12009 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12011 return (optimize
> 0) ? 2 : 0;
12015 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12017 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12020 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12021 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12022 sequences of non-executed instructions in IT blocks probably take the same
12023 amount of time as executed instructions (and the IT instruction itself takes
12024 space in icache). This function was experimentally determined to give good
12025 results on a popular embedded benchmark. */
12028 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12030 return (TARGET_32BIT
&& speed_p
) ? 1
12031 : arm_default_branch_cost (speed_p
, predictable_p
);
12034 static bool fp_consts_inited
= false;
12036 static REAL_VALUE_TYPE value_fp0
;
12039 init_fp_table (void)
12043 r
= REAL_VALUE_ATOF ("0", DFmode
);
12045 fp_consts_inited
= true;
12048 /* Return TRUE if rtx X is a valid immediate FP constant. */
12050 arm_const_double_rtx (rtx x
)
12054 if (!fp_consts_inited
)
12057 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12058 if (REAL_VALUE_MINUS_ZERO (r
))
12061 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12067 /* VFPv3 has a fairly wide range of representable immediates, formed from
12068 "quarter-precision" floating-point values. These can be evaluated using this
12069 formula (with ^ for exponentiation):
12073 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12074 16 <= n <= 31 and 0 <= r <= 7.
12076 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12078 - A (most-significant) is the sign bit.
12079 - BCD are the exponent (encoded as r XOR 3).
12080 - EFGH are the mantissa (encoded as n - 16).
12083 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12084 fconst[sd] instruction, or -1 if X isn't suitable. */
12086 vfp3_const_double_index (rtx x
)
12088 REAL_VALUE_TYPE r
, m
;
12089 int sign
, exponent
;
12090 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12091 unsigned HOST_WIDE_INT mask
;
12092 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12095 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12098 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12100 /* We can't represent these things, so detect them first. */
12101 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12104 /* Extract sign, exponent and mantissa. */
12105 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12106 r
= real_value_abs (&r
);
12107 exponent
= REAL_EXP (&r
);
12108 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12109 highest (sign) bit, with a fixed binary point at bit point_pos.
12110 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12111 bits for the mantissa, this may fail (low bits would be lost). */
12112 real_ldexp (&m
, &r
, point_pos
- exponent
);
12113 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12114 mantissa
= w
.elt (0);
12115 mant_hi
= w
.elt (1);
12117 /* If there are bits set in the low part of the mantissa, we can't
12118 represent this value. */
12122 /* Now make it so that mantissa contains the most-significant bits, and move
12123 the point_pos to indicate that the least-significant bits have been
12125 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12126 mantissa
= mant_hi
;
12128 /* We can permit four significant bits of mantissa only, plus a high bit
12129 which is always 1. */
12130 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12131 if ((mantissa
& mask
) != 0)
12134 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12135 mantissa
>>= point_pos
- 5;
12137 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12138 floating-point immediate zero with Neon using an integer-zero load, but
12139 that case is handled elsewhere.) */
12143 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12145 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12146 normalized significands are in the range [1, 2). (Our mantissa is shifted
12147 left 4 places at this point relative to normalized IEEE754 values). GCC
12148 internally uses [0.5, 1) (see real.c), so the exponent returned from
12149 REAL_EXP must be altered. */
12150 exponent
= 5 - exponent
;
12152 if (exponent
< 0 || exponent
> 7)
12155 /* Sign, mantissa and exponent are now in the correct form to plug into the
12156 formula described in the comment above. */
12157 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12160 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12162 vfp3_const_double_rtx (rtx x
)
12167 return vfp3_const_double_index (x
) != -1;
12170 /* Recognize immediates which can be used in various Neon instructions. Legal
12171 immediates are described by the following table (for VMVN variants, the
12172 bitwise inverse of the constant shown is recognized. In either case, VMOV
12173 is output and the correct instruction to use for a given constant is chosen
12174 by the assembler). The constant shown is replicated across all elements of
12175 the destination vector.
12177 insn elems variant constant (binary)
12178 ---- ----- ------- -----------------
12179 vmov i32 0 00000000 00000000 00000000 abcdefgh
12180 vmov i32 1 00000000 00000000 abcdefgh 00000000
12181 vmov i32 2 00000000 abcdefgh 00000000 00000000
12182 vmov i32 3 abcdefgh 00000000 00000000 00000000
12183 vmov i16 4 00000000 abcdefgh
12184 vmov i16 5 abcdefgh 00000000
12185 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12186 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12187 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12188 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12189 vmvn i16 10 00000000 abcdefgh
12190 vmvn i16 11 abcdefgh 00000000
12191 vmov i32 12 00000000 00000000 abcdefgh 11111111
12192 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12193 vmov i32 14 00000000 abcdefgh 11111111 11111111
12194 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12195 vmov i8 16 abcdefgh
12196 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12197 eeeeeeee ffffffff gggggggg hhhhhhhh
12198 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12199 vmov f32 19 00000000 00000000 00000000 00000000
12201 For case 18, B = !b. Representable values are exactly those accepted by
12202 vfp3_const_double_index, but are output as floating-point numbers rather
12205 For case 19, we will change it to vmov.i32 when assembling.
12207 Variants 0-5 (inclusive) may also be used as immediates for the second
12208 operand of VORR/VBIC instructions.
12210 The INVERSE argument causes the bitwise inverse of the given operand to be
12211 recognized instead (used for recognizing legal immediates for the VAND/VORN
12212 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12213 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12214 output, rather than the real insns vbic/vorr).
12216 INVERSE makes no difference to the recognition of float vectors.
12218 The return value is the variant of immediate as shown in the above table, or
12219 -1 if the given value doesn't match any of the listed patterns.
12222 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12223 rtx
*modconst
, int *elementwidth
)
12225 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12227 for (i = 0; i < idx; i += (STRIDE)) \
12232 immtype = (CLASS); \
12233 elsize = (ELSIZE); \
12237 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12238 unsigned int innersize
;
12239 unsigned char bytes
[16];
12240 int immtype
= -1, matches
;
12241 unsigned int invmask
= inverse
? 0xff : 0;
12242 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12246 n_elts
= CONST_VECTOR_NUNITS (op
);
12247 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12252 if (mode
== VOIDmode
)
12254 innersize
= GET_MODE_SIZE (mode
);
12257 /* Vectors of float constants. */
12258 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12260 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12261 REAL_VALUE_TYPE r0
;
12263 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12266 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12268 for (i
= 1; i
< n_elts
; i
++)
12270 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12271 REAL_VALUE_TYPE re
;
12273 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12275 if (!REAL_VALUES_EQUAL (r0
, re
))
12280 *modconst
= CONST_VECTOR_ELT (op
, 0);
12285 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12291 /* Splat vector constant out into a byte vector. */
12292 for (i
= 0; i
< n_elts
; i
++)
12294 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12295 unsigned HOST_WIDE_INT elpart
;
12296 unsigned int part
, parts
;
12298 if (CONST_INT_P (el
))
12300 elpart
= INTVAL (el
);
12303 else if (CONST_DOUBLE_P (el
))
12305 elpart
= CONST_DOUBLE_LOW (el
);
12309 gcc_unreachable ();
12311 for (part
= 0; part
< parts
; part
++)
12314 for (byte
= 0; byte
< innersize
; byte
++)
12316 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12317 elpart
>>= BITS_PER_UNIT
;
12319 if (CONST_DOUBLE_P (el
))
12320 elpart
= CONST_DOUBLE_HIGH (el
);
12324 /* Sanity check. */
12325 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12329 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12330 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12332 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12333 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12335 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12336 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12338 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12339 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12341 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12343 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12345 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12346 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12348 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12349 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12351 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12352 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12354 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12355 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12357 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12359 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12361 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12362 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12364 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12365 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12367 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12368 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12370 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12371 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12373 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12375 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12376 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12384 *elementwidth
= elsize
;
12388 unsigned HOST_WIDE_INT imm
= 0;
12390 /* Un-invert bytes of recognized vector, if necessary. */
12392 for (i
= 0; i
< idx
; i
++)
12393 bytes
[i
] ^= invmask
;
12397 /* FIXME: Broken on 32-bit H_W_I hosts. */
12398 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12400 for (i
= 0; i
< 8; i
++)
12401 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12402 << (i
* BITS_PER_UNIT
);
12404 *modconst
= GEN_INT (imm
);
12408 unsigned HOST_WIDE_INT imm
= 0;
12410 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12411 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12413 *modconst
= GEN_INT (imm
);
12421 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12422 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12423 float elements), and a modified constant (whatever should be output for a
12424 VMOV) in *MODCONST. */
12427 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12428 rtx
*modconst
, int *elementwidth
)
12432 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12438 *modconst
= tmpconst
;
12441 *elementwidth
= tmpwidth
;
12446 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12447 the immediate is valid, write a constant suitable for using as an operand
12448 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12449 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12452 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12453 rtx
*modconst
, int *elementwidth
)
12457 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12459 if (retval
< 0 || retval
> 5)
12463 *modconst
= tmpconst
;
12466 *elementwidth
= tmpwidth
;
12471 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12472 the immediate is valid, write a constant suitable for using as an operand
12473 to VSHR/VSHL to *MODCONST and the corresponding element width to
12474 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12475 because they have different limitations. */
12478 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12479 rtx
*modconst
, int *elementwidth
,
12482 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12483 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12484 unsigned HOST_WIDE_INT last_elt
= 0;
12485 unsigned HOST_WIDE_INT maxshift
;
12487 /* Split vector constant out into a byte vector. */
12488 for (i
= 0; i
< n_elts
; i
++)
12490 rtx el
= CONST_VECTOR_ELT (op
, i
);
12491 unsigned HOST_WIDE_INT elpart
;
12493 if (CONST_INT_P (el
))
12494 elpart
= INTVAL (el
);
12495 else if (CONST_DOUBLE_P (el
))
12498 gcc_unreachable ();
12500 if (i
!= 0 && elpart
!= last_elt
)
12506 /* Shift less than element size. */
12507 maxshift
= innersize
* 8;
12511 /* Left shift immediate value can be from 0 to <size>-1. */
12512 if (last_elt
>= maxshift
)
12517 /* Right shift immediate value can be from 1 to <size>. */
12518 if (last_elt
== 0 || last_elt
> maxshift
)
12523 *elementwidth
= innersize
* 8;
12526 *modconst
= CONST_VECTOR_ELT (op
, 0);
12531 /* Return a string suitable for output of Neon immediate logic operation
12535 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12536 int inverse
, int quad
)
12538 int width
, is_valid
;
12539 static char templ
[40];
12541 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12543 gcc_assert (is_valid
!= 0);
12546 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12548 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12553 /* Return a string suitable for output of Neon immediate shift operation
12554 (VSHR or VSHL) MNEM. */
12557 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12558 machine_mode mode
, int quad
,
12561 int width
, is_valid
;
12562 static char templ
[40];
12564 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12565 gcc_assert (is_valid
!= 0);
12568 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12570 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12575 /* Output a sequence of pairwise operations to implement a reduction.
12576 NOTE: We do "too much work" here, because pairwise operations work on two
12577 registers-worth of operands in one go. Unfortunately we can't exploit those
12578 extra calculations to do the full operation in fewer steps, I don't think.
12579 Although all vector elements of the result but the first are ignored, we
12580 actually calculate the same result in each of the elements. An alternative
12581 such as initially loading a vector with zero to use as each of the second
12582 operands would use up an additional register and take an extra instruction,
12583 for no particular gain. */
12586 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12587 rtx (*reduc
) (rtx
, rtx
, rtx
))
12589 machine_mode inner
= GET_MODE_INNER (mode
);
12590 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12593 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12595 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12596 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12601 /* If VALS is a vector constant that can be loaded into a register
12602 using VDUP, generate instructions to do so and return an RTX to
12603 assign to the register. Otherwise return NULL_RTX. */
12606 neon_vdup_constant (rtx vals
)
12608 machine_mode mode
= GET_MODE (vals
);
12609 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12610 int n_elts
= GET_MODE_NUNITS (mode
);
12611 bool all_same
= true;
12615 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12618 for (i
= 0; i
< n_elts
; ++i
)
12620 x
= XVECEXP (vals
, 0, i
);
12621 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12626 /* The elements are not all the same. We could handle repeating
12627 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12628 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12632 /* We can load this constant by using VDUP and a constant in a
12633 single ARM register. This will be cheaper than a vector
12636 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12637 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12640 /* Generate code to load VALS, which is a PARALLEL containing only
12641 constants (for vec_init) or CONST_VECTOR, efficiently into a
12642 register. Returns an RTX to copy into the register, or NULL_RTX
12643 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12646 neon_make_constant (rtx vals
)
12648 machine_mode mode
= GET_MODE (vals
);
12650 rtx const_vec
= NULL_RTX
;
12651 int n_elts
= GET_MODE_NUNITS (mode
);
12655 if (GET_CODE (vals
) == CONST_VECTOR
)
12657 else if (GET_CODE (vals
) == PARALLEL
)
12659 /* A CONST_VECTOR must contain only CONST_INTs and
12660 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12661 Only store valid constants in a CONST_VECTOR. */
12662 for (i
= 0; i
< n_elts
; ++i
)
12664 rtx x
= XVECEXP (vals
, 0, i
);
12665 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12668 if (n_const
== n_elts
)
12669 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12672 gcc_unreachable ();
12674 if (const_vec
!= NULL
12675 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12676 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12678 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12679 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12680 pipeline cycle; creating the constant takes one or two ARM
12681 pipeline cycles. */
12683 else if (const_vec
!= NULL_RTX
)
12684 /* Load from constant pool. On Cortex-A8 this takes two cycles
12685 (for either double or quad vectors). We can not take advantage
12686 of single-cycle VLD1 because we need a PC-relative addressing
12690 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12691 We can not construct an initializer. */
12695 /* Initialize vector TARGET to VALS. */
12698 neon_expand_vector_init (rtx target
, rtx vals
)
12700 machine_mode mode
= GET_MODE (target
);
12701 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12702 int n_elts
= GET_MODE_NUNITS (mode
);
12703 int n_var
= 0, one_var
= -1;
12704 bool all_same
= true;
12708 for (i
= 0; i
< n_elts
; ++i
)
12710 x
= XVECEXP (vals
, 0, i
);
12711 if (!CONSTANT_P (x
))
12712 ++n_var
, one_var
= i
;
12714 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12720 rtx constant
= neon_make_constant (vals
);
12721 if (constant
!= NULL_RTX
)
12723 emit_move_insn (target
, constant
);
12728 /* Splat a single non-constant element if we can. */
12729 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12731 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12732 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12733 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12737 /* One field is non-constant. Load constant then overwrite varying
12738 field. This is more efficient than using the stack. */
12741 rtx copy
= copy_rtx (vals
);
12742 rtx index
= GEN_INT (one_var
);
12744 /* Load constant part of vector, substitute neighboring value for
12745 varying element. */
12746 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12747 neon_expand_vector_init (target
, copy
);
12749 /* Insert variable. */
12750 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12754 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12757 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12760 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12763 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12766 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12769 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12772 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12775 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12778 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12781 gcc_unreachable ();
12786 /* Construct the vector in memory one field at a time
12787 and load the whole vector. */
12788 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12789 for (i
= 0; i
< n_elts
; i
++)
12790 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12791 i
* GET_MODE_SIZE (inner_mode
)),
12792 XVECEXP (vals
, 0, i
));
12793 emit_move_insn (target
, mem
);
12796 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12797 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12798 reported source locations are bogus. */
12801 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12804 HOST_WIDE_INT lane
;
12806 gcc_assert (CONST_INT_P (operand
));
12808 lane
= INTVAL (operand
);
12810 if (lane
< low
|| lane
>= high
)
12814 /* Bounds-check lanes. */
12817 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12819 bounds_check (operand
, low
, high
, "lane out of range");
12822 /* Bounds-check constants. */
12825 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12827 bounds_check (operand
, low
, high
, "constant out of range");
12831 neon_element_bits (machine_mode mode
)
12833 if (mode
== DImode
)
12834 return GET_MODE_BITSIZE (mode
);
12836 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12840 /* Predicates for `match_operand' and `match_operator'. */
12842 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12843 WB is true if full writeback address modes are allowed and is false
12844 if limited writeback address modes (POST_INC and PRE_DEC) are
12848 arm_coproc_mem_operand (rtx op
, bool wb
)
12852 /* Reject eliminable registers. */
12853 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12854 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12855 || reg_mentioned_p (arg_pointer_rtx
, op
)
12856 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12857 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12858 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12859 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12862 /* Constants are converted into offsets from labels. */
12866 ind
= XEXP (op
, 0);
12868 if (reload_completed
12869 && (GET_CODE (ind
) == LABEL_REF
12870 || (GET_CODE (ind
) == CONST
12871 && GET_CODE (XEXP (ind
, 0)) == PLUS
12872 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12873 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12876 /* Match: (mem (reg)). */
12878 return arm_address_register_rtx_p (ind
, 0);
12880 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12881 acceptable in any case (subject to verification by
12882 arm_address_register_rtx_p). We need WB to be true to accept
12883 PRE_INC and POST_DEC. */
12884 if (GET_CODE (ind
) == POST_INC
12885 || GET_CODE (ind
) == PRE_DEC
12887 && (GET_CODE (ind
) == PRE_INC
12888 || GET_CODE (ind
) == POST_DEC
)))
12889 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12892 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12893 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12894 && GET_CODE (XEXP (ind
, 1)) == PLUS
12895 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12896 ind
= XEXP (ind
, 1);
12901 if (GET_CODE (ind
) == PLUS
12902 && REG_P (XEXP (ind
, 0))
12903 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12904 && CONST_INT_P (XEXP (ind
, 1))
12905 && INTVAL (XEXP (ind
, 1)) > -1024
12906 && INTVAL (XEXP (ind
, 1)) < 1024
12907 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12913 /* Return TRUE if OP is a memory operand which we can load or store a vector
12914 to/from. TYPE is one of the following values:
12915 0 - Vector load/stor (vldr)
12916 1 - Core registers (ldm)
12917 2 - Element/structure loads (vld1)
12920 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12924 /* Reject eliminable registers. */
12925 if (! (reload_in_progress
|| reload_completed
)
12926 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12927 || reg_mentioned_p (arg_pointer_rtx
, op
)
12928 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12929 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12930 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12931 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12934 /* Constants are converted into offsets from labels. */
12938 ind
= XEXP (op
, 0);
12940 if (reload_completed
12941 && (GET_CODE (ind
) == LABEL_REF
12942 || (GET_CODE (ind
) == CONST
12943 && GET_CODE (XEXP (ind
, 0)) == PLUS
12944 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12945 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12948 /* Match: (mem (reg)). */
12950 return arm_address_register_rtx_p (ind
, 0);
12952 /* Allow post-increment with Neon registers. */
12953 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12954 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12955 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12957 /* Allow post-increment by register for VLDn */
12958 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12959 && GET_CODE (XEXP (ind
, 1)) == PLUS
12960 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12967 && GET_CODE (ind
) == PLUS
12968 && REG_P (XEXP (ind
, 0))
12969 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12970 && CONST_INT_P (XEXP (ind
, 1))
12971 && INTVAL (XEXP (ind
, 1)) > -1024
12972 /* For quad modes, we restrict the constant offset to be slightly less
12973 than what the instruction format permits. We have no such constraint
12974 on double mode offsets. (This must match arm_legitimate_index_p.) */
12975 && (INTVAL (XEXP (ind
, 1))
12976 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12977 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12983 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12986 neon_struct_mem_operand (rtx op
)
12990 /* Reject eliminable registers. */
12991 if (! (reload_in_progress
|| reload_completed
)
12992 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12993 || reg_mentioned_p (arg_pointer_rtx
, op
)
12994 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12995 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12996 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12997 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13000 /* Constants are converted into offsets from labels. */
13004 ind
= XEXP (op
, 0);
13006 if (reload_completed
13007 && (GET_CODE (ind
) == LABEL_REF
13008 || (GET_CODE (ind
) == CONST
13009 && GET_CODE (XEXP (ind
, 0)) == PLUS
13010 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13011 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13014 /* Match: (mem (reg)). */
13016 return arm_address_register_rtx_p (ind
, 0);
13018 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13019 if (GET_CODE (ind
) == POST_INC
13020 || GET_CODE (ind
) == PRE_DEC
)
13021 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13026 /* Return true if X is a register that will be eliminated later on. */
13028 arm_eliminable_register (rtx x
)
13030 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13031 || REGNO (x
) == ARG_POINTER_REGNUM
13032 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13033 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13036 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13037 coprocessor registers. Otherwise return NO_REGS. */
13040 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13042 if (mode
== HFmode
)
13044 if (!TARGET_NEON_FP16
)
13045 return GENERAL_REGS
;
13046 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13048 return GENERAL_REGS
;
13051 /* The neon move patterns handle all legitimate vector and struct
13054 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13055 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13056 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13057 || VALID_NEON_STRUCT_MODE (mode
)))
13060 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13063 return GENERAL_REGS
;
13066 /* Values which must be returned in the most-significant end of the return
13070 arm_return_in_msb (const_tree valtype
)
13072 return (TARGET_AAPCS_BASED
13073 && BYTES_BIG_ENDIAN
13074 && (AGGREGATE_TYPE_P (valtype
)
13075 || TREE_CODE (valtype
) == COMPLEX_TYPE
13076 || FIXED_POINT_TYPE_P (valtype
)));
13079 /* Return TRUE if X references a SYMBOL_REF. */
13081 symbol_mentioned_p (rtx x
)
13086 if (GET_CODE (x
) == SYMBOL_REF
)
13089 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13090 are constant offsets, not symbols. */
13091 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13094 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13096 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13102 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13103 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13106 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13113 /* Return TRUE if X references a LABEL_REF. */
13115 label_mentioned_p (rtx x
)
13120 if (GET_CODE (x
) == LABEL_REF
)
13123 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13124 instruction, but they are constant offsets, not symbols. */
13125 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13128 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13129 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13135 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13136 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13139 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13147 tls_mentioned_p (rtx x
)
13149 switch (GET_CODE (x
))
13152 return tls_mentioned_p (XEXP (x
, 0));
13155 if (XINT (x
, 1) == UNSPEC_TLS
)
13163 /* Must not copy any rtx that uses a pc-relative address. */
13166 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
13168 if (GET_CODE (*x
) == UNSPEC
13169 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
13170 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
13176 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13178 /* The tls call insn cannot be copied, as it is paired with a data
13180 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13183 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
13187 minmax_code (rtx x
)
13189 enum rtx_code code
= GET_CODE (x
);
13202 gcc_unreachable ();
13206 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13209 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13210 int *mask
, bool *signed_sat
)
13212 /* The high bound must be a power of two minus one. */
13213 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13217 /* The low bound is either zero (for usat) or one less than the
13218 negation of the high bound (for ssat). */
13219 if (INTVAL (lo_bound
) == 0)
13224 *signed_sat
= false;
13229 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13234 *signed_sat
= true;
13242 /* Return 1 if memory locations are adjacent. */
13244 adjacent_mem_locations (rtx a
, rtx b
)
13246 /* We don't guarantee to preserve the order of these memory refs. */
13247 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13250 if ((REG_P (XEXP (a
, 0))
13251 || (GET_CODE (XEXP (a
, 0)) == PLUS
13252 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13253 && (REG_P (XEXP (b
, 0))
13254 || (GET_CODE (XEXP (b
, 0)) == PLUS
13255 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13257 HOST_WIDE_INT val0
= 0, val1
= 0;
13261 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13263 reg0
= XEXP (XEXP (a
, 0), 0);
13264 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13267 reg0
= XEXP (a
, 0);
13269 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13271 reg1
= XEXP (XEXP (b
, 0), 0);
13272 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13275 reg1
= XEXP (b
, 0);
13277 /* Don't accept any offset that will require multiple
13278 instructions to handle, since this would cause the
13279 arith_adjacentmem pattern to output an overlong sequence. */
13280 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13283 /* Don't allow an eliminable register: register elimination can make
13284 the offset too large. */
13285 if (arm_eliminable_register (reg0
))
13288 val_diff
= val1
- val0
;
13292 /* If the target has load delay slots, then there's no benefit
13293 to using an ldm instruction unless the offset is zero and
13294 we are optimizing for size. */
13295 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13296 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13297 && (val_diff
== 4 || val_diff
== -4));
13300 return ((REGNO (reg0
) == REGNO (reg1
))
13301 && (val_diff
== 4 || val_diff
== -4));
13307 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13308 for load operations, false for store operations. CONSECUTIVE is true
13309 if the register numbers in the operation must be consecutive in the register
13310 bank. RETURN_PC is true if value is to be loaded in PC.
13311 The pattern we are trying to match for load is:
13312 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13313 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13316 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13319 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13320 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13321 3. If consecutive is TRUE, then for kth register being loaded,
13322 REGNO (R_dk) = REGNO (R_d0) + k.
13323 The pattern for store is similar. */
13325 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13326 bool consecutive
, bool return_pc
)
13328 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13329 rtx reg
, mem
, addr
;
13331 unsigned first_regno
;
13332 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13334 bool addr_reg_in_reglist
= false;
13335 bool update
= false;
13340 /* If not in SImode, then registers must be consecutive
13341 (e.g., VLDM instructions for DFmode). */
13342 gcc_assert ((mode
== SImode
) || consecutive
);
13343 /* Setting return_pc for stores is illegal. */
13344 gcc_assert (!return_pc
|| load
);
13346 /* Set up the increments and the regs per val based on the mode. */
13347 reg_increment
= GET_MODE_SIZE (mode
);
13348 regs_per_val
= reg_increment
/ 4;
13349 offset_adj
= return_pc
? 1 : 0;
13352 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13353 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13356 /* Check if this is a write-back. */
13357 elt
= XVECEXP (op
, 0, offset_adj
);
13358 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13364 /* The offset adjustment must be the number of registers being
13365 popped times the size of a single register. */
13366 if (!REG_P (SET_DEST (elt
))
13367 || !REG_P (XEXP (SET_SRC (elt
), 0))
13368 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13369 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13370 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13371 ((count
- 1 - offset_adj
) * reg_increment
))
13375 i
= i
+ offset_adj
;
13376 base
= base
+ offset_adj
;
13377 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13378 success depends on the type: VLDM can do just one reg,
13379 LDM must do at least two. */
13380 if ((count
<= i
) && (mode
== SImode
))
13383 elt
= XVECEXP (op
, 0, i
- 1);
13384 if (GET_CODE (elt
) != SET
)
13389 reg
= SET_DEST (elt
);
13390 mem
= SET_SRC (elt
);
13394 reg
= SET_SRC (elt
);
13395 mem
= SET_DEST (elt
);
13398 if (!REG_P (reg
) || !MEM_P (mem
))
13401 regno
= REGNO (reg
);
13402 first_regno
= regno
;
13403 addr
= XEXP (mem
, 0);
13404 if (GET_CODE (addr
) == PLUS
)
13406 if (!CONST_INT_P (XEXP (addr
, 1)))
13409 offset
= INTVAL (XEXP (addr
, 1));
13410 addr
= XEXP (addr
, 0);
13416 /* Don't allow SP to be loaded unless it is also the base register. It
13417 guarantees that SP is reset correctly when an LDM instruction
13418 is interrupted. Otherwise, we might end up with a corrupt stack. */
13419 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13422 for (; i
< count
; i
++)
13424 elt
= XVECEXP (op
, 0, i
);
13425 if (GET_CODE (elt
) != SET
)
13430 reg
= SET_DEST (elt
);
13431 mem
= SET_SRC (elt
);
13435 reg
= SET_SRC (elt
);
13436 mem
= SET_DEST (elt
);
13440 || GET_MODE (reg
) != mode
13441 || REGNO (reg
) <= regno
13444 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13445 /* Don't allow SP to be loaded unless it is also the base register. It
13446 guarantees that SP is reset correctly when an LDM instruction
13447 is interrupted. Otherwise, we might end up with a corrupt stack. */
13448 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13450 || GET_MODE (mem
) != mode
13451 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13452 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13453 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13454 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13455 offset
+ (i
- base
) * reg_increment
))
13456 && (!REG_P (XEXP (mem
, 0))
13457 || offset
+ (i
- base
) * reg_increment
!= 0)))
13460 regno
= REGNO (reg
);
13461 if (regno
== REGNO (addr
))
13462 addr_reg_in_reglist
= true;
13467 if (update
&& addr_reg_in_reglist
)
13470 /* For Thumb-1, address register is always modified - either by write-back
13471 or by explicit load. If the pattern does not describe an update,
13472 then the address register must be in the list of loaded registers. */
13474 return update
|| addr_reg_in_reglist
;
13480 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13481 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13482 instruction. ADD_OFFSET is nonzero if the base address register needs
13483 to be modified with an add instruction before we can use it. */
13486 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13487 int nops
, HOST_WIDE_INT add_offset
)
13489 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13490 if the offset isn't small enough. The reason 2 ldrs are faster
13491 is because these ARMs are able to do more than one cache access
13492 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13493 whilst the ARM8 has a double bandwidth cache. This means that
13494 these cores can do both an instruction fetch and a data fetch in
13495 a single cycle, so the trick of calculating the address into a
13496 scratch register (one of the result regs) and then doing a load
13497 multiple actually becomes slower (and no smaller in code size).
13498 That is the transformation
13500 ldr rd1, [rbase + offset]
13501 ldr rd2, [rbase + offset + 4]
13505 add rd1, rbase, offset
13506 ldmia rd1, {rd1, rd2}
13508 produces worse code -- '3 cycles + any stalls on rd2' instead of
13509 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13510 access per cycle, the first sequence could never complete in less
13511 than 6 cycles, whereas the ldm sequence would only take 5 and
13512 would make better use of sequential accesses if not hitting the
13515 We cheat here and test 'arm_ld_sched' which we currently know to
13516 only be true for the ARM8, ARM9 and StrongARM. If this ever
13517 changes, then the test below needs to be reworked. */
13518 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13521 /* XScale has load-store double instructions, but they have stricter
13522 alignment requirements than load-store multiple, so we cannot
13525 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13526 the pipeline until completion.
13534 An ldr instruction takes 1-3 cycles, but does not block the
13543 Best case ldr will always win. However, the more ldr instructions
13544 we issue, the less likely we are to be able to schedule them well.
13545 Using ldr instructions also increases code size.
13547 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13548 for counts of 3 or 4 regs. */
13549 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13554 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13555 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13556 an array ORDER which describes the sequence to use when accessing the
13557 offsets that produces an ascending order. In this sequence, each
13558 offset must be larger by exactly 4 than the previous one. ORDER[0]
13559 must have been filled in with the lowest offset by the caller.
13560 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13561 we use to verify that ORDER produces an ascending order of registers.
13562 Return true if it was possible to construct such an order, false if
13566 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13567 int *unsorted_regs
)
13570 for (i
= 1; i
< nops
; i
++)
13574 order
[i
] = order
[i
- 1];
13575 for (j
= 0; j
< nops
; j
++)
13576 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13578 /* We must find exactly one offset that is higher than the
13579 previous one by 4. */
13580 if (order
[i
] != order
[i
- 1])
13584 if (order
[i
] == order
[i
- 1])
13586 /* The register numbers must be ascending. */
13587 if (unsorted_regs
!= NULL
13588 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13594 /* Used to determine in a peephole whether a sequence of load
13595 instructions can be changed into a load-multiple instruction.
13596 NOPS is the number of separate load instructions we are examining. The
13597 first NOPS entries in OPERANDS are the destination registers, the
13598 next NOPS entries are memory operands. If this function is
13599 successful, *BASE is set to the common base register of the memory
13600 accesses; *LOAD_OFFSET is set to the first memory location's offset
13601 from that base register.
13602 REGS is an array filled in with the destination register numbers.
13603 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13604 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13605 the sequence of registers in REGS matches the loads from ascending memory
13606 locations, and the function verifies that the register numbers are
13607 themselves ascending. If CHECK_REGS is false, the register numbers
13608 are stored in the order they are found in the operands. */
13610 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13611 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13613 int unsorted_regs
[MAX_LDM_STM_OPS
];
13614 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13615 int order
[MAX_LDM_STM_OPS
];
13616 rtx base_reg_rtx
= NULL
;
13620 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13621 easily extended if required. */
13622 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13624 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13626 /* Loop over the operands and check that the memory references are
13627 suitable (i.e. immediate offsets from the same base register). At
13628 the same time, extract the target register, and the memory
13630 for (i
= 0; i
< nops
; i
++)
13635 /* Convert a subreg of a mem into the mem itself. */
13636 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13637 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13639 gcc_assert (MEM_P (operands
[nops
+ i
]));
13641 /* Don't reorder volatile memory references; it doesn't seem worth
13642 looking for the case where the order is ok anyway. */
13643 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13646 offset
= const0_rtx
;
13648 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13649 || (GET_CODE (reg
) == SUBREG
13650 && REG_P (reg
= SUBREG_REG (reg
))))
13651 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13652 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13653 || (GET_CODE (reg
) == SUBREG
13654 && REG_P (reg
= SUBREG_REG (reg
))))
13655 && (CONST_INT_P (offset
13656 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13660 base_reg
= REGNO (reg
);
13661 base_reg_rtx
= reg
;
13662 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13665 else if (base_reg
!= (int) REGNO (reg
))
13666 /* Not addressed from the same base register. */
13669 unsorted_regs
[i
] = (REG_P (operands
[i
])
13670 ? REGNO (operands
[i
])
13671 : REGNO (SUBREG_REG (operands
[i
])));
13673 /* If it isn't an integer register, or if it overwrites the
13674 base register but isn't the last insn in the list, then
13675 we can't do this. */
13676 if (unsorted_regs
[i
] < 0
13677 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13678 || unsorted_regs
[i
] > 14
13679 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13682 /* Don't allow SP to be loaded unless it is also the base
13683 register. It guarantees that SP is reset correctly when
13684 an LDM instruction is interrupted. Otherwise, we might
13685 end up with a corrupt stack. */
13686 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13689 unsorted_offsets
[i
] = INTVAL (offset
);
13690 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13694 /* Not a suitable memory address. */
13698 /* All the useful information has now been extracted from the
13699 operands into unsorted_regs and unsorted_offsets; additionally,
13700 order[0] has been set to the lowest offset in the list. Sort
13701 the offsets into order, verifying that they are adjacent, and
13702 check that the register numbers are ascending. */
13703 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13704 check_regs
? unsorted_regs
: NULL
))
13708 memcpy (saved_order
, order
, sizeof order
);
13714 for (i
= 0; i
< nops
; i
++)
13715 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13717 *load_offset
= unsorted_offsets
[order
[0]];
13721 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13724 if (unsorted_offsets
[order
[0]] == 0)
13725 ldm_case
= 1; /* ldmia */
13726 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13727 ldm_case
= 2; /* ldmib */
13728 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13729 ldm_case
= 3; /* ldmda */
13730 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13731 ldm_case
= 4; /* ldmdb */
13732 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13733 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13738 if (!multiple_operation_profitable_p (false, nops
,
13740 ? unsorted_offsets
[order
[0]] : 0))
13746 /* Used to determine in a peephole whether a sequence of store instructions can
13747 be changed into a store-multiple instruction.
13748 NOPS is the number of separate store instructions we are examining.
13749 NOPS_TOTAL is the total number of instructions recognized by the peephole
13751 The first NOPS entries in OPERANDS are the source registers, the next
13752 NOPS entries are memory operands. If this function is successful, *BASE is
13753 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13754 to the first memory location's offset from that base register. REGS is an
13755 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13756 likewise filled with the corresponding rtx's.
13757 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13758 numbers to an ascending order of stores.
13759 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13760 from ascending memory locations, and the function verifies that the register
13761 numbers are themselves ascending. If CHECK_REGS is false, the register
13762 numbers are stored in the order they are found in the operands. */
13764 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13765 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13766 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13768 int unsorted_regs
[MAX_LDM_STM_OPS
];
13769 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13770 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13771 int order
[MAX_LDM_STM_OPS
];
13773 rtx base_reg_rtx
= NULL
;
13776 /* Write back of base register is currently only supported for Thumb 1. */
13777 int base_writeback
= TARGET_THUMB1
;
13779 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13780 easily extended if required. */
13781 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13783 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13785 /* Loop over the operands and check that the memory references are
13786 suitable (i.e. immediate offsets from the same base register). At
13787 the same time, extract the target register, and the memory
13789 for (i
= 0; i
< nops
; i
++)
13794 /* Convert a subreg of a mem into the mem itself. */
13795 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13796 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13798 gcc_assert (MEM_P (operands
[nops
+ i
]));
13800 /* Don't reorder volatile memory references; it doesn't seem worth
13801 looking for the case where the order is ok anyway. */
13802 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13805 offset
= const0_rtx
;
13807 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13808 || (GET_CODE (reg
) == SUBREG
13809 && REG_P (reg
= SUBREG_REG (reg
))))
13810 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13811 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13812 || (GET_CODE (reg
) == SUBREG
13813 && REG_P (reg
= SUBREG_REG (reg
))))
13814 && (CONST_INT_P (offset
13815 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13817 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13818 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13819 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13823 base_reg
= REGNO (reg
);
13824 base_reg_rtx
= reg
;
13825 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13828 else if (base_reg
!= (int) REGNO (reg
))
13829 /* Not addressed from the same base register. */
13832 /* If it isn't an integer register, then we can't do this. */
13833 if (unsorted_regs
[i
] < 0
13834 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13835 /* The effects are unpredictable if the base register is
13836 both updated and stored. */
13837 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13838 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13839 || unsorted_regs
[i
] > 14)
13842 unsorted_offsets
[i
] = INTVAL (offset
);
13843 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13847 /* Not a suitable memory address. */
13851 /* All the useful information has now been extracted from the
13852 operands into unsorted_regs and unsorted_offsets; additionally,
13853 order[0] has been set to the lowest offset in the list. Sort
13854 the offsets into order, verifying that they are adjacent, and
13855 check that the register numbers are ascending. */
13856 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13857 check_regs
? unsorted_regs
: NULL
))
13861 memcpy (saved_order
, order
, sizeof order
);
13867 for (i
= 0; i
< nops
; i
++)
13869 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13871 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13874 *load_offset
= unsorted_offsets
[order
[0]];
13878 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13881 if (unsorted_offsets
[order
[0]] == 0)
13882 stm_case
= 1; /* stmia */
13883 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13884 stm_case
= 2; /* stmib */
13885 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13886 stm_case
= 3; /* stmda */
13887 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13888 stm_case
= 4; /* stmdb */
13892 if (!multiple_operation_profitable_p (false, nops
, 0))
13898 /* Routines for use in generating RTL. */
13900 /* Generate a load-multiple instruction. COUNT is the number of loads in
13901 the instruction; REGS and MEMS are arrays containing the operands.
13902 BASEREG is the base register to be used in addressing the memory operands.
13903 WBACK_OFFSET is nonzero if the instruction should update the base
13907 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13908 HOST_WIDE_INT wback_offset
)
13913 if (!multiple_operation_profitable_p (false, count
, 0))
13919 for (i
= 0; i
< count
; i
++)
13920 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13922 if (wback_offset
!= 0)
13923 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13925 seq
= get_insns ();
13931 result
= gen_rtx_PARALLEL (VOIDmode
,
13932 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13933 if (wback_offset
!= 0)
13935 XVECEXP (result
, 0, 0)
13936 = gen_rtx_SET (VOIDmode
, basereg
,
13937 plus_constant (Pmode
, basereg
, wback_offset
));
13942 for (j
= 0; i
< count
; i
++, j
++)
13943 XVECEXP (result
, 0, i
)
13944 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13949 /* Generate a store-multiple instruction. COUNT is the number of stores in
13950 the instruction; REGS and MEMS are arrays containing the operands.
13951 BASEREG is the base register to be used in addressing the memory operands.
13952 WBACK_OFFSET is nonzero if the instruction should update the base
13956 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13957 HOST_WIDE_INT wback_offset
)
13962 if (GET_CODE (basereg
) == PLUS
)
13963 basereg
= XEXP (basereg
, 0);
13965 if (!multiple_operation_profitable_p (false, count
, 0))
13971 for (i
= 0; i
< count
; i
++)
13972 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13974 if (wback_offset
!= 0)
13975 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13977 seq
= get_insns ();
13983 result
= gen_rtx_PARALLEL (VOIDmode
,
13984 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13985 if (wback_offset
!= 0)
13987 XVECEXP (result
, 0, 0)
13988 = gen_rtx_SET (VOIDmode
, basereg
,
13989 plus_constant (Pmode
, basereg
, wback_offset
));
13994 for (j
= 0; i
< count
; i
++, j
++)
13995 XVECEXP (result
, 0, i
)
13996 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14001 /* Generate either a load-multiple or a store-multiple instruction. This
14002 function can be used in situations where we can start with a single MEM
14003 rtx and adjust its address upwards.
14004 COUNT is the number of operations in the instruction, not counting a
14005 possible update of the base register. REGS is an array containing the
14007 BASEREG is the base register to be used in addressing the memory operands,
14008 which are constructed from BASEMEM.
14009 WRITE_BACK specifies whether the generated instruction should include an
14010 update of the base register.
14011 OFFSETP is used to pass an offset to and from this function; this offset
14012 is not used when constructing the address (instead BASEMEM should have an
14013 appropriate offset in its address), it is used only for setting
14014 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14017 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14018 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14020 rtx mems
[MAX_LDM_STM_OPS
];
14021 HOST_WIDE_INT offset
= *offsetp
;
14024 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14026 if (GET_CODE (basereg
) == PLUS
)
14027 basereg
= XEXP (basereg
, 0);
14029 for (i
= 0; i
< count
; i
++)
14031 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14032 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14040 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14041 write_back
? 4 * count
: 0);
14043 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14044 write_back
? 4 * count
: 0);
14048 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14049 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14051 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14056 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14057 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14059 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14063 /* Called from a peephole2 expander to turn a sequence of loads into an
14064 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14065 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14066 is true if we can reorder the registers because they are used commutatively
14068 Returns true iff we could generate a new instruction. */
14071 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14073 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14074 rtx mems
[MAX_LDM_STM_OPS
];
14075 int i
, j
, base_reg
;
14077 HOST_WIDE_INT offset
;
14078 int write_back
= FALSE
;
14082 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14083 &base_reg
, &offset
, !sort_regs
);
14089 for (i
= 0; i
< nops
- 1; i
++)
14090 for (j
= i
+ 1; j
< nops
; j
++)
14091 if (regs
[i
] > regs
[j
])
14097 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14101 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14102 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14108 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14109 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14111 if (!TARGET_THUMB1
)
14113 base_reg
= regs
[0];
14114 base_reg_rtx
= newbase
;
14118 for (i
= 0; i
< nops
; i
++)
14120 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14121 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14124 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14125 write_back
? offset
+ i
* 4 : 0));
14129 /* Called from a peephole2 expander to turn a sequence of stores into an
14130 STM instruction. OPERANDS are the operands found by the peephole matcher;
14131 NOPS indicates how many separate stores we are trying to combine.
14132 Returns true iff we could generate a new instruction. */
14135 gen_stm_seq (rtx
*operands
, int nops
)
14138 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14139 rtx mems
[MAX_LDM_STM_OPS
];
14142 HOST_WIDE_INT offset
;
14143 int write_back
= FALSE
;
14146 bool base_reg_dies
;
14148 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14149 mem_order
, &base_reg
, &offset
, true);
14154 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14156 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14159 gcc_assert (base_reg_dies
);
14165 gcc_assert (base_reg_dies
);
14166 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14170 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14172 for (i
= 0; i
< nops
; i
++)
14174 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14175 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14178 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14179 write_back
? offset
+ i
* 4 : 0));
14183 /* Called from a peephole2 expander to turn a sequence of stores that are
14184 preceded by constant loads into an STM instruction. OPERANDS are the
14185 operands found by the peephole matcher; NOPS indicates how many
14186 separate stores we are trying to combine; there are 2 * NOPS
14187 instructions in the peephole.
14188 Returns true iff we could generate a new instruction. */
14191 gen_const_stm_seq (rtx
*operands
, int nops
)
14193 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14194 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14195 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14196 rtx mems
[MAX_LDM_STM_OPS
];
14199 HOST_WIDE_INT offset
;
14200 int write_back
= FALSE
;
14203 bool base_reg_dies
;
14205 HARD_REG_SET allocated
;
14207 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14208 mem_order
, &base_reg
, &offset
, false);
14213 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14215 /* If the same register is used more than once, try to find a free
14217 CLEAR_HARD_REG_SET (allocated
);
14218 for (i
= 0; i
< nops
; i
++)
14220 for (j
= i
+ 1; j
< nops
; j
++)
14221 if (regs
[i
] == regs
[j
])
14223 rtx t
= peep2_find_free_register (0, nops
* 2,
14224 TARGET_THUMB1
? "l" : "r",
14225 SImode
, &allocated
);
14229 regs
[i
] = REGNO (t
);
14233 /* Compute an ordering that maps the register numbers to an ascending
14236 for (i
= 0; i
< nops
; i
++)
14237 if (regs
[i
] < regs
[reg_order
[0]])
14240 for (i
= 1; i
< nops
; i
++)
14242 int this_order
= reg_order
[i
- 1];
14243 for (j
= 0; j
< nops
; j
++)
14244 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14245 && (this_order
== reg_order
[i
- 1]
14246 || regs
[j
] < regs
[this_order
]))
14248 reg_order
[i
] = this_order
;
14251 /* Ensure that registers that must be live after the instruction end
14252 up with the correct value. */
14253 for (i
= 0; i
< nops
; i
++)
14255 int this_order
= reg_order
[i
];
14256 if ((this_order
!= mem_order
[i
]
14257 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14258 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14262 /* Load the constants. */
14263 for (i
= 0; i
< nops
; i
++)
14265 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14266 sorted_regs
[i
] = regs
[reg_order
[i
]];
14267 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14270 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14272 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14275 gcc_assert (base_reg_dies
);
14281 gcc_assert (base_reg_dies
);
14282 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14286 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14288 for (i
= 0; i
< nops
; i
++)
14290 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14291 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14294 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14295 write_back
? offset
+ i
* 4 : 0));
14299 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14300 unaligned copies on processors which support unaligned semantics for those
14301 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14302 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14303 An interleave factor of 1 (the minimum) will perform no interleaving.
14304 Load/store multiple are used for aligned addresses where possible. */
14307 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14308 HOST_WIDE_INT length
,
14309 unsigned int interleave_factor
)
14311 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14312 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14313 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14314 HOST_WIDE_INT i
, j
;
14315 HOST_WIDE_INT remaining
= length
, words
;
14316 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14318 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14319 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14320 HOST_WIDE_INT srcoffset
, dstoffset
;
14321 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14324 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14326 /* Use hard registers if we have aligned source or destination so we can use
14327 load/store multiple with contiguous registers. */
14328 if (dst_aligned
|| src_aligned
)
14329 for (i
= 0; i
< interleave_factor
; i
++)
14330 regs
[i
] = gen_rtx_REG (SImode
, i
);
14332 for (i
= 0; i
< interleave_factor
; i
++)
14333 regs
[i
] = gen_reg_rtx (SImode
);
14335 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14336 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14338 srcoffset
= dstoffset
= 0;
14340 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14341 For copying the last bytes we want to subtract this offset again. */
14342 src_autoinc
= dst_autoinc
= 0;
14344 for (i
= 0; i
< interleave_factor
; i
++)
14347 /* Copy BLOCK_SIZE_BYTES chunks. */
14349 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14352 if (src_aligned
&& interleave_factor
> 1)
14354 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14355 TRUE
, srcbase
, &srcoffset
));
14356 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14360 for (j
= 0; j
< interleave_factor
; j
++)
14362 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14364 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14365 srcoffset
+ j
* UNITS_PER_WORD
);
14366 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14368 srcoffset
+= block_size_bytes
;
14372 if (dst_aligned
&& interleave_factor
> 1)
14374 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14375 TRUE
, dstbase
, &dstoffset
));
14376 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14380 for (j
= 0; j
< interleave_factor
; j
++)
14382 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14384 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14385 dstoffset
+ j
* UNITS_PER_WORD
);
14386 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14388 dstoffset
+= block_size_bytes
;
14391 remaining
-= block_size_bytes
;
14394 /* Copy any whole words left (note these aren't interleaved with any
14395 subsequent halfword/byte load/stores in the interests of simplicity). */
14397 words
= remaining
/ UNITS_PER_WORD
;
14399 gcc_assert (words
< interleave_factor
);
14401 if (src_aligned
&& words
> 1)
14403 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14405 src_autoinc
+= UNITS_PER_WORD
* words
;
14409 for (j
= 0; j
< words
; j
++)
14411 addr
= plus_constant (Pmode
, src
,
14412 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14413 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14414 srcoffset
+ j
* UNITS_PER_WORD
);
14415 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14417 srcoffset
+= words
* UNITS_PER_WORD
;
14420 if (dst_aligned
&& words
> 1)
14422 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14424 dst_autoinc
+= words
* UNITS_PER_WORD
;
14428 for (j
= 0; j
< words
; j
++)
14430 addr
= plus_constant (Pmode
, dst
,
14431 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14432 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14433 dstoffset
+ j
* UNITS_PER_WORD
);
14434 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14436 dstoffset
+= words
* UNITS_PER_WORD
;
14439 remaining
-= words
* UNITS_PER_WORD
;
14441 gcc_assert (remaining
< 4);
14443 /* Copy a halfword if necessary. */
14445 if (remaining
>= 2)
14447 halfword_tmp
= gen_reg_rtx (SImode
);
14449 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14450 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14451 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14453 /* Either write out immediately, or delay until we've loaded the last
14454 byte, depending on interleave factor. */
14455 if (interleave_factor
== 1)
14457 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14458 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14459 emit_insn (gen_unaligned_storehi (mem
,
14460 gen_lowpart (HImode
, halfword_tmp
)));
14461 halfword_tmp
= NULL
;
14469 gcc_assert (remaining
< 2);
14471 /* Copy last byte. */
14473 if ((remaining
& 1) != 0)
14475 byte_tmp
= gen_reg_rtx (SImode
);
14477 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14478 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14479 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14481 if (interleave_factor
== 1)
14483 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14484 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14485 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14494 /* Store last halfword if we haven't done so already. */
14498 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14499 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14500 emit_insn (gen_unaligned_storehi (mem
,
14501 gen_lowpart (HImode
, halfword_tmp
)));
14505 /* Likewise for last byte. */
14509 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14510 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14511 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14515 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14518 /* From mips_adjust_block_mem:
14520 Helper function for doing a loop-based block operation on memory
14521 reference MEM. Each iteration of the loop will operate on LENGTH
14524 Create a new base register for use within the loop and point it to
14525 the start of MEM. Create a new memory reference that uses this
14526 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14529 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14532 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14534 /* Although the new mem does not refer to a known location,
14535 it does keep up to LENGTH bytes of alignment. */
14536 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14537 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14540 /* From mips_block_move_loop:
14542 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14543 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14544 the memory regions do not overlap. */
14547 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14548 unsigned int interleave_factor
,
14549 HOST_WIDE_INT bytes_per_iter
)
14551 rtx src_reg
, dest_reg
, final_src
, test
;
14552 HOST_WIDE_INT leftover
;
14554 leftover
= length
% bytes_per_iter
;
14555 length
-= leftover
;
14557 /* Create registers and memory references for use within the loop. */
14558 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14559 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14561 /* Calculate the value that SRC_REG should have after the last iteration of
14563 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14564 0, 0, OPTAB_WIDEN
);
14566 /* Emit the start of the loop. */
14567 rtx_code_label
*label
= gen_label_rtx ();
14568 emit_label (label
);
14570 /* Emit the loop body. */
14571 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14572 interleave_factor
);
14574 /* Move on to the next block. */
14575 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14576 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14578 /* Emit the loop condition. */
14579 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14580 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14582 /* Mop up any left-over bytes. */
14584 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14587 /* Emit a block move when either the source or destination is unaligned (not
14588 aligned to a four-byte boundary). This may need further tuning depending on
14589 core type, optimize_size setting, etc. */
14592 arm_movmemqi_unaligned (rtx
*operands
)
14594 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14598 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14599 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14600 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14601 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14602 or dst_aligned though: allow more interleaving in those cases since the
14603 resulting code can be smaller. */
14604 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14605 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14608 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14609 interleave_factor
, bytes_per_iter
);
14611 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14612 interleave_factor
);
14616 /* Note that the loop created by arm_block_move_unaligned_loop may be
14617 subject to loop unrolling, which makes tuning this condition a little
14620 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14622 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14629 arm_gen_movmemqi (rtx
*operands
)
14631 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14632 HOST_WIDE_INT srcoffset
, dstoffset
;
14634 rtx src
, dst
, srcbase
, dstbase
;
14635 rtx part_bytes_reg
= NULL
;
14638 if (!CONST_INT_P (operands
[2])
14639 || !CONST_INT_P (operands
[3])
14640 || INTVAL (operands
[2]) > 64)
14643 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14644 return arm_movmemqi_unaligned (operands
);
14646 if (INTVAL (operands
[3]) & 3)
14649 dstbase
= operands
[0];
14650 srcbase
= operands
[1];
14652 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14653 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14655 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14656 out_words_to_go
= INTVAL (operands
[2]) / 4;
14657 last_bytes
= INTVAL (operands
[2]) & 3;
14658 dstoffset
= srcoffset
= 0;
14660 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14661 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14663 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14665 if (in_words_to_go
> 4)
14666 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14667 TRUE
, srcbase
, &srcoffset
));
14669 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14670 src
, FALSE
, srcbase
,
14673 if (out_words_to_go
)
14675 if (out_words_to_go
> 4)
14676 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14677 TRUE
, dstbase
, &dstoffset
));
14678 else if (out_words_to_go
!= 1)
14679 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14680 out_words_to_go
, dst
,
14683 dstbase
, &dstoffset
));
14686 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14687 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14688 if (last_bytes
!= 0)
14690 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14696 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14697 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14700 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14701 if (out_words_to_go
)
14705 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14706 sreg
= copy_to_reg (mem
);
14708 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14709 emit_move_insn (mem
, sreg
);
14712 gcc_assert (!in_words_to_go
); /* Sanity check */
14715 if (in_words_to_go
)
14717 gcc_assert (in_words_to_go
> 0);
14719 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14720 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14723 gcc_assert (!last_bytes
|| part_bytes_reg
);
14725 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14727 rtx tmp
= gen_reg_rtx (SImode
);
14729 /* The bytes we want are in the top end of the word. */
14730 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14731 GEN_INT (8 * (4 - last_bytes
))));
14732 part_bytes_reg
= tmp
;
14736 mem
= adjust_automodify_address (dstbase
, QImode
,
14737 plus_constant (Pmode
, dst
,
14739 dstoffset
+ last_bytes
- 1);
14740 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14744 tmp
= gen_reg_rtx (SImode
);
14745 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14746 part_bytes_reg
= tmp
;
14753 if (last_bytes
> 1)
14755 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14756 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14760 rtx tmp
= gen_reg_rtx (SImode
);
14761 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14762 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14763 part_bytes_reg
= tmp
;
14770 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14771 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14778 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14781 next_consecutive_mem (rtx mem
)
14783 machine_mode mode
= GET_MODE (mem
);
14784 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14785 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14787 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14790 /* Copy using LDRD/STRD instructions whenever possible.
14791 Returns true upon success. */
14793 gen_movmem_ldrd_strd (rtx
*operands
)
14795 unsigned HOST_WIDE_INT len
;
14796 HOST_WIDE_INT align
;
14797 rtx src
, dst
, base
;
14799 bool src_aligned
, dst_aligned
;
14800 bool src_volatile
, dst_volatile
;
14802 gcc_assert (CONST_INT_P (operands
[2]));
14803 gcc_assert (CONST_INT_P (operands
[3]));
14805 len
= UINTVAL (operands
[2]);
14809 /* Maximum alignment we can assume for both src and dst buffers. */
14810 align
= INTVAL (operands
[3]);
14812 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14815 /* Place src and dst addresses in registers
14816 and update the corresponding mem rtx. */
14818 dst_volatile
= MEM_VOLATILE_P (dst
);
14819 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14820 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14821 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14824 src_volatile
= MEM_VOLATILE_P (src
);
14825 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14826 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14827 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14829 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14832 if (src_volatile
|| dst_volatile
)
14835 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14836 if (!(dst_aligned
|| src_aligned
))
14837 return arm_gen_movmemqi (operands
);
14839 src
= adjust_address (src
, DImode
, 0);
14840 dst
= adjust_address (dst
, DImode
, 0);
14844 reg0
= gen_reg_rtx (DImode
);
14846 emit_move_insn (reg0
, src
);
14848 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14851 emit_move_insn (dst
, reg0
);
14853 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14855 src
= next_consecutive_mem (src
);
14856 dst
= next_consecutive_mem (dst
);
14859 gcc_assert (len
< 8);
14862 /* More than a word but less than a double-word to copy. Copy a word. */
14863 reg0
= gen_reg_rtx (SImode
);
14864 src
= adjust_address (src
, SImode
, 0);
14865 dst
= adjust_address (dst
, SImode
, 0);
14867 emit_move_insn (reg0
, src
);
14869 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14872 emit_move_insn (dst
, reg0
);
14874 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14876 src
= next_consecutive_mem (src
);
14877 dst
= next_consecutive_mem (dst
);
14884 /* Copy the remaining bytes. */
14887 dst
= adjust_address (dst
, HImode
, 0);
14888 src
= adjust_address (src
, HImode
, 0);
14889 reg0
= gen_reg_rtx (SImode
);
14891 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14893 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14896 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14898 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14900 src
= next_consecutive_mem (src
);
14901 dst
= next_consecutive_mem (dst
);
14906 dst
= adjust_address (dst
, QImode
, 0);
14907 src
= adjust_address (src
, QImode
, 0);
14908 reg0
= gen_reg_rtx (QImode
);
14909 emit_move_insn (reg0
, src
);
14910 emit_move_insn (dst
, reg0
);
14914 /* Select a dominance comparison mode if possible for a test of the general
14915 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14916 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14917 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14918 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14919 In all cases OP will be either EQ or NE, but we don't need to know which
14920 here. If we are unable to support a dominance comparison we return
14921 CC mode. This will then fail to match for the RTL expressions that
14922 generate this call. */
14924 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14926 enum rtx_code cond1
, cond2
;
14929 /* Currently we will probably get the wrong result if the individual
14930 comparisons are not simple. This also ensures that it is safe to
14931 reverse a comparison if necessary. */
14932 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14934 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14938 /* The if_then_else variant of this tests the second condition if the
14939 first passes, but is true if the first fails. Reverse the first
14940 condition to get a true "inclusive-or" expression. */
14941 if (cond_or
== DOM_CC_NX_OR_Y
)
14942 cond1
= reverse_condition (cond1
);
14944 /* If the comparisons are not equal, and one doesn't dominate the other,
14945 then we can't do this. */
14947 && !comparison_dominates_p (cond1
, cond2
)
14948 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14953 enum rtx_code temp
= cond1
;
14961 if (cond_or
== DOM_CC_X_AND_Y
)
14966 case EQ
: return CC_DEQmode
;
14967 case LE
: return CC_DLEmode
;
14968 case LEU
: return CC_DLEUmode
;
14969 case GE
: return CC_DGEmode
;
14970 case GEU
: return CC_DGEUmode
;
14971 default: gcc_unreachable ();
14975 if (cond_or
== DOM_CC_X_AND_Y
)
14987 gcc_unreachable ();
14991 if (cond_or
== DOM_CC_X_AND_Y
)
15003 gcc_unreachable ();
15007 if (cond_or
== DOM_CC_X_AND_Y
)
15008 return CC_DLTUmode
;
15013 return CC_DLTUmode
;
15015 return CC_DLEUmode
;
15019 gcc_unreachable ();
15023 if (cond_or
== DOM_CC_X_AND_Y
)
15024 return CC_DGTUmode
;
15029 return CC_DGTUmode
;
15031 return CC_DGEUmode
;
15035 gcc_unreachable ();
15038 /* The remaining cases only occur when both comparisons are the
15041 gcc_assert (cond1
== cond2
);
15045 gcc_assert (cond1
== cond2
);
15049 gcc_assert (cond1
== cond2
);
15053 gcc_assert (cond1
== cond2
);
15054 return CC_DLEUmode
;
15057 gcc_assert (cond1
== cond2
);
15058 return CC_DGEUmode
;
15061 gcc_unreachable ();
15066 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15068 /* All floating point compares return CCFP if it is an equality
15069 comparison, and CCFPE otherwise. */
15070 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15093 gcc_unreachable ();
15097 /* A compare with a shifted operand. Because of canonicalization, the
15098 comparison will have to be swapped when we emit the assembler. */
15099 if (GET_MODE (y
) == SImode
15100 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15101 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15102 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15103 || GET_CODE (x
) == ROTATERT
))
15106 /* This operation is performed swapped, but since we only rely on the Z
15107 flag we don't need an additional mode. */
15108 if (GET_MODE (y
) == SImode
15109 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15110 && GET_CODE (x
) == NEG
15111 && (op
== EQ
|| op
== NE
))
15114 /* This is a special case that is used by combine to allow a
15115 comparison of a shifted byte load to be split into a zero-extend
15116 followed by a comparison of the shifted integer (only valid for
15117 equalities and unsigned inequalities). */
15118 if (GET_MODE (x
) == SImode
15119 && GET_CODE (x
) == ASHIFT
15120 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15121 && GET_CODE (XEXP (x
, 0)) == SUBREG
15122 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15123 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15124 && (op
== EQ
|| op
== NE
15125 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15126 && CONST_INT_P (y
))
15129 /* A construct for a conditional compare, if the false arm contains
15130 0, then both conditions must be true, otherwise either condition
15131 must be true. Not all conditions are possible, so CCmode is
15132 returned if it can't be done. */
15133 if (GET_CODE (x
) == IF_THEN_ELSE
15134 && (XEXP (x
, 2) == const0_rtx
15135 || XEXP (x
, 2) == const1_rtx
)
15136 && COMPARISON_P (XEXP (x
, 0))
15137 && COMPARISON_P (XEXP (x
, 1)))
15138 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15139 INTVAL (XEXP (x
, 2)));
15141 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15142 if (GET_CODE (x
) == AND
15143 && (op
== EQ
|| op
== NE
)
15144 && COMPARISON_P (XEXP (x
, 0))
15145 && COMPARISON_P (XEXP (x
, 1)))
15146 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15149 if (GET_CODE (x
) == IOR
15150 && (op
== EQ
|| op
== NE
)
15151 && COMPARISON_P (XEXP (x
, 0))
15152 && COMPARISON_P (XEXP (x
, 1)))
15153 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15156 /* An operation (on Thumb) where we want to test for a single bit.
15157 This is done by shifting that bit up into the top bit of a
15158 scratch register; we can then branch on the sign bit. */
15160 && GET_MODE (x
) == SImode
15161 && (op
== EQ
|| op
== NE
)
15162 && GET_CODE (x
) == ZERO_EXTRACT
15163 && XEXP (x
, 1) == const1_rtx
)
15166 /* An operation that sets the condition codes as a side-effect, the
15167 V flag is not set correctly, so we can only use comparisons where
15168 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15170 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15171 if (GET_MODE (x
) == SImode
15173 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15174 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15175 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15176 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15177 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15178 || GET_CODE (x
) == LSHIFTRT
15179 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15180 || GET_CODE (x
) == ROTATERT
15181 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15182 return CC_NOOVmode
;
15184 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15187 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15188 && GET_CODE (x
) == PLUS
15189 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15192 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15198 /* A DImode comparison against zero can be implemented by
15199 or'ing the two halves together. */
15200 if (y
== const0_rtx
)
15203 /* We can do an equality test in three Thumb instructions. */
15213 /* DImode unsigned comparisons can be implemented by cmp +
15214 cmpeq without a scratch register. Not worth doing in
15225 /* DImode signed and unsigned comparisons can be implemented
15226 by cmp + sbcs with a scratch register, but that does not
15227 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15228 gcc_assert (op
!= EQ
&& op
!= NE
);
15232 gcc_unreachable ();
15236 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15237 return GET_MODE (x
);
15242 /* X and Y are two things to compare using CODE. Emit the compare insn and
15243 return the rtx for register 0 in the proper mode. FP means this is a
15244 floating point compare: I don't think that it is needed on the arm. */
15246 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15250 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15252 /* We might have X as a constant, Y as a register because of the predicates
15253 used for cmpdi. If so, force X to a register here. */
15254 if (dimode_comparison
&& !REG_P (x
))
15255 x
= force_reg (DImode
, x
);
15257 mode
= SELECT_CC_MODE (code
, x
, y
);
15258 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15260 if (dimode_comparison
15261 && mode
!= CC_CZmode
)
15265 /* To compare two non-zero values for equality, XOR them and
15266 then compare against zero. Not used for ARM mode; there
15267 CC_CZmode is cheaper. */
15268 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15270 gcc_assert (!reload_completed
);
15271 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15275 /* A scratch register is required. */
15276 if (reload_completed
)
15277 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15279 scratch
= gen_rtx_SCRATCH (SImode
);
15281 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15282 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15283 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15286 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15291 /* Generate a sequence of insns that will generate the correct return
15292 address mask depending on the physical architecture that the program
15295 arm_gen_return_addr_mask (void)
15297 rtx reg
= gen_reg_rtx (Pmode
);
15299 emit_insn (gen_return_addr_mask (reg
));
15304 arm_reload_in_hi (rtx
*operands
)
15306 rtx ref
= operands
[1];
15308 HOST_WIDE_INT offset
= 0;
15310 if (GET_CODE (ref
) == SUBREG
)
15312 offset
= SUBREG_BYTE (ref
);
15313 ref
= SUBREG_REG (ref
);
15318 /* We have a pseudo which has been spilt onto the stack; there
15319 are two cases here: the first where there is a simple
15320 stack-slot replacement and a second where the stack-slot is
15321 out of range, or is used as a subreg. */
15322 if (reg_equiv_mem (REGNO (ref
)))
15324 ref
= reg_equiv_mem (REGNO (ref
));
15325 base
= find_replacement (&XEXP (ref
, 0));
15328 /* The slot is out of range, or was dressed up in a SUBREG. */
15329 base
= reg_equiv_address (REGNO (ref
));
15332 base
= find_replacement (&XEXP (ref
, 0));
15334 /* Handle the case where the address is too complex to be offset by 1. */
15335 if (GET_CODE (base
) == MINUS
15336 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15338 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15340 emit_set_insn (base_plus
, base
);
15343 else if (GET_CODE (base
) == PLUS
)
15345 /* The addend must be CONST_INT, or we would have dealt with it above. */
15346 HOST_WIDE_INT hi
, lo
;
15348 offset
+= INTVAL (XEXP (base
, 1));
15349 base
= XEXP (base
, 0);
15351 /* Rework the address into a legal sequence of insns. */
15352 /* Valid range for lo is -4095 -> 4095 */
15355 : -((-offset
) & 0xfff));
15357 /* Corner case, if lo is the max offset then we would be out of range
15358 once we have added the additional 1 below, so bump the msb into the
15359 pre-loading insn(s). */
15363 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15364 ^ (HOST_WIDE_INT
) 0x80000000)
15365 - (HOST_WIDE_INT
) 0x80000000);
15367 gcc_assert (hi
+ lo
== offset
);
15371 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15373 /* Get the base address; addsi3 knows how to handle constants
15374 that require more than one insn. */
15375 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15381 /* Operands[2] may overlap operands[0] (though it won't overlap
15382 operands[1]), that's why we asked for a DImode reg -- so we can
15383 use the bit that does not overlap. */
15384 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15385 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15387 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15389 emit_insn (gen_zero_extendqisi2 (scratch
,
15390 gen_rtx_MEM (QImode
,
15391 plus_constant (Pmode
, base
,
15393 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15394 gen_rtx_MEM (QImode
,
15395 plus_constant (Pmode
, base
,
15397 if (!BYTES_BIG_ENDIAN
)
15398 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15399 gen_rtx_IOR (SImode
,
15402 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15406 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15407 gen_rtx_IOR (SImode
,
15408 gen_rtx_ASHIFT (SImode
, scratch
,
15410 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15413 /* Handle storing a half-word to memory during reload by synthesizing as two
15414 byte stores. Take care not to clobber the input values until after we
15415 have moved them somewhere safe. This code assumes that if the DImode
15416 scratch in operands[2] overlaps either the input value or output address
15417 in some way, then that value must die in this insn (we absolutely need
15418 two scratch registers for some corner cases). */
15420 arm_reload_out_hi (rtx
*operands
)
15422 rtx ref
= operands
[0];
15423 rtx outval
= operands
[1];
15425 HOST_WIDE_INT offset
= 0;
15427 if (GET_CODE (ref
) == SUBREG
)
15429 offset
= SUBREG_BYTE (ref
);
15430 ref
= SUBREG_REG (ref
);
15435 /* We have a pseudo which has been spilt onto the stack; there
15436 are two cases here: the first where there is a simple
15437 stack-slot replacement and a second where the stack-slot is
15438 out of range, or is used as a subreg. */
15439 if (reg_equiv_mem (REGNO (ref
)))
15441 ref
= reg_equiv_mem (REGNO (ref
));
15442 base
= find_replacement (&XEXP (ref
, 0));
15445 /* The slot is out of range, or was dressed up in a SUBREG. */
15446 base
= reg_equiv_address (REGNO (ref
));
15449 base
= find_replacement (&XEXP (ref
, 0));
15451 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15453 /* Handle the case where the address is too complex to be offset by 1. */
15454 if (GET_CODE (base
) == MINUS
15455 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15457 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15459 /* Be careful not to destroy OUTVAL. */
15460 if (reg_overlap_mentioned_p (base_plus
, outval
))
15462 /* Updating base_plus might destroy outval, see if we can
15463 swap the scratch and base_plus. */
15464 if (!reg_overlap_mentioned_p (scratch
, outval
))
15467 scratch
= base_plus
;
15472 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15474 /* Be conservative and copy OUTVAL into the scratch now,
15475 this should only be necessary if outval is a subreg
15476 of something larger than a word. */
15477 /* XXX Might this clobber base? I can't see how it can,
15478 since scratch is known to overlap with OUTVAL, and
15479 must be wider than a word. */
15480 emit_insn (gen_movhi (scratch_hi
, outval
));
15481 outval
= scratch_hi
;
15485 emit_set_insn (base_plus
, base
);
15488 else if (GET_CODE (base
) == PLUS
)
15490 /* The addend must be CONST_INT, or we would have dealt with it above. */
15491 HOST_WIDE_INT hi
, lo
;
15493 offset
+= INTVAL (XEXP (base
, 1));
15494 base
= XEXP (base
, 0);
15496 /* Rework the address into a legal sequence of insns. */
15497 /* Valid range for lo is -4095 -> 4095 */
15500 : -((-offset
) & 0xfff));
15502 /* Corner case, if lo is the max offset then we would be out of range
15503 once we have added the additional 1 below, so bump the msb into the
15504 pre-loading insn(s). */
15508 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15509 ^ (HOST_WIDE_INT
) 0x80000000)
15510 - (HOST_WIDE_INT
) 0x80000000);
15512 gcc_assert (hi
+ lo
== offset
);
15516 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15518 /* Be careful not to destroy OUTVAL. */
15519 if (reg_overlap_mentioned_p (base_plus
, outval
))
15521 /* Updating base_plus might destroy outval, see if we
15522 can swap the scratch and base_plus. */
15523 if (!reg_overlap_mentioned_p (scratch
, outval
))
15526 scratch
= base_plus
;
15531 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15533 /* Be conservative and copy outval into scratch now,
15534 this should only be necessary if outval is a
15535 subreg of something larger than a word. */
15536 /* XXX Might this clobber base? I can't see how it
15537 can, since scratch is known to overlap with
15539 emit_insn (gen_movhi (scratch_hi
, outval
));
15540 outval
= scratch_hi
;
15544 /* Get the base address; addsi3 knows how to handle constants
15545 that require more than one insn. */
15546 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15552 if (BYTES_BIG_ENDIAN
)
15554 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15555 plus_constant (Pmode
, base
,
15557 gen_lowpart (QImode
, outval
)));
15558 emit_insn (gen_lshrsi3 (scratch
,
15559 gen_rtx_SUBREG (SImode
, outval
, 0),
15561 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15563 gen_lowpart (QImode
, scratch
)));
15567 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15569 gen_lowpart (QImode
, outval
)));
15570 emit_insn (gen_lshrsi3 (scratch
,
15571 gen_rtx_SUBREG (SImode
, outval
, 0),
15573 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15574 plus_constant (Pmode
, base
,
15576 gen_lowpart (QImode
, scratch
)));
15580 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15581 (padded to the size of a word) should be passed in a register. */
15584 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15586 if (TARGET_AAPCS_BASED
)
15587 return must_pass_in_stack_var_size (mode
, type
);
15589 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15593 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15594 Return true if an argument passed on the stack should be padded upwards,
15595 i.e. if the least-significant byte has useful data.
15596 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15597 aggregate types are placed in the lowest memory address. */
15600 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15602 if (!TARGET_AAPCS_BASED
)
15603 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15605 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15612 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15613 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15614 register has useful data, and return the opposite if the most
15615 significant byte does. */
15618 arm_pad_reg_upward (machine_mode mode
,
15619 tree type
, int first ATTRIBUTE_UNUSED
)
15621 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15623 /* For AAPCS, small aggregates, small fixed-point types,
15624 and small complex types are always padded upwards. */
15627 if ((AGGREGATE_TYPE_P (type
)
15628 || TREE_CODE (type
) == COMPLEX_TYPE
15629 || FIXED_POINT_TYPE_P (type
))
15630 && int_size_in_bytes (type
) <= 4)
15635 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15636 && GET_MODE_SIZE (mode
) <= 4)
15641 /* Otherwise, use default padding. */
15642 return !BYTES_BIG_ENDIAN
;
15645 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15646 assuming that the address in the base register is word aligned. */
15648 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15650 HOST_WIDE_INT max_offset
;
15652 /* Offset must be a multiple of 4 in Thumb mode. */
15653 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15658 else if (TARGET_ARM
)
15663 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15666 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15667 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15668 Assumes that the address in the base register RN is word aligned. Pattern
15669 guarantees that both memory accesses use the same base register,
15670 the offsets are constants within the range, and the gap between the offsets is 4.
15671 If preload complete then check that registers are legal. WBACK indicates whether
15672 address is updated. LOAD indicates whether memory access is load or store. */
15674 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15675 bool wback
, bool load
)
15677 unsigned int t
, t2
, n
;
15679 if (!reload_completed
)
15682 if (!offset_ok_for_ldrd_strd (offset
))
15689 if ((TARGET_THUMB2
)
15690 && ((wback
&& (n
== t
|| n
== t2
))
15691 || (t
== SP_REGNUM
)
15692 || (t
== PC_REGNUM
)
15693 || (t2
== SP_REGNUM
)
15694 || (t2
== PC_REGNUM
)
15695 || (!load
&& (n
== PC_REGNUM
))
15696 || (load
&& (t
== t2
))
15697 /* Triggers Cortex-M3 LDRD errata. */
15698 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15702 && ((wback
&& (n
== t
|| n
== t2
))
15703 || (t2
== PC_REGNUM
)
15704 || (t
% 2 != 0) /* First destination register is not even. */
15706 /* PC can be used as base register (for offset addressing only),
15707 but it is depricated. */
15708 || (n
== PC_REGNUM
)))
15714 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15715 operand MEM's address contains an immediate offset from the base
15716 register and has no side effects, in which case it sets BASE and
15717 OFFSET accordingly. */
15719 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15723 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15725 /* TODO: Handle more general memory operand patterns, such as
15726 PRE_DEC and PRE_INC. */
15728 if (side_effects_p (mem
))
15731 /* Can't deal with subregs. */
15732 if (GET_CODE (mem
) == SUBREG
)
15735 gcc_assert (MEM_P (mem
));
15737 *offset
= const0_rtx
;
15739 addr
= XEXP (mem
, 0);
15741 /* If addr isn't valid for DImode, then we can't handle it. */
15742 if (!arm_legitimate_address_p (DImode
, addr
,
15743 reload_in_progress
|| reload_completed
))
15751 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15753 *base
= XEXP (addr
, 0);
15754 *offset
= XEXP (addr
, 1);
15755 return (REG_P (*base
) && CONST_INT_P (*offset
));
15761 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15763 /* Called from a peephole2 to replace two word-size accesses with a
15764 single LDRD/STRD instruction. Returns true iff we can generate a
15765 new instruction sequence. That is, both accesses use the same base
15766 register and the gap between constant offsets is 4. This function
15767 may reorder its operands to match ldrd/strd RTL templates.
15768 OPERANDS are the operands found by the peephole matcher;
15769 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15770 corresponding memory operands. LOAD indicaates whether the access
15771 is load or store. CONST_STORE indicates a store of constant
15772 integer values held in OPERANDS[4,5] and assumes that the pattern
15773 is of length 4 insn, for the purpose of checking dead registers.
15774 COMMUTE indicates that register operands may be reordered. */
15776 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15777 bool const_store
, bool commute
)
15780 HOST_WIDE_INT offsets
[2], offset
;
15781 rtx base
= NULL_RTX
;
15782 rtx cur_base
, cur_offset
, tmp
;
15784 HARD_REG_SET regset
;
15786 gcc_assert (!const_store
|| !load
);
15787 /* Check that the memory references are immediate offsets from the
15788 same base register. Extract the base register, the destination
15789 registers, and the corresponding memory offsets. */
15790 for (i
= 0; i
< nops
; i
++)
15792 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15797 else if (REGNO (base
) != REGNO (cur_base
))
15800 offsets
[i
] = INTVAL (cur_offset
);
15801 if (GET_CODE (operands
[i
]) == SUBREG
)
15803 tmp
= SUBREG_REG (operands
[i
]);
15804 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15809 /* Make sure there is no dependency between the individual loads. */
15810 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15811 return false; /* RAW */
15813 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15814 return false; /* WAW */
15816 /* If the same input register is used in both stores
15817 when storing different constants, try to find a free register.
15818 For example, the code
15823 can be transformed into
15826 in Thumb mode assuming that r1 is free. */
15828 && REGNO (operands
[0]) == REGNO (operands
[1])
15829 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15833 CLEAR_HARD_REG_SET (regset
);
15834 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15835 if (tmp
== NULL_RTX
)
15838 /* Use the new register in the first load to ensure that
15839 if the original input register is not dead after peephole,
15840 then it will have the correct constant value. */
15843 else if (TARGET_ARM
)
15846 int regno
= REGNO (operands
[0]);
15847 if (!peep2_reg_dead_p (4, operands
[0]))
15849 /* When the input register is even and is not dead after the
15850 pattern, it has to hold the second constant but we cannot
15851 form a legal STRD in ARM mode with this register as the second
15853 if (regno
% 2 == 0)
15856 /* Is regno-1 free? */
15857 SET_HARD_REG_SET (regset
);
15858 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15859 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15860 if (tmp
== NULL_RTX
)
15867 /* Find a DImode register. */
15868 CLEAR_HARD_REG_SET (regset
);
15869 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15870 if (tmp
!= NULL_RTX
)
15872 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15873 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15877 /* Can we use the input register to form a DI register? */
15878 SET_HARD_REG_SET (regset
);
15879 CLEAR_HARD_REG_BIT(regset
,
15880 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15881 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15882 if (tmp
== NULL_RTX
)
15884 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15888 gcc_assert (operands
[0] != NULL_RTX
);
15889 gcc_assert (operands
[1] != NULL_RTX
);
15890 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15891 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15895 /* Make sure the instructions are ordered with lower memory access first. */
15896 if (offsets
[0] > offsets
[1])
15898 gap
= offsets
[0] - offsets
[1];
15899 offset
= offsets
[1];
15901 /* Swap the instructions such that lower memory is accessed first. */
15902 SWAP_RTX (operands
[0], operands
[1]);
15903 SWAP_RTX (operands
[2], operands
[3]);
15905 SWAP_RTX (operands
[4], operands
[5]);
15909 gap
= offsets
[1] - offsets
[0];
15910 offset
= offsets
[0];
15913 /* Make sure accesses are to consecutive memory locations. */
15917 /* Make sure we generate legal instructions. */
15918 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15922 /* In Thumb state, where registers are almost unconstrained, there
15923 is little hope to fix it. */
15927 if (load
&& commute
)
15929 /* Try reordering registers. */
15930 SWAP_RTX (operands
[0], operands
[1]);
15931 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15938 /* If input registers are dead after this pattern, they can be
15939 reordered or replaced by other registers that are free in the
15940 current pattern. */
15941 if (!peep2_reg_dead_p (4, operands
[0])
15942 || !peep2_reg_dead_p (4, operands
[1]))
15945 /* Try to reorder the input registers. */
15946 /* For example, the code
15951 can be transformed into
15956 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15959 SWAP_RTX (operands
[0], operands
[1]);
15963 /* Try to find a free DI register. */
15964 CLEAR_HARD_REG_SET (regset
);
15965 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15966 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15969 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15970 if (tmp
== NULL_RTX
)
15973 /* DREG must be an even-numbered register in DImode.
15974 Split it into SI registers. */
15975 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15976 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15977 gcc_assert (operands
[0] != NULL_RTX
);
15978 gcc_assert (operands
[1] != NULL_RTX
);
15979 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15980 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15982 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15995 /* Print a symbolic form of X to the debug file, F. */
15997 arm_print_value (FILE *f
, rtx x
)
15999 switch (GET_CODE (x
))
16002 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16006 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16014 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16016 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16017 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16025 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16029 fprintf (f
, "`%s'", XSTR (x
, 0));
16033 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16037 arm_print_value (f
, XEXP (x
, 0));
16041 arm_print_value (f
, XEXP (x
, 0));
16043 arm_print_value (f
, XEXP (x
, 1));
16051 fprintf (f
, "????");
16056 /* Routines for manipulation of the constant pool. */
16058 /* Arm instructions cannot load a large constant directly into a
16059 register; they have to come from a pc relative load. The constant
16060 must therefore be placed in the addressable range of the pc
16061 relative load. Depending on the precise pc relative load
16062 instruction the range is somewhere between 256 bytes and 4k. This
16063 means that we often have to dump a constant inside a function, and
16064 generate code to branch around it.
16066 It is important to minimize this, since the branches will slow
16067 things down and make the code larger.
16069 Normally we can hide the table after an existing unconditional
16070 branch so that there is no interruption of the flow, but in the
16071 worst case the code looks like this:
16089 We fix this by performing a scan after scheduling, which notices
16090 which instructions need to have their operands fetched from the
16091 constant table and builds the table.
16093 The algorithm starts by building a table of all the constants that
16094 need fixing up and all the natural barriers in the function (places
16095 where a constant table can be dropped without breaking the flow).
16096 For each fixup we note how far the pc-relative replacement will be
16097 able to reach and the offset of the instruction into the function.
16099 Having built the table we then group the fixes together to form
16100 tables that are as large as possible (subject to addressing
16101 constraints) and emit each table of constants after the last
16102 barrier that is within range of all the instructions in the group.
16103 If a group does not contain a barrier, then we forcibly create one
16104 by inserting a jump instruction into the flow. Once the table has
16105 been inserted, the insns are then modified to reference the
16106 relevant entry in the pool.
16108 Possible enhancements to the algorithm (not implemented) are:
16110 1) For some processors and object formats, there may be benefit in
16111 aligning the pools to the start of cache lines; this alignment
16112 would need to be taken into account when calculating addressability
16115 /* These typedefs are located at the start of this file, so that
16116 they can be used in the prototypes there. This comment is to
16117 remind readers of that fact so that the following structures
16118 can be understood more easily.
16120 typedef struct minipool_node Mnode;
16121 typedef struct minipool_fixup Mfix; */
16123 struct minipool_node
16125 /* Doubly linked chain of entries. */
16128 /* The maximum offset into the code that this entry can be placed. While
16129 pushing fixes for forward references, all entries are sorted in order
16130 of increasing max_address. */
16131 HOST_WIDE_INT max_address
;
16132 /* Similarly for an entry inserted for a backwards ref. */
16133 HOST_WIDE_INT min_address
;
16134 /* The number of fixes referencing this entry. This can become zero
16135 if we "unpush" an entry. In this case we ignore the entry when we
16136 come to emit the code. */
16138 /* The offset from the start of the minipool. */
16139 HOST_WIDE_INT offset
;
16140 /* The value in table. */
16142 /* The mode of value. */
16144 /* The size of the value. With iWMMXt enabled
16145 sizes > 4 also imply an alignment of 8-bytes. */
16149 struct minipool_fixup
16153 HOST_WIDE_INT address
;
16159 HOST_WIDE_INT forwards
;
16160 HOST_WIDE_INT backwards
;
16163 /* Fixes less than a word need padding out to a word boundary. */
16164 #define MINIPOOL_FIX_SIZE(mode) \
16165 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16167 static Mnode
* minipool_vector_head
;
16168 static Mnode
* minipool_vector_tail
;
16169 static rtx_code_label
*minipool_vector_label
;
16170 static int minipool_pad
;
16172 /* The linked list of all minipool fixes required for this function. */
16173 Mfix
* minipool_fix_head
;
16174 Mfix
* minipool_fix_tail
;
16175 /* The fix entry for the current minipool, once it has been placed. */
16176 Mfix
* minipool_barrier
;
16178 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16179 #define JUMP_TABLES_IN_TEXT_SECTION 0
16182 static HOST_WIDE_INT
16183 get_jump_table_size (rtx_jump_table_data
*insn
)
16185 /* ADDR_VECs only take room if read-only data does into the text
16187 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16189 rtx body
= PATTERN (insn
);
16190 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16191 HOST_WIDE_INT size
;
16192 HOST_WIDE_INT modesize
;
16194 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16195 size
= modesize
* XVECLEN (body
, elt
);
16199 /* Round up size of TBB table to a halfword boundary. */
16200 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16203 /* No padding necessary for TBH. */
16206 /* Add two bytes for alignment on Thumb. */
16211 gcc_unreachable ();
16219 /* Return the maximum amount of padding that will be inserted before
16222 static HOST_WIDE_INT
16223 get_label_padding (rtx label
)
16225 HOST_WIDE_INT align
, min_insn_size
;
16227 align
= 1 << label_to_alignment (label
);
16228 min_insn_size
= TARGET_THUMB
? 2 : 4;
16229 return align
> min_insn_size
? align
- min_insn_size
: 0;
16232 /* Move a minipool fix MP from its current location to before MAX_MP.
16233 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16234 constraints may need updating. */
16236 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16237 HOST_WIDE_INT max_address
)
16239 /* The code below assumes these are different. */
16240 gcc_assert (mp
!= max_mp
);
16242 if (max_mp
== NULL
)
16244 if (max_address
< mp
->max_address
)
16245 mp
->max_address
= max_address
;
16249 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16250 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16252 mp
->max_address
= max_address
;
16254 /* Unlink MP from its current position. Since max_mp is non-null,
16255 mp->prev must be non-null. */
16256 mp
->prev
->next
= mp
->next
;
16257 if (mp
->next
!= NULL
)
16258 mp
->next
->prev
= mp
->prev
;
16260 minipool_vector_tail
= mp
->prev
;
16262 /* Re-insert it before MAX_MP. */
16264 mp
->prev
= max_mp
->prev
;
16267 if (mp
->prev
!= NULL
)
16268 mp
->prev
->next
= mp
;
16270 minipool_vector_head
= mp
;
16273 /* Save the new entry. */
16276 /* Scan over the preceding entries and adjust their addresses as
16278 while (mp
->prev
!= NULL
16279 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16281 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16288 /* Add a constant to the minipool for a forward reference. Returns the
16289 node added or NULL if the constant will not fit in this pool. */
16291 add_minipool_forward_ref (Mfix
*fix
)
16293 /* If set, max_mp is the first pool_entry that has a lower
16294 constraint than the one we are trying to add. */
16295 Mnode
* max_mp
= NULL
;
16296 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16299 /* If the minipool starts before the end of FIX->INSN then this FIX
16300 can not be placed into the current pool. Furthermore, adding the
16301 new constant pool entry may cause the pool to start FIX_SIZE bytes
16303 if (minipool_vector_head
&&
16304 (fix
->address
+ get_attr_length (fix
->insn
)
16305 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16308 /* Scan the pool to see if a constant with the same value has
16309 already been added. While we are doing this, also note the
16310 location where we must insert the constant if it doesn't already
16312 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16314 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16315 && fix
->mode
== mp
->mode
16316 && (!LABEL_P (fix
->value
)
16317 || (CODE_LABEL_NUMBER (fix
->value
)
16318 == CODE_LABEL_NUMBER (mp
->value
)))
16319 && rtx_equal_p (fix
->value
, mp
->value
))
16321 /* More than one fix references this entry. */
16323 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16326 /* Note the insertion point if necessary. */
16328 && mp
->max_address
> max_address
)
16331 /* If we are inserting an 8-bytes aligned quantity and
16332 we have not already found an insertion point, then
16333 make sure that all such 8-byte aligned quantities are
16334 placed at the start of the pool. */
16335 if (ARM_DOUBLEWORD_ALIGN
16337 && fix
->fix_size
>= 8
16338 && mp
->fix_size
< 8)
16341 max_address
= mp
->max_address
;
16345 /* The value is not currently in the minipool, so we need to create
16346 a new entry for it. If MAX_MP is NULL, the entry will be put on
16347 the end of the list since the placement is less constrained than
16348 any existing entry. Otherwise, we insert the new fix before
16349 MAX_MP and, if necessary, adjust the constraints on the other
16352 mp
->fix_size
= fix
->fix_size
;
16353 mp
->mode
= fix
->mode
;
16354 mp
->value
= fix
->value
;
16356 /* Not yet required for a backwards ref. */
16357 mp
->min_address
= -65536;
16359 if (max_mp
== NULL
)
16361 mp
->max_address
= max_address
;
16363 mp
->prev
= minipool_vector_tail
;
16365 if (mp
->prev
== NULL
)
16367 minipool_vector_head
= mp
;
16368 minipool_vector_label
= gen_label_rtx ();
16371 mp
->prev
->next
= mp
;
16373 minipool_vector_tail
= mp
;
16377 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16378 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16380 mp
->max_address
= max_address
;
16383 mp
->prev
= max_mp
->prev
;
16385 if (mp
->prev
!= NULL
)
16386 mp
->prev
->next
= mp
;
16388 minipool_vector_head
= mp
;
16391 /* Save the new entry. */
16394 /* Scan over the preceding entries and adjust their addresses as
16396 while (mp
->prev
!= NULL
16397 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16399 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16407 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16408 HOST_WIDE_INT min_address
)
16410 HOST_WIDE_INT offset
;
16412 /* The code below assumes these are different. */
16413 gcc_assert (mp
!= min_mp
);
16415 if (min_mp
== NULL
)
16417 if (min_address
> mp
->min_address
)
16418 mp
->min_address
= min_address
;
16422 /* We will adjust this below if it is too loose. */
16423 mp
->min_address
= min_address
;
16425 /* Unlink MP from its current position. Since min_mp is non-null,
16426 mp->next must be non-null. */
16427 mp
->next
->prev
= mp
->prev
;
16428 if (mp
->prev
!= NULL
)
16429 mp
->prev
->next
= mp
->next
;
16431 minipool_vector_head
= mp
->next
;
16433 /* Reinsert it after MIN_MP. */
16435 mp
->next
= min_mp
->next
;
16437 if (mp
->next
!= NULL
)
16438 mp
->next
->prev
= mp
;
16440 minipool_vector_tail
= mp
;
16446 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16448 mp
->offset
= offset
;
16449 if (mp
->refcount
> 0)
16450 offset
+= mp
->fix_size
;
16452 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16453 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16459 /* Add a constant to the minipool for a backward reference. Returns the
16460 node added or NULL if the constant will not fit in this pool.
16462 Note that the code for insertion for a backwards reference can be
16463 somewhat confusing because the calculated offsets for each fix do
16464 not take into account the size of the pool (which is still under
16467 add_minipool_backward_ref (Mfix
*fix
)
16469 /* If set, min_mp is the last pool_entry that has a lower constraint
16470 than the one we are trying to add. */
16471 Mnode
*min_mp
= NULL
;
16472 /* This can be negative, since it is only a constraint. */
16473 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16476 /* If we can't reach the current pool from this insn, or if we can't
16477 insert this entry at the end of the pool without pushing other
16478 fixes out of range, then we don't try. This ensures that we
16479 can't fail later on. */
16480 if (min_address
>= minipool_barrier
->address
16481 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16482 >= minipool_barrier
->address
))
16485 /* Scan the pool to see if a constant with the same value has
16486 already been added. While we are doing this, also note the
16487 location where we must insert the constant if it doesn't already
16489 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16491 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16492 && fix
->mode
== mp
->mode
16493 && (!LABEL_P (fix
->value
)
16494 || (CODE_LABEL_NUMBER (fix
->value
)
16495 == CODE_LABEL_NUMBER (mp
->value
)))
16496 && rtx_equal_p (fix
->value
, mp
->value
)
16497 /* Check that there is enough slack to move this entry to the
16498 end of the table (this is conservative). */
16499 && (mp
->max_address
16500 > (minipool_barrier
->address
16501 + minipool_vector_tail
->offset
16502 + minipool_vector_tail
->fix_size
)))
16505 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16508 if (min_mp
!= NULL
)
16509 mp
->min_address
+= fix
->fix_size
;
16512 /* Note the insertion point if necessary. */
16513 if (mp
->min_address
< min_address
)
16515 /* For now, we do not allow the insertion of 8-byte alignment
16516 requiring nodes anywhere but at the start of the pool. */
16517 if (ARM_DOUBLEWORD_ALIGN
16518 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16523 else if (mp
->max_address
16524 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16526 /* Inserting before this entry would push the fix beyond
16527 its maximum address (which can happen if we have
16528 re-located a forwards fix); force the new fix to come
16530 if (ARM_DOUBLEWORD_ALIGN
16531 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16536 min_address
= mp
->min_address
+ fix
->fix_size
;
16539 /* Do not insert a non-8-byte aligned quantity before 8-byte
16540 aligned quantities. */
16541 else if (ARM_DOUBLEWORD_ALIGN
16542 && fix
->fix_size
< 8
16543 && mp
->fix_size
>= 8)
16546 min_address
= mp
->min_address
+ fix
->fix_size
;
16551 /* We need to create a new entry. */
16553 mp
->fix_size
= fix
->fix_size
;
16554 mp
->mode
= fix
->mode
;
16555 mp
->value
= fix
->value
;
16557 mp
->max_address
= minipool_barrier
->address
+ 65536;
16559 mp
->min_address
= min_address
;
16561 if (min_mp
== NULL
)
16564 mp
->next
= minipool_vector_head
;
16566 if (mp
->next
== NULL
)
16568 minipool_vector_tail
= mp
;
16569 minipool_vector_label
= gen_label_rtx ();
16572 mp
->next
->prev
= mp
;
16574 minipool_vector_head
= mp
;
16578 mp
->next
= min_mp
->next
;
16582 if (mp
->next
!= NULL
)
16583 mp
->next
->prev
= mp
;
16585 minipool_vector_tail
= mp
;
16588 /* Save the new entry. */
16596 /* Scan over the following entries and adjust their offsets. */
16597 while (mp
->next
!= NULL
)
16599 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16600 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16603 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16605 mp
->next
->offset
= mp
->offset
;
16614 assign_minipool_offsets (Mfix
*barrier
)
16616 HOST_WIDE_INT offset
= 0;
16619 minipool_barrier
= barrier
;
16621 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16623 mp
->offset
= offset
;
16625 if (mp
->refcount
> 0)
16626 offset
+= mp
->fix_size
;
16630 /* Output the literal table */
16632 dump_minipool (rtx_insn
*scan
)
16638 if (ARM_DOUBLEWORD_ALIGN
)
16639 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16640 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16647 fprintf (dump_file
,
16648 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16649 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16651 scan
= emit_label_after (gen_label_rtx (), scan
);
16652 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16653 scan
= emit_label_after (minipool_vector_label
, scan
);
16655 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16657 if (mp
->refcount
> 0)
16661 fprintf (dump_file
,
16662 ";; Offset %u, min %ld, max %ld ",
16663 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16664 (unsigned long) mp
->max_address
);
16665 arm_print_value (dump_file
, mp
->value
);
16666 fputc ('\n', dump_file
);
16669 switch (mp
->fix_size
)
16671 #ifdef HAVE_consttable_1
16673 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16677 #ifdef HAVE_consttable_2
16679 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16683 #ifdef HAVE_consttable_4
16685 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16689 #ifdef HAVE_consttable_8
16691 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16695 #ifdef HAVE_consttable_16
16697 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16702 gcc_unreachable ();
16710 minipool_vector_head
= minipool_vector_tail
= NULL
;
16711 scan
= emit_insn_after (gen_consttable_end (), scan
);
16712 scan
= emit_barrier_after (scan
);
16715 /* Return the cost of forcibly inserting a barrier after INSN. */
16717 arm_barrier_cost (rtx insn
)
16719 /* Basing the location of the pool on the loop depth is preferable,
16720 but at the moment, the basic block information seems to be
16721 corrupt by this stage of the compilation. */
16722 int base_cost
= 50;
16723 rtx next
= next_nonnote_insn (insn
);
16725 if (next
!= NULL
&& LABEL_P (next
))
16728 switch (GET_CODE (insn
))
16731 /* It will always be better to place the table before the label, rather
16740 return base_cost
- 10;
16743 return base_cost
+ 10;
16747 /* Find the best place in the insn stream in the range
16748 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16749 Create the barrier by inserting a jump and add a new fix entry for
16752 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16754 HOST_WIDE_INT count
= 0;
16755 rtx_barrier
*barrier
;
16756 rtx_insn
*from
= fix
->insn
;
16757 /* The instruction after which we will insert the jump. */
16758 rtx_insn
*selected
= NULL
;
16760 /* The address at which the jump instruction will be placed. */
16761 HOST_WIDE_INT selected_address
;
16763 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16764 rtx_code_label
*label
= gen_label_rtx ();
16766 selected_cost
= arm_barrier_cost (from
);
16767 selected_address
= fix
->address
;
16769 while (from
&& count
< max_count
)
16771 rtx_jump_table_data
*tmp
;
16774 /* This code shouldn't have been called if there was a natural barrier
16776 gcc_assert (!BARRIER_P (from
));
16778 /* Count the length of this insn. This must stay in sync with the
16779 code that pushes minipool fixes. */
16780 if (LABEL_P (from
))
16781 count
+= get_label_padding (from
);
16783 count
+= get_attr_length (from
);
16785 /* If there is a jump table, add its length. */
16786 if (tablejump_p (from
, NULL
, &tmp
))
16788 count
+= get_jump_table_size (tmp
);
16790 /* Jump tables aren't in a basic block, so base the cost on
16791 the dispatch insn. If we select this location, we will
16792 still put the pool after the table. */
16793 new_cost
= arm_barrier_cost (from
);
16795 if (count
< max_count
16796 && (!selected
|| new_cost
<= selected_cost
))
16799 selected_cost
= new_cost
;
16800 selected_address
= fix
->address
+ count
;
16803 /* Continue after the dispatch table. */
16804 from
= NEXT_INSN (tmp
);
16808 new_cost
= arm_barrier_cost (from
);
16810 if (count
< max_count
16811 && (!selected
|| new_cost
<= selected_cost
))
16814 selected_cost
= new_cost
;
16815 selected_address
= fix
->address
+ count
;
16818 from
= NEXT_INSN (from
);
16821 /* Make sure that we found a place to insert the jump. */
16822 gcc_assert (selected
);
16824 /* Make sure we do not split a call and its corresponding
16825 CALL_ARG_LOCATION note. */
16826 if (CALL_P (selected
))
16828 rtx_insn
*next
= NEXT_INSN (selected
);
16829 if (next
&& NOTE_P (next
)
16830 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16834 /* Create a new JUMP_INSN that branches around a barrier. */
16835 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16836 JUMP_LABEL (from
) = label
;
16837 barrier
= emit_barrier_after (from
);
16838 emit_label_after (label
, barrier
);
16840 /* Create a minipool barrier entry for the new barrier. */
16841 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16842 new_fix
->insn
= barrier
;
16843 new_fix
->address
= selected_address
;
16844 new_fix
->next
= fix
->next
;
16845 fix
->next
= new_fix
;
16850 /* Record that there is a natural barrier in the insn stream at
16853 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16855 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16858 fix
->address
= address
;
16861 if (minipool_fix_head
!= NULL
)
16862 minipool_fix_tail
->next
= fix
;
16864 minipool_fix_head
= fix
;
16866 minipool_fix_tail
= fix
;
16869 /* Record INSN, which will need fixing up to load a value from the
16870 minipool. ADDRESS is the offset of the insn since the start of the
16871 function; LOC is a pointer to the part of the insn which requires
16872 fixing; VALUE is the constant that must be loaded, which is of type
16875 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16876 machine_mode mode
, rtx value
)
16878 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16881 fix
->address
= address
;
16884 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16885 fix
->value
= value
;
16886 fix
->forwards
= get_attr_pool_range (insn
);
16887 fix
->backwards
= get_attr_neg_pool_range (insn
);
16888 fix
->minipool
= NULL
;
16890 /* If an insn doesn't have a range defined for it, then it isn't
16891 expecting to be reworked by this code. Better to stop now than
16892 to generate duff assembly code. */
16893 gcc_assert (fix
->forwards
|| fix
->backwards
);
16895 /* If an entry requires 8-byte alignment then assume all constant pools
16896 require 4 bytes of padding. Trying to do this later on a per-pool
16897 basis is awkward because existing pool entries have to be modified. */
16898 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16903 fprintf (dump_file
,
16904 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16905 GET_MODE_NAME (mode
),
16906 INSN_UID (insn
), (unsigned long) address
,
16907 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16908 arm_print_value (dump_file
, fix
->value
);
16909 fprintf (dump_file
, "\n");
16912 /* Add it to the chain of fixes. */
16915 if (minipool_fix_head
!= NULL
)
16916 minipool_fix_tail
->next
= fix
;
16918 minipool_fix_head
= fix
;
16920 minipool_fix_tail
= fix
;
16923 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16924 Returns the number of insns needed, or 99 if we always want to synthesize
16927 arm_max_const_double_inline_cost ()
16929 /* Let the value get synthesized to avoid the use of literal pools. */
16930 if (arm_disable_literal_pool
)
16933 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16937 Returns the number of insns needed, or 99 if we don't know how to
16940 arm_const_double_inline_cost (rtx val
)
16942 rtx lowpart
, highpart
;
16945 mode
= GET_MODE (val
);
16947 if (mode
== VOIDmode
)
16950 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16952 lowpart
= gen_lowpart (SImode
, val
);
16953 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16955 gcc_assert (CONST_INT_P (lowpart
));
16956 gcc_assert (CONST_INT_P (highpart
));
16958 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16959 NULL_RTX
, NULL_RTX
, 0, 0)
16960 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16961 NULL_RTX
, NULL_RTX
, 0, 0));
16964 /* Cost of loading a SImode constant. */
16966 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16968 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16969 NULL_RTX
, NULL_RTX
, 1, 0);
16972 /* Return true if it is worthwhile to split a 64-bit constant into two
16973 32-bit operations. This is the case if optimizing for size, or
16974 if we have load delay slots, or if one 32-bit part can be done with
16975 a single data operation. */
16977 arm_const_double_by_parts (rtx val
)
16979 machine_mode mode
= GET_MODE (val
);
16982 if (optimize_size
|| arm_ld_sched
)
16985 if (mode
== VOIDmode
)
16988 part
= gen_highpart_mode (SImode
, mode
, val
);
16990 gcc_assert (CONST_INT_P (part
));
16992 if (const_ok_for_arm (INTVAL (part
))
16993 || const_ok_for_arm (~INTVAL (part
)))
16996 part
= gen_lowpart (SImode
, val
);
16998 gcc_assert (CONST_INT_P (part
));
17000 if (const_ok_for_arm (INTVAL (part
))
17001 || const_ok_for_arm (~INTVAL (part
)))
17007 /* Return true if it is possible to inline both the high and low parts
17008 of a 64-bit constant into 32-bit data processing instructions. */
17010 arm_const_double_by_immediates (rtx val
)
17012 machine_mode mode
= GET_MODE (val
);
17015 if (mode
== VOIDmode
)
17018 part
= gen_highpart_mode (SImode
, mode
, val
);
17020 gcc_assert (CONST_INT_P (part
));
17022 if (!const_ok_for_arm (INTVAL (part
)))
17025 part
= gen_lowpart (SImode
, val
);
17027 gcc_assert (CONST_INT_P (part
));
17029 if (!const_ok_for_arm (INTVAL (part
)))
17035 /* Scan INSN and note any of its operands that need fixing.
17036 If DO_PUSHES is false we do not actually push any of the fixups
17039 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17043 extract_constrain_insn (insn
);
17045 if (recog_data
.n_alternatives
== 0)
17048 /* Fill in recog_op_alt with information about the constraints of
17050 preprocess_constraints (insn
);
17052 const operand_alternative
*op_alt
= which_op_alt ();
17053 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17055 /* Things we need to fix can only occur in inputs. */
17056 if (recog_data
.operand_type
[opno
] != OP_IN
)
17059 /* If this alternative is a memory reference, then any mention
17060 of constants in this alternative is really to fool reload
17061 into allowing us to accept one there. We need to fix them up
17062 now so that we output the right code. */
17063 if (op_alt
[opno
].memory_ok
)
17065 rtx op
= recog_data
.operand
[opno
];
17067 if (CONSTANT_P (op
))
17070 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17071 recog_data
.operand_mode
[opno
], op
);
17073 else if (MEM_P (op
)
17074 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17075 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17079 rtx cop
= avoid_constant_pool_reference (op
);
17081 /* Casting the address of something to a mode narrower
17082 than a word can cause avoid_constant_pool_reference()
17083 to return the pool reference itself. That's no good to
17084 us here. Lets just hope that we can use the
17085 constant pool value directly. */
17087 cop
= get_pool_constant (XEXP (op
, 0));
17089 push_minipool_fix (insn
, address
,
17090 recog_data
.operand_loc
[opno
],
17091 recog_data
.operand_mode
[opno
], cop
);
17101 /* Rewrite move insn into subtract of 0 if the condition codes will
17102 be useful in next conditional jump insn. */
17105 thumb1_reorg (void)
17109 FOR_EACH_BB_FN (bb
, cfun
)
17112 rtx pat
, op0
, set
= NULL
;
17113 rtx_insn
*prev
, *insn
= BB_END (bb
);
17114 bool insn_clobbered
= false;
17116 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17117 insn
= PREV_INSN (insn
);
17119 /* Find the last cbranchsi4_insn in basic block BB. */
17120 if (insn
== BB_HEAD (bb
)
17121 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17124 /* Get the register with which we are comparing. */
17125 pat
= PATTERN (insn
);
17126 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17128 /* Find the first flag setting insn before INSN in basic block BB. */
17129 gcc_assert (insn
!= BB_HEAD (bb
));
17130 for (prev
= PREV_INSN (insn
);
17132 && prev
!= BB_HEAD (bb
)
17134 || DEBUG_INSN_P (prev
)
17135 || ((set
= single_set (prev
)) != NULL
17136 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17137 prev
= PREV_INSN (prev
))
17139 if (reg_set_p (op0
, prev
))
17140 insn_clobbered
= true;
17143 /* Skip if op0 is clobbered by insn other than prev. */
17144 if (insn_clobbered
)
17150 dest
= SET_DEST (set
);
17151 src
= SET_SRC (set
);
17152 if (!low_register_operand (dest
, SImode
)
17153 || !low_register_operand (src
, SImode
))
17156 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17157 in INSN. Both src and dest of the move insn are checked. */
17158 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17160 dest
= copy_rtx (dest
);
17161 src
= copy_rtx (src
);
17162 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17163 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17164 INSN_CODE (prev
) = -1;
17165 /* Set test register in INSN to dest. */
17166 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17167 INSN_CODE (insn
) = -1;
17172 /* Convert instructions to their cc-clobbering variant if possible, since
17173 that allows us to use smaller encodings. */
17176 thumb2_reorg (void)
17181 INIT_REG_SET (&live
);
17183 /* We are freeing block_for_insn in the toplev to keep compatibility
17184 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17185 compute_bb_for_insn ();
17188 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17190 FOR_EACH_BB_FN (bb
, cfun
)
17192 if (current_tune
->disparage_flag_setting_t16_encodings
17193 && optimize_bb_for_speed_p (bb
))
17197 Convert_Action action
= SKIP
;
17198 Convert_Action action_for_partial_flag_setting
17199 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17200 && optimize_bb_for_speed_p (bb
))
17203 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17204 df_simulate_initialize_backwards (bb
, &live
);
17205 FOR_BB_INSNS_REVERSE (bb
, insn
)
17207 if (NONJUMP_INSN_P (insn
)
17208 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17209 && GET_CODE (PATTERN (insn
)) == SET
)
17212 rtx pat
= PATTERN (insn
);
17213 rtx dst
= XEXP (pat
, 0);
17214 rtx src
= XEXP (pat
, 1);
17215 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17217 if (!OBJECT_P (src
))
17218 op0
= XEXP (src
, 0);
17220 if (BINARY_P (src
))
17221 op1
= XEXP (src
, 1);
17223 if (low_register_operand (dst
, SImode
))
17225 switch (GET_CODE (src
))
17228 /* Adding two registers and storing the result
17229 in the first source is already a 16-bit
17231 if (rtx_equal_p (dst
, op0
)
17232 && register_operand (op1
, SImode
))
17235 if (low_register_operand (op0
, SImode
))
17237 /* ADDS <Rd>,<Rn>,<Rm> */
17238 if (low_register_operand (op1
, SImode
))
17240 /* ADDS <Rdn>,#<imm8> */
17241 /* SUBS <Rdn>,#<imm8> */
17242 else if (rtx_equal_p (dst
, op0
)
17243 && CONST_INT_P (op1
)
17244 && IN_RANGE (INTVAL (op1
), -255, 255))
17246 /* ADDS <Rd>,<Rn>,#<imm3> */
17247 /* SUBS <Rd>,<Rn>,#<imm3> */
17248 else if (CONST_INT_P (op1
)
17249 && IN_RANGE (INTVAL (op1
), -7, 7))
17252 /* ADCS <Rd>, <Rn> */
17253 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17254 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17255 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17257 && COMPARISON_P (op1
)
17258 && cc_register (XEXP (op1
, 0), VOIDmode
)
17259 && maybe_get_arm_condition_code (op1
) == ARM_CS
17260 && XEXP (op1
, 1) == const0_rtx
)
17265 /* RSBS <Rd>,<Rn>,#0
17266 Not handled here: see NEG below. */
17267 /* SUBS <Rd>,<Rn>,#<imm3>
17269 Not handled here: see PLUS above. */
17270 /* SUBS <Rd>,<Rn>,<Rm> */
17271 if (low_register_operand (op0
, SImode
)
17272 && low_register_operand (op1
, SImode
))
17277 /* MULS <Rdm>,<Rn>,<Rdm>
17278 As an exception to the rule, this is only used
17279 when optimizing for size since MULS is slow on all
17280 known implementations. We do not even want to use
17281 MULS in cold code, if optimizing for speed, so we
17282 test the global flag here. */
17283 if (!optimize_size
)
17285 /* else fall through. */
17289 /* ANDS <Rdn>,<Rm> */
17290 if (rtx_equal_p (dst
, op0
)
17291 && low_register_operand (op1
, SImode
))
17292 action
= action_for_partial_flag_setting
;
17293 else if (rtx_equal_p (dst
, op1
)
17294 && low_register_operand (op0
, SImode
))
17295 action
= action_for_partial_flag_setting
== SKIP
17296 ? SKIP
: SWAP_CONV
;
17302 /* ASRS <Rdn>,<Rm> */
17303 /* LSRS <Rdn>,<Rm> */
17304 /* LSLS <Rdn>,<Rm> */
17305 if (rtx_equal_p (dst
, op0
)
17306 && low_register_operand (op1
, SImode
))
17307 action
= action_for_partial_flag_setting
;
17308 /* ASRS <Rd>,<Rm>,#<imm5> */
17309 /* LSRS <Rd>,<Rm>,#<imm5> */
17310 /* LSLS <Rd>,<Rm>,#<imm5> */
17311 else if (low_register_operand (op0
, SImode
)
17312 && CONST_INT_P (op1
)
17313 && IN_RANGE (INTVAL (op1
), 0, 31))
17314 action
= action_for_partial_flag_setting
;
17318 /* RORS <Rdn>,<Rm> */
17319 if (rtx_equal_p (dst
, op0
)
17320 && low_register_operand (op1
, SImode
))
17321 action
= action_for_partial_flag_setting
;
17325 /* MVNS <Rd>,<Rm> */
17326 if (low_register_operand (op0
, SImode
))
17327 action
= action_for_partial_flag_setting
;
17331 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17332 if (low_register_operand (op0
, SImode
))
17337 /* MOVS <Rd>,#<imm8> */
17338 if (CONST_INT_P (src
)
17339 && IN_RANGE (INTVAL (src
), 0, 255))
17340 action
= action_for_partial_flag_setting
;
17344 /* MOVS and MOV<c> with registers have different
17345 encodings, so are not relevant here. */
17353 if (action
!= SKIP
)
17355 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17356 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17359 if (action
== SWAP_CONV
)
17361 src
= copy_rtx (src
);
17362 XEXP (src
, 0) = op1
;
17363 XEXP (src
, 1) = op0
;
17364 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17365 vec
= gen_rtvec (2, pat
, clobber
);
17367 else /* action == CONV */
17368 vec
= gen_rtvec (2, pat
, clobber
);
17370 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17371 INSN_CODE (insn
) = -1;
17375 if (NONDEBUG_INSN_P (insn
))
17376 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17380 CLEAR_REG_SET (&live
);
17383 /* Gcc puts the pool in the wrong place for ARM, since we can only
17384 load addresses a limited distance around the pc. We do some
17385 special munging to move the constant pool values to the correct
17386 point in the code. */
17391 HOST_WIDE_INT address
= 0;
17396 else if (TARGET_THUMB2
)
17399 /* Ensure all insns that must be split have been split at this point.
17400 Otherwise, the pool placement code below may compute incorrect
17401 insn lengths. Note that when optimizing, all insns have already
17402 been split at this point. */
17404 split_all_insns_noflow ();
17406 minipool_fix_head
= minipool_fix_tail
= NULL
;
17408 /* The first insn must always be a note, or the code below won't
17409 scan it properly. */
17410 insn
= get_insns ();
17411 gcc_assert (NOTE_P (insn
));
17414 /* Scan all the insns and record the operands that will need fixing. */
17415 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17417 if (BARRIER_P (insn
))
17418 push_minipool_barrier (insn
, address
);
17419 else if (INSN_P (insn
))
17421 rtx_jump_table_data
*table
;
17423 note_invalid_constants (insn
, address
, true);
17424 address
+= get_attr_length (insn
);
17426 /* If the insn is a vector jump, add the size of the table
17427 and skip the table. */
17428 if (tablejump_p (insn
, NULL
, &table
))
17430 address
+= get_jump_table_size (table
);
17434 else if (LABEL_P (insn
))
17435 /* Add the worst-case padding due to alignment. We don't add
17436 the _current_ padding because the minipool insertions
17437 themselves might change it. */
17438 address
+= get_label_padding (insn
);
17441 fix
= minipool_fix_head
;
17443 /* Now scan the fixups and perform the required changes. */
17448 Mfix
* last_added_fix
;
17449 Mfix
* last_barrier
= NULL
;
17452 /* Skip any further barriers before the next fix. */
17453 while (fix
&& BARRIER_P (fix
->insn
))
17456 /* No more fixes. */
17460 last_added_fix
= NULL
;
17462 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17464 if (BARRIER_P (ftmp
->insn
))
17466 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17469 last_barrier
= ftmp
;
17471 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17474 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17477 /* If we found a barrier, drop back to that; any fixes that we
17478 could have reached but come after the barrier will now go in
17479 the next mini-pool. */
17480 if (last_barrier
!= NULL
)
17482 /* Reduce the refcount for those fixes that won't go into this
17484 for (fdel
= last_barrier
->next
;
17485 fdel
&& fdel
!= ftmp
;
17488 fdel
->minipool
->refcount
--;
17489 fdel
->minipool
= NULL
;
17492 ftmp
= last_barrier
;
17496 /* ftmp is first fix that we can't fit into this pool and
17497 there no natural barriers that we could use. Insert a
17498 new barrier in the code somewhere between the previous
17499 fix and this one, and arrange to jump around it. */
17500 HOST_WIDE_INT max_address
;
17502 /* The last item on the list of fixes must be a barrier, so
17503 we can never run off the end of the list of fixes without
17504 last_barrier being set. */
17507 max_address
= minipool_vector_head
->max_address
;
17508 /* Check that there isn't another fix that is in range that
17509 we couldn't fit into this pool because the pool was
17510 already too large: we need to put the pool before such an
17511 instruction. The pool itself may come just after the
17512 fix because create_fix_barrier also allows space for a
17513 jump instruction. */
17514 if (ftmp
->address
< max_address
)
17515 max_address
= ftmp
->address
+ 1;
17517 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17520 assign_minipool_offsets (last_barrier
);
17524 if (!BARRIER_P (ftmp
->insn
)
17525 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17532 /* Scan over the fixes we have identified for this pool, fixing them
17533 up and adding the constants to the pool itself. */
17534 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17535 this_fix
= this_fix
->next
)
17536 if (!BARRIER_P (this_fix
->insn
))
17539 = plus_constant (Pmode
,
17540 gen_rtx_LABEL_REF (VOIDmode
,
17541 minipool_vector_label
),
17542 this_fix
->minipool
->offset
);
17543 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17546 dump_minipool (last_barrier
->insn
);
17550 /* From now on we must synthesize any constants that we can't handle
17551 directly. This can happen if the RTL gets split during final
17552 instruction generation. */
17553 cfun
->machine
->after_arm_reorg
= 1;
17555 /* Free the minipool memory. */
17556 obstack_free (&minipool_obstack
, minipool_startobj
);
17559 /* Routines to output assembly language. */
17561 /* Return string representation of passed in real value. */
17562 static const char *
17563 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17565 if (!fp_consts_inited
)
17568 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17572 /* OPERANDS[0] is the entire list of insns that constitute pop,
17573 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17574 is in the list, UPDATE is true iff the list contains explicit
17575 update of base register. */
17577 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17583 const char *conditional
;
17584 int num_saves
= XVECLEN (operands
[0], 0);
17585 unsigned int regno
;
17586 unsigned int regno_base
= REGNO (operands
[1]);
17589 offset
+= update
? 1 : 0;
17590 offset
+= return_pc
? 1 : 0;
17592 /* Is the base register in the list? */
17593 for (i
= offset
; i
< num_saves
; i
++)
17595 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17596 /* If SP is in the list, then the base register must be SP. */
17597 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17598 /* If base register is in the list, there must be no explicit update. */
17599 if (regno
== regno_base
)
17600 gcc_assert (!update
);
17603 conditional
= reverse
? "%?%D0" : "%?%d0";
17604 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17606 /* Output pop (not stmfd) because it has a shorter encoding. */
17607 gcc_assert (update
);
17608 sprintf (pattern
, "pop%s\t{", conditional
);
17612 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17613 It's just a convention, their semantics are identical. */
17614 if (regno_base
== SP_REGNUM
)
17615 sprintf (pattern
, "ldm%sfd\t", conditional
);
17616 else if (TARGET_UNIFIED_ASM
)
17617 sprintf (pattern
, "ldmia%s\t", conditional
);
17619 sprintf (pattern
, "ldm%sia\t", conditional
);
17621 strcat (pattern
, reg_names
[regno_base
]);
17623 strcat (pattern
, "!, {");
17625 strcat (pattern
, ", {");
17628 /* Output the first destination register. */
17630 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17632 /* Output the rest of the destination registers. */
17633 for (i
= offset
+ 1; i
< num_saves
; i
++)
17635 strcat (pattern
, ", ");
17637 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17640 strcat (pattern
, "}");
17642 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17643 strcat (pattern
, "^");
17645 output_asm_insn (pattern
, &cond
);
17649 /* Output the assembly for a store multiple. */
17652 vfp_output_vstmd (rtx
* operands
)
17658 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17659 ? XEXP (operands
[0], 0)
17660 : XEXP (XEXP (operands
[0], 0), 0);
17661 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17664 strcpy (pattern
, "vpush%?.64\t{%P1");
17666 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17668 p
= strlen (pattern
);
17670 gcc_assert (REG_P (operands
[1]));
17672 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17673 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17675 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17677 strcpy (&pattern
[p
], "}");
17679 output_asm_insn (pattern
, operands
);
17684 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17685 number of bytes pushed. */
17688 vfp_emit_fstmd (int base_reg
, int count
)
17695 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17696 register pairs are stored by a store multiple insn. We avoid this
17697 by pushing an extra pair. */
17698 if (count
== 2 && !arm_arch6
)
17700 if (base_reg
== LAST_VFP_REGNUM
- 3)
17705 /* FSTMD may not store more than 16 doubleword registers at once. Split
17706 larger stores into multiple parts (up to a maximum of two, in
17711 /* NOTE: base_reg is an internal register number, so each D register
17713 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17714 saved
+= vfp_emit_fstmd (base_reg
, 16);
17718 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17719 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17721 reg
= gen_rtx_REG (DFmode
, base_reg
);
17724 XVECEXP (par
, 0, 0)
17725 = gen_rtx_SET (VOIDmode
,
17728 gen_rtx_PRE_MODIFY (Pmode
,
17731 (Pmode
, stack_pointer_rtx
,
17734 gen_rtx_UNSPEC (BLKmode
,
17735 gen_rtvec (1, reg
),
17736 UNSPEC_PUSH_MULT
));
17738 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17739 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17740 RTX_FRAME_RELATED_P (tmp
) = 1;
17741 XVECEXP (dwarf
, 0, 0) = tmp
;
17743 tmp
= gen_rtx_SET (VOIDmode
,
17744 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17746 RTX_FRAME_RELATED_P (tmp
) = 1;
17747 XVECEXP (dwarf
, 0, 1) = tmp
;
17749 for (i
= 1; i
< count
; i
++)
17751 reg
= gen_rtx_REG (DFmode
, base_reg
);
17753 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17755 tmp
= gen_rtx_SET (VOIDmode
,
17756 gen_frame_mem (DFmode
,
17757 plus_constant (Pmode
,
17761 RTX_FRAME_RELATED_P (tmp
) = 1;
17762 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17765 par
= emit_insn (par
);
17766 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17767 RTX_FRAME_RELATED_P (par
) = 1;
17772 /* Emit a call instruction with pattern PAT. ADDR is the address of
17773 the call target. */
17776 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17780 insn
= emit_call_insn (pat
);
17782 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17783 If the call might use such an entry, add a use of the PIC register
17784 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17785 if (TARGET_VXWORKS_RTP
17788 && GET_CODE (addr
) == SYMBOL_REF
17789 && (SYMBOL_REF_DECL (addr
)
17790 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17791 : !SYMBOL_REF_LOCAL_P (addr
)))
17793 require_pic_register ();
17794 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17797 if (TARGET_AAPCS_BASED
)
17799 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17800 linker. We need to add an IP clobber to allow setting
17801 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17802 is not needed since it's a fixed register. */
17803 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17804 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17808 /* Output a 'call' insn. */
17810 output_call (rtx
*operands
)
17812 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17814 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17815 if (REGNO (operands
[0]) == LR_REGNUM
)
17817 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17818 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17821 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17823 if (TARGET_INTERWORK
|| arm_arch4t
)
17824 output_asm_insn ("bx%?\t%0", operands
);
17826 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17831 /* Output a 'call' insn that is a reference in memory. This is
17832 disabled for ARMv5 and we prefer a blx instead because otherwise
17833 there's a significant performance overhead. */
17835 output_call_mem (rtx
*operands
)
17837 gcc_assert (!arm_arch5
);
17838 if (TARGET_INTERWORK
)
17840 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17841 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17842 output_asm_insn ("bx%?\t%|ip", operands
);
17844 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17846 /* LR is used in the memory address. We load the address in the
17847 first instruction. It's safe to use IP as the target of the
17848 load since the call will kill it anyway. */
17849 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17850 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17852 output_asm_insn ("bx%?\t%|ip", operands
);
17854 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17858 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17859 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17866 /* Output a move from arm registers to arm registers of a long double
17867 OPERANDS[0] is the destination.
17868 OPERANDS[1] is the source. */
17870 output_mov_long_double_arm_from_arm (rtx
*operands
)
17872 /* We have to be careful here because the two might overlap. */
17873 int dest_start
= REGNO (operands
[0]);
17874 int src_start
= REGNO (operands
[1]);
17878 if (dest_start
< src_start
)
17880 for (i
= 0; i
< 3; i
++)
17882 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17883 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17884 output_asm_insn ("mov%?\t%0, %1", ops
);
17889 for (i
= 2; i
>= 0; i
--)
17891 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17892 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17893 output_asm_insn ("mov%?\t%0, %1", ops
);
17901 arm_emit_movpair (rtx dest
, rtx src
)
17903 /* If the src is an immediate, simplify it. */
17904 if (CONST_INT_P (src
))
17906 HOST_WIDE_INT val
= INTVAL (src
);
17907 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17908 if ((val
>> 16) & 0x0000ffff)
17909 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17911 GEN_INT ((val
>> 16) & 0x0000ffff));
17914 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17915 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17918 /* Output a move between double words. It must be REG<-MEM
17921 output_move_double (rtx
*operands
, bool emit
, int *count
)
17923 enum rtx_code code0
= GET_CODE (operands
[0]);
17924 enum rtx_code code1
= GET_CODE (operands
[1]);
17929 /* The only case when this might happen is when
17930 you are looking at the length of a DImode instruction
17931 that has an invalid constant in it. */
17932 if (code0
== REG
&& code1
!= MEM
)
17934 gcc_assert (!emit
);
17941 unsigned int reg0
= REGNO (operands
[0]);
17943 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17945 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17947 switch (GET_CODE (XEXP (operands
[1], 0)))
17954 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17957 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17962 gcc_assert (TARGET_LDRD
);
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17971 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17973 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17981 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17983 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17988 gcc_assert (TARGET_LDRD
);
17990 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17995 /* Autoicrement addressing modes should never have overlapping
17996 base and destination registers, and overlapping index registers
17997 are already prohibited, so this doesn't need to worry about
17999 otherops
[0] = operands
[0];
18000 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18001 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18003 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18005 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18007 /* Registers overlap so split out the increment. */
18010 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18011 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can
18020 handle, fix these up with a pair of ldr. */
18022 || !CONST_INT_P (otherops
[2])
18023 || (INTVAL (otherops
[2]) > -256
18024 && INTVAL (otherops
[2]) < 256))
18027 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18033 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18034 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18044 /* Use a single insn if we can.
18045 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18046 fix these up with a pair of ldr. */
18048 || !CONST_INT_P (otherops
[2])
18049 || (INTVAL (otherops
[2]) > -256
18050 && INTVAL (otherops
[2]) < 256))
18053 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18059 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18060 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18070 /* We might be able to use ldrd %0, %1 here. However the range is
18071 different to ldr/adr, and it is broken on some ARMv7-M
18072 implementations. */
18073 /* Use the second register of the pair to avoid problematic
18075 otherops
[1] = operands
[1];
18077 output_asm_insn ("adr%?\t%0, %1", otherops
);
18078 operands
[1] = otherops
[0];
18082 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18084 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18091 /* ??? This needs checking for thumb2. */
18093 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18094 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18096 otherops
[0] = operands
[0];
18097 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18098 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18100 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18102 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18104 switch ((int) INTVAL (otherops
[2]))
18108 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18114 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18120 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18124 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18125 operands
[1] = otherops
[0];
18127 && (REG_P (otherops
[2])
18129 || (CONST_INT_P (otherops
[2])
18130 && INTVAL (otherops
[2]) > -256
18131 && INTVAL (otherops
[2]) < 256)))
18133 if (reg_overlap_mentioned_p (operands
[0],
18137 /* Swap base and index registers over to
18138 avoid a conflict. */
18140 otherops
[1] = otherops
[2];
18143 /* If both registers conflict, it will usually
18144 have been fixed by a splitter. */
18145 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18146 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18150 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18151 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18158 otherops
[0] = operands
[0];
18160 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18165 if (CONST_INT_P (otherops
[2]))
18169 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18170 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18172 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18178 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18184 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18191 return "ldr%(d%)\t%0, [%1]";
18193 return "ldm%(ia%)\t%1, %M0";
18197 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18198 /* Take care of overlapping base/data reg. */
18199 if (reg_mentioned_p (operands
[0], operands
[1]))
18203 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18204 output_asm_insn ("ldr%?\t%0, %1", operands
);
18214 output_asm_insn ("ldr%?\t%0, %1", operands
);
18215 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18225 /* Constraints should ensure this. */
18226 gcc_assert (code0
== MEM
&& code1
== REG
);
18227 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18228 || (TARGET_ARM
&& TARGET_LDRD
));
18230 switch (GET_CODE (XEXP (operands
[0], 0)))
18236 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18238 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18243 gcc_assert (TARGET_LDRD
);
18245 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18252 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18254 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18262 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18264 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18269 gcc_assert (TARGET_LDRD
);
18271 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18276 otherops
[0] = operands
[1];
18277 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18278 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18280 /* IWMMXT allows offsets larger than ldrd can handle,
18281 fix these up with a pair of ldr. */
18283 && CONST_INT_P (otherops
[2])
18284 && (INTVAL(otherops
[2]) <= -256
18285 || INTVAL(otherops
[2]) >= 256))
18287 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18291 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18292 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18301 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18302 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18308 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18311 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18316 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18321 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18322 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18324 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18328 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18335 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18342 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18347 && (REG_P (otherops
[2])
18349 || (CONST_INT_P (otherops
[2])
18350 && INTVAL (otherops
[2]) > -256
18351 && INTVAL (otherops
[2]) < 256)))
18353 otherops
[0] = operands
[1];
18354 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18356 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18362 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18363 otherops
[1] = operands
[1];
18366 output_asm_insn ("str%?\t%1, %0", operands
);
18367 output_asm_insn ("str%?\t%H1, %0", otherops
);
18377 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18378 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18381 output_move_quad (rtx
*operands
)
18383 if (REG_P (operands
[0]))
18385 /* Load, or reg->reg move. */
18387 if (MEM_P (operands
[1]))
18389 switch (GET_CODE (XEXP (operands
[1], 0)))
18392 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18397 output_asm_insn ("adr%?\t%0, %1", operands
);
18398 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18402 gcc_unreachable ();
18410 gcc_assert (REG_P (operands
[1]));
18412 dest
= REGNO (operands
[0]);
18413 src
= REGNO (operands
[1]);
18415 /* This seems pretty dumb, but hopefully GCC won't try to do it
18418 for (i
= 0; i
< 4; i
++)
18420 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18421 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18422 output_asm_insn ("mov%?\t%0, %1", ops
);
18425 for (i
= 3; i
>= 0; i
--)
18427 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18428 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18429 output_asm_insn ("mov%?\t%0, %1", ops
);
18435 gcc_assert (MEM_P (operands
[0]));
18436 gcc_assert (REG_P (operands
[1]));
18437 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18439 switch (GET_CODE (XEXP (operands
[0], 0)))
18442 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18446 gcc_unreachable ();
18453 /* Output a VFP load or store instruction. */
18456 output_move_vfp (rtx
*operands
)
18458 rtx reg
, mem
, addr
, ops
[2];
18459 int load
= REG_P (operands
[0]);
18460 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18461 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18466 reg
= operands
[!load
];
18467 mem
= operands
[load
];
18469 mode
= GET_MODE (reg
);
18471 gcc_assert (REG_P (reg
));
18472 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18473 gcc_assert (mode
== SFmode
18477 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18478 gcc_assert (MEM_P (mem
));
18480 addr
= XEXP (mem
, 0);
18482 switch (GET_CODE (addr
))
18485 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18486 ops
[0] = XEXP (addr
, 0);
18491 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18492 ops
[0] = XEXP (addr
, 0);
18497 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18503 sprintf (buff
, templ
,
18504 load
? "ld" : "st",
18507 integer_p
? "\t%@ int" : "");
18508 output_asm_insn (buff
, ops
);
18513 /* Output a Neon double-word or quad-word load or store, or a load
18514 or store for larger structure modes.
18516 WARNING: The ordering of elements is weird in big-endian mode,
18517 because the EABI requires that vectors stored in memory appear
18518 as though they were stored by a VSTM, as required by the EABI.
18519 GCC RTL defines element ordering based on in-memory order.
18520 This can be different from the architectural ordering of elements
18521 within a NEON register. The intrinsics defined in arm_neon.h use the
18522 NEON register element ordering, not the GCC RTL element ordering.
18524 For example, the in-memory ordering of a big-endian a quadword
18525 vector with 16-bit elements when stored from register pair {d0,d1}
18526 will be (lowest address first, d0[N] is NEON register element N):
18528 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18530 When necessary, quadword registers (dN, dN+1) are moved to ARM
18531 registers from rN in the order:
18533 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18535 So that STM/LDM can be used on vectors in ARM registers, and the
18536 same memory layout will result as if VSTM/VLDM were used.
18538 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18539 possible, which allows use of appropriate alignment tags.
18540 Note that the choice of "64" is independent of the actual vector
18541 element size; this size simply ensures that the behavior is
18542 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18544 Due to limitations of those instructions, use of VST1.64/VLD1.64
18545 is not possible if:
18546 - the address contains PRE_DEC, or
18547 - the mode refers to more than 4 double-word registers
18549 In those cases, it would be possible to replace VSTM/VLDM by a
18550 sequence of instructions; this is not currently implemented since
18551 this is not certain to actually improve performance. */
18554 output_move_neon (rtx
*operands
)
18556 rtx reg
, mem
, addr
, ops
[2];
18557 int regno
, nregs
, load
= REG_P (operands
[0]);
18562 reg
= operands
[!load
];
18563 mem
= operands
[load
];
18565 mode
= GET_MODE (reg
);
18567 gcc_assert (REG_P (reg
));
18568 regno
= REGNO (reg
);
18569 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18570 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18571 || NEON_REGNO_OK_FOR_QUAD (regno
));
18572 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18573 || VALID_NEON_QREG_MODE (mode
)
18574 || VALID_NEON_STRUCT_MODE (mode
));
18575 gcc_assert (MEM_P (mem
));
18577 addr
= XEXP (mem
, 0);
18579 /* Strip off const from addresses like (const (plus (...))). */
18580 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18581 addr
= XEXP (addr
, 0);
18583 switch (GET_CODE (addr
))
18586 /* We have to use vldm / vstm for too-large modes. */
18589 templ
= "v%smia%%?\t%%0!, %%h1";
18590 ops
[0] = XEXP (addr
, 0);
18594 templ
= "v%s1.64\t%%h1, %%A0";
18601 /* We have to use vldm / vstm in this case, since there is no
18602 pre-decrement form of the vld1 / vst1 instructions. */
18603 templ
= "v%smdb%%?\t%%0!, %%h1";
18604 ops
[0] = XEXP (addr
, 0);
18609 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18610 gcc_unreachable ();
18613 /* We have to use vldm / vstm for too-large modes. */
18617 templ
= "v%smia%%?\t%%m0, %%h1";
18619 templ
= "v%s1.64\t%%h1, %%A0";
18625 /* Fall through. */
18631 for (i
= 0; i
< nregs
; i
++)
18633 /* We're only using DImode here because it's a convenient size. */
18634 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18635 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18636 if (reg_overlap_mentioned_p (ops
[0], mem
))
18638 gcc_assert (overlap
== -1);
18643 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18644 output_asm_insn (buff
, ops
);
18649 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18650 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18651 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18652 output_asm_insn (buff
, ops
);
18659 gcc_unreachable ();
18662 sprintf (buff
, templ
, load
? "ld" : "st");
18663 output_asm_insn (buff
, ops
);
18668 /* Compute and return the length of neon_mov<mode>, where <mode> is
18669 one of VSTRUCT modes: EI, OI, CI or XI. */
18671 arm_attr_length_move_neon (rtx_insn
*insn
)
18673 rtx reg
, mem
, addr
;
18677 extract_insn_cached (insn
);
18679 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18681 mode
= GET_MODE (recog_data
.operand
[0]);
18692 gcc_unreachable ();
18696 load
= REG_P (recog_data
.operand
[0]);
18697 reg
= recog_data
.operand
[!load
];
18698 mem
= recog_data
.operand
[load
];
18700 gcc_assert (MEM_P (mem
));
18702 mode
= GET_MODE (reg
);
18703 addr
= XEXP (mem
, 0);
18705 /* Strip off const from addresses like (const (plus (...))). */
18706 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18707 addr
= XEXP (addr
, 0);
18709 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18711 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18718 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18722 arm_address_offset_is_imm (rtx_insn
*insn
)
18726 extract_insn_cached (insn
);
18728 if (REG_P (recog_data
.operand
[0]))
18731 mem
= recog_data
.operand
[0];
18733 gcc_assert (MEM_P (mem
));
18735 addr
= XEXP (mem
, 0);
18738 || (GET_CODE (addr
) == PLUS
18739 && REG_P (XEXP (addr
, 0))
18740 && CONST_INT_P (XEXP (addr
, 1))))
18746 /* Output an ADD r, s, #n where n may be too big for one instruction.
18747 If adding zero to one register, output nothing. */
18749 output_add_immediate (rtx
*operands
)
18751 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18753 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18756 output_multi_immediate (operands
,
18757 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18760 output_multi_immediate (operands
,
18761 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18768 /* Output a multiple immediate operation.
18769 OPERANDS is the vector of operands referred to in the output patterns.
18770 INSTR1 is the output pattern to use for the first constant.
18771 INSTR2 is the output pattern to use for subsequent constants.
18772 IMMED_OP is the index of the constant slot in OPERANDS.
18773 N is the constant value. */
18774 static const char *
18775 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18776 int immed_op
, HOST_WIDE_INT n
)
18778 #if HOST_BITS_PER_WIDE_INT > 32
18784 /* Quick and easy output. */
18785 operands
[immed_op
] = const0_rtx
;
18786 output_asm_insn (instr1
, operands
);
18791 const char * instr
= instr1
;
18793 /* Note that n is never zero here (which would give no output). */
18794 for (i
= 0; i
< 32; i
+= 2)
18798 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18799 output_asm_insn (instr
, operands
);
18809 /* Return the name of a shifter operation. */
18810 static const char *
18811 arm_shift_nmem(enum rtx_code code
)
18816 return ARM_LSL_NAME
;
18832 /* Return the appropriate ARM instruction for the operation code.
18833 The returned result should not be overwritten. OP is the rtx of the
18834 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18837 arithmetic_instr (rtx op
, int shift_first_arg
)
18839 switch (GET_CODE (op
))
18845 return shift_first_arg
? "rsb" : "sub";
18860 return arm_shift_nmem(GET_CODE(op
));
18863 gcc_unreachable ();
18867 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18868 for the operation code. The returned result should not be overwritten.
18869 OP is the rtx code of the shift.
18870 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18872 static const char *
18873 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18876 enum rtx_code code
= GET_CODE (op
);
18881 if (!CONST_INT_P (XEXP (op
, 1)))
18883 output_operand_lossage ("invalid shift operand");
18888 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18896 mnem
= arm_shift_nmem(code
);
18897 if (CONST_INT_P (XEXP (op
, 1)))
18899 *amountp
= INTVAL (XEXP (op
, 1));
18901 else if (REG_P (XEXP (op
, 1)))
18908 output_operand_lossage ("invalid shift operand");
18914 /* We never have to worry about the amount being other than a
18915 power of 2, since this case can never be reloaded from a reg. */
18916 if (!CONST_INT_P (XEXP (op
, 1)))
18918 output_operand_lossage ("invalid shift operand");
18922 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18924 /* Amount must be a power of two. */
18925 if (*amountp
& (*amountp
- 1))
18927 output_operand_lossage ("invalid shift operand");
18931 *amountp
= int_log2 (*amountp
);
18932 return ARM_LSL_NAME
;
18935 output_operand_lossage ("invalid shift operand");
18939 /* This is not 100% correct, but follows from the desire to merge
18940 multiplication by a power of 2 with the recognizer for a
18941 shift. >=32 is not a valid shift for "lsl", so we must try and
18942 output a shift that produces the correct arithmetical result.
18943 Using lsr #32 is identical except for the fact that the carry bit
18944 is not set correctly if we set the flags; but we never use the
18945 carry bit from such an operation, so we can ignore that. */
18946 if (code
== ROTATERT
)
18947 /* Rotate is just modulo 32. */
18949 else if (*amountp
!= (*amountp
& 31))
18951 if (code
== ASHIFT
)
18956 /* Shifts of 0 are no-ops. */
18963 /* Obtain the shift from the POWER of two. */
18965 static HOST_WIDE_INT
18966 int_log2 (HOST_WIDE_INT power
)
18968 HOST_WIDE_INT shift
= 0;
18970 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18972 gcc_assert (shift
<= 31);
18979 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18980 because /bin/as is horribly restrictive. The judgement about
18981 whether or not each character is 'printable' (and can be output as
18982 is) or not (and must be printed with an octal escape) must be made
18983 with reference to the *host* character set -- the situation is
18984 similar to that discussed in the comments above pp_c_char in
18985 c-pretty-print.c. */
18987 #define MAX_ASCII_LEN 51
18990 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18993 int len_so_far
= 0;
18995 fputs ("\t.ascii\t\"", stream
);
18997 for (i
= 0; i
< len
; i
++)
19001 if (len_so_far
>= MAX_ASCII_LEN
)
19003 fputs ("\"\n\t.ascii\t\"", stream
);
19009 if (c
== '\\' || c
== '\"')
19011 putc ('\\', stream
);
19019 fprintf (stream
, "\\%03o", c
);
19024 fputs ("\"\n", stream
);
19027 /* Compute the register save mask for registers 0 through 12
19028 inclusive. This code is used by arm_compute_save_reg_mask. */
19030 static unsigned long
19031 arm_compute_save_reg0_reg12_mask (void)
19033 unsigned long func_type
= arm_current_func_type ();
19034 unsigned long save_reg_mask
= 0;
19037 if (IS_INTERRUPT (func_type
))
19039 unsigned int max_reg
;
19040 /* Interrupt functions must not corrupt any registers,
19041 even call clobbered ones. If this is a leaf function
19042 we can just examine the registers used by the RTL, but
19043 otherwise we have to assume that whatever function is
19044 called might clobber anything, and so we have to save
19045 all the call-clobbered registers as well. */
19046 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19047 /* FIQ handlers have registers r8 - r12 banked, so
19048 we only need to check r0 - r7, Normal ISRs only
19049 bank r14 and r15, so we must check up to r12.
19050 r13 is the stack pointer which is always preserved,
19051 so we do not need to consider it here. */
19056 for (reg
= 0; reg
<= max_reg
; reg
++)
19057 if (df_regs_ever_live_p (reg
)
19058 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19059 save_reg_mask
|= (1 << reg
);
19061 /* Also save the pic base register if necessary. */
19063 && !TARGET_SINGLE_PIC_BASE
19064 && arm_pic_register
!= INVALID_REGNUM
19065 && crtl
->uses_pic_offset_table
)
19066 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19068 else if (IS_VOLATILE(func_type
))
19070 /* For noreturn functions we historically omitted register saves
19071 altogether. However this really messes up debugging. As a
19072 compromise save just the frame pointers. Combined with the link
19073 register saved elsewhere this should be sufficient to get
19075 if (frame_pointer_needed
)
19076 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19077 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19078 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19079 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19080 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19084 /* In the normal case we only need to save those registers
19085 which are call saved and which are used by this function. */
19086 for (reg
= 0; reg
<= 11; reg
++)
19087 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
19088 save_reg_mask
|= (1 << reg
);
19090 /* Handle the frame pointer as a special case. */
19091 if (frame_pointer_needed
)
19092 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19094 /* If we aren't loading the PIC register,
19095 don't stack it even though it may be live. */
19097 && !TARGET_SINGLE_PIC_BASE
19098 && arm_pic_register
!= INVALID_REGNUM
19099 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19100 || crtl
->uses_pic_offset_table
))
19101 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19103 /* The prologue will copy SP into R0, so save it. */
19104 if (IS_STACKALIGN (func_type
))
19105 save_reg_mask
|= 1;
19108 /* Save registers so the exception handler can modify them. */
19109 if (crtl
->calls_eh_return
)
19115 reg
= EH_RETURN_DATA_REGNO (i
);
19116 if (reg
== INVALID_REGNUM
)
19118 save_reg_mask
|= 1 << reg
;
19122 return save_reg_mask
;
19125 /* Return true if r3 is live at the start of the function. */
19128 arm_r3_live_at_start_p (void)
19130 /* Just look at cfg info, which is still close enough to correct at this
19131 point. This gives false positives for broken functions that might use
19132 uninitialized data that happens to be allocated in r3, but who cares? */
19133 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19136 /* Compute the number of bytes used to store the static chain register on the
19137 stack, above the stack frame. We need to know this accurately to get the
19138 alignment of the rest of the stack frame correct. */
19141 arm_compute_static_chain_stack_bytes (void)
19143 /* See the defining assertion in arm_expand_prologue. */
19144 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19145 && IS_NESTED (arm_current_func_type ())
19146 && arm_r3_live_at_start_p ()
19147 && crtl
->args
.pretend_args_size
== 0)
19153 /* Compute a bit mask of which registers need to be
19154 saved on the stack for the current function.
19155 This is used by arm_get_frame_offsets, which may add extra registers. */
19157 static unsigned long
19158 arm_compute_save_reg_mask (void)
19160 unsigned int save_reg_mask
= 0;
19161 unsigned long func_type
= arm_current_func_type ();
19164 if (IS_NAKED (func_type
))
19165 /* This should never really happen. */
19168 /* If we are creating a stack frame, then we must save the frame pointer,
19169 IP (which will hold the old stack pointer), LR and the PC. */
19170 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19172 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19175 | (1 << PC_REGNUM
);
19177 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19179 /* Decide if we need to save the link register.
19180 Interrupt routines have their own banked link register,
19181 so they never need to save it.
19182 Otherwise if we do not use the link register we do not need to save
19183 it. If we are pushing other registers onto the stack however, we
19184 can save an instruction in the epilogue by pushing the link register
19185 now and then popping it back into the PC. This incurs extra memory
19186 accesses though, so we only do it when optimizing for size, and only
19187 if we know that we will not need a fancy return sequence. */
19188 if (df_regs_ever_live_p (LR_REGNUM
)
19191 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19192 && !crtl
->calls_eh_return
))
19193 save_reg_mask
|= 1 << LR_REGNUM
;
19195 if (cfun
->machine
->lr_save_eliminated
)
19196 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19198 if (TARGET_REALLY_IWMMXT
19199 && ((bit_count (save_reg_mask
)
19200 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19201 arm_compute_static_chain_stack_bytes())
19204 /* The total number of registers that are going to be pushed
19205 onto the stack is odd. We need to ensure that the stack
19206 is 64-bit aligned before we start to save iWMMXt registers,
19207 and also before we start to create locals. (A local variable
19208 might be a double or long long which we will load/store using
19209 an iWMMXt instruction). Therefore we need to push another
19210 ARM register, so that the stack will be 64-bit aligned. We
19211 try to avoid using the arg registers (r0 -r3) as they might be
19212 used to pass values in a tail call. */
19213 for (reg
= 4; reg
<= 12; reg
++)
19214 if ((save_reg_mask
& (1 << reg
)) == 0)
19218 save_reg_mask
|= (1 << reg
);
19221 cfun
->machine
->sibcall_blocked
= 1;
19222 save_reg_mask
|= (1 << 3);
19226 /* We may need to push an additional register for use initializing the
19227 PIC base register. */
19228 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19229 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19231 reg
= thumb_find_work_register (1 << 4);
19232 if (!call_used_regs
[reg
])
19233 save_reg_mask
|= (1 << reg
);
19236 return save_reg_mask
;
19240 /* Compute a bit mask of which registers need to be
19241 saved on the stack for the current function. */
19242 static unsigned long
19243 thumb1_compute_save_reg_mask (void)
19245 unsigned long mask
;
19249 for (reg
= 0; reg
< 12; reg
++)
19250 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19254 && !TARGET_SINGLE_PIC_BASE
19255 && arm_pic_register
!= INVALID_REGNUM
19256 && crtl
->uses_pic_offset_table
)
19257 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19259 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19260 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19261 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19263 /* LR will also be pushed if any lo regs are pushed. */
19264 if (mask
& 0xff || thumb_force_lr_save ())
19265 mask
|= (1 << LR_REGNUM
);
19267 /* Make sure we have a low work register if we need one.
19268 We will need one if we are going to push a high register,
19269 but we are not currently intending to push a low register. */
19270 if ((mask
& 0xff) == 0
19271 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19273 /* Use thumb_find_work_register to choose which register
19274 we will use. If the register is live then we will
19275 have to push it. Use LAST_LO_REGNUM as our fallback
19276 choice for the register to select. */
19277 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19278 /* Make sure the register returned by thumb_find_work_register is
19279 not part of the return value. */
19280 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19281 reg
= LAST_LO_REGNUM
;
19283 if (! call_used_regs
[reg
])
19287 /* The 504 below is 8 bytes less than 512 because there are two possible
19288 alignment words. We can't tell here if they will be present or not so we
19289 have to play it safe and assume that they are. */
19290 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19291 ROUND_UP_WORD (get_frame_size ()) +
19292 crtl
->outgoing_args_size
) >= 504)
19294 /* This is the same as the code in thumb1_expand_prologue() which
19295 determines which register to use for stack decrement. */
19296 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19297 if (mask
& (1 << reg
))
19300 if (reg
> LAST_LO_REGNUM
)
19302 /* Make sure we have a register available for stack decrement. */
19303 mask
|= 1 << LAST_LO_REGNUM
;
19311 /* Return the number of bytes required to save VFP registers. */
19313 arm_get_vfp_saved_size (void)
19315 unsigned int regno
;
19320 /* Space for saved VFP registers. */
19321 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19324 for (regno
= FIRST_VFP_REGNUM
;
19325 regno
< LAST_VFP_REGNUM
;
19328 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19329 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19333 /* Workaround ARM10 VFPr1 bug. */
19334 if (count
== 2 && !arm_arch6
)
19336 saved
+= count
* 8;
19345 if (count
== 2 && !arm_arch6
)
19347 saved
+= count
* 8;
19354 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19355 everything bar the final return instruction. If simple_return is true,
19356 then do not output epilogue, because it has already been emitted in RTL. */
19358 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19359 bool simple_return
)
19361 char conditional
[10];
19364 unsigned long live_regs_mask
;
19365 unsigned long func_type
;
19366 arm_stack_offsets
*offsets
;
19368 func_type
= arm_current_func_type ();
19370 if (IS_NAKED (func_type
))
19373 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19375 /* If this function was declared non-returning, and we have
19376 found a tail call, then we have to trust that the called
19377 function won't return. */
19382 /* Otherwise, trap an attempted return by aborting. */
19384 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19386 assemble_external_libcall (ops
[1]);
19387 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19393 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19395 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19397 cfun
->machine
->return_used_this_function
= 1;
19399 offsets
= arm_get_frame_offsets ();
19400 live_regs_mask
= offsets
->saved_regs_mask
;
19402 if (!simple_return
&& live_regs_mask
)
19404 const char * return_reg
;
19406 /* If we do not have any special requirements for function exit
19407 (e.g. interworking) then we can load the return address
19408 directly into the PC. Otherwise we must load it into LR. */
19410 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19411 return_reg
= reg_names
[PC_REGNUM
];
19413 return_reg
= reg_names
[LR_REGNUM
];
19415 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19417 /* There are three possible reasons for the IP register
19418 being saved. 1) a stack frame was created, in which case
19419 IP contains the old stack pointer, or 2) an ISR routine
19420 corrupted it, or 3) it was saved to align the stack on
19421 iWMMXt. In case 1, restore IP into SP, otherwise just
19423 if (frame_pointer_needed
)
19425 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19426 live_regs_mask
|= (1 << SP_REGNUM
);
19429 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19432 /* On some ARM architectures it is faster to use LDR rather than
19433 LDM to load a single register. On other architectures, the
19434 cost is the same. In 26 bit mode, or for exception handlers,
19435 we have to use LDM to load the PC so that the CPSR is also
19437 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19438 if (live_regs_mask
== (1U << reg
))
19441 if (reg
<= LAST_ARM_REGNUM
19442 && (reg
!= LR_REGNUM
19444 || ! IS_INTERRUPT (func_type
)))
19446 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19447 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19454 /* Generate the load multiple instruction to restore the
19455 registers. Note we can get here, even if
19456 frame_pointer_needed is true, but only if sp already
19457 points to the base of the saved core registers. */
19458 if (live_regs_mask
& (1 << SP_REGNUM
))
19460 unsigned HOST_WIDE_INT stack_adjust
;
19462 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19463 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19465 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19466 if (TARGET_UNIFIED_ASM
)
19467 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19469 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19472 /* If we can't use ldmib (SA110 bug),
19473 then try to pop r3 instead. */
19475 live_regs_mask
|= 1 << 3;
19477 if (TARGET_UNIFIED_ASM
)
19478 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19480 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19484 if (TARGET_UNIFIED_ASM
)
19485 sprintf (instr
, "pop%s\t{", conditional
);
19487 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19489 p
= instr
+ strlen (instr
);
19491 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19492 if (live_regs_mask
& (1 << reg
))
19494 int l
= strlen (reg_names
[reg
]);
19500 memcpy (p
, ", ", 2);
19504 memcpy (p
, "%|", 2);
19505 memcpy (p
+ 2, reg_names
[reg
], l
);
19509 if (live_regs_mask
& (1 << LR_REGNUM
))
19511 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19512 /* If returning from an interrupt, restore the CPSR. */
19513 if (IS_INTERRUPT (func_type
))
19520 output_asm_insn (instr
, & operand
);
19522 /* See if we need to generate an extra instruction to
19523 perform the actual function return. */
19525 && func_type
!= ARM_FT_INTERWORKED
19526 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19528 /* The return has already been handled
19529 by loading the LR into the PC. */
19536 switch ((int) ARM_FUNC_TYPE (func_type
))
19540 /* ??? This is wrong for unified assembly syntax. */
19541 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19544 case ARM_FT_INTERWORKED
:
19545 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19548 case ARM_FT_EXCEPTION
:
19549 /* ??? This is wrong for unified assembly syntax. */
19550 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19554 /* Use bx if it's available. */
19555 if (arm_arch5
|| arm_arch4t
)
19556 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19558 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19562 output_asm_insn (instr
, & operand
);
19568 /* Write the function name into the code section, directly preceding
19569 the function prologue.
19571 Code will be output similar to this:
19573 .ascii "arm_poke_function_name", 0
19576 .word 0xff000000 + (t1 - t0)
19577 arm_poke_function_name
19579 stmfd sp!, {fp, ip, lr, pc}
19582 When performing a stack backtrace, code can inspect the value
19583 of 'pc' stored at 'fp' + 0. If the trace function then looks
19584 at location pc - 12 and the top 8 bits are set, then we know
19585 that there is a function name embedded immediately preceding this
19586 location and has length ((pc[-3]) & 0xff000000).
19588 We assume that pc is declared as a pointer to an unsigned long.
19590 It is of no benefit to output the function name if we are assembling
19591 a leaf function. These function types will not contain a stack
19592 backtrace structure, therefore it is not possible to determine the
19595 arm_poke_function_name (FILE *stream
, const char *name
)
19597 unsigned long alignlength
;
19598 unsigned long length
;
19601 length
= strlen (name
) + 1;
19602 alignlength
= ROUND_UP_WORD (length
);
19604 ASM_OUTPUT_ASCII (stream
, name
, length
);
19605 ASM_OUTPUT_ALIGN (stream
, 2);
19606 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19607 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19610 /* Place some comments into the assembler stream
19611 describing the current function. */
19613 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19615 unsigned long func_type
;
19617 /* ??? Do we want to print some of the below anyway? */
19621 /* Sanity check. */
19622 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19624 func_type
= arm_current_func_type ();
19626 switch ((int) ARM_FUNC_TYPE (func_type
))
19629 case ARM_FT_NORMAL
:
19631 case ARM_FT_INTERWORKED
:
19632 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19635 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19638 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19640 case ARM_FT_EXCEPTION
:
19641 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19645 if (IS_NAKED (func_type
))
19646 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19648 if (IS_VOLATILE (func_type
))
19649 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19651 if (IS_NESTED (func_type
))
19652 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19653 if (IS_STACKALIGN (func_type
))
19654 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19656 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19658 crtl
->args
.pretend_args_size
, frame_size
);
19660 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19661 frame_pointer_needed
,
19662 cfun
->machine
->uses_anonymous_args
);
19664 if (cfun
->machine
->lr_save_eliminated
)
19665 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19667 if (crtl
->calls_eh_return
)
19668 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19673 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19674 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19676 arm_stack_offsets
*offsets
;
19682 /* Emit any call-via-reg trampolines that are needed for v4t support
19683 of call_reg and call_value_reg type insns. */
19684 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19686 rtx label
= cfun
->machine
->call_via
[regno
];
19690 switch_to_section (function_section (current_function_decl
));
19691 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19692 CODE_LABEL_NUMBER (label
));
19693 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19697 /* ??? Probably not safe to set this here, since it assumes that a
19698 function will be emitted as assembly immediately after we generate
19699 RTL for it. This does not happen for inline functions. */
19700 cfun
->machine
->return_used_this_function
= 0;
19702 else /* TARGET_32BIT */
19704 /* We need to take into account any stack-frame rounding. */
19705 offsets
= arm_get_frame_offsets ();
19707 gcc_assert (!use_return_insn (FALSE
, NULL
)
19708 || (cfun
->machine
->return_used_this_function
!= 0)
19709 || offsets
->saved_regs
== offsets
->outgoing_args
19710 || frame_pointer_needed
);
19714 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19715 STR and STRD. If an even number of registers are being pushed, one
19716 or more STRD patterns are created for each register pair. If an
19717 odd number of registers are pushed, emit an initial STR followed by
19718 as many STRD instructions as are needed. This works best when the
19719 stack is initially 64-bit aligned (the normal case), since it
19720 ensures that each STRD is also 64-bit aligned. */
19722 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19727 rtx par
= NULL_RTX
;
19728 rtx dwarf
= NULL_RTX
;
19732 num_regs
= bit_count (saved_regs_mask
);
19734 /* Must be at least one register to save, and can't save SP or PC. */
19735 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19736 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19737 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19739 /* Create sequence for DWARF info. All the frame-related data for
19740 debugging is held in this wrapper. */
19741 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19743 /* Describe the stack adjustment. */
19744 tmp
= gen_rtx_SET (VOIDmode
,
19746 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19747 RTX_FRAME_RELATED_P (tmp
) = 1;
19748 XVECEXP (dwarf
, 0, 0) = tmp
;
19750 /* Find the first register. */
19751 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19756 /* If there's an odd number of registers to push. Start off by
19757 pushing a single register. This ensures that subsequent strd
19758 operations are dword aligned (assuming that SP was originally
19759 64-bit aligned). */
19760 if ((num_regs
& 1) != 0)
19762 rtx reg
, mem
, insn
;
19764 reg
= gen_rtx_REG (SImode
, regno
);
19766 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19767 stack_pointer_rtx
));
19769 mem
= gen_frame_mem (Pmode
,
19771 (Pmode
, stack_pointer_rtx
,
19772 plus_constant (Pmode
, stack_pointer_rtx
,
19775 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19776 RTX_FRAME_RELATED_P (tmp
) = 1;
19777 insn
= emit_insn (tmp
);
19778 RTX_FRAME_RELATED_P (insn
) = 1;
19779 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19780 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19782 RTX_FRAME_RELATED_P (tmp
) = 1;
19785 XVECEXP (dwarf
, 0, i
) = tmp
;
19789 while (i
< num_regs
)
19790 if (saved_regs_mask
& (1 << regno
))
19792 rtx reg1
, reg2
, mem1
, mem2
;
19793 rtx tmp0
, tmp1
, tmp2
;
19796 /* Find the register to pair with this one. */
19797 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19801 reg1
= gen_rtx_REG (SImode
, regno
);
19802 reg2
= gen_rtx_REG (SImode
, regno2
);
19809 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19812 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19814 -4 * (num_regs
- 1)));
19815 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19816 plus_constant (Pmode
, stack_pointer_rtx
,
19818 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19819 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19820 RTX_FRAME_RELATED_P (tmp0
) = 1;
19821 RTX_FRAME_RELATED_P (tmp1
) = 1;
19822 RTX_FRAME_RELATED_P (tmp2
) = 1;
19823 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19824 XVECEXP (par
, 0, 0) = tmp0
;
19825 XVECEXP (par
, 0, 1) = tmp1
;
19826 XVECEXP (par
, 0, 2) = tmp2
;
19827 insn
= emit_insn (par
);
19828 RTX_FRAME_RELATED_P (insn
) = 1;
19829 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19833 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19836 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19839 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19840 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19841 RTX_FRAME_RELATED_P (tmp1
) = 1;
19842 RTX_FRAME_RELATED_P (tmp2
) = 1;
19843 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19844 XVECEXP (par
, 0, 0) = tmp1
;
19845 XVECEXP (par
, 0, 1) = tmp2
;
19849 /* Create unwind information. This is an approximation. */
19850 tmp1
= gen_rtx_SET (VOIDmode
,
19851 gen_frame_mem (Pmode
,
19852 plus_constant (Pmode
,
19856 tmp2
= gen_rtx_SET (VOIDmode
,
19857 gen_frame_mem (Pmode
,
19858 plus_constant (Pmode
,
19863 RTX_FRAME_RELATED_P (tmp1
) = 1;
19864 RTX_FRAME_RELATED_P (tmp2
) = 1;
19865 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19866 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19868 regno
= regno2
+ 1;
19876 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19877 whenever possible, otherwise it emits single-word stores. The first store
19878 also allocates stack space for all saved registers, using writeback with
19879 post-addressing mode. All other stores use offset addressing. If no STRD
19880 can be emitted, this function emits a sequence of single-word stores,
19881 and not an STM as before, because single-word stores provide more freedom
19882 scheduling and can be turned into an STM by peephole optimizations. */
19884 arm_emit_strd_push (unsigned long saved_regs_mask
)
19887 int i
, j
, dwarf_index
= 0;
19889 rtx dwarf
= NULL_RTX
;
19890 rtx insn
= NULL_RTX
;
19893 /* TODO: A more efficient code can be emitted by changing the
19894 layout, e.g., first push all pairs that can use STRD to keep the
19895 stack aligned, and then push all other registers. */
19896 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19897 if (saved_regs_mask
& (1 << i
))
19900 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19901 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19902 gcc_assert (num_regs
> 0);
19904 /* Create sequence for DWARF info. */
19905 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19907 /* For dwarf info, we generate explicit stack update. */
19908 tmp
= gen_rtx_SET (VOIDmode
,
19910 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19911 RTX_FRAME_RELATED_P (tmp
) = 1;
19912 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19914 /* Save registers. */
19915 offset
= - 4 * num_regs
;
19917 while (j
<= LAST_ARM_REGNUM
)
19918 if (saved_regs_mask
& (1 << j
))
19921 && (saved_regs_mask
& (1 << (j
+ 1))))
19923 /* Current register and previous register form register pair for
19924 which STRD can be generated. */
19927 /* Allocate stack space for all saved registers. */
19928 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19929 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19930 mem
= gen_frame_mem (DImode
, tmp
);
19933 else if (offset
> 0)
19934 mem
= gen_frame_mem (DImode
,
19935 plus_constant (Pmode
,
19939 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19941 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19942 RTX_FRAME_RELATED_P (tmp
) = 1;
19943 tmp
= emit_insn (tmp
);
19945 /* Record the first store insn. */
19946 if (dwarf_index
== 1)
19949 /* Generate dwarf info. */
19950 mem
= gen_frame_mem (SImode
,
19951 plus_constant (Pmode
,
19954 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19955 RTX_FRAME_RELATED_P (tmp
) = 1;
19956 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19958 mem
= gen_frame_mem (SImode
,
19959 plus_constant (Pmode
,
19962 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19963 RTX_FRAME_RELATED_P (tmp
) = 1;
19964 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19971 /* Emit a single word store. */
19974 /* Allocate stack space for all saved registers. */
19975 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19976 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19977 mem
= gen_frame_mem (SImode
, tmp
);
19980 else if (offset
> 0)
19981 mem
= gen_frame_mem (SImode
,
19982 plus_constant (Pmode
,
19986 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19988 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19989 RTX_FRAME_RELATED_P (tmp
) = 1;
19990 tmp
= emit_insn (tmp
);
19992 /* Record the first store insn. */
19993 if (dwarf_index
== 1)
19996 /* Generate dwarf info. */
19997 mem
= gen_frame_mem (SImode
,
19998 plus_constant(Pmode
,
20001 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
20002 RTX_FRAME_RELATED_P (tmp
) = 1;
20003 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20012 /* Attach dwarf info to the first insn we generate. */
20013 gcc_assert (insn
!= NULL_RTX
);
20014 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20015 RTX_FRAME_RELATED_P (insn
) = 1;
20018 /* Generate and emit an insn that we will recognize as a push_multi.
20019 Unfortunately, since this insn does not reflect very well the actual
20020 semantics of the operation, we need to annotate the insn for the benefit
20021 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20022 MASK for registers that should be annotated for DWARF2 frame unwind
20025 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20028 int num_dwarf_regs
= 0;
20032 int dwarf_par_index
;
20035 /* We don't record the PC in the dwarf frame information. */
20036 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20038 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20040 if (mask
& (1 << i
))
20042 if (dwarf_regs_mask
& (1 << i
))
20046 gcc_assert (num_regs
&& num_regs
<= 16);
20047 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20049 /* For the body of the insn we are going to generate an UNSPEC in
20050 parallel with several USEs. This allows the insn to be recognized
20051 by the push_multi pattern in the arm.md file.
20053 The body of the insn looks something like this:
20056 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20057 (const_int:SI <num>)))
20058 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20064 For the frame note however, we try to be more explicit and actually
20065 show each register being stored into the stack frame, plus a (single)
20066 decrement of the stack pointer. We do it this way in order to be
20067 friendly to the stack unwinding code, which only wants to see a single
20068 stack decrement per instruction. The RTL we generate for the note looks
20069 something like this:
20072 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20073 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20074 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20075 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20079 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20080 instead we'd have a parallel expression detailing all
20081 the stores to the various memory addresses so that debug
20082 information is more up-to-date. Remember however while writing
20083 this to take care of the constraints with the push instruction.
20085 Note also that this has to be taken care of for the VFP registers.
20087 For more see PR43399. */
20089 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20090 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20091 dwarf_par_index
= 1;
20093 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20095 if (mask
& (1 << i
))
20097 reg
= gen_rtx_REG (SImode
, i
);
20099 XVECEXP (par
, 0, 0)
20100 = gen_rtx_SET (VOIDmode
,
20103 gen_rtx_PRE_MODIFY (Pmode
,
20106 (Pmode
, stack_pointer_rtx
,
20109 gen_rtx_UNSPEC (BLKmode
,
20110 gen_rtvec (1, reg
),
20111 UNSPEC_PUSH_MULT
));
20113 if (dwarf_regs_mask
& (1 << i
))
20115 tmp
= gen_rtx_SET (VOIDmode
,
20116 gen_frame_mem (SImode
, stack_pointer_rtx
),
20118 RTX_FRAME_RELATED_P (tmp
) = 1;
20119 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20126 for (j
= 1, i
++; j
< num_regs
; i
++)
20128 if (mask
& (1 << i
))
20130 reg
= gen_rtx_REG (SImode
, i
);
20132 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20134 if (dwarf_regs_mask
& (1 << i
))
20137 = gen_rtx_SET (VOIDmode
,
20140 plus_constant (Pmode
, stack_pointer_rtx
,
20143 RTX_FRAME_RELATED_P (tmp
) = 1;
20144 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20151 par
= emit_insn (par
);
20153 tmp
= gen_rtx_SET (VOIDmode
,
20155 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20156 RTX_FRAME_RELATED_P (tmp
) = 1;
20157 XVECEXP (dwarf
, 0, 0) = tmp
;
20159 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20164 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20165 SIZE is the offset to be adjusted.
20166 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20168 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20172 RTX_FRAME_RELATED_P (insn
) = 1;
20173 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20174 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20177 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20178 SAVED_REGS_MASK shows which registers need to be restored.
20180 Unfortunately, since this insn does not reflect very well the actual
20181 semantics of the operation, we need to annotate the insn for the benefit
20182 of DWARF2 frame unwind information. */
20184 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20189 rtx dwarf
= NULL_RTX
;
20195 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20196 offset_adj
= return_in_pc
? 1 : 0;
20197 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20198 if (saved_regs_mask
& (1 << i
))
20201 gcc_assert (num_regs
&& num_regs
<= 16);
20203 /* If SP is in reglist, then we don't emit SP update insn. */
20204 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20206 /* The parallel needs to hold num_regs SETs
20207 and one SET for the stack update. */
20208 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20213 XVECEXP (par
, 0, 0) = tmp
;
20218 /* Increment the stack pointer, based on there being
20219 num_regs 4-byte registers to restore. */
20220 tmp
= gen_rtx_SET (VOIDmode
,
20222 plus_constant (Pmode
,
20225 RTX_FRAME_RELATED_P (tmp
) = 1;
20226 XVECEXP (par
, 0, offset_adj
) = tmp
;
20229 /* Now restore every reg, which may include PC. */
20230 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20231 if (saved_regs_mask
& (1 << i
))
20233 reg
= gen_rtx_REG (SImode
, i
);
20234 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20236 /* Emit single load with writeback. */
20237 tmp
= gen_frame_mem (SImode
,
20238 gen_rtx_POST_INC (Pmode
,
20239 stack_pointer_rtx
));
20240 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20241 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20245 tmp
= gen_rtx_SET (VOIDmode
,
20249 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20250 RTX_FRAME_RELATED_P (tmp
) = 1;
20251 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20253 /* We need to maintain a sequence for DWARF info too. As dwarf info
20254 should not have PC, skip PC. */
20255 if (i
!= PC_REGNUM
)
20256 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20262 par
= emit_jump_insn (par
);
20264 par
= emit_insn (par
);
20266 REG_NOTES (par
) = dwarf
;
20268 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20269 stack_pointer_rtx
, stack_pointer_rtx
);
20272 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20273 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20275 Unfortunately, since this insn does not reflect very well the actual
20276 semantics of the operation, we need to annotate the insn for the benefit
20277 of DWARF2 frame unwind information. */
20279 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20283 rtx dwarf
= NULL_RTX
;
20286 gcc_assert (num_regs
&& num_regs
<= 32);
20288 /* Workaround ARM10 VFPr1 bug. */
20289 if (num_regs
== 2 && !arm_arch6
)
20291 if (first_reg
== 15)
20297 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20298 there could be up to 32 D-registers to restore.
20299 If there are more than 16 D-registers, make two recursive calls,
20300 each of which emits one pop_multi instruction. */
20303 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20304 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20308 /* The parallel needs to hold num_regs SETs
20309 and one SET for the stack update. */
20310 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20312 /* Increment the stack pointer, based on there being
20313 num_regs 8-byte registers to restore. */
20314 tmp
= gen_rtx_SET (VOIDmode
,
20316 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20317 RTX_FRAME_RELATED_P (tmp
) = 1;
20318 XVECEXP (par
, 0, 0) = tmp
;
20320 /* Now show every reg that will be restored, using a SET for each. */
20321 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20323 reg
= gen_rtx_REG (DFmode
, i
);
20325 tmp
= gen_rtx_SET (VOIDmode
,
20329 plus_constant (Pmode
, base_reg
, 8 * j
)));
20330 RTX_FRAME_RELATED_P (tmp
) = 1;
20331 XVECEXP (par
, 0, j
+ 1) = tmp
;
20333 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20338 par
= emit_insn (par
);
20339 REG_NOTES (par
) = dwarf
;
20341 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20342 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20344 RTX_FRAME_RELATED_P (par
) = 1;
20345 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20348 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20349 base_reg
, base_reg
);
20352 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20353 number of registers are being popped, multiple LDRD patterns are created for
20354 all register pairs. If odd number of registers are popped, last register is
20355 loaded by using LDR pattern. */
20357 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20361 rtx par
= NULL_RTX
;
20362 rtx dwarf
= NULL_RTX
;
20363 rtx tmp
, reg
, tmp1
;
20366 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20367 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20368 if (saved_regs_mask
& (1 << i
))
20371 gcc_assert (num_regs
&& num_regs
<= 16);
20373 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20374 to be popped. So, if num_regs is even, now it will become odd,
20375 and we can generate pop with PC. If num_regs is odd, it will be
20376 even now, and ldr with return can be generated for PC. */
20380 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20382 /* Var j iterates over all the registers to gather all the registers in
20383 saved_regs_mask. Var i gives index of saved registers in stack frame.
20384 A PARALLEL RTX of register-pair is created here, so that pattern for
20385 LDRD can be matched. As PC is always last register to be popped, and
20386 we have already decremented num_regs if PC, we don't have to worry
20387 about PC in this loop. */
20388 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20389 if (saved_regs_mask
& (1 << j
))
20391 /* Create RTX for memory load. */
20392 reg
= gen_rtx_REG (SImode
, j
);
20393 tmp
= gen_rtx_SET (SImode
,
20395 gen_frame_mem (SImode
,
20396 plus_constant (Pmode
,
20397 stack_pointer_rtx
, 4 * i
)));
20398 RTX_FRAME_RELATED_P (tmp
) = 1;
20402 /* When saved-register index (i) is even, the RTX to be emitted is
20403 yet to be created. Hence create it first. The LDRD pattern we
20404 are generating is :
20405 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20406 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20407 where target registers need not be consecutive. */
20408 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20412 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20413 added as 0th element and if i is odd, reg_i is added as 1st element
20414 of LDRD pattern shown above. */
20415 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20416 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20420 /* When saved-register index (i) is odd, RTXs for both the registers
20421 to be loaded are generated in above given LDRD pattern, and the
20422 pattern can be emitted now. */
20423 par
= emit_insn (par
);
20424 REG_NOTES (par
) = dwarf
;
20425 RTX_FRAME_RELATED_P (par
) = 1;
20431 /* If the number of registers pushed is odd AND return_in_pc is false OR
20432 number of registers are even AND return_in_pc is true, last register is
20433 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20434 then LDR with post increment. */
20436 /* Increment the stack pointer, based on there being
20437 num_regs 4-byte registers to restore. */
20438 tmp
= gen_rtx_SET (VOIDmode
,
20440 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20441 RTX_FRAME_RELATED_P (tmp
) = 1;
20442 tmp
= emit_insn (tmp
);
20445 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20446 stack_pointer_rtx
, stack_pointer_rtx
);
20451 if (((num_regs
% 2) == 1 && !return_in_pc
)
20452 || ((num_regs
% 2) == 0 && return_in_pc
))
20454 /* Scan for the single register to be popped. Skip until the saved
20455 register is found. */
20456 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20458 /* Gen LDR with post increment here. */
20459 tmp1
= gen_rtx_MEM (SImode
,
20460 gen_rtx_POST_INC (SImode
,
20461 stack_pointer_rtx
));
20462 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20464 reg
= gen_rtx_REG (SImode
, j
);
20465 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20466 RTX_FRAME_RELATED_P (tmp
) = 1;
20467 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20471 /* If return_in_pc, j must be PC_REGNUM. */
20472 gcc_assert (j
== PC_REGNUM
);
20473 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20474 XVECEXP (par
, 0, 0) = ret_rtx
;
20475 XVECEXP (par
, 0, 1) = tmp
;
20476 par
= emit_jump_insn (par
);
20480 par
= emit_insn (tmp
);
20481 REG_NOTES (par
) = dwarf
;
20482 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20483 stack_pointer_rtx
, stack_pointer_rtx
);
20487 else if ((num_regs
% 2) == 1 && return_in_pc
)
20489 /* There are 2 registers to be popped. So, generate the pattern
20490 pop_multiple_with_stack_update_and_return to pop in PC. */
20491 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20497 /* LDRD in ARM mode needs consecutive registers as operands. This function
20498 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20499 offset addressing and then generates one separate stack udpate. This provides
20500 more scheduling freedom, compared to writeback on every load. However,
20501 if the function returns using load into PC directly
20502 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20503 before the last load. TODO: Add a peephole optimization to recognize
20504 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20505 peephole optimization to merge the load at stack-offset zero
20506 with the stack update instruction using load with writeback
20507 in post-index addressing mode. */
20509 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20513 rtx par
= NULL_RTX
;
20514 rtx dwarf
= NULL_RTX
;
20517 /* Restore saved registers. */
20518 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20520 while (j
<= LAST_ARM_REGNUM
)
20521 if (saved_regs_mask
& (1 << j
))
20524 && (saved_regs_mask
& (1 << (j
+ 1)))
20525 && (j
+ 1) != PC_REGNUM
)
20527 /* Current register and next register form register pair for which
20528 LDRD can be generated. PC is always the last register popped, and
20529 we handle it separately. */
20531 mem
= gen_frame_mem (DImode
,
20532 plus_constant (Pmode
,
20536 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20538 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20539 tmp
= emit_insn (tmp
);
20540 RTX_FRAME_RELATED_P (tmp
) = 1;
20542 /* Generate dwarf info. */
20544 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20545 gen_rtx_REG (SImode
, j
),
20547 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20548 gen_rtx_REG (SImode
, j
+ 1),
20551 REG_NOTES (tmp
) = dwarf
;
20556 else if (j
!= PC_REGNUM
)
20558 /* Emit a single word load. */
20560 mem
= gen_frame_mem (SImode
,
20561 plus_constant (Pmode
,
20565 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20567 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20568 tmp
= emit_insn (tmp
);
20569 RTX_FRAME_RELATED_P (tmp
) = 1;
20571 /* Generate dwarf info. */
20572 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20573 gen_rtx_REG (SImode
, j
),
20579 else /* j == PC_REGNUM */
20585 /* Update the stack. */
20588 tmp
= gen_rtx_SET (Pmode
,
20590 plus_constant (Pmode
,
20593 tmp
= emit_insn (tmp
);
20594 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20595 stack_pointer_rtx
, stack_pointer_rtx
);
20599 if (saved_regs_mask
& (1 << PC_REGNUM
))
20601 /* Only PC is to be popped. */
20602 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20603 XVECEXP (par
, 0, 0) = ret_rtx
;
20604 tmp
= gen_rtx_SET (SImode
,
20605 gen_rtx_REG (SImode
, PC_REGNUM
),
20606 gen_frame_mem (SImode
,
20607 gen_rtx_POST_INC (SImode
,
20608 stack_pointer_rtx
)));
20609 RTX_FRAME_RELATED_P (tmp
) = 1;
20610 XVECEXP (par
, 0, 1) = tmp
;
20611 par
= emit_jump_insn (par
);
20613 /* Generate dwarf info. */
20614 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20615 gen_rtx_REG (SImode
, PC_REGNUM
),
20617 REG_NOTES (par
) = dwarf
;
20618 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20619 stack_pointer_rtx
, stack_pointer_rtx
);
20623 /* Calculate the size of the return value that is passed in registers. */
20625 arm_size_return_regs (void)
20629 if (crtl
->return_rtx
!= 0)
20630 mode
= GET_MODE (crtl
->return_rtx
);
20632 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20634 return GET_MODE_SIZE (mode
);
20637 /* Return true if the current function needs to save/restore LR. */
20639 thumb_force_lr_save (void)
20641 return !cfun
->machine
->lr_save_eliminated
20642 && (!leaf_function_p ()
20643 || thumb_far_jump_used_p ()
20644 || df_regs_ever_live_p (LR_REGNUM
));
20647 /* We do not know if r3 will be available because
20648 we do have an indirect tailcall happening in this
20649 particular case. */
20651 is_indirect_tailcall_p (rtx call
)
20653 rtx pat
= PATTERN (call
);
20655 /* Indirect tail call. */
20656 pat
= XVECEXP (pat
, 0, 0);
20657 if (GET_CODE (pat
) == SET
)
20658 pat
= SET_SRC (pat
);
20660 pat
= XEXP (XEXP (pat
, 0), 0);
20661 return REG_P (pat
);
20664 /* Return true if r3 is used by any of the tail call insns in the
20665 current function. */
20667 any_sibcall_could_use_r3 (void)
20672 if (!crtl
->tail_call_emit
)
20674 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20675 if (e
->flags
& EDGE_SIBCALL
)
20677 rtx call
= BB_END (e
->src
);
20678 if (!CALL_P (call
))
20679 call
= prev_nonnote_nondebug_insn (call
);
20680 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20681 if (find_regno_fusage (call
, USE
, 3)
20682 || is_indirect_tailcall_p (call
))
20689 /* Compute the distance from register FROM to register TO.
20690 These can be the arg pointer (26), the soft frame pointer (25),
20691 the stack pointer (13) or the hard frame pointer (11).
20692 In thumb mode r7 is used as the soft frame pointer, if needed.
20693 Typical stack layout looks like this:
20695 old stack pointer -> | |
20698 | | saved arguments for
20699 | | vararg functions
20702 hard FP & arg pointer -> | | \
20710 soft frame pointer -> | | /
20715 locals base pointer -> | | /
20720 current stack pointer -> | | /
20723 For a given function some or all of these stack components
20724 may not be needed, giving rise to the possibility of
20725 eliminating some of the registers.
20727 The values returned by this function must reflect the behavior
20728 of arm_expand_prologue() and arm_compute_save_reg_mask().
20730 The sign of the number returned reflects the direction of stack
20731 growth, so the values are positive for all eliminations except
20732 from the soft frame pointer to the hard frame pointer.
20734 SFP may point just inside the local variables block to ensure correct
20738 /* Calculate stack offsets. These are used to calculate register elimination
20739 offsets and in prologue/epilogue code. Also calculates which registers
20740 should be saved. */
20742 static arm_stack_offsets
*
20743 arm_get_frame_offsets (void)
20745 struct arm_stack_offsets
*offsets
;
20746 unsigned long func_type
;
20750 HOST_WIDE_INT frame_size
;
20753 offsets
= &cfun
->machine
->stack_offsets
;
20755 /* We need to know if we are a leaf function. Unfortunately, it
20756 is possible to be called after start_sequence has been called,
20757 which causes get_insns to return the insns for the sequence,
20758 not the function, which will cause leaf_function_p to return
20759 the incorrect result.
20761 to know about leaf functions once reload has completed, and the
20762 frame size cannot be changed after that time, so we can safely
20763 use the cached value. */
20765 if (reload_completed
)
20768 /* Initially this is the size of the local variables. It will translated
20769 into an offset once we have determined the size of preceding data. */
20770 frame_size
= ROUND_UP_WORD (get_frame_size ());
20772 leaf
= leaf_function_p ();
20774 /* Space for variadic functions. */
20775 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20777 /* In Thumb mode this is incorrect, but never used. */
20779 = (offsets
->saved_args
20780 + arm_compute_static_chain_stack_bytes ()
20781 + (frame_pointer_needed
? 4 : 0));
20785 unsigned int regno
;
20787 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20788 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20789 saved
= core_saved
;
20791 /* We know that SP will be doubleword aligned on entry, and we must
20792 preserve that condition at any subroutine call. We also require the
20793 soft frame pointer to be doubleword aligned. */
20795 if (TARGET_REALLY_IWMMXT
)
20797 /* Check for the call-saved iWMMXt registers. */
20798 for (regno
= FIRST_IWMMXT_REGNUM
;
20799 regno
<= LAST_IWMMXT_REGNUM
;
20801 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20805 func_type
= arm_current_func_type ();
20806 /* Space for saved VFP registers. */
20807 if (! IS_VOLATILE (func_type
)
20808 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20809 saved
+= arm_get_vfp_saved_size ();
20811 else /* TARGET_THUMB1 */
20813 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20814 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20815 saved
= core_saved
;
20816 if (TARGET_BACKTRACE
)
20820 /* Saved registers include the stack frame. */
20821 offsets
->saved_regs
20822 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20823 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20825 /* A leaf function does not need any stack alignment if it has nothing
20827 if (leaf
&& frame_size
== 0
20828 /* However if it calls alloca(), we have a dynamically allocated
20829 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20830 && ! cfun
->calls_alloca
)
20832 offsets
->outgoing_args
= offsets
->soft_frame
;
20833 offsets
->locals_base
= offsets
->soft_frame
;
20837 /* Ensure SFP has the correct alignment. */
20838 if (ARM_DOUBLEWORD_ALIGN
20839 && (offsets
->soft_frame
& 7))
20841 offsets
->soft_frame
+= 4;
20842 /* Try to align stack by pushing an extra reg. Don't bother doing this
20843 when there is a stack frame as the alignment will be rolled into
20844 the normal stack adjustment. */
20845 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20849 /* Register r3 is caller-saved. Normally it does not need to be
20850 saved on entry by the prologue. However if we choose to save
20851 it for padding then we may confuse the compiler into thinking
20852 a prologue sequence is required when in fact it is not. This
20853 will occur when shrink-wrapping if r3 is used as a scratch
20854 register and there are no other callee-saved writes.
20856 This situation can be avoided when other callee-saved registers
20857 are available and r3 is not mandatory if we choose a callee-saved
20858 register for padding. */
20859 bool prefer_callee_reg_p
= false;
20861 /* If it is safe to use r3, then do so. This sometimes
20862 generates better code on Thumb-2 by avoiding the need to
20863 use 32-bit push/pop instructions. */
20864 if (! any_sibcall_could_use_r3 ()
20865 && arm_size_return_regs () <= 12
20866 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20868 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20871 if (!TARGET_THUMB2
)
20872 prefer_callee_reg_p
= true;
20875 || prefer_callee_reg_p
)
20877 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20879 /* Avoid fixed registers; they may be changed at
20880 arbitrary times so it's unsafe to restore them
20881 during the epilogue. */
20883 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20893 offsets
->saved_regs
+= 4;
20894 offsets
->saved_regs_mask
|= (1 << reg
);
20899 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20900 offsets
->outgoing_args
= (offsets
->locals_base
20901 + crtl
->outgoing_args_size
);
20903 if (ARM_DOUBLEWORD_ALIGN
)
20905 /* Ensure SP remains doubleword aligned. */
20906 if (offsets
->outgoing_args
& 7)
20907 offsets
->outgoing_args
+= 4;
20908 gcc_assert (!(offsets
->outgoing_args
& 7));
20915 /* Calculate the relative offsets for the different stack pointers. Positive
20916 offsets are in the direction of stack growth. */
20919 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20921 arm_stack_offsets
*offsets
;
20923 offsets
= arm_get_frame_offsets ();
20925 /* OK, now we have enough information to compute the distances.
20926 There must be an entry in these switch tables for each pair
20927 of registers in ELIMINABLE_REGS, even if some of the entries
20928 seem to be redundant or useless. */
20931 case ARG_POINTER_REGNUM
:
20934 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20937 case FRAME_POINTER_REGNUM
:
20938 /* This is the reverse of the soft frame pointer
20939 to hard frame pointer elimination below. */
20940 return offsets
->soft_frame
- offsets
->saved_args
;
20942 case ARM_HARD_FRAME_POINTER_REGNUM
:
20943 /* This is only non-zero in the case where the static chain register
20944 is stored above the frame. */
20945 return offsets
->frame
- offsets
->saved_args
- 4;
20947 case STACK_POINTER_REGNUM
:
20948 /* If nothing has been pushed on the stack at all
20949 then this will return -4. This *is* correct! */
20950 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20953 gcc_unreachable ();
20955 gcc_unreachable ();
20957 case FRAME_POINTER_REGNUM
:
20960 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20963 case ARM_HARD_FRAME_POINTER_REGNUM
:
20964 /* The hard frame pointer points to the top entry in the
20965 stack frame. The soft frame pointer to the bottom entry
20966 in the stack frame. If there is no stack frame at all,
20967 then they are identical. */
20969 return offsets
->frame
- offsets
->soft_frame
;
20971 case STACK_POINTER_REGNUM
:
20972 return offsets
->outgoing_args
- offsets
->soft_frame
;
20975 gcc_unreachable ();
20977 gcc_unreachable ();
20980 /* You cannot eliminate from the stack pointer.
20981 In theory you could eliminate from the hard frame
20982 pointer to the stack pointer, but this will never
20983 happen, since if a stack frame is not needed the
20984 hard frame pointer will never be used. */
20985 gcc_unreachable ();
20989 /* Given FROM and TO register numbers, say whether this elimination is
20990 allowed. Frame pointer elimination is automatically handled.
20992 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20993 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20994 pointer, we must eliminate FRAME_POINTER_REGNUM into
20995 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20996 ARG_POINTER_REGNUM. */
20999 arm_can_eliminate (const int from
, const int to
)
21001 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21002 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21003 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21004 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21008 /* Emit RTL to save coprocessor registers on function entry. Returns the
21009 number of bytes pushed. */
21012 arm_save_coproc_regs(void)
21014 int saved_size
= 0;
21016 unsigned start_reg
;
21019 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21020 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21022 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21023 insn
= gen_rtx_MEM (V2SImode
, insn
);
21024 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21025 RTX_FRAME_RELATED_P (insn
) = 1;
21029 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21031 start_reg
= FIRST_VFP_REGNUM
;
21033 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21035 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21036 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21038 if (start_reg
!= reg
)
21039 saved_size
+= vfp_emit_fstmd (start_reg
,
21040 (reg
- start_reg
) / 2);
21041 start_reg
= reg
+ 2;
21044 if (start_reg
!= reg
)
21045 saved_size
+= vfp_emit_fstmd (start_reg
,
21046 (reg
- start_reg
) / 2);
21052 /* Set the Thumb frame pointer from the stack pointer. */
21055 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21057 HOST_WIDE_INT amount
;
21060 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21062 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21063 stack_pointer_rtx
, GEN_INT (amount
)));
21066 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21067 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21068 expects the first two operands to be the same. */
21071 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21073 hard_frame_pointer_rtx
));
21077 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21078 hard_frame_pointer_rtx
,
21079 stack_pointer_rtx
));
21081 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21082 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21083 RTX_FRAME_RELATED_P (dwarf
) = 1;
21084 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21087 RTX_FRAME_RELATED_P (insn
) = 1;
21090 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21093 arm_expand_prologue (void)
21098 unsigned long live_regs_mask
;
21099 unsigned long func_type
;
21101 int saved_pretend_args
= 0;
21102 int saved_regs
= 0;
21103 unsigned HOST_WIDE_INT args_to_push
;
21104 arm_stack_offsets
*offsets
;
21106 func_type
= arm_current_func_type ();
21108 /* Naked functions don't have prologues. */
21109 if (IS_NAKED (func_type
))
21112 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21113 args_to_push
= crtl
->args
.pretend_args_size
;
21115 /* Compute which register we will have to save onto the stack. */
21116 offsets
= arm_get_frame_offsets ();
21117 live_regs_mask
= offsets
->saved_regs_mask
;
21119 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21121 if (IS_STACKALIGN (func_type
))
21125 /* Handle a word-aligned stack pointer. We generate the following:
21130 <save and restore r0 in normal prologue/epilogue>
21134 The unwinder doesn't need to know about the stack realignment.
21135 Just tell it we saved SP in r0. */
21136 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21138 r0
= gen_rtx_REG (SImode
, 0);
21139 r1
= gen_rtx_REG (SImode
, 1);
21141 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21142 RTX_FRAME_RELATED_P (insn
) = 1;
21143 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21145 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21147 /* ??? The CFA changes here, which may cause GDB to conclude that it
21148 has entered a different function. That said, the unwind info is
21149 correct, individually, before and after this instruction because
21150 we've described the save of SP, which will override the default
21151 handling of SP as restoring from the CFA. */
21152 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21155 /* For APCS frames, if IP register is clobbered
21156 when creating frame, save that register in a special
21158 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21160 if (IS_INTERRUPT (func_type
))
21162 /* Interrupt functions must not corrupt any registers.
21163 Creating a frame pointer however, corrupts the IP
21164 register, so we must push it first. */
21165 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21167 /* Do not set RTX_FRAME_RELATED_P on this insn.
21168 The dwarf stack unwinding code only wants to see one
21169 stack decrement per function, and this is not it. If
21170 this instruction is labeled as being part of the frame
21171 creation sequence then dwarf2out_frame_debug_expr will
21172 die when it encounters the assignment of IP to FP
21173 later on, since the use of SP here establishes SP as
21174 the CFA register and not IP.
21176 Anyway this instruction is not really part of the stack
21177 frame creation although it is part of the prologue. */
21179 else if (IS_NESTED (func_type
))
21181 /* The static chain register is the same as the IP register
21182 used as a scratch register during stack frame creation.
21183 To get around this need to find somewhere to store IP
21184 whilst the frame is being created. We try the following
21187 1. The last argument register r3 if it is available.
21188 2. A slot on the stack above the frame if there are no
21189 arguments to push onto the stack.
21190 3. Register r3 again, after pushing the argument registers
21191 onto the stack, if this is a varargs function.
21192 4. The last slot on the stack created for the arguments to
21193 push, if this isn't a varargs function.
21195 Note - we only need to tell the dwarf2 backend about the SP
21196 adjustment in the second variant; the static chain register
21197 doesn't need to be unwound, as it doesn't contain a value
21198 inherited from the caller. */
21200 if (!arm_r3_live_at_start_p ())
21201 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21202 else if (args_to_push
== 0)
21206 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21209 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21210 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21213 /* Just tell the dwarf backend that we adjusted SP. */
21214 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21215 plus_constant (Pmode
, stack_pointer_rtx
,
21217 RTX_FRAME_RELATED_P (insn
) = 1;
21218 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21222 /* Store the args on the stack. */
21223 if (cfun
->machine
->uses_anonymous_args
)
21226 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21227 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21228 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21229 saved_pretend_args
= 1;
21235 if (args_to_push
== 4)
21236 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21239 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21240 plus_constant (Pmode
,
21244 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21246 /* Just tell the dwarf backend that we adjusted SP. */
21248 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21249 plus_constant (Pmode
, stack_pointer_rtx
,
21251 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21254 RTX_FRAME_RELATED_P (insn
) = 1;
21255 fp_offset
= args_to_push
;
21260 insn
= emit_set_insn (ip_rtx
,
21261 plus_constant (Pmode
, stack_pointer_rtx
,
21263 RTX_FRAME_RELATED_P (insn
) = 1;
21268 /* Push the argument registers, or reserve space for them. */
21269 if (cfun
->machine
->uses_anonymous_args
)
21270 insn
= emit_multi_reg_push
21271 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21272 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21275 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21276 GEN_INT (- args_to_push
)));
21277 RTX_FRAME_RELATED_P (insn
) = 1;
21280 /* If this is an interrupt service routine, and the link register
21281 is going to be pushed, and we're not generating extra
21282 push of IP (needed when frame is needed and frame layout if apcs),
21283 subtracting four from LR now will mean that the function return
21284 can be done with a single instruction. */
21285 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21286 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21287 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21290 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21292 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21295 if (live_regs_mask
)
21297 unsigned long dwarf_regs_mask
= live_regs_mask
;
21299 saved_regs
+= bit_count (live_regs_mask
) * 4;
21300 if (optimize_size
&& !frame_pointer_needed
21301 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21303 /* If no coprocessor registers are being pushed and we don't have
21304 to worry about a frame pointer then push extra registers to
21305 create the stack frame. This is done is a way that does not
21306 alter the frame layout, so is independent of the epilogue. */
21310 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21312 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21313 if (frame
&& n
* 4 >= frame
)
21316 live_regs_mask
|= (1 << n
) - 1;
21317 saved_regs
+= frame
;
21322 && current_tune
->prefer_ldrd_strd
21323 && !optimize_function_for_size_p (cfun
))
21325 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21327 thumb2_emit_strd_push (live_regs_mask
);
21328 else if (TARGET_ARM
21329 && !TARGET_APCS_FRAME
21330 && !IS_INTERRUPT (func_type
))
21331 arm_emit_strd_push (live_regs_mask
);
21334 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21335 RTX_FRAME_RELATED_P (insn
) = 1;
21340 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21341 RTX_FRAME_RELATED_P (insn
) = 1;
21345 if (! IS_VOLATILE (func_type
))
21346 saved_regs
+= arm_save_coproc_regs ();
21348 if (frame_pointer_needed
&& TARGET_ARM
)
21350 /* Create the new frame pointer. */
21351 if (TARGET_APCS_FRAME
)
21353 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21354 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21355 RTX_FRAME_RELATED_P (insn
) = 1;
21357 if (IS_NESTED (func_type
))
21359 /* Recover the static chain register. */
21360 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21361 insn
= gen_rtx_REG (SImode
, 3);
21364 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21365 insn
= gen_frame_mem (SImode
, insn
);
21367 emit_set_insn (ip_rtx
, insn
);
21368 /* Add a USE to stop propagate_one_insn() from barfing. */
21369 emit_insn (gen_force_register_use (ip_rtx
));
21374 insn
= GEN_INT (saved_regs
- 4);
21375 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21376 stack_pointer_rtx
, insn
));
21377 RTX_FRAME_RELATED_P (insn
) = 1;
21381 if (flag_stack_usage_info
)
21382 current_function_static_stack_size
21383 = offsets
->outgoing_args
- offsets
->saved_args
;
21385 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21387 /* This add can produce multiple insns for a large constant, so we
21388 need to get tricky. */
21389 rtx_insn
*last
= get_last_insn ();
21391 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21392 - offsets
->outgoing_args
);
21394 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21398 last
= last
? NEXT_INSN (last
) : get_insns ();
21399 RTX_FRAME_RELATED_P (last
) = 1;
21401 while (last
!= insn
);
21403 /* If the frame pointer is needed, emit a special barrier that
21404 will prevent the scheduler from moving stores to the frame
21405 before the stack adjustment. */
21406 if (frame_pointer_needed
)
21407 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21408 hard_frame_pointer_rtx
));
21412 if (frame_pointer_needed
&& TARGET_THUMB2
)
21413 thumb_set_frame_pointer (offsets
);
21415 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21417 unsigned long mask
;
21419 mask
= live_regs_mask
;
21420 mask
&= THUMB2_WORK_REGS
;
21421 if (!IS_NESTED (func_type
))
21422 mask
|= (1 << IP_REGNUM
);
21423 arm_load_pic_register (mask
);
21426 /* If we are profiling, make sure no instructions are scheduled before
21427 the call to mcount. Similarly if the user has requested no
21428 scheduling in the prolog. Similarly if we want non-call exceptions
21429 using the EABI unwinder, to prevent faulting instructions from being
21430 swapped with a stack adjustment. */
21431 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21432 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21433 && cfun
->can_throw_non_call_exceptions
))
21434 emit_insn (gen_blockage ());
21436 /* If the link register is being kept alive, with the return address in it,
21437 then make sure that it does not get reused by the ce2 pass. */
21438 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21439 cfun
->machine
->lr_save_eliminated
= 1;
21442 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21444 arm_print_condition (FILE *stream
)
21446 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21448 /* Branch conversion is not implemented for Thumb-2. */
21451 output_operand_lossage ("predicated Thumb instruction");
21454 if (current_insn_predicate
!= NULL
)
21456 output_operand_lossage
21457 ("predicated instruction in conditional sequence");
21461 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21463 else if (current_insn_predicate
)
21465 enum arm_cond_code code
;
21469 output_operand_lossage ("predicated Thumb instruction");
21473 code
= get_arm_condition_code (current_insn_predicate
);
21474 fputs (arm_condition_codes
[code
], stream
);
21479 /* Globally reserved letters: acln
21480 Puncutation letters currently used: @_|?().!#
21481 Lower case letters currently used: bcdefhimpqtvwxyz
21482 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21483 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21485 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21487 If CODE is 'd', then the X is a condition operand and the instruction
21488 should only be executed if the condition is true.
21489 if CODE is 'D', then the X is a condition operand and the instruction
21490 should only be executed if the condition is false: however, if the mode
21491 of the comparison is CCFPEmode, then always execute the instruction -- we
21492 do this because in these circumstances !GE does not necessarily imply LT;
21493 in these cases the instruction pattern will take care to make sure that
21494 an instruction containing %d will follow, thereby undoing the effects of
21495 doing this instruction unconditionally.
21496 If CODE is 'N' then X is a floating point operand that must be negated
21498 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21499 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21501 arm_print_operand (FILE *stream
, rtx x
, int code
)
21506 fputs (ASM_COMMENT_START
, stream
);
21510 fputs (user_label_prefix
, stream
);
21514 fputs (REGISTER_PREFIX
, stream
);
21518 arm_print_condition (stream
);
21522 /* Nothing in unified syntax, otherwise the current condition code. */
21523 if (!TARGET_UNIFIED_ASM
)
21524 arm_print_condition (stream
);
21528 /* The current condition code in unified syntax, otherwise nothing. */
21529 if (TARGET_UNIFIED_ASM
)
21530 arm_print_condition (stream
);
21534 /* The current condition code for a condition code setting instruction.
21535 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21536 if (TARGET_UNIFIED_ASM
)
21538 fputc('s', stream
);
21539 arm_print_condition (stream
);
21543 arm_print_condition (stream
);
21544 fputc('s', stream
);
21549 /* If the instruction is conditionally executed then print
21550 the current condition code, otherwise print 's'. */
21551 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21552 if (current_insn_predicate
)
21553 arm_print_condition (stream
);
21555 fputc('s', stream
);
21558 /* %# is a "break" sequence. It doesn't output anything, but is used to
21559 separate e.g. operand numbers from following text, if that text consists
21560 of further digits which we don't want to be part of the operand
21568 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21569 r
= real_value_negate (&r
);
21570 fprintf (stream
, "%s", fp_const_from_val (&r
));
21574 /* An integer or symbol address without a preceding # sign. */
21576 switch (GET_CODE (x
))
21579 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21583 output_addr_const (stream
, x
);
21587 if (GET_CODE (XEXP (x
, 0)) == PLUS
21588 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21590 output_addr_const (stream
, x
);
21593 /* Fall through. */
21596 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21600 /* An integer that we want to print in HEX. */
21602 switch (GET_CODE (x
))
21605 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21609 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21614 if (CONST_INT_P (x
))
21617 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21618 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21622 putc ('~', stream
);
21623 output_addr_const (stream
, x
);
21628 /* Print the log2 of a CONST_INT. */
21632 if (!CONST_INT_P (x
)
21633 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21634 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21636 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21641 /* The low 16 bits of an immediate constant. */
21642 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21646 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21650 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21658 shift
= shift_op (x
, &val
);
21662 fprintf (stream
, ", %s ", shift
);
21664 arm_print_operand (stream
, XEXP (x
, 1), 0);
21666 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21671 /* An explanation of the 'Q', 'R' and 'H' register operands:
21673 In a pair of registers containing a DI or DF value the 'Q'
21674 operand returns the register number of the register containing
21675 the least significant part of the value. The 'R' operand returns
21676 the register number of the register containing the most
21677 significant part of the value.
21679 The 'H' operand returns the higher of the two register numbers.
21680 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21681 same as the 'Q' operand, since the most significant part of the
21682 value is held in the lower number register. The reverse is true
21683 on systems where WORDS_BIG_ENDIAN is false.
21685 The purpose of these operands is to distinguish between cases
21686 where the endian-ness of the values is important (for example
21687 when they are added together), and cases where the endian-ness
21688 is irrelevant, but the order of register operations is important.
21689 For example when loading a value from memory into a register
21690 pair, the endian-ness does not matter. Provided that the value
21691 from the lower memory address is put into the lower numbered
21692 register, and the value from the higher address is put into the
21693 higher numbered register, the load will work regardless of whether
21694 the value being loaded is big-wordian or little-wordian. The
21695 order of the two register loads can matter however, if the address
21696 of the memory location is actually held in one of the registers
21697 being overwritten by the load.
21699 The 'Q' and 'R' constraints are also available for 64-bit
21702 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21704 rtx part
= gen_lowpart (SImode
, x
);
21705 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21709 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21711 output_operand_lossage ("invalid operand for code '%c'", code
);
21715 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21719 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21721 machine_mode mode
= GET_MODE (x
);
21724 if (mode
== VOIDmode
)
21726 part
= gen_highpart_mode (SImode
, mode
, x
);
21727 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21731 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21733 output_operand_lossage ("invalid operand for code '%c'", code
);
21737 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21741 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21743 output_operand_lossage ("invalid operand for code '%c'", code
);
21747 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21751 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21753 output_operand_lossage ("invalid operand for code '%c'", code
);
21757 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21761 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21763 output_operand_lossage ("invalid operand for code '%c'", code
);
21767 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21771 asm_fprintf (stream
, "%r",
21772 REG_P (XEXP (x
, 0))
21773 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21777 asm_fprintf (stream
, "{%r-%r}",
21779 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21782 /* Like 'M', but writing doubleword vector registers, for use by Neon
21786 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21787 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21789 asm_fprintf (stream
, "{d%d}", regno
);
21791 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21796 /* CONST_TRUE_RTX means always -- that's the default. */
21797 if (x
== const_true_rtx
)
21800 if (!COMPARISON_P (x
))
21802 output_operand_lossage ("invalid operand for code '%c'", code
);
21806 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21811 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21812 want to do that. */
21813 if (x
== const_true_rtx
)
21815 output_operand_lossage ("instruction never executed");
21818 if (!COMPARISON_P (x
))
21820 output_operand_lossage ("invalid operand for code '%c'", code
);
21824 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21825 (get_arm_condition_code (x
))],
21835 /* Former Maverick support, removed after GCC-4.7. */
21836 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21841 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21842 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21843 /* Bad value for wCG register number. */
21845 output_operand_lossage ("invalid operand for code '%c'", code
);
21850 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21853 /* Print an iWMMXt control register name. */
21855 if (!CONST_INT_P (x
)
21857 || INTVAL (x
) >= 16)
21858 /* Bad value for wC register number. */
21860 output_operand_lossage ("invalid operand for code '%c'", code
);
21866 static const char * wc_reg_names
[16] =
21868 "wCID", "wCon", "wCSSF", "wCASF",
21869 "wC4", "wC5", "wC6", "wC7",
21870 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21871 "wC12", "wC13", "wC14", "wC15"
21874 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21878 /* Print the high single-precision register of a VFP double-precision
21882 machine_mode mode
= GET_MODE (x
);
21885 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21887 output_operand_lossage ("invalid operand for code '%c'", code
);
21892 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21894 output_operand_lossage ("invalid operand for code '%c'", code
);
21898 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21902 /* Print a VFP/Neon double precision or quad precision register name. */
21906 machine_mode mode
= GET_MODE (x
);
21907 int is_quad
= (code
== 'q');
21910 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21912 output_operand_lossage ("invalid operand for code '%c'", code
);
21917 || !IS_VFP_REGNUM (REGNO (x
)))
21919 output_operand_lossage ("invalid operand for code '%c'", code
);
21924 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21925 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21927 output_operand_lossage ("invalid operand for code '%c'", code
);
21931 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21932 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21936 /* These two codes print the low/high doubleword register of a Neon quad
21937 register, respectively. For pair-structure types, can also print
21938 low/high quadword registers. */
21942 machine_mode mode
= GET_MODE (x
);
21945 if ((GET_MODE_SIZE (mode
) != 16
21946 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21948 output_operand_lossage ("invalid operand for code '%c'", code
);
21953 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21955 output_operand_lossage ("invalid operand for code '%c'", code
);
21959 if (GET_MODE_SIZE (mode
) == 16)
21960 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21961 + (code
== 'f' ? 1 : 0));
21963 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21964 + (code
== 'f' ? 1 : 0));
21968 /* Print a VFPv3 floating-point constant, represented as an integer
21972 int index
= vfp3_const_double_index (x
);
21973 gcc_assert (index
!= -1);
21974 fprintf (stream
, "%d", index
);
21978 /* Print bits representing opcode features for Neon.
21980 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21981 and polynomials as unsigned.
21983 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21985 Bit 2 is 1 for rounding functions, 0 otherwise. */
21987 /* Identify the type as 's', 'u', 'p' or 'f'. */
21990 HOST_WIDE_INT bits
= INTVAL (x
);
21991 fputc ("uspf"[bits
& 3], stream
);
21995 /* Likewise, but signed and unsigned integers are both 'i'. */
21998 HOST_WIDE_INT bits
= INTVAL (x
);
21999 fputc ("iipf"[bits
& 3], stream
);
22003 /* As for 'T', but emit 'u' instead of 'p'. */
22006 HOST_WIDE_INT bits
= INTVAL (x
);
22007 fputc ("usuf"[bits
& 3], stream
);
22011 /* Bit 2: rounding (vs none). */
22014 HOST_WIDE_INT bits
= INTVAL (x
);
22015 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22019 /* Memory operand for vld1/vst1 instruction. */
22023 bool postinc
= FALSE
;
22024 rtx postinc_reg
= NULL
;
22025 unsigned align
, memsize
, align_bits
;
22027 gcc_assert (MEM_P (x
));
22028 addr
= XEXP (x
, 0);
22029 if (GET_CODE (addr
) == POST_INC
)
22032 addr
= XEXP (addr
, 0);
22034 if (GET_CODE (addr
) == POST_MODIFY
)
22036 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22037 addr
= XEXP (addr
, 0);
22039 asm_fprintf (stream
, "[%r", REGNO (addr
));
22041 /* We know the alignment of this access, so we can emit a hint in the
22042 instruction (for some alignments) as an aid to the memory subsystem
22044 align
= MEM_ALIGN (x
) >> 3;
22045 memsize
= MEM_SIZE (x
);
22047 /* Only certain alignment specifiers are supported by the hardware. */
22048 if (memsize
== 32 && (align
% 32) == 0)
22050 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22052 else if (memsize
>= 8 && (align
% 8) == 0)
22057 if (align_bits
!= 0)
22058 asm_fprintf (stream
, ":%d", align_bits
);
22060 asm_fprintf (stream
, "]");
22063 fputs("!", stream
);
22065 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22073 gcc_assert (MEM_P (x
));
22074 addr
= XEXP (x
, 0);
22075 gcc_assert (REG_P (addr
));
22076 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22080 /* Translate an S register number into a D register number and element index. */
22083 machine_mode mode
= GET_MODE (x
);
22086 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22088 output_operand_lossage ("invalid operand for code '%c'", code
);
22093 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22095 output_operand_lossage ("invalid operand for code '%c'", code
);
22099 regno
= regno
- FIRST_VFP_REGNUM
;
22100 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22105 gcc_assert (CONST_DOUBLE_P (x
));
22107 result
= vfp3_const_double_for_fract_bits (x
);
22109 result
= vfp3_const_double_for_bits (x
);
22110 fprintf (stream
, "#%d", result
);
22113 /* Register specifier for vld1.16/vst1.16. Translate the S register
22114 number into a D register number and element index. */
22117 machine_mode mode
= GET_MODE (x
);
22120 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22122 output_operand_lossage ("invalid operand for code '%c'", code
);
22127 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22129 output_operand_lossage ("invalid operand for code '%c'", code
);
22133 regno
= regno
- FIRST_VFP_REGNUM
;
22134 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22141 output_operand_lossage ("missing operand");
22145 switch (GET_CODE (x
))
22148 asm_fprintf (stream
, "%r", REGNO (x
));
22152 output_memory_reference_mode
= GET_MODE (x
);
22153 output_address (XEXP (x
, 0));
22159 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22160 sizeof (fpstr
), 0, 1);
22161 fprintf (stream
, "#%s", fpstr
);
22166 gcc_assert (GET_CODE (x
) != NEG
);
22167 fputc ('#', stream
);
22168 if (GET_CODE (x
) == HIGH
)
22170 fputs (":lower16:", stream
);
22174 output_addr_const (stream
, x
);
22180 /* Target hook for printing a memory address. */
22182 arm_print_operand_address (FILE *stream
, rtx x
)
22186 int is_minus
= GET_CODE (x
) == MINUS
;
22189 asm_fprintf (stream
, "[%r]", REGNO (x
));
22190 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22192 rtx base
= XEXP (x
, 0);
22193 rtx index
= XEXP (x
, 1);
22194 HOST_WIDE_INT offset
= 0;
22196 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22198 /* Ensure that BASE is a register. */
22199 /* (one of them must be). */
22200 /* Also ensure the SP is not used as in index register. */
22205 switch (GET_CODE (index
))
22208 offset
= INTVAL (index
);
22211 asm_fprintf (stream
, "[%r, #%wd]",
22212 REGNO (base
), offset
);
22216 asm_fprintf (stream
, "[%r, %s%r]",
22217 REGNO (base
), is_minus
? "-" : "",
22227 asm_fprintf (stream
, "[%r, %s%r",
22228 REGNO (base
), is_minus
? "-" : "",
22229 REGNO (XEXP (index
, 0)));
22230 arm_print_operand (stream
, index
, 'S');
22231 fputs ("]", stream
);
22236 gcc_unreachable ();
22239 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22240 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22242 extern machine_mode output_memory_reference_mode
;
22244 gcc_assert (REG_P (XEXP (x
, 0)));
22246 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22247 asm_fprintf (stream
, "[%r, #%s%d]!",
22248 REGNO (XEXP (x
, 0)),
22249 GET_CODE (x
) == PRE_DEC
? "-" : "",
22250 GET_MODE_SIZE (output_memory_reference_mode
));
22252 asm_fprintf (stream
, "[%r], #%s%d",
22253 REGNO (XEXP (x
, 0)),
22254 GET_CODE (x
) == POST_DEC
? "-" : "",
22255 GET_MODE_SIZE (output_memory_reference_mode
));
22257 else if (GET_CODE (x
) == PRE_MODIFY
)
22259 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22260 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22261 asm_fprintf (stream
, "#%wd]!",
22262 INTVAL (XEXP (XEXP (x
, 1), 1)));
22264 asm_fprintf (stream
, "%r]!",
22265 REGNO (XEXP (XEXP (x
, 1), 1)));
22267 else if (GET_CODE (x
) == POST_MODIFY
)
22269 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22270 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22271 asm_fprintf (stream
, "#%wd",
22272 INTVAL (XEXP (XEXP (x
, 1), 1)));
22274 asm_fprintf (stream
, "%r",
22275 REGNO (XEXP (XEXP (x
, 1), 1)));
22277 else output_addr_const (stream
, x
);
22282 asm_fprintf (stream
, "[%r]", REGNO (x
));
22283 else if (GET_CODE (x
) == POST_INC
)
22284 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22285 else if (GET_CODE (x
) == PLUS
)
22287 gcc_assert (REG_P (XEXP (x
, 0)));
22288 if (CONST_INT_P (XEXP (x
, 1)))
22289 asm_fprintf (stream
, "[%r, #%wd]",
22290 REGNO (XEXP (x
, 0)),
22291 INTVAL (XEXP (x
, 1)));
22293 asm_fprintf (stream
, "[%r, %r]",
22294 REGNO (XEXP (x
, 0)),
22295 REGNO (XEXP (x
, 1)));
22298 output_addr_const (stream
, x
);
22302 /* Target hook for indicating whether a punctuation character for
22303 TARGET_PRINT_OPERAND is valid. */
22305 arm_print_operand_punct_valid_p (unsigned char code
)
22307 return (code
== '@' || code
== '|' || code
== '.'
22308 || code
== '(' || code
== ')' || code
== '#'
22309 || (TARGET_32BIT
&& (code
== '?'))
22310 || (TARGET_THUMB2
&& (code
== '!'))
22311 || (TARGET_THUMB
&& (code
== '_')));
22314 /* Target hook for assembling integer objects. The ARM version needs to
22315 handle word-sized values specially. */
22317 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22321 if (size
== UNITS_PER_WORD
&& aligned_p
)
22323 fputs ("\t.word\t", asm_out_file
);
22324 output_addr_const (asm_out_file
, x
);
22326 /* Mark symbols as position independent. We only do this in the
22327 .text segment, not in the .data segment. */
22328 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22329 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22331 /* See legitimize_pic_address for an explanation of the
22332 TARGET_VXWORKS_RTP check. */
22333 if (!arm_pic_data_is_text_relative
22334 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22335 fputs ("(GOT)", asm_out_file
);
22337 fputs ("(GOTOFF)", asm_out_file
);
22339 fputc ('\n', asm_out_file
);
22343 mode
= GET_MODE (x
);
22345 if (arm_vector_mode_supported_p (mode
))
22349 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22351 units
= CONST_VECTOR_NUNITS (x
);
22352 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22354 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22355 for (i
= 0; i
< units
; i
++)
22357 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22359 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22362 for (i
= 0; i
< units
; i
++)
22364 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22365 REAL_VALUE_TYPE rval
;
22367 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22370 (rval
, GET_MODE_INNER (mode
),
22371 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22377 return default_assemble_integer (x
, size
, aligned_p
);
22381 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22385 if (!TARGET_AAPCS_BASED
)
22388 default_named_section_asm_out_constructor
22389 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22393 /* Put these in the .init_array section, using a special relocation. */
22394 if (priority
!= DEFAULT_INIT_PRIORITY
)
22397 sprintf (buf
, "%s.%.5u",
22398 is_ctor
? ".init_array" : ".fini_array",
22400 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22407 switch_to_section (s
);
22408 assemble_align (POINTER_SIZE
);
22409 fputs ("\t.word\t", asm_out_file
);
22410 output_addr_const (asm_out_file
, symbol
);
22411 fputs ("(target1)\n", asm_out_file
);
22414 /* Add a function to the list of static constructors. */
22417 arm_elf_asm_constructor (rtx symbol
, int priority
)
22419 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22422 /* Add a function to the list of static destructors. */
22425 arm_elf_asm_destructor (rtx symbol
, int priority
)
22427 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22430 /* A finite state machine takes care of noticing whether or not instructions
22431 can be conditionally executed, and thus decrease execution time and code
22432 size by deleting branch instructions. The fsm is controlled by
22433 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22435 /* The state of the fsm controlling condition codes are:
22436 0: normal, do nothing special
22437 1: make ASM_OUTPUT_OPCODE not output this instruction
22438 2: make ASM_OUTPUT_OPCODE not output this instruction
22439 3: make instructions conditional
22440 4: make instructions conditional
22442 State transitions (state->state by whom under condition):
22443 0 -> 1 final_prescan_insn if the `target' is a label
22444 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22445 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22446 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22447 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22448 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22449 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22450 (the target insn is arm_target_insn).
22452 If the jump clobbers the conditions then we use states 2 and 4.
22454 A similar thing can be done with conditional return insns.
22456 XXX In case the `target' is an unconditional branch, this conditionalising
22457 of the instructions always reduces code size, but not always execution
22458 time. But then, I want to reduce the code size to somewhere near what
22459 /bin/cc produces. */
22461 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22462 instructions. When a COND_EXEC instruction is seen the subsequent
22463 instructions are scanned so that multiple conditional instructions can be
22464 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22465 specify the length and true/false mask for the IT block. These will be
22466 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22468 /* Returns the index of the ARM condition code string in
22469 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22470 COMPARISON should be an rtx like `(eq (...) (...))'. */
22473 maybe_get_arm_condition_code (rtx comparison
)
22475 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22476 enum arm_cond_code code
;
22477 enum rtx_code comp_code
= GET_CODE (comparison
);
22479 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22480 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22481 XEXP (comparison
, 1));
22485 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22486 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22487 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22488 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22489 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22490 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22491 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22492 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22493 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22494 case CC_DLTUmode
: code
= ARM_CC
;
22497 if (comp_code
== EQ
)
22498 return ARM_INVERSE_CONDITION_CODE (code
);
22499 if (comp_code
== NE
)
22506 case NE
: return ARM_NE
;
22507 case EQ
: return ARM_EQ
;
22508 case GE
: return ARM_PL
;
22509 case LT
: return ARM_MI
;
22510 default: return ARM_NV
;
22516 case NE
: return ARM_NE
;
22517 case EQ
: return ARM_EQ
;
22518 default: return ARM_NV
;
22524 case NE
: return ARM_MI
;
22525 case EQ
: return ARM_PL
;
22526 default: return ARM_NV
;
22531 /* We can handle all cases except UNEQ and LTGT. */
22534 case GE
: return ARM_GE
;
22535 case GT
: return ARM_GT
;
22536 case LE
: return ARM_LS
;
22537 case LT
: return ARM_MI
;
22538 case NE
: return ARM_NE
;
22539 case EQ
: return ARM_EQ
;
22540 case ORDERED
: return ARM_VC
;
22541 case UNORDERED
: return ARM_VS
;
22542 case UNLT
: return ARM_LT
;
22543 case UNLE
: return ARM_LE
;
22544 case UNGT
: return ARM_HI
;
22545 case UNGE
: return ARM_PL
;
22546 /* UNEQ and LTGT do not have a representation. */
22547 case UNEQ
: /* Fall through. */
22548 case LTGT
: /* Fall through. */
22549 default: return ARM_NV
;
22555 case NE
: return ARM_NE
;
22556 case EQ
: return ARM_EQ
;
22557 case GE
: return ARM_LE
;
22558 case GT
: return ARM_LT
;
22559 case LE
: return ARM_GE
;
22560 case LT
: return ARM_GT
;
22561 case GEU
: return ARM_LS
;
22562 case GTU
: return ARM_CC
;
22563 case LEU
: return ARM_CS
;
22564 case LTU
: return ARM_HI
;
22565 default: return ARM_NV
;
22571 case LTU
: return ARM_CS
;
22572 case GEU
: return ARM_CC
;
22573 default: return ARM_NV
;
22579 case NE
: return ARM_NE
;
22580 case EQ
: return ARM_EQ
;
22581 case GEU
: return ARM_CS
;
22582 case GTU
: return ARM_HI
;
22583 case LEU
: return ARM_LS
;
22584 case LTU
: return ARM_CC
;
22585 default: return ARM_NV
;
22591 case GE
: return ARM_GE
;
22592 case LT
: return ARM_LT
;
22593 case GEU
: return ARM_CS
;
22594 case LTU
: return ARM_CC
;
22595 default: return ARM_NV
;
22601 case NE
: return ARM_NE
;
22602 case EQ
: return ARM_EQ
;
22603 case GE
: return ARM_GE
;
22604 case GT
: return ARM_GT
;
22605 case LE
: return ARM_LE
;
22606 case LT
: return ARM_LT
;
22607 case GEU
: return ARM_CS
;
22608 case GTU
: return ARM_HI
;
22609 case LEU
: return ARM_LS
;
22610 case LTU
: return ARM_CC
;
22611 default: return ARM_NV
;
22614 default: gcc_unreachable ();
22618 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22619 static enum arm_cond_code
22620 get_arm_condition_code (rtx comparison
)
22622 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22623 gcc_assert (code
!= ARM_NV
);
22627 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22630 thumb2_final_prescan_insn (rtx_insn
*insn
)
22632 rtx_insn
*first_insn
= insn
;
22633 rtx body
= PATTERN (insn
);
22635 enum arm_cond_code code
;
22640 /* max_insns_skipped in the tune was already taken into account in the
22641 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22642 just emit the IT blocks as we can. It does not make sense to split
22644 max
= MAX_INSN_PER_IT_BLOCK
;
22646 /* Remove the previous insn from the count of insns to be output. */
22647 if (arm_condexec_count
)
22648 arm_condexec_count
--;
22650 /* Nothing to do if we are already inside a conditional block. */
22651 if (arm_condexec_count
)
22654 if (GET_CODE (body
) != COND_EXEC
)
22657 /* Conditional jumps are implemented directly. */
22661 predicate
= COND_EXEC_TEST (body
);
22662 arm_current_cc
= get_arm_condition_code (predicate
);
22664 n
= get_attr_ce_count (insn
);
22665 arm_condexec_count
= 1;
22666 arm_condexec_mask
= (1 << n
) - 1;
22667 arm_condexec_masklen
= n
;
22668 /* See if subsequent instructions can be combined into the same block. */
22671 insn
= next_nonnote_insn (insn
);
22673 /* Jumping into the middle of an IT block is illegal, so a label or
22674 barrier terminates the block. */
22675 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22678 body
= PATTERN (insn
);
22679 /* USE and CLOBBER aren't really insns, so just skip them. */
22680 if (GET_CODE (body
) == USE
22681 || GET_CODE (body
) == CLOBBER
)
22684 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22685 if (GET_CODE (body
) != COND_EXEC
)
22687 /* Maximum number of conditionally executed instructions in a block. */
22688 n
= get_attr_ce_count (insn
);
22689 if (arm_condexec_masklen
+ n
> max
)
22692 predicate
= COND_EXEC_TEST (body
);
22693 code
= get_arm_condition_code (predicate
);
22694 mask
= (1 << n
) - 1;
22695 if (arm_current_cc
== code
)
22696 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22697 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22700 arm_condexec_count
++;
22701 arm_condexec_masklen
+= n
;
22703 /* A jump must be the last instruction in a conditional block. */
22707 /* Restore recog_data (getting the attributes of other insns can
22708 destroy this array, but final.c assumes that it remains intact
22709 across this call). */
22710 extract_constrain_insn_cached (first_insn
);
22714 arm_final_prescan_insn (rtx_insn
*insn
)
22716 /* BODY will hold the body of INSN. */
22717 rtx body
= PATTERN (insn
);
22719 /* This will be 1 if trying to repeat the trick, and things need to be
22720 reversed if it appears to fail. */
22723 /* If we start with a return insn, we only succeed if we find another one. */
22724 int seeking_return
= 0;
22725 enum rtx_code return_code
= UNKNOWN
;
22727 /* START_INSN will hold the insn from where we start looking. This is the
22728 first insn after the following code_label if REVERSE is true. */
22729 rtx_insn
*start_insn
= insn
;
22731 /* If in state 4, check if the target branch is reached, in order to
22732 change back to state 0. */
22733 if (arm_ccfsm_state
== 4)
22735 if (insn
== arm_target_insn
)
22737 arm_target_insn
= NULL
;
22738 arm_ccfsm_state
= 0;
22743 /* If in state 3, it is possible to repeat the trick, if this insn is an
22744 unconditional branch to a label, and immediately following this branch
22745 is the previous target label which is only used once, and the label this
22746 branch jumps to is not too far off. */
22747 if (arm_ccfsm_state
== 3)
22749 if (simplejump_p (insn
))
22751 start_insn
= next_nonnote_insn (start_insn
);
22752 if (BARRIER_P (start_insn
))
22754 /* XXX Isn't this always a barrier? */
22755 start_insn
= next_nonnote_insn (start_insn
);
22757 if (LABEL_P (start_insn
)
22758 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22759 && LABEL_NUSES (start_insn
) == 1)
22764 else if (ANY_RETURN_P (body
))
22766 start_insn
= next_nonnote_insn (start_insn
);
22767 if (BARRIER_P (start_insn
))
22768 start_insn
= next_nonnote_insn (start_insn
);
22769 if (LABEL_P (start_insn
)
22770 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22771 && LABEL_NUSES (start_insn
) == 1)
22774 seeking_return
= 1;
22775 return_code
= GET_CODE (body
);
22784 gcc_assert (!arm_ccfsm_state
|| reverse
);
22785 if (!JUMP_P (insn
))
22788 /* This jump might be paralleled with a clobber of the condition codes
22789 the jump should always come first */
22790 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22791 body
= XVECEXP (body
, 0, 0);
22794 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22795 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22798 int fail
= FALSE
, succeed
= FALSE
;
22799 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22800 int then_not_else
= TRUE
;
22801 rtx_insn
*this_insn
= start_insn
;
22804 /* Register the insn jumped to. */
22807 if (!seeking_return
)
22808 label
= XEXP (SET_SRC (body
), 0);
22810 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22811 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22812 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22814 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22815 then_not_else
= FALSE
;
22817 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22819 seeking_return
= 1;
22820 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22822 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22824 seeking_return
= 1;
22825 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22826 then_not_else
= FALSE
;
22829 gcc_unreachable ();
22831 /* See how many insns this branch skips, and what kind of insns. If all
22832 insns are okay, and the label or unconditional branch to the same
22833 label is not too far away, succeed. */
22834 for (insns_skipped
= 0;
22835 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22839 this_insn
= next_nonnote_insn (this_insn
);
22843 switch (GET_CODE (this_insn
))
22846 /* Succeed if it is the target label, otherwise fail since
22847 control falls in from somewhere else. */
22848 if (this_insn
== label
)
22850 arm_ccfsm_state
= 1;
22858 /* Succeed if the following insn is the target label.
22860 If return insns are used then the last insn in a function
22861 will be a barrier. */
22862 this_insn
= next_nonnote_insn (this_insn
);
22863 if (this_insn
&& this_insn
== label
)
22865 arm_ccfsm_state
= 1;
22873 /* The AAPCS says that conditional calls should not be
22874 used since they make interworking inefficient (the
22875 linker can't transform BL<cond> into BLX). That's
22876 only a problem if the machine has BLX. */
22883 /* Succeed if the following insn is the target label, or
22884 if the following two insns are a barrier and the
22886 this_insn
= next_nonnote_insn (this_insn
);
22887 if (this_insn
&& BARRIER_P (this_insn
))
22888 this_insn
= next_nonnote_insn (this_insn
);
22890 if (this_insn
&& this_insn
== label
22891 && insns_skipped
< max_insns_skipped
)
22893 arm_ccfsm_state
= 1;
22901 /* If this is an unconditional branch to the same label, succeed.
22902 If it is to another label, do nothing. If it is conditional,
22904 /* XXX Probably, the tests for SET and the PC are
22907 scanbody
= PATTERN (this_insn
);
22908 if (GET_CODE (scanbody
) == SET
22909 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22911 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22912 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22914 arm_ccfsm_state
= 2;
22917 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22920 /* Fail if a conditional return is undesirable (e.g. on a
22921 StrongARM), but still allow this if optimizing for size. */
22922 else if (GET_CODE (scanbody
) == return_code
22923 && !use_return_insn (TRUE
, NULL
)
22926 else if (GET_CODE (scanbody
) == return_code
)
22928 arm_ccfsm_state
= 2;
22931 else if (GET_CODE (scanbody
) == PARALLEL
)
22933 switch (get_attr_conds (this_insn
))
22943 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22948 /* Instructions using or affecting the condition codes make it
22950 scanbody
= PATTERN (this_insn
);
22951 if (!(GET_CODE (scanbody
) == SET
22952 || GET_CODE (scanbody
) == PARALLEL
)
22953 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22963 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22964 arm_target_label
= CODE_LABEL_NUMBER (label
);
22967 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22969 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22971 this_insn
= next_nonnote_insn (this_insn
);
22972 gcc_assert (!this_insn
22973 || (!BARRIER_P (this_insn
)
22974 && !LABEL_P (this_insn
)));
22978 /* Oh, dear! we ran off the end.. give up. */
22979 extract_constrain_insn_cached (insn
);
22980 arm_ccfsm_state
= 0;
22981 arm_target_insn
= NULL
;
22984 arm_target_insn
= this_insn
;
22987 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22990 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22992 if (reverse
|| then_not_else
)
22993 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22996 /* Restore recog_data (getting the attributes of other insns can
22997 destroy this array, but final.c assumes that it remains intact
22998 across this call. */
22999 extract_constrain_insn_cached (insn
);
23003 /* Output IT instructions. */
23005 thumb2_asm_output_opcode (FILE * stream
)
23010 if (arm_condexec_mask
)
23012 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23013 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23015 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23016 arm_condition_codes
[arm_current_cc
]);
23017 arm_condexec_mask
= 0;
23021 /* Returns true if REGNO is a valid register
23022 for holding a quantity of type MODE. */
23024 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23026 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23027 return (regno
== CC_REGNUM
23028 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23029 && regno
== VFPCC_REGNUM
));
23031 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23035 /* For the Thumb we only allow values bigger than SImode in
23036 registers 0 - 6, so that there is always a second low
23037 register available to hold the upper part of the value.
23038 We probably we ought to ensure that the register is the
23039 start of an even numbered register pair. */
23040 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23042 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23043 && IS_VFP_REGNUM (regno
))
23045 if (mode
== SFmode
|| mode
== SImode
)
23046 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23048 if (mode
== DFmode
)
23049 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23051 /* VFP registers can hold HFmode values, but there is no point in
23052 putting them there unless we have hardware conversion insns. */
23053 if (mode
== HFmode
)
23054 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23057 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23058 || (VALID_NEON_QREG_MODE (mode
)
23059 && NEON_REGNO_OK_FOR_QUAD (regno
))
23060 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23061 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23062 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23063 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23064 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23069 if (TARGET_REALLY_IWMMXT
)
23071 if (IS_IWMMXT_GR_REGNUM (regno
))
23072 return mode
== SImode
;
23074 if (IS_IWMMXT_REGNUM (regno
))
23075 return VALID_IWMMXT_REG_MODE (mode
);
23078 /* We allow almost any value to be stored in the general registers.
23079 Restrict doubleword quantities to even register pairs in ARM state
23080 so that we can use ldrd. Do not allow very large Neon structure
23081 opaque modes in general registers; they would use too many. */
23082 if (regno
<= LAST_ARM_REGNUM
)
23084 if (ARM_NUM_REGS (mode
) > 4)
23090 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23093 if (regno
== FRAME_POINTER_REGNUM
23094 || regno
== ARG_POINTER_REGNUM
)
23095 /* We only allow integers in the fake hard registers. */
23096 return GET_MODE_CLASS (mode
) == MODE_INT
;
23101 /* Implement MODES_TIEABLE_P. */
23104 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23106 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23109 /* We specifically want to allow elements of "structure" modes to
23110 be tieable to the structure. This more general condition allows
23111 other rarer situations too. */
23113 && (VALID_NEON_DREG_MODE (mode1
)
23114 || VALID_NEON_QREG_MODE (mode1
)
23115 || VALID_NEON_STRUCT_MODE (mode1
))
23116 && (VALID_NEON_DREG_MODE (mode2
)
23117 || VALID_NEON_QREG_MODE (mode2
)
23118 || VALID_NEON_STRUCT_MODE (mode2
)))
23124 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23125 not used in arm mode. */
23128 arm_regno_class (int regno
)
23130 if (regno
== PC_REGNUM
)
23135 if (regno
== STACK_POINTER_REGNUM
)
23137 if (regno
== CC_REGNUM
)
23144 if (TARGET_THUMB2
&& regno
< 8)
23147 if ( regno
<= LAST_ARM_REGNUM
23148 || regno
== FRAME_POINTER_REGNUM
23149 || regno
== ARG_POINTER_REGNUM
)
23150 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23152 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23153 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23155 if (IS_VFP_REGNUM (regno
))
23157 if (regno
<= D7_VFP_REGNUM
)
23158 return VFP_D0_D7_REGS
;
23159 else if (regno
<= LAST_LO_VFP_REGNUM
)
23160 return VFP_LO_REGS
;
23162 return VFP_HI_REGS
;
23165 if (IS_IWMMXT_REGNUM (regno
))
23166 return IWMMXT_REGS
;
23168 if (IS_IWMMXT_GR_REGNUM (regno
))
23169 return IWMMXT_GR_REGS
;
23174 /* Handle a special case when computing the offset
23175 of an argument from the frame pointer. */
23177 arm_debugger_arg_offset (int value
, rtx addr
)
23181 /* We are only interested if dbxout_parms() failed to compute the offset. */
23185 /* We can only cope with the case where the address is held in a register. */
23189 /* If we are using the frame pointer to point at the argument, then
23190 an offset of 0 is correct. */
23191 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23194 /* If we are using the stack pointer to point at the
23195 argument, then an offset of 0 is correct. */
23196 /* ??? Check this is consistent with thumb2 frame layout. */
23197 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23198 && REGNO (addr
) == SP_REGNUM
)
23201 /* Oh dear. The argument is pointed to by a register rather
23202 than being held in a register, or being stored at a known
23203 offset from the frame pointer. Since GDB only understands
23204 those two kinds of argument we must translate the address
23205 held in the register into an offset from the frame pointer.
23206 We do this by searching through the insns for the function
23207 looking to see where this register gets its value. If the
23208 register is initialized from the frame pointer plus an offset
23209 then we are in luck and we can continue, otherwise we give up.
23211 This code is exercised by producing debugging information
23212 for a function with arguments like this:
23214 double func (double a, double b, int c, double d) {return d;}
23216 Without this code the stab for parameter 'd' will be set to
23217 an offset of 0 from the frame pointer, rather than 8. */
23219 /* The if() statement says:
23221 If the insn is a normal instruction
23222 and if the insn is setting the value in a register
23223 and if the register being set is the register holding the address of the argument
23224 and if the address is computing by an addition
23225 that involves adding to a register
23226 which is the frame pointer
23231 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23233 if ( NONJUMP_INSN_P (insn
)
23234 && GET_CODE (PATTERN (insn
)) == SET
23235 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23236 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23237 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23238 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23239 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23242 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23251 warning (0, "unable to compute real location of stacked parameter");
23252 value
= 8; /* XXX magic hack */
23273 T_MAX
/* Size of enum. Keep last. */
23274 } neon_builtin_type_mode
;
23276 #define TYPE_MODE_BIT(X) (1 << (X))
23278 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23279 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23280 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23281 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23282 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23283 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23285 #define v8qi_UP T_V8QI
23286 #define v4hi_UP T_V4HI
23287 #define v4hf_UP T_V4HF
23288 #define v2si_UP T_V2SI
23289 #define v2sf_UP T_V2SF
23291 #define v16qi_UP T_V16QI
23292 #define v8hi_UP T_V8HI
23293 #define v4si_UP T_V4SI
23294 #define v4sf_UP T_V4SF
23295 #define v2di_UP T_V2DI
23300 #define UP(X) X##_UP
23337 NEON_LOADSTRUCTLANE
,
23339 NEON_STORESTRUCTLANE
,
23348 const neon_itype itype
;
23349 const neon_builtin_type_mode mode
;
23350 const enum insn_code code
;
23351 unsigned int fcode
;
23352 } neon_builtin_datum
;
23354 #define CF(N,X) CODE_FOR_neon_##N##X
23356 #define VAR1(T, N, A) \
23357 {#N, NEON_##T, UP (A), CF (N, A), 0}
23358 #define VAR2(T, N, A, B) \
23360 {#N, NEON_##T, UP (B), CF (N, B), 0}
23361 #define VAR3(T, N, A, B, C) \
23362 VAR2 (T, N, A, B), \
23363 {#N, NEON_##T, UP (C), CF (N, C), 0}
23364 #define VAR4(T, N, A, B, C, D) \
23365 VAR3 (T, N, A, B, C), \
23366 {#N, NEON_##T, UP (D), CF (N, D), 0}
23367 #define VAR5(T, N, A, B, C, D, E) \
23368 VAR4 (T, N, A, B, C, D), \
23369 {#N, NEON_##T, UP (E), CF (N, E), 0}
23370 #define VAR6(T, N, A, B, C, D, E, F) \
23371 VAR5 (T, N, A, B, C, D, E), \
23372 {#N, NEON_##T, UP (F), CF (N, F), 0}
23373 #define VAR7(T, N, A, B, C, D, E, F, G) \
23374 VAR6 (T, N, A, B, C, D, E, F), \
23375 {#N, NEON_##T, UP (G), CF (N, G), 0}
23376 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23377 VAR7 (T, N, A, B, C, D, E, F, G), \
23378 {#N, NEON_##T, UP (H), CF (N, H), 0}
23379 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23380 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23381 {#N, NEON_##T, UP (I), CF (N, I), 0}
23382 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23383 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23384 {#N, NEON_##T, UP (J), CF (N, J), 0}
23386 /* The NEON builtin data can be found in arm_neon_builtins.def.
23387 The mode entries in the following table correspond to the "key" type of the
23388 instruction variant, i.e. equivalent to that which would be specified after
23389 the assembler mnemonic, which usually refers to the last vector operand.
23390 (Signed/unsigned/polynomial types are not differentiated between though, and
23391 are all mapped onto the same mode for a given element size.) The modes
23392 listed per instruction should be the same as those defined for that
23393 instruction's pattern in neon.md. */
23395 static neon_builtin_datum neon_builtin_data
[] =
23397 #include "arm_neon_builtins.def"
23412 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23413 #define VAR1(T, N, A) \
23415 #define VAR2(T, N, A, B) \
23418 #define VAR3(T, N, A, B, C) \
23419 VAR2 (T, N, A, B), \
23421 #define VAR4(T, N, A, B, C, D) \
23422 VAR3 (T, N, A, B, C), \
23424 #define VAR5(T, N, A, B, C, D, E) \
23425 VAR4 (T, N, A, B, C, D), \
23427 #define VAR6(T, N, A, B, C, D, E, F) \
23428 VAR5 (T, N, A, B, C, D, E), \
23430 #define VAR7(T, N, A, B, C, D, E, F, G) \
23431 VAR6 (T, N, A, B, C, D, E, F), \
23433 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23434 VAR7 (T, N, A, B, C, D, E, F, G), \
23436 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23437 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23439 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23440 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23444 ARM_BUILTIN_GETWCGR0
,
23445 ARM_BUILTIN_GETWCGR1
,
23446 ARM_BUILTIN_GETWCGR2
,
23447 ARM_BUILTIN_GETWCGR3
,
23449 ARM_BUILTIN_SETWCGR0
,
23450 ARM_BUILTIN_SETWCGR1
,
23451 ARM_BUILTIN_SETWCGR2
,
23452 ARM_BUILTIN_SETWCGR3
,
23456 ARM_BUILTIN_WAVG2BR
,
23457 ARM_BUILTIN_WAVG2HR
,
23458 ARM_BUILTIN_WAVG2B
,
23459 ARM_BUILTIN_WAVG2H
,
23466 ARM_BUILTIN_WMACSZ
,
23468 ARM_BUILTIN_WMACUZ
,
23471 ARM_BUILTIN_WSADBZ
,
23473 ARM_BUILTIN_WSADHZ
,
23475 ARM_BUILTIN_WALIGNI
,
23476 ARM_BUILTIN_WALIGNR0
,
23477 ARM_BUILTIN_WALIGNR1
,
23478 ARM_BUILTIN_WALIGNR2
,
23479 ARM_BUILTIN_WALIGNR3
,
23482 ARM_BUILTIN_TMIAPH
,
23483 ARM_BUILTIN_TMIABB
,
23484 ARM_BUILTIN_TMIABT
,
23485 ARM_BUILTIN_TMIATB
,
23486 ARM_BUILTIN_TMIATT
,
23488 ARM_BUILTIN_TMOVMSKB
,
23489 ARM_BUILTIN_TMOVMSKH
,
23490 ARM_BUILTIN_TMOVMSKW
,
23492 ARM_BUILTIN_TBCSTB
,
23493 ARM_BUILTIN_TBCSTH
,
23494 ARM_BUILTIN_TBCSTW
,
23496 ARM_BUILTIN_WMADDS
,
23497 ARM_BUILTIN_WMADDU
,
23499 ARM_BUILTIN_WPACKHSS
,
23500 ARM_BUILTIN_WPACKWSS
,
23501 ARM_BUILTIN_WPACKDSS
,
23502 ARM_BUILTIN_WPACKHUS
,
23503 ARM_BUILTIN_WPACKWUS
,
23504 ARM_BUILTIN_WPACKDUS
,
23509 ARM_BUILTIN_WADDSSB
,
23510 ARM_BUILTIN_WADDSSH
,
23511 ARM_BUILTIN_WADDSSW
,
23512 ARM_BUILTIN_WADDUSB
,
23513 ARM_BUILTIN_WADDUSH
,
23514 ARM_BUILTIN_WADDUSW
,
23518 ARM_BUILTIN_WSUBSSB
,
23519 ARM_BUILTIN_WSUBSSH
,
23520 ARM_BUILTIN_WSUBSSW
,
23521 ARM_BUILTIN_WSUBUSB
,
23522 ARM_BUILTIN_WSUBUSH
,
23523 ARM_BUILTIN_WSUBUSW
,
23530 ARM_BUILTIN_WCMPEQB
,
23531 ARM_BUILTIN_WCMPEQH
,
23532 ARM_BUILTIN_WCMPEQW
,
23533 ARM_BUILTIN_WCMPGTUB
,
23534 ARM_BUILTIN_WCMPGTUH
,
23535 ARM_BUILTIN_WCMPGTUW
,
23536 ARM_BUILTIN_WCMPGTSB
,
23537 ARM_BUILTIN_WCMPGTSH
,
23538 ARM_BUILTIN_WCMPGTSW
,
23540 ARM_BUILTIN_TEXTRMSB
,
23541 ARM_BUILTIN_TEXTRMSH
,
23542 ARM_BUILTIN_TEXTRMSW
,
23543 ARM_BUILTIN_TEXTRMUB
,
23544 ARM_BUILTIN_TEXTRMUH
,
23545 ARM_BUILTIN_TEXTRMUW
,
23546 ARM_BUILTIN_TINSRB
,
23547 ARM_BUILTIN_TINSRH
,
23548 ARM_BUILTIN_TINSRW
,
23550 ARM_BUILTIN_WMAXSW
,
23551 ARM_BUILTIN_WMAXSH
,
23552 ARM_BUILTIN_WMAXSB
,
23553 ARM_BUILTIN_WMAXUW
,
23554 ARM_BUILTIN_WMAXUH
,
23555 ARM_BUILTIN_WMAXUB
,
23556 ARM_BUILTIN_WMINSW
,
23557 ARM_BUILTIN_WMINSH
,
23558 ARM_BUILTIN_WMINSB
,
23559 ARM_BUILTIN_WMINUW
,
23560 ARM_BUILTIN_WMINUH
,
23561 ARM_BUILTIN_WMINUB
,
23563 ARM_BUILTIN_WMULUM
,
23564 ARM_BUILTIN_WMULSM
,
23565 ARM_BUILTIN_WMULUL
,
23567 ARM_BUILTIN_PSADBH
,
23568 ARM_BUILTIN_WSHUFH
,
23582 ARM_BUILTIN_WSLLHI
,
23583 ARM_BUILTIN_WSLLWI
,
23584 ARM_BUILTIN_WSLLDI
,
23585 ARM_BUILTIN_WSRAHI
,
23586 ARM_BUILTIN_WSRAWI
,
23587 ARM_BUILTIN_WSRADI
,
23588 ARM_BUILTIN_WSRLHI
,
23589 ARM_BUILTIN_WSRLWI
,
23590 ARM_BUILTIN_WSRLDI
,
23591 ARM_BUILTIN_WRORHI
,
23592 ARM_BUILTIN_WRORWI
,
23593 ARM_BUILTIN_WRORDI
,
23595 ARM_BUILTIN_WUNPCKIHB
,
23596 ARM_BUILTIN_WUNPCKIHH
,
23597 ARM_BUILTIN_WUNPCKIHW
,
23598 ARM_BUILTIN_WUNPCKILB
,
23599 ARM_BUILTIN_WUNPCKILH
,
23600 ARM_BUILTIN_WUNPCKILW
,
23602 ARM_BUILTIN_WUNPCKEHSB
,
23603 ARM_BUILTIN_WUNPCKEHSH
,
23604 ARM_BUILTIN_WUNPCKEHSW
,
23605 ARM_BUILTIN_WUNPCKEHUB
,
23606 ARM_BUILTIN_WUNPCKEHUH
,
23607 ARM_BUILTIN_WUNPCKEHUW
,
23608 ARM_BUILTIN_WUNPCKELSB
,
23609 ARM_BUILTIN_WUNPCKELSH
,
23610 ARM_BUILTIN_WUNPCKELSW
,
23611 ARM_BUILTIN_WUNPCKELUB
,
23612 ARM_BUILTIN_WUNPCKELUH
,
23613 ARM_BUILTIN_WUNPCKELUW
,
23619 ARM_BUILTIN_WADDSUBHX
,
23620 ARM_BUILTIN_WSUBADDHX
,
23622 ARM_BUILTIN_WABSDIFFB
,
23623 ARM_BUILTIN_WABSDIFFH
,
23624 ARM_BUILTIN_WABSDIFFW
,
23626 ARM_BUILTIN_WADDCH
,
23627 ARM_BUILTIN_WADDCW
,
23630 ARM_BUILTIN_WAVG4R
,
23632 ARM_BUILTIN_WMADDSX
,
23633 ARM_BUILTIN_WMADDUX
,
23635 ARM_BUILTIN_WMADDSN
,
23636 ARM_BUILTIN_WMADDUN
,
23638 ARM_BUILTIN_WMULWSM
,
23639 ARM_BUILTIN_WMULWUM
,
23641 ARM_BUILTIN_WMULWSMR
,
23642 ARM_BUILTIN_WMULWUMR
,
23644 ARM_BUILTIN_WMULWL
,
23646 ARM_BUILTIN_WMULSMR
,
23647 ARM_BUILTIN_WMULUMR
,
23649 ARM_BUILTIN_WQMULM
,
23650 ARM_BUILTIN_WQMULMR
,
23652 ARM_BUILTIN_WQMULWM
,
23653 ARM_BUILTIN_WQMULWMR
,
23655 ARM_BUILTIN_WADDBHUSM
,
23656 ARM_BUILTIN_WADDBHUSL
,
23658 ARM_BUILTIN_WQMIABB
,
23659 ARM_BUILTIN_WQMIABT
,
23660 ARM_BUILTIN_WQMIATB
,
23661 ARM_BUILTIN_WQMIATT
,
23663 ARM_BUILTIN_WQMIABBN
,
23664 ARM_BUILTIN_WQMIABTN
,
23665 ARM_BUILTIN_WQMIATBN
,
23666 ARM_BUILTIN_WQMIATTN
,
23668 ARM_BUILTIN_WMIABB
,
23669 ARM_BUILTIN_WMIABT
,
23670 ARM_BUILTIN_WMIATB
,
23671 ARM_BUILTIN_WMIATT
,
23673 ARM_BUILTIN_WMIABBN
,
23674 ARM_BUILTIN_WMIABTN
,
23675 ARM_BUILTIN_WMIATBN
,
23676 ARM_BUILTIN_WMIATTN
,
23678 ARM_BUILTIN_WMIAWBB
,
23679 ARM_BUILTIN_WMIAWBT
,
23680 ARM_BUILTIN_WMIAWTB
,
23681 ARM_BUILTIN_WMIAWTT
,
23683 ARM_BUILTIN_WMIAWBBN
,
23684 ARM_BUILTIN_WMIAWBTN
,
23685 ARM_BUILTIN_WMIAWTBN
,
23686 ARM_BUILTIN_WMIAWTTN
,
23688 ARM_BUILTIN_WMERGE
,
23690 ARM_BUILTIN_CRC32B
,
23691 ARM_BUILTIN_CRC32H
,
23692 ARM_BUILTIN_CRC32W
,
23693 ARM_BUILTIN_CRC32CB
,
23694 ARM_BUILTIN_CRC32CH
,
23695 ARM_BUILTIN_CRC32CW
,
23697 ARM_BUILTIN_GET_FPSCR
,
23698 ARM_BUILTIN_SET_FPSCR
,
23704 #define CRYPTO1(L, U, M1, M2) \
23705 ARM_BUILTIN_CRYPTO_##U,
23706 #define CRYPTO2(L, U, M1, M2, M3) \
23707 ARM_BUILTIN_CRYPTO_##U,
23708 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23709 ARM_BUILTIN_CRYPTO_##U,
23711 #include "crypto.def"
23717 #include "arm_neon_builtins.def"
23722 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23736 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23738 #define NUM_DREG_TYPES 5
23739 #define NUM_QREG_TYPES 6
23742 arm_init_neon_builtins (void)
23744 unsigned int i
, fcode
;
23747 tree neon_intQI_type_node
;
23748 tree neon_intHI_type_node
;
23749 tree neon_floatHF_type_node
;
23750 tree neon_polyQI_type_node
;
23751 tree neon_polyHI_type_node
;
23752 tree neon_intSI_type_node
;
23753 tree neon_intDI_type_node
;
23754 tree neon_intUTI_type_node
;
23755 tree neon_float_type_node
;
23757 tree intQI_pointer_node
;
23758 tree intHI_pointer_node
;
23759 tree intSI_pointer_node
;
23760 tree intDI_pointer_node
;
23761 tree float_pointer_node
;
23763 tree const_intQI_node
;
23764 tree const_intHI_node
;
23765 tree const_intSI_node
;
23766 tree const_intDI_node
;
23767 tree const_float_node
;
23769 tree const_intQI_pointer_node
;
23770 tree const_intHI_pointer_node
;
23771 tree const_intSI_pointer_node
;
23772 tree const_intDI_pointer_node
;
23773 tree const_float_pointer_node
;
23775 tree V8QI_type_node
;
23776 tree V4HI_type_node
;
23777 tree V4UHI_type_node
;
23778 tree V4HF_type_node
;
23779 tree V2SI_type_node
;
23780 tree V2USI_type_node
;
23781 tree V2SF_type_node
;
23782 tree V16QI_type_node
;
23783 tree V8HI_type_node
;
23784 tree V8UHI_type_node
;
23785 tree V4SI_type_node
;
23786 tree V4USI_type_node
;
23787 tree V4SF_type_node
;
23788 tree V2DI_type_node
;
23789 tree V2UDI_type_node
;
23791 tree intUQI_type_node
;
23792 tree intUHI_type_node
;
23793 tree intUSI_type_node
;
23794 tree intUDI_type_node
;
23796 tree intEI_type_node
;
23797 tree intOI_type_node
;
23798 tree intCI_type_node
;
23799 tree intXI_type_node
;
23801 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23802 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23803 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23805 /* Create distinguished type nodes for NEON vector element types,
23806 and pointers to values of such types, so we can detect them later. */
23807 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23808 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23809 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23810 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23811 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23812 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23813 neon_float_type_node
= make_node (REAL_TYPE
);
23814 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23815 layout_type (neon_float_type_node
);
23816 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23817 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23818 layout_type (neon_floatHF_type_node
);
23820 /* Define typedefs which exactly correspond to the modes we are basing vector
23821 types on. If you change these names you'll need to change
23822 the table used by arm_mangle_type too. */
23823 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23824 "__builtin_neon_qi");
23825 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23826 "__builtin_neon_hi");
23827 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23828 "__builtin_neon_hf");
23829 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23830 "__builtin_neon_si");
23831 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23832 "__builtin_neon_sf");
23833 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23834 "__builtin_neon_di");
23835 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23836 "__builtin_neon_poly8");
23837 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23838 "__builtin_neon_poly16");
23840 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23841 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23842 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23843 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23844 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23846 /* Next create constant-qualified versions of the above types. */
23847 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23849 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23851 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23853 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23855 const_float_node
= build_qualified_type (neon_float_type_node
,
23858 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23859 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23860 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23861 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23862 const_float_pointer_node
= build_pointer_type (const_float_node
);
23864 /* Unsigned integer types for various mode sizes. */
23865 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23866 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23867 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23868 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23869 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23870 /* Now create vector types based on our NEON element types. */
23871 /* 64-bit vectors. */
23873 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23875 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23877 build_vector_type_for_mode (intUHI_type_node
, V4HImode
);
23879 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23881 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23883 build_vector_type_for_mode (intUSI_type_node
, V2SImode
);
23885 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23886 /* 128-bit vectors. */
23888 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23890 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23892 build_vector_type_for_mode (intUHI_type_node
, V8HImode
);
23894 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23896 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23898 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23900 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23902 build_vector_type_for_mode (intUDI_type_node
, V2DImode
);
23905 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23906 "__builtin_neon_uqi");
23907 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23908 "__builtin_neon_uhi");
23909 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23910 "__builtin_neon_usi");
23911 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23912 "__builtin_neon_udi");
23913 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23914 "__builtin_neon_poly64");
23915 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23916 "__builtin_neon_poly128");
23918 /* Opaque integer types for structures of vectors. */
23919 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23920 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23921 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23922 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23924 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23925 "__builtin_neon_ti");
23926 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23927 "__builtin_neon_ei");
23928 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23929 "__builtin_neon_oi");
23930 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23931 "__builtin_neon_ci");
23932 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23933 "__builtin_neon_xi");
23935 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23938 tree V16UQI_type_node
=
23939 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23941 tree v16uqi_ftype_v16uqi
23942 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23944 tree v16uqi_ftype_v16uqi_v16uqi
23945 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23946 V16UQI_type_node
, NULL_TREE
);
23948 tree v4usi_ftype_v4usi
23949 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23951 tree v4usi_ftype_v4usi_v4usi
23952 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23953 V4USI_type_node
, NULL_TREE
);
23955 tree v4usi_ftype_v4usi_v4usi_v4usi
23956 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23957 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23959 tree uti_ftype_udi_udi
23960 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23961 intUDI_type_node
, NULL_TREE
);
23974 ARM_BUILTIN_CRYPTO_##U
23976 "__builtin_arm_crypto_"#L
23977 #define FT1(R, A) \
23979 #define FT2(R, A1, A2) \
23980 R##_ftype_##A1##_##A2
23981 #define FT3(R, A1, A2, A3) \
23982 R##_ftype_##A1##_##A2##_##A3
23983 #define CRYPTO1(L, U, R, A) \
23984 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23985 C (U), BUILT_IN_MD, \
23987 #define CRYPTO2(L, U, R, A1, A2) \
23988 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23989 C (U), BUILT_IN_MD, \
23992 #define CRYPTO3(L, U, R, A1, A2, A3) \
23993 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23994 C (U), BUILT_IN_MD, \
23996 #include "crypto.def"
24007 dreg_types
[0] = V8QI_type_node
;
24008 dreg_types
[1] = V4HI_type_node
;
24009 dreg_types
[2] = V2SI_type_node
;
24010 dreg_types
[3] = V2SF_type_node
;
24011 dreg_types
[4] = neon_intDI_type_node
;
24013 qreg_types
[0] = V16QI_type_node
;
24014 qreg_types
[1] = V8HI_type_node
;
24015 qreg_types
[2] = V4SI_type_node
;
24016 qreg_types
[3] = V4SF_type_node
;
24017 qreg_types
[4] = V2DI_type_node
;
24018 qreg_types
[5] = neon_intUTI_type_node
;
24020 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
24023 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
24025 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
24026 reinterp_ftype_dreg
[i
][j
]
24027 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
24029 reinterp_ftype_qreg
[i
][j
]
24030 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
24034 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
24035 i
< ARRAY_SIZE (neon_builtin_data
);
24038 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
24040 const char* const modenames
[] = {
24041 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24042 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24047 int is_load
= 0, is_store
= 0;
24049 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
24056 case NEON_LOAD1LANE
:
24057 case NEON_LOADSTRUCT
:
24058 case NEON_LOADSTRUCTLANE
:
24060 /* Fall through. */
24062 case NEON_STORE1LANE
:
24063 case NEON_STORESTRUCT
:
24064 case NEON_STORESTRUCTLANE
:
24067 /* Fall through. */
24071 case NEON_LOGICBINOP
:
24072 case NEON_SHIFTINSERT
:
24079 case NEON_SHIFTIMM
:
24080 case NEON_SHIFTACC
:
24086 case NEON_LANEMULL
:
24087 case NEON_LANEMULH
:
24089 case NEON_SCALARMUL
:
24090 case NEON_SCALARMULL
:
24091 case NEON_SCALARMULH
:
24092 case NEON_SCALARMAC
:
24098 tree return_type
= void_type_node
, args
= void_list_node
;
24100 /* Build a function type directly from the insn_data for
24101 this builtin. The build_function_type() function takes
24102 care of removing duplicates for us. */
24103 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
24107 if (is_load
&& k
== 1)
24109 /* Neon load patterns always have the memory
24110 operand in the operand 1 position. */
24111 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24112 == neon_struct_operand
);
24118 eltype
= const_intQI_pointer_node
;
24123 eltype
= const_intHI_pointer_node
;
24128 eltype
= const_intSI_pointer_node
;
24133 eltype
= const_float_pointer_node
;
24138 eltype
= const_intDI_pointer_node
;
24141 default: gcc_unreachable ();
24144 else if (is_store
&& k
== 0)
24146 /* Similarly, Neon store patterns use operand 0 as
24147 the memory location to store to. */
24148 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24149 == neon_struct_operand
);
24155 eltype
= intQI_pointer_node
;
24160 eltype
= intHI_pointer_node
;
24165 eltype
= intSI_pointer_node
;
24170 eltype
= float_pointer_node
;
24175 eltype
= intDI_pointer_node
;
24178 default: gcc_unreachable ();
24183 switch (insn_data
[d
->code
].operand
[k
].mode
)
24185 case VOIDmode
: eltype
= void_type_node
; break;
24187 case QImode
: eltype
= neon_intQI_type_node
; break;
24188 case HImode
: eltype
= neon_intHI_type_node
; break;
24189 case SImode
: eltype
= neon_intSI_type_node
; break;
24190 case SFmode
: eltype
= neon_float_type_node
; break;
24191 case DImode
: eltype
= neon_intDI_type_node
; break;
24192 case TImode
: eltype
= intTI_type_node
; break;
24193 case EImode
: eltype
= intEI_type_node
; break;
24194 case OImode
: eltype
= intOI_type_node
; break;
24195 case CImode
: eltype
= intCI_type_node
; break;
24196 case XImode
: eltype
= intXI_type_node
; break;
24197 /* 64-bit vectors. */
24198 case V8QImode
: eltype
= V8QI_type_node
; break;
24199 case V4HImode
: eltype
= V4HI_type_node
; break;
24200 case V2SImode
: eltype
= V2SI_type_node
; break;
24201 case V2SFmode
: eltype
= V2SF_type_node
; break;
24202 /* 128-bit vectors. */
24203 case V16QImode
: eltype
= V16QI_type_node
; break;
24204 case V8HImode
: eltype
= V8HI_type_node
; break;
24205 case V4SImode
: eltype
= V4SI_type_node
; break;
24206 case V4SFmode
: eltype
= V4SF_type_node
; break;
24207 case V2DImode
: eltype
= V2DI_type_node
; break;
24208 default: gcc_unreachable ();
24212 if (k
== 0 && !is_store
)
24213 return_type
= eltype
;
24215 args
= tree_cons (NULL_TREE
, eltype
, args
);
24218 ftype
= build_function_type (return_type
, args
);
24222 case NEON_REINTERP
:
24224 /* We iterate over NUM_DREG_TYPES doubleword types,
24225 then NUM_QREG_TYPES quadword types.
24226 V4HF is not a type used in reinterpret, so we translate
24227 d->mode to the correct index in reinterp_ftype_dreg. */
24229 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
24230 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
24232 switch (insn_data
[d
->code
].operand
[0].mode
)
24234 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
24235 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
24236 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
24237 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
24238 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
24239 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
24240 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
24241 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
24242 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
24243 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
24244 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
24245 default: gcc_unreachable ();
24249 case NEON_FLOAT_WIDEN
:
24251 tree eltype
= NULL_TREE
;
24252 tree return_type
= NULL_TREE
;
24254 switch (insn_data
[d
->code
].operand
[1].mode
)
24257 eltype
= V4HF_type_node
;
24258 return_type
= V4SF_type_node
;
24260 default: gcc_unreachable ();
24262 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24265 case NEON_FLOAT_NARROW
:
24267 tree eltype
= NULL_TREE
;
24268 tree return_type
= NULL_TREE
;
24270 switch (insn_data
[d
->code
].operand
[1].mode
)
24273 eltype
= V4SF_type_node
;
24274 return_type
= V4HF_type_node
;
24276 default: gcc_unreachable ();
24278 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24283 tree eltype
= NULL_TREE
;
24284 switch (insn_data
[d
->code
].operand
[1].mode
)
24287 eltype
= V4UHI_type_node
;
24290 eltype
= V8UHI_type_node
;
24293 eltype
= V2USI_type_node
;
24296 eltype
= V4USI_type_node
;
24299 eltype
= V2UDI_type_node
;
24301 default: gcc_unreachable ();
24303 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24306 case NEON_COPYSIGNF
:
24308 tree eltype
= NULL_TREE
;
24309 switch (insn_data
[d
->code
].operand
[1].mode
)
24312 eltype
= V2SF_type_node
;
24315 eltype
= V4SF_type_node
;
24317 default: gcc_unreachable ();
24319 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24323 gcc_unreachable ();
24326 gcc_assert (ftype
!= NULL
);
24328 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
24330 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
24332 arm_builtin_decls
[fcode
] = decl
;
24336 #undef NUM_DREG_TYPES
24337 #undef NUM_QREG_TYPES
24339 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24342 if ((MASK) & insn_flags) \
24345 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24346 BUILT_IN_MD, NULL, NULL_TREE); \
24347 arm_builtin_decls[CODE] = bdecl; \
24352 struct builtin_description
24354 const unsigned int mask
;
24355 const enum insn_code icode
;
24356 const char * const name
;
24357 const enum arm_builtins code
;
24358 const enum rtx_code comparison
;
24359 const unsigned int flag
;
24362 static const struct builtin_description bdesc_2arg
[] =
24364 #define IWMMXT_BUILTIN(code, string, builtin) \
24365 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24366 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24368 #define IWMMXT2_BUILTIN(code, string, builtin) \
24369 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24370 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24372 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
24373 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
24374 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
24375 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
24376 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
24377 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
24378 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
24379 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
24380 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
24381 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
24382 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
24383 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
24384 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
24385 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
24386 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
24387 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
24388 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
24389 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
24390 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
24391 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
24392 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
24393 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
24394 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
24395 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
24396 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
24397 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
24398 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
24399 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
24400 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
24401 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
24402 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
24403 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
24404 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
24405 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
24406 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
24407 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
24408 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
24409 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
24410 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
24411 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
24412 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
24413 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
24414 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
24415 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
24416 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
24417 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
24418 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
24419 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
24420 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
24421 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
24422 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
24423 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
24424 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
24425 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
24426 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
24427 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
24428 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
24429 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
24430 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
24431 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
24432 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
24433 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
24434 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
24435 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
24436 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
24437 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
24438 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
24439 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
24440 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
24441 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
24442 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
24443 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
24444 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24445 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24446 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24447 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24448 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24449 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24451 #define IWMMXT_BUILTIN2(code, builtin) \
24452 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24454 #define IWMMXT2_BUILTIN2(code, builtin) \
24455 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24457 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24458 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24459 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24460 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24461 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24462 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24463 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24464 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24465 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24466 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24469 #define FP_BUILTIN(L, U) \
24470 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24473 FP_BUILTIN (get_fpscr
, GET_FPSCR
)
24474 FP_BUILTIN (set_fpscr
, SET_FPSCR
)
24477 #define CRC32_BUILTIN(L, U) \
24478 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24480 CRC32_BUILTIN (crc32b
, CRC32B
)
24481 CRC32_BUILTIN (crc32h
, CRC32H
)
24482 CRC32_BUILTIN (crc32w
, CRC32W
)
24483 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24484 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24485 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24486 #undef CRC32_BUILTIN
24489 #define CRYPTO_BUILTIN(L, U) \
24490 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24495 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24496 #define CRYPTO1(L, U, R, A)
24497 #define CRYPTO3(L, U, R, A1, A2, A3)
24498 #include "crypto.def"
24505 static const struct builtin_description bdesc_1arg
[] =
24507 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24508 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24509 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24510 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24511 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24512 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24513 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24514 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24515 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24516 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24517 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24518 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24519 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24520 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24521 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24522 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24523 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24524 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24525 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24526 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24527 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24528 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24529 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24530 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24532 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO2(L, U, R, A1, A2)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24541 static const struct builtin_description bdesc_3arg
[] =
24543 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24544 #define CRYPTO1(L, U, R, A)
24545 #define CRYPTO2(L, U, R, A1, A2)
24546 #include "crypto.def"
24551 #undef CRYPTO_BUILTIN
24553 /* Set up all the iWMMXt builtins. This is not called if
24554 TARGET_IWMMXT is zero. */
24557 arm_init_iwmmxt_builtins (void)
24559 const struct builtin_description
* d
;
24562 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24563 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24564 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24566 tree v8qi_ftype_v8qi_v8qi_int
24567 = build_function_type_list (V8QI_type_node
,
24568 V8QI_type_node
, V8QI_type_node
,
24569 integer_type_node
, NULL_TREE
);
24570 tree v4hi_ftype_v4hi_int
24571 = build_function_type_list (V4HI_type_node
,
24572 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24573 tree v2si_ftype_v2si_int
24574 = build_function_type_list (V2SI_type_node
,
24575 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24576 tree v2si_ftype_di_di
24577 = build_function_type_list (V2SI_type_node
,
24578 long_long_integer_type_node
,
24579 long_long_integer_type_node
,
24581 tree di_ftype_di_int
24582 = build_function_type_list (long_long_integer_type_node
,
24583 long_long_integer_type_node
,
24584 integer_type_node
, NULL_TREE
);
24585 tree di_ftype_di_int_int
24586 = build_function_type_list (long_long_integer_type_node
,
24587 long_long_integer_type_node
,
24589 integer_type_node
, NULL_TREE
);
24590 tree int_ftype_v8qi
24591 = build_function_type_list (integer_type_node
,
24592 V8QI_type_node
, NULL_TREE
);
24593 tree int_ftype_v4hi
24594 = build_function_type_list (integer_type_node
,
24595 V4HI_type_node
, NULL_TREE
);
24596 tree int_ftype_v2si
24597 = build_function_type_list (integer_type_node
,
24598 V2SI_type_node
, NULL_TREE
);
24599 tree int_ftype_v8qi_int
24600 = build_function_type_list (integer_type_node
,
24601 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24602 tree int_ftype_v4hi_int
24603 = build_function_type_list (integer_type_node
,
24604 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24605 tree int_ftype_v2si_int
24606 = build_function_type_list (integer_type_node
,
24607 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24608 tree v8qi_ftype_v8qi_int_int
24609 = build_function_type_list (V8QI_type_node
,
24610 V8QI_type_node
, integer_type_node
,
24611 integer_type_node
, NULL_TREE
);
24612 tree v4hi_ftype_v4hi_int_int
24613 = build_function_type_list (V4HI_type_node
,
24614 V4HI_type_node
, integer_type_node
,
24615 integer_type_node
, NULL_TREE
);
24616 tree v2si_ftype_v2si_int_int
24617 = build_function_type_list (V2SI_type_node
,
24618 V2SI_type_node
, integer_type_node
,
24619 integer_type_node
, NULL_TREE
);
24620 /* Miscellaneous. */
24621 tree v8qi_ftype_v4hi_v4hi
24622 = build_function_type_list (V8QI_type_node
,
24623 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24624 tree v4hi_ftype_v2si_v2si
24625 = build_function_type_list (V4HI_type_node
,
24626 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24627 tree v8qi_ftype_v4hi_v8qi
24628 = build_function_type_list (V8QI_type_node
,
24629 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24630 tree v2si_ftype_v4hi_v4hi
24631 = build_function_type_list (V2SI_type_node
,
24632 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24633 tree v2si_ftype_v8qi_v8qi
24634 = build_function_type_list (V2SI_type_node
,
24635 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24636 tree v4hi_ftype_v4hi_di
24637 = build_function_type_list (V4HI_type_node
,
24638 V4HI_type_node
, long_long_integer_type_node
,
24640 tree v2si_ftype_v2si_di
24641 = build_function_type_list (V2SI_type_node
,
24642 V2SI_type_node
, long_long_integer_type_node
,
24645 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24646 tree int_ftype_void
24647 = build_function_type_list (integer_type_node
, NULL_TREE
);
24649 = build_function_type_list (long_long_integer_type_node
,
24650 V8QI_type_node
, NULL_TREE
);
24652 = build_function_type_list (long_long_integer_type_node
,
24653 V4HI_type_node
, NULL_TREE
);
24655 = build_function_type_list (long_long_integer_type_node
,
24656 V2SI_type_node
, NULL_TREE
);
24657 tree v2si_ftype_v4hi
24658 = build_function_type_list (V2SI_type_node
,
24659 V4HI_type_node
, NULL_TREE
);
24660 tree v4hi_ftype_v8qi
24661 = build_function_type_list (V4HI_type_node
,
24662 V8QI_type_node
, NULL_TREE
);
24663 tree v8qi_ftype_v8qi
24664 = build_function_type_list (V8QI_type_node
,
24665 V8QI_type_node
, NULL_TREE
);
24666 tree v4hi_ftype_v4hi
24667 = build_function_type_list (V4HI_type_node
,
24668 V4HI_type_node
, NULL_TREE
);
24669 tree v2si_ftype_v2si
24670 = build_function_type_list (V2SI_type_node
,
24671 V2SI_type_node
, NULL_TREE
);
24673 tree di_ftype_di_v4hi_v4hi
24674 = build_function_type_list (long_long_unsigned_type_node
,
24675 long_long_unsigned_type_node
,
24676 V4HI_type_node
, V4HI_type_node
,
24679 tree di_ftype_v4hi_v4hi
24680 = build_function_type_list (long_long_unsigned_type_node
,
24681 V4HI_type_node
,V4HI_type_node
,
24684 tree v2si_ftype_v2si_v4hi_v4hi
24685 = build_function_type_list (V2SI_type_node
,
24686 V2SI_type_node
, V4HI_type_node
,
24687 V4HI_type_node
, NULL_TREE
);
24689 tree v2si_ftype_v2si_v8qi_v8qi
24690 = build_function_type_list (V2SI_type_node
,
24691 V2SI_type_node
, V8QI_type_node
,
24692 V8QI_type_node
, NULL_TREE
);
24694 tree di_ftype_di_v2si_v2si
24695 = build_function_type_list (long_long_unsigned_type_node
,
24696 long_long_unsigned_type_node
,
24697 V2SI_type_node
, V2SI_type_node
,
24700 tree di_ftype_di_di_int
24701 = build_function_type_list (long_long_unsigned_type_node
,
24702 long_long_unsigned_type_node
,
24703 long_long_unsigned_type_node
,
24704 integer_type_node
, NULL_TREE
);
24706 tree void_ftype_int
24707 = build_function_type_list (void_type_node
,
24708 integer_type_node
, NULL_TREE
);
24710 tree v8qi_ftype_char
24711 = build_function_type_list (V8QI_type_node
,
24712 signed_char_type_node
, NULL_TREE
);
24714 tree v4hi_ftype_short
24715 = build_function_type_list (V4HI_type_node
,
24716 short_integer_type_node
, NULL_TREE
);
24718 tree v2si_ftype_int
24719 = build_function_type_list (V2SI_type_node
,
24720 integer_type_node
, NULL_TREE
);
24722 /* Normal vector binops. */
24723 tree v8qi_ftype_v8qi_v8qi
24724 = build_function_type_list (V8QI_type_node
,
24725 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24726 tree v4hi_ftype_v4hi_v4hi
24727 = build_function_type_list (V4HI_type_node
,
24728 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24729 tree v2si_ftype_v2si_v2si
24730 = build_function_type_list (V2SI_type_node
,
24731 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24732 tree di_ftype_di_di
24733 = build_function_type_list (long_long_unsigned_type_node
,
24734 long_long_unsigned_type_node
,
24735 long_long_unsigned_type_node
,
24738 /* Add all builtins that are more or less simple operations on two
24740 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24742 /* Use one of the operands; the target can have a different mode for
24743 mask-generating compares. */
24747 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24750 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24755 type
= v8qi_ftype_v8qi_v8qi
;
24758 type
= v4hi_ftype_v4hi_v4hi
;
24761 type
= v2si_ftype_v2si_v2si
;
24764 type
= di_ftype_di_di
;
24768 gcc_unreachable ();
24771 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24774 /* Add the remaining MMX insns with somewhat more complicated types. */
24775 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24776 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24777 ARM_BUILTIN_ ## CODE)
24779 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24780 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24781 ARM_BUILTIN_ ## CODE)
24783 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24784 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24785 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24786 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24787 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24788 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24789 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24790 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24791 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24793 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24794 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24795 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24796 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24797 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24798 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24800 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24801 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24802 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24803 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24804 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24805 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24807 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24808 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24809 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24810 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24811 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24812 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24814 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24815 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24816 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24817 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24818 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24819 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24821 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24823 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24824 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24825 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24826 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24827 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24828 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24829 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24830 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24831 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24832 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24834 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24835 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24836 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24837 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24838 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24839 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24840 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24841 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24842 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24844 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24845 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24846 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24848 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24849 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24850 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24852 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24853 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24855 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24856 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24857 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24858 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24859 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24860 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24862 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24863 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24864 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24865 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24866 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24867 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24868 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24869 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24870 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24871 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24872 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24873 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24875 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24876 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24877 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24878 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24880 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24881 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24882 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24883 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24884 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24885 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24886 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24888 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24889 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24890 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24892 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24893 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24894 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24895 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24897 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24898 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24899 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24900 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24902 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24903 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24904 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24905 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24907 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24908 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24909 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24910 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24912 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24913 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24914 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24915 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24917 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24918 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24919 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24920 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24922 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24924 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24925 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24926 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24928 #undef iwmmx_mbuiltin
24929 #undef iwmmx2_mbuiltin
24933 arm_init_fp16_builtins (void)
24935 tree fp16_type
= make_node (REAL_TYPE
);
24936 TYPE_PRECISION (fp16_type
) = 16;
24937 layout_type (fp16_type
);
24938 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24942 arm_init_crc32_builtins ()
24944 tree si_ftype_si_qi
24945 = build_function_type_list (unsigned_intSI_type_node
,
24946 unsigned_intSI_type_node
,
24947 unsigned_intQI_type_node
, NULL_TREE
);
24948 tree si_ftype_si_hi
24949 = build_function_type_list (unsigned_intSI_type_node
,
24950 unsigned_intSI_type_node
,
24951 unsigned_intHI_type_node
, NULL_TREE
);
24952 tree si_ftype_si_si
24953 = build_function_type_list (unsigned_intSI_type_node
,
24954 unsigned_intSI_type_node
,
24955 unsigned_intSI_type_node
, NULL_TREE
);
24957 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24958 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24959 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24960 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24961 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24962 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24963 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24964 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24965 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24966 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24967 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24968 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24969 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24970 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24971 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24972 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24973 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24974 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24978 arm_init_builtins (void)
24980 if (TARGET_REALLY_IWMMXT
)
24981 arm_init_iwmmxt_builtins ();
24984 arm_init_neon_builtins ();
24986 if (arm_fp16_format
)
24987 arm_init_fp16_builtins ();
24990 arm_init_crc32_builtins ();
24992 if (TARGET_VFP
&& TARGET_HARD_FLOAT
)
24994 tree ftype_set_fpscr
24995 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL
);
24996 tree ftype_get_fpscr
24997 = build_function_type_list (unsigned_type_node
, NULL
);
24999 arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
]
25000 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr
,
25001 ARM_BUILTIN_GET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25002 arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
]
25003 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr
,
25004 ARM_BUILTIN_SET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
25008 /* Return the ARM builtin for CODE. */
25011 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
25013 if (code
>= ARM_BUILTIN_MAX
)
25014 return error_mark_node
;
25016 return arm_builtin_decls
[code
];
25019 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25021 static const char *
25022 arm_invalid_parameter_type (const_tree t
)
25024 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25025 return N_("function parameters cannot have __fp16 type");
25029 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
25031 static const char *
25032 arm_invalid_return_type (const_tree t
)
25034 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25035 return N_("functions cannot return __fp16 type");
25039 /* Implement TARGET_PROMOTED_TYPE. */
25042 arm_promoted_type (const_tree t
)
25044 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
25045 return float_type_node
;
25049 /* Implement TARGET_CONVERT_TO_TYPE.
25050 Specifically, this hook implements the peculiarity of the ARM
25051 half-precision floating-point C semantics that requires conversions between
25052 __fp16 to or from double to do an intermediate conversion to float. */
25055 arm_convert_to_type (tree type
, tree expr
)
25057 tree fromtype
= TREE_TYPE (expr
);
25058 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
25060 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
25061 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
25062 return convert (type
, convert (float_type_node
, expr
));
25066 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25067 This simply adds HFmode as a supported mode; even though we don't
25068 implement arithmetic on this type directly, it's supported by
25069 optabs conversions, much the way the double-word arithmetic is
25070 special-cased in the default hook. */
25073 arm_scalar_mode_supported_p (machine_mode mode
)
25075 if (mode
== HFmode
)
25076 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25077 else if (ALL_FIXED_POINT_MODE_P (mode
))
25080 return default_scalar_mode_supported_p (mode
);
25083 /* Errors in the source file can cause expand_expr to return const0_rtx
25084 where we expect a vector. To avoid crashing, use one of the vector
25085 clear instructions. */
25088 safe_vector_operand (rtx x
, machine_mode mode
)
25090 if (x
!= const0_rtx
)
25092 x
= gen_reg_rtx (mode
);
25094 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
25095 : gen_rtx_SUBREG (DImode
, x
, 0)));
25099 /* Function to expand ternary builtins. */
25101 arm_expand_ternop_builtin (enum insn_code icode
,
25102 tree exp
, rtx target
)
25105 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25106 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25107 tree arg2
= CALL_EXPR_ARG (exp
, 2);
25109 rtx op0
= expand_normal (arg0
);
25110 rtx op1
= expand_normal (arg1
);
25111 rtx op2
= expand_normal (arg2
);
25112 rtx op3
= NULL_RTX
;
25114 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25115 lane operand depending on endianness. */
25116 bool builtin_sha1cpm_p
= false;
25118 if (insn_data
[icode
].n_operands
== 5)
25120 gcc_assert (icode
== CODE_FOR_crypto_sha1c
25121 || icode
== CODE_FOR_crypto_sha1p
25122 || icode
== CODE_FOR_crypto_sha1m
);
25123 builtin_sha1cpm_p
= true;
25125 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25126 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25127 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25128 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
25131 if (VECTOR_MODE_P (mode0
))
25132 op0
= safe_vector_operand (op0
, mode0
);
25133 if (VECTOR_MODE_P (mode1
))
25134 op1
= safe_vector_operand (op1
, mode1
);
25135 if (VECTOR_MODE_P (mode2
))
25136 op2
= safe_vector_operand (op2
, mode2
);
25139 || GET_MODE (target
) != tmode
25140 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25141 target
= gen_reg_rtx (tmode
);
25143 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25144 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
25145 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
25147 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25148 op0
= copy_to_mode_reg (mode0
, op0
);
25149 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25150 op1
= copy_to_mode_reg (mode1
, op1
);
25151 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25152 op2
= copy_to_mode_reg (mode2
, op2
);
25153 if (builtin_sha1cpm_p
)
25154 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25156 if (builtin_sha1cpm_p
)
25157 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
25159 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25166 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25169 arm_expand_binop_builtin (enum insn_code icode
,
25170 tree exp
, rtx target
)
25173 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25174 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25175 rtx op0
= expand_normal (arg0
);
25176 rtx op1
= expand_normal (arg1
);
25177 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25178 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25179 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25181 if (VECTOR_MODE_P (mode0
))
25182 op0
= safe_vector_operand (op0
, mode0
);
25183 if (VECTOR_MODE_P (mode1
))
25184 op1
= safe_vector_operand (op1
, mode1
);
25187 || GET_MODE (target
) != tmode
25188 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25189 target
= gen_reg_rtx (tmode
);
25191 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25192 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
25194 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25195 op0
= copy_to_mode_reg (mode0
, op0
);
25196 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25197 op1
= copy_to_mode_reg (mode1
, op1
);
25199 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25206 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25209 arm_expand_unop_builtin (enum insn_code icode
,
25210 tree exp
, rtx target
, int do_load
)
25213 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25214 rtx op0
= expand_normal (arg0
);
25215 rtx op1
= NULL_RTX
;
25216 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25217 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25218 bool builtin_sha1h_p
= false;
25220 if (insn_data
[icode
].n_operands
== 3)
25222 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
25223 builtin_sha1h_p
= true;
25227 || GET_MODE (target
) != tmode
25228 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25229 target
= gen_reg_rtx (tmode
);
25231 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
25234 if (VECTOR_MODE_P (mode0
))
25235 op0
= safe_vector_operand (op0
, mode0
);
25237 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25238 op0
= copy_to_mode_reg (mode0
, op0
);
25240 if (builtin_sha1h_p
)
25241 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25243 if (builtin_sha1h_p
)
25244 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25246 pat
= GEN_FCN (icode
) (target
, op0
);
25254 NEON_ARG_COPY_TO_REG
,
25260 #define NEON_MAX_BUILTIN_ARGS 5
25262 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25263 and return an expression for the accessed memory.
25265 The intrinsic function operates on a block of registers that has
25266 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25267 function references the memory at EXP of type TYPE and in mode
25268 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25272 neon_dereference_pointer (tree exp
, tree type
, machine_mode mem_mode
,
25273 machine_mode reg_mode
,
25274 neon_builtin_type_mode type_mode
)
25276 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
25277 tree elem_type
, upper_bound
, array_type
;
25279 /* Work out the size of the register block in bytes. */
25280 reg_size
= GET_MODE_SIZE (reg_mode
);
25282 /* Work out the size of each vector in bytes. */
25283 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
25284 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
25286 /* Work out how many vectors there are. */
25287 gcc_assert (reg_size
% vector_size
== 0);
25288 nvectors
= reg_size
/ vector_size
;
25290 /* Work out the type of each element. */
25291 gcc_assert (POINTER_TYPE_P (type
));
25292 elem_type
= TREE_TYPE (type
);
25294 /* Work out how many elements are being loaded or stored.
25295 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25296 and memory elements; anything else implies a lane load or store. */
25297 if (mem_mode
== reg_mode
)
25298 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
25302 /* Create a type that describes the full access. */
25303 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
25304 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
25306 /* Dereference EXP using that type. */
25307 return fold_build2 (MEM_REF
, array_type
, exp
,
25308 build_int_cst (build_pointer_type (array_type
), 0));
25311 /* Expand a Neon builtin. */
25313 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
25314 neon_builtin_type_mode type_mode
,
25315 tree exp
, int fcode
, ...)
25319 tree arg
[NEON_MAX_BUILTIN_ARGS
];
25320 rtx op
[NEON_MAX_BUILTIN_ARGS
];
25323 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25324 machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
25325 machine_mode other_mode
;
25331 || GET_MODE (target
) != tmode
25332 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
25333 target
= gen_reg_rtx (tmode
);
25335 va_start (ap
, fcode
);
25337 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
25341 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
25343 if (thisarg
== NEON_ARG_STOP
)
25347 opno
= argc
+ have_retval
;
25348 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
25349 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
25350 arg_type
= TREE_VALUE (formals
);
25351 if (thisarg
== NEON_ARG_MEMORY
)
25353 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
25354 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
25355 mode
[argc
], other_mode
,
25359 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25361 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
25362 (thisarg
== NEON_ARG_MEMORY
25363 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
25367 case NEON_ARG_COPY_TO_REG
:
25368 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25369 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25370 (op
[argc
], mode
[argc
]))
25371 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
25374 case NEON_ARG_CONSTANT
:
25375 /* FIXME: This error message is somewhat unhelpful. */
25376 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25377 (op
[argc
], mode
[argc
]))
25378 error ("argument must be a constant");
25381 case NEON_ARG_MEMORY
:
25382 /* Check if expand failed. */
25383 if (op
[argc
] == const0_rtx
)
25385 gcc_assert (MEM_P (op
[argc
]));
25386 PUT_MODE (op
[argc
], mode
[argc
]);
25387 /* ??? arm_neon.h uses the same built-in functions for signed
25388 and unsigned accesses, casting where necessary. This isn't
25390 set_mem_alias_set (op
[argc
], 0);
25391 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25392 (op
[argc
], mode
[argc
]))
25393 op
[argc
] = (replace_equiv_address
25394 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
25397 case NEON_ARG_STOP
:
25398 gcc_unreachable ();
25402 formals
= TREE_CHAIN (formals
);
25412 pat
= GEN_FCN (icode
) (target
, op
[0]);
25416 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
25420 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
25424 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
25428 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
25432 gcc_unreachable ();
25438 pat
= GEN_FCN (icode
) (op
[0]);
25442 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
25446 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
25450 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
25454 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
25458 gcc_unreachable ();
25469 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25470 constants defined per-instruction or per instruction-variant. Instead, the
25471 required info is looked up in the table neon_builtin_data. */
25473 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25475 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25476 neon_itype itype
= d
->itype
;
25477 enum insn_code icode
= d
->code
;
25478 neon_builtin_type_mode type_mode
= d
->mode
;
25485 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25486 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25490 case NEON_SCALARMUL
:
25491 case NEON_SCALARMULL
:
25492 case NEON_SCALARMULH
:
25493 case NEON_SHIFTINSERT
:
25494 case NEON_LOGICBINOP
:
25495 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25496 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25500 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25501 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25502 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25506 case NEON_SHIFTIMM
:
25507 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25508 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25512 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25513 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25518 case NEON_FLOAT_WIDEN
:
25519 case NEON_FLOAT_NARROW
:
25521 case NEON_REINTERP
:
25522 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25523 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25525 case NEON_COPYSIGNF
:
25528 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25529 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25532 case NEON_LANEMULL
:
25533 case NEON_LANEMULH
:
25534 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25535 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25536 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25539 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25540 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25541 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25543 case NEON_SHIFTACC
:
25544 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25545 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25546 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25548 case NEON_SCALARMAC
:
25549 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25550 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25551 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25555 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25556 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25560 case NEON_LOADSTRUCT
:
25561 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25562 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25564 case NEON_LOAD1LANE
:
25565 case NEON_LOADSTRUCTLANE
:
25566 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25567 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25571 case NEON_STORESTRUCT
:
25572 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25573 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25575 case NEON_STORE1LANE
:
25576 case NEON_STORESTRUCTLANE
:
25577 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25578 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25582 gcc_unreachable ();
25585 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25587 neon_reinterpret (rtx dest
, rtx src
)
25589 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25592 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25593 not to early-clobber SRC registers in the process.
25595 We assume that the operands described by SRC and DEST represent a
25596 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25597 number of components into which the copy has been decomposed. */
25599 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25603 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25604 || REGNO (operands
[0]) < REGNO (operands
[1]))
25606 for (i
= 0; i
< count
; i
++)
25608 operands
[2 * i
] = dest
[i
];
25609 operands
[2 * i
+ 1] = src
[i
];
25614 for (i
= 0; i
< count
; i
++)
25616 operands
[2 * i
] = dest
[count
- i
- 1];
25617 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25622 /* Split operands into moves from op[1] + op[2] into op[0]. */
25625 neon_split_vcombine (rtx operands
[3])
25627 unsigned int dest
= REGNO (operands
[0]);
25628 unsigned int src1
= REGNO (operands
[1]);
25629 unsigned int src2
= REGNO (operands
[2]);
25630 machine_mode halfmode
= GET_MODE (operands
[1]);
25631 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25632 rtx destlo
, desthi
;
25634 if (src1
== dest
&& src2
== dest
+ halfregs
)
25636 /* No-op move. Can't split to nothing; emit something. */
25637 emit_note (NOTE_INSN_DELETED
);
25641 /* Preserve register attributes for variable tracking. */
25642 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25643 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25644 GET_MODE_SIZE (halfmode
));
25646 /* Special case of reversed high/low parts. Use VSWP. */
25647 if (src2
== dest
&& src1
== dest
+ halfregs
)
25649 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25650 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25651 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25655 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25657 /* Try to avoid unnecessary moves if part of the result
25658 is in the right place already. */
25660 emit_move_insn (destlo
, operands
[1]);
25661 if (src2
!= dest
+ halfregs
)
25662 emit_move_insn (desthi
, operands
[2]);
25666 if (src2
!= dest
+ halfregs
)
25667 emit_move_insn (desthi
, operands
[2]);
25669 emit_move_insn (destlo
, operands
[1]);
25673 /* Expand an expression EXP that calls a built-in function,
25674 with result going to TARGET if that's convenient
25675 (and in mode MODE if that's convenient).
25676 SUBTARGET may be used as the target for computing one of EXP's operands.
25677 IGNORE is nonzero if the value is to be ignored. */
25680 arm_expand_builtin (tree exp
,
25682 rtx subtarget ATTRIBUTE_UNUSED
,
25683 machine_mode mode ATTRIBUTE_UNUSED
,
25684 int ignore ATTRIBUTE_UNUSED
)
25686 const struct builtin_description
* d
;
25687 enum insn_code icode
;
25688 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25696 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25698 machine_mode tmode
;
25699 machine_mode mode0
;
25700 machine_mode mode1
;
25701 machine_mode mode2
;
25707 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25708 return arm_expand_neon_builtin (fcode
, exp
, target
);
25712 case ARM_BUILTIN_GET_FPSCR
:
25713 case ARM_BUILTIN_SET_FPSCR
:
25714 if (fcode
== ARM_BUILTIN_GET_FPSCR
)
25716 icode
= CODE_FOR_get_fpscr
;
25717 target
= gen_reg_rtx (SImode
);
25718 pat
= GEN_FCN (icode
) (target
);
25723 icode
= CODE_FOR_set_fpscr
;
25724 arg0
= CALL_EXPR_ARG (exp
, 0);
25725 op0
= expand_normal (arg0
);
25726 pat
= GEN_FCN (icode
) (op0
);
25731 case ARM_BUILTIN_TEXTRMSB
:
25732 case ARM_BUILTIN_TEXTRMUB
:
25733 case ARM_BUILTIN_TEXTRMSH
:
25734 case ARM_BUILTIN_TEXTRMUH
:
25735 case ARM_BUILTIN_TEXTRMSW
:
25736 case ARM_BUILTIN_TEXTRMUW
:
25737 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25738 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25739 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25740 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25741 : CODE_FOR_iwmmxt_textrmw
);
25743 arg0
= CALL_EXPR_ARG (exp
, 0);
25744 arg1
= CALL_EXPR_ARG (exp
, 1);
25745 op0
= expand_normal (arg0
);
25746 op1
= expand_normal (arg1
);
25747 tmode
= insn_data
[icode
].operand
[0].mode
;
25748 mode0
= insn_data
[icode
].operand
[1].mode
;
25749 mode1
= insn_data
[icode
].operand
[2].mode
;
25751 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25752 op0
= copy_to_mode_reg (mode0
, op0
);
25753 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25755 /* @@@ better error message */
25756 error ("selector must be an immediate");
25757 return gen_reg_rtx (tmode
);
25760 opint
= INTVAL (op1
);
25761 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25763 if (opint
> 7 || opint
< 0)
25764 error ("the range of selector should be in 0 to 7");
25766 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25768 if (opint
> 3 || opint
< 0)
25769 error ("the range of selector should be in 0 to 3");
25771 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25773 if (opint
> 1 || opint
< 0)
25774 error ("the range of selector should be in 0 to 1");
25778 || GET_MODE (target
) != tmode
25779 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25780 target
= gen_reg_rtx (tmode
);
25781 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25787 case ARM_BUILTIN_WALIGNI
:
25788 /* If op2 is immediate, call walighi, else call walighr. */
25789 arg0
= CALL_EXPR_ARG (exp
, 0);
25790 arg1
= CALL_EXPR_ARG (exp
, 1);
25791 arg2
= CALL_EXPR_ARG (exp
, 2);
25792 op0
= expand_normal (arg0
);
25793 op1
= expand_normal (arg1
);
25794 op2
= expand_normal (arg2
);
25795 if (CONST_INT_P (op2
))
25797 icode
= CODE_FOR_iwmmxt_waligni
;
25798 tmode
= insn_data
[icode
].operand
[0].mode
;
25799 mode0
= insn_data
[icode
].operand
[1].mode
;
25800 mode1
= insn_data
[icode
].operand
[2].mode
;
25801 mode2
= insn_data
[icode
].operand
[3].mode
;
25802 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25803 op0
= copy_to_mode_reg (mode0
, op0
);
25804 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25805 op1
= copy_to_mode_reg (mode1
, op1
);
25806 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25807 selector
= INTVAL (op2
);
25808 if (selector
> 7 || selector
< 0)
25809 error ("the range of selector should be in 0 to 7");
25813 icode
= CODE_FOR_iwmmxt_walignr
;
25814 tmode
= insn_data
[icode
].operand
[0].mode
;
25815 mode0
= insn_data
[icode
].operand
[1].mode
;
25816 mode1
= insn_data
[icode
].operand
[2].mode
;
25817 mode2
= insn_data
[icode
].operand
[3].mode
;
25818 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25819 op0
= copy_to_mode_reg (mode0
, op0
);
25820 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25821 op1
= copy_to_mode_reg (mode1
, op1
);
25822 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25823 op2
= copy_to_mode_reg (mode2
, op2
);
25826 || GET_MODE (target
) != tmode
25827 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25828 target
= gen_reg_rtx (tmode
);
25829 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25835 case ARM_BUILTIN_TINSRB
:
25836 case ARM_BUILTIN_TINSRH
:
25837 case ARM_BUILTIN_TINSRW
:
25838 case ARM_BUILTIN_WMERGE
:
25839 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25840 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25841 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25842 : CODE_FOR_iwmmxt_tinsrw
);
25843 arg0
= CALL_EXPR_ARG (exp
, 0);
25844 arg1
= CALL_EXPR_ARG (exp
, 1);
25845 arg2
= CALL_EXPR_ARG (exp
, 2);
25846 op0
= expand_normal (arg0
);
25847 op1
= expand_normal (arg1
);
25848 op2
= expand_normal (arg2
);
25849 tmode
= insn_data
[icode
].operand
[0].mode
;
25850 mode0
= insn_data
[icode
].operand
[1].mode
;
25851 mode1
= insn_data
[icode
].operand
[2].mode
;
25852 mode2
= insn_data
[icode
].operand
[3].mode
;
25854 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25855 op0
= copy_to_mode_reg (mode0
, op0
);
25856 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25857 op1
= copy_to_mode_reg (mode1
, op1
);
25858 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25860 error ("selector must be an immediate");
25863 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25865 selector
= INTVAL (op2
);
25866 if (selector
> 7 || selector
< 0)
25867 error ("the range of selector should be in 0 to 7");
25869 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25870 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25871 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25874 selector
= INTVAL (op2
);
25875 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25876 error ("the range of selector should be in 0 to 7");
25877 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25878 error ("the range of selector should be in 0 to 3");
25879 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25880 error ("the range of selector should be in 0 to 1");
25882 op2
= GEN_INT (mask
);
25885 || GET_MODE (target
) != tmode
25886 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25887 target
= gen_reg_rtx (tmode
);
25888 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25894 case ARM_BUILTIN_SETWCGR0
:
25895 case ARM_BUILTIN_SETWCGR1
:
25896 case ARM_BUILTIN_SETWCGR2
:
25897 case ARM_BUILTIN_SETWCGR3
:
25898 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25899 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25900 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25901 : CODE_FOR_iwmmxt_setwcgr3
);
25902 arg0
= CALL_EXPR_ARG (exp
, 0);
25903 op0
= expand_normal (arg0
);
25904 mode0
= insn_data
[icode
].operand
[0].mode
;
25905 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25906 op0
= copy_to_mode_reg (mode0
, op0
);
25907 pat
= GEN_FCN (icode
) (op0
);
25913 case ARM_BUILTIN_GETWCGR0
:
25914 case ARM_BUILTIN_GETWCGR1
:
25915 case ARM_BUILTIN_GETWCGR2
:
25916 case ARM_BUILTIN_GETWCGR3
:
25917 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25918 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25919 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25920 : CODE_FOR_iwmmxt_getwcgr3
);
25921 tmode
= insn_data
[icode
].operand
[0].mode
;
25923 || GET_MODE (target
) != tmode
25924 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25925 target
= gen_reg_rtx (tmode
);
25926 pat
= GEN_FCN (icode
) (target
);
25932 case ARM_BUILTIN_WSHUFH
:
25933 icode
= CODE_FOR_iwmmxt_wshufh
;
25934 arg0
= CALL_EXPR_ARG (exp
, 0);
25935 arg1
= CALL_EXPR_ARG (exp
, 1);
25936 op0
= expand_normal (arg0
);
25937 op1
= expand_normal (arg1
);
25938 tmode
= insn_data
[icode
].operand
[0].mode
;
25939 mode1
= insn_data
[icode
].operand
[1].mode
;
25940 mode2
= insn_data
[icode
].operand
[2].mode
;
25942 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25943 op0
= copy_to_mode_reg (mode1
, op0
);
25944 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25946 error ("mask must be an immediate");
25949 selector
= INTVAL (op1
);
25950 if (selector
< 0 || selector
> 255)
25951 error ("the range of mask should be in 0 to 255");
25953 || GET_MODE (target
) != tmode
25954 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25955 target
= gen_reg_rtx (tmode
);
25956 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25962 case ARM_BUILTIN_WMADDS
:
25963 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25964 case ARM_BUILTIN_WMADDSX
:
25965 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25966 case ARM_BUILTIN_WMADDSN
:
25967 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25968 case ARM_BUILTIN_WMADDU
:
25969 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25970 case ARM_BUILTIN_WMADDUX
:
25971 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25972 case ARM_BUILTIN_WMADDUN
:
25973 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25974 case ARM_BUILTIN_WSADBZ
:
25975 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25976 case ARM_BUILTIN_WSADHZ
:
25977 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25979 /* Several three-argument builtins. */
25980 case ARM_BUILTIN_WMACS
:
25981 case ARM_BUILTIN_WMACU
:
25982 case ARM_BUILTIN_TMIA
:
25983 case ARM_BUILTIN_TMIAPH
:
25984 case ARM_BUILTIN_TMIATT
:
25985 case ARM_BUILTIN_TMIATB
:
25986 case ARM_BUILTIN_TMIABT
:
25987 case ARM_BUILTIN_TMIABB
:
25988 case ARM_BUILTIN_WQMIABB
:
25989 case ARM_BUILTIN_WQMIABT
:
25990 case ARM_BUILTIN_WQMIATB
:
25991 case ARM_BUILTIN_WQMIATT
:
25992 case ARM_BUILTIN_WQMIABBN
:
25993 case ARM_BUILTIN_WQMIABTN
:
25994 case ARM_BUILTIN_WQMIATBN
:
25995 case ARM_BUILTIN_WQMIATTN
:
25996 case ARM_BUILTIN_WMIABB
:
25997 case ARM_BUILTIN_WMIABT
:
25998 case ARM_BUILTIN_WMIATB
:
25999 case ARM_BUILTIN_WMIATT
:
26000 case ARM_BUILTIN_WMIABBN
:
26001 case ARM_BUILTIN_WMIABTN
:
26002 case ARM_BUILTIN_WMIATBN
:
26003 case ARM_BUILTIN_WMIATTN
:
26004 case ARM_BUILTIN_WMIAWBB
:
26005 case ARM_BUILTIN_WMIAWBT
:
26006 case ARM_BUILTIN_WMIAWTB
:
26007 case ARM_BUILTIN_WMIAWTT
:
26008 case ARM_BUILTIN_WMIAWBBN
:
26009 case ARM_BUILTIN_WMIAWBTN
:
26010 case ARM_BUILTIN_WMIAWTBN
:
26011 case ARM_BUILTIN_WMIAWTTN
:
26012 case ARM_BUILTIN_WSADB
:
26013 case ARM_BUILTIN_WSADH
:
26014 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
26015 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
26016 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
26017 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
26018 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
26019 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
26020 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
26021 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
26022 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
26023 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
26024 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
26025 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
26026 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
26027 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
26028 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
26029 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
26030 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
26031 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
26032 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
26033 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
26034 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
26035 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
26036 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
26037 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
26038 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
26039 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
26040 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
26041 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
26042 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
26043 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
26044 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
26045 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
26046 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
26047 : CODE_FOR_iwmmxt_wsadh
);
26048 arg0
= CALL_EXPR_ARG (exp
, 0);
26049 arg1
= CALL_EXPR_ARG (exp
, 1);
26050 arg2
= CALL_EXPR_ARG (exp
, 2);
26051 op0
= expand_normal (arg0
);
26052 op1
= expand_normal (arg1
);
26053 op2
= expand_normal (arg2
);
26054 tmode
= insn_data
[icode
].operand
[0].mode
;
26055 mode0
= insn_data
[icode
].operand
[1].mode
;
26056 mode1
= insn_data
[icode
].operand
[2].mode
;
26057 mode2
= insn_data
[icode
].operand
[3].mode
;
26059 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
26060 op0
= copy_to_mode_reg (mode0
, op0
);
26061 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
26062 op1
= copy_to_mode_reg (mode1
, op1
);
26063 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
26064 op2
= copy_to_mode_reg (mode2
, op2
);
26066 || GET_MODE (target
) != tmode
26067 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
26068 target
= gen_reg_rtx (tmode
);
26069 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
26075 case ARM_BUILTIN_WZERO
:
26076 target
= gen_reg_rtx (DImode
);
26077 emit_insn (gen_iwmmxt_clrdi (target
));
26080 case ARM_BUILTIN_WSRLHI
:
26081 case ARM_BUILTIN_WSRLWI
:
26082 case ARM_BUILTIN_WSRLDI
:
26083 case ARM_BUILTIN_WSLLHI
:
26084 case ARM_BUILTIN_WSLLWI
:
26085 case ARM_BUILTIN_WSLLDI
:
26086 case ARM_BUILTIN_WSRAHI
:
26087 case ARM_BUILTIN_WSRAWI
:
26088 case ARM_BUILTIN_WSRADI
:
26089 case ARM_BUILTIN_WRORHI
:
26090 case ARM_BUILTIN_WRORWI
:
26091 case ARM_BUILTIN_WRORDI
:
26092 case ARM_BUILTIN_WSRLH
:
26093 case ARM_BUILTIN_WSRLW
:
26094 case ARM_BUILTIN_WSRLD
:
26095 case ARM_BUILTIN_WSLLH
:
26096 case ARM_BUILTIN_WSLLW
:
26097 case ARM_BUILTIN_WSLLD
:
26098 case ARM_BUILTIN_WSRAH
:
26099 case ARM_BUILTIN_WSRAW
:
26100 case ARM_BUILTIN_WSRAD
:
26101 case ARM_BUILTIN_WRORH
:
26102 case ARM_BUILTIN_WRORW
:
26103 case ARM_BUILTIN_WRORD
:
26104 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
26105 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
26106 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
26107 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
26108 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
26109 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
26110 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
26111 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
26112 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
26113 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
26114 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
26115 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
26116 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
26117 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
26118 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
26119 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
26120 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
26121 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
26122 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
26123 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
26124 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
26125 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
26126 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
26127 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
26128 : CODE_FOR_nothing
);
26129 arg1
= CALL_EXPR_ARG (exp
, 1);
26130 op1
= expand_normal (arg1
);
26131 if (GET_MODE (op1
) == VOIDmode
)
26133 imm
= INTVAL (op1
);
26134 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
26135 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
26136 && (imm
< 0 || imm
> 32))
26138 if (fcode
== ARM_BUILTIN_WRORHI
)
26139 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26140 else if (fcode
== ARM_BUILTIN_WRORWI
)
26141 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26142 else if (fcode
== ARM_BUILTIN_WRORH
)
26143 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26145 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26147 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
26148 && (imm
< 0 || imm
> 64))
26150 if (fcode
== ARM_BUILTIN_WRORDI
)
26151 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26153 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26157 if (fcode
== ARM_BUILTIN_WSRLHI
)
26158 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26159 else if (fcode
== ARM_BUILTIN_WSRLWI
)
26160 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26161 else if (fcode
== ARM_BUILTIN_WSRLDI
)
26162 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26163 else if (fcode
== ARM_BUILTIN_WSLLHI
)
26164 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26165 else if (fcode
== ARM_BUILTIN_WSLLWI
)
26166 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26167 else if (fcode
== ARM_BUILTIN_WSLLDI
)
26168 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26169 else if (fcode
== ARM_BUILTIN_WSRAHI
)
26170 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26171 else if (fcode
== ARM_BUILTIN_WSRAWI
)
26172 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26173 else if (fcode
== ARM_BUILTIN_WSRADI
)
26174 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26175 else if (fcode
== ARM_BUILTIN_WSRLH
)
26176 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26177 else if (fcode
== ARM_BUILTIN_WSRLW
)
26178 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26179 else if (fcode
== ARM_BUILTIN_WSRLD
)
26180 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26181 else if (fcode
== ARM_BUILTIN_WSLLH
)
26182 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26183 else if (fcode
== ARM_BUILTIN_WSLLW
)
26184 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26185 else if (fcode
== ARM_BUILTIN_WSLLD
)
26186 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26187 else if (fcode
== ARM_BUILTIN_WSRAH
)
26188 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26189 else if (fcode
== ARM_BUILTIN_WSRAW
)
26190 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26192 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26195 return arm_expand_binop_builtin (icode
, exp
, target
);
26201 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
26202 if (d
->code
== (const enum arm_builtins
) fcode
)
26203 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
26205 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
26206 if (d
->code
== (const enum arm_builtins
) fcode
)
26207 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
26209 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
26210 if (d
->code
== (const enum arm_builtins
) fcode
)
26211 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
26213 /* @@@ Should really do something sensible here. */
26217 /* Return the number (counting from 0) of
26218 the least significant set bit in MASK. */
26221 number_of_first_bit_set (unsigned mask
)
26223 return ctz_hwi (mask
);
26226 /* Like emit_multi_reg_push, but allowing for a different set of
26227 registers to be described as saved. MASK is the set of registers
26228 to be saved; REAL_REGS is the set of registers to be described as
26229 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26232 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26234 unsigned long regno
;
26235 rtx par
[10], tmp
, reg
;
26239 /* Build the parallel of the registers actually being stored. */
26240 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26242 regno
= ctz_hwi (mask
);
26243 reg
= gen_rtx_REG (SImode
, regno
);
26246 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26248 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26253 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26254 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26255 tmp
= gen_frame_mem (BLKmode
, tmp
);
26256 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
26259 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26260 insn
= emit_insn (tmp
);
26262 /* Always build the stack adjustment note for unwind info. */
26263 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26264 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
26267 /* Build the parallel of the registers recorded as saved for unwind. */
26268 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26270 regno
= ctz_hwi (real_regs
);
26271 reg
= gen_rtx_REG (SImode
, regno
);
26273 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26274 tmp
= gen_frame_mem (SImode
, tmp
);
26275 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
26276 RTX_FRAME_RELATED_P (tmp
) = 1;
26284 RTX_FRAME_RELATED_P (par
[0]) = 1;
26285 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26288 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26293 /* Emit code to push or pop registers to or from the stack. F is the
26294 assembly file. MASK is the registers to pop. */
26296 thumb_pop (FILE *f
, unsigned long mask
)
26299 int lo_mask
= mask
& 0xFF;
26300 int pushed_words
= 0;
26304 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26306 /* Special case. Do not generate a POP PC statement here, do it in
26308 thumb_exit (f
, -1);
26312 fprintf (f
, "\tpop\t{");
26314 /* Look at the low registers first. */
26315 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26319 asm_fprintf (f
, "%r", regno
);
26321 if ((lo_mask
& ~1) != 0)
26328 if (mask
& (1 << PC_REGNUM
))
26330 /* Catch popping the PC. */
26331 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
26332 || crtl
->calls_eh_return
)
26334 /* The PC is never poped directly, instead
26335 it is popped into r3 and then BX is used. */
26336 fprintf (f
, "}\n");
26338 thumb_exit (f
, -1);
26347 asm_fprintf (f
, "%r", PC_REGNUM
);
26351 fprintf (f
, "}\n");
26354 /* Generate code to return from a thumb function.
26355 If 'reg_containing_return_addr' is -1, then the return address is
26356 actually on the stack, at the stack pointer. */
26358 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26360 unsigned regs_available_for_popping
;
26361 unsigned regs_to_pop
;
26363 unsigned available
;
26367 int restore_a4
= FALSE
;
26369 /* Compute the registers we need to pop. */
26373 if (reg_containing_return_addr
== -1)
26375 regs_to_pop
|= 1 << LR_REGNUM
;
26379 if (TARGET_BACKTRACE
)
26381 /* Restore the (ARM) frame pointer and stack pointer. */
26382 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26386 /* If there is nothing to pop then just emit the BX instruction and
26388 if (pops_needed
== 0)
26390 if (crtl
->calls_eh_return
)
26391 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26393 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26396 /* Otherwise if we are not supporting interworking and we have not created
26397 a backtrace structure and the function was not entered in ARM mode then
26398 just pop the return address straight into the PC. */
26399 else if (!TARGET_INTERWORK
26400 && !TARGET_BACKTRACE
26401 && !is_called_in_ARM_mode (current_function_decl
)
26402 && !crtl
->calls_eh_return
)
26404 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26408 /* Find out how many of the (return) argument registers we can corrupt. */
26409 regs_available_for_popping
= 0;
26411 /* If returning via __builtin_eh_return, the bottom three registers
26412 all contain information needed for the return. */
26413 if (crtl
->calls_eh_return
)
26417 /* If we can deduce the registers used from the function's
26418 return value. This is more reliable that examining
26419 df_regs_ever_live_p () because that will be set if the register is
26420 ever used in the function, not just if the register is used
26421 to hold a return value. */
26423 if (crtl
->return_rtx
!= 0)
26424 mode
= GET_MODE (crtl
->return_rtx
);
26426 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26428 size
= GET_MODE_SIZE (mode
);
26432 /* In a void function we can use any argument register.
26433 In a function that returns a structure on the stack
26434 we can use the second and third argument registers. */
26435 if (mode
== VOIDmode
)
26436 regs_available_for_popping
=
26437 (1 << ARG_REGISTER (1))
26438 | (1 << ARG_REGISTER (2))
26439 | (1 << ARG_REGISTER (3));
26441 regs_available_for_popping
=
26442 (1 << ARG_REGISTER (2))
26443 | (1 << ARG_REGISTER (3));
26445 else if (size
<= 4)
26446 regs_available_for_popping
=
26447 (1 << ARG_REGISTER (2))
26448 | (1 << ARG_REGISTER (3));
26449 else if (size
<= 8)
26450 regs_available_for_popping
=
26451 (1 << ARG_REGISTER (3));
26454 /* Match registers to be popped with registers into which we pop them. */
26455 for (available
= regs_available_for_popping
,
26456 required
= regs_to_pop
;
26457 required
!= 0 && available
!= 0;
26458 available
&= ~(available
& - available
),
26459 required
&= ~(required
& - required
))
26462 /* If we have any popping registers left over, remove them. */
26464 regs_available_for_popping
&= ~available
;
26466 /* Otherwise if we need another popping register we can use
26467 the fourth argument register. */
26468 else if (pops_needed
)
26470 /* If we have not found any free argument registers and
26471 reg a4 contains the return address, we must move it. */
26472 if (regs_available_for_popping
== 0
26473 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26475 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26476 reg_containing_return_addr
= LR_REGNUM
;
26478 else if (size
> 12)
26480 /* Register a4 is being used to hold part of the return value,
26481 but we have dire need of a free, low register. */
26484 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26487 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26489 /* The fourth argument register is available. */
26490 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26496 /* Pop as many registers as we can. */
26497 thumb_pop (f
, regs_available_for_popping
);
26499 /* Process the registers we popped. */
26500 if (reg_containing_return_addr
== -1)
26502 /* The return address was popped into the lowest numbered register. */
26503 regs_to_pop
&= ~(1 << LR_REGNUM
);
26505 reg_containing_return_addr
=
26506 number_of_first_bit_set (regs_available_for_popping
);
26508 /* Remove this register for the mask of available registers, so that
26509 the return address will not be corrupted by further pops. */
26510 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26513 /* If we popped other registers then handle them here. */
26514 if (regs_available_for_popping
)
26518 /* Work out which register currently contains the frame pointer. */
26519 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26521 /* Move it into the correct place. */
26522 asm_fprintf (f
, "\tmov\t%r, %r\n",
26523 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26525 /* (Temporarily) remove it from the mask of popped registers. */
26526 regs_available_for_popping
&= ~(1 << frame_pointer
);
26527 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26529 if (regs_available_for_popping
)
26533 /* We popped the stack pointer as well,
26534 find the register that contains it. */
26535 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26537 /* Move it into the stack register. */
26538 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26540 /* At this point we have popped all necessary registers, so
26541 do not worry about restoring regs_available_for_popping
26542 to its correct value:
26544 assert (pops_needed == 0)
26545 assert (regs_available_for_popping == (1 << frame_pointer))
26546 assert (regs_to_pop == (1 << STACK_POINTER)) */
26550 /* Since we have just move the popped value into the frame
26551 pointer, the popping register is available for reuse, and
26552 we know that we still have the stack pointer left to pop. */
26553 regs_available_for_popping
|= (1 << frame_pointer
);
26557 /* If we still have registers left on the stack, but we no longer have
26558 any registers into which we can pop them, then we must move the return
26559 address into the link register and make available the register that
26561 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26563 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26565 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26566 reg_containing_return_addr
);
26568 reg_containing_return_addr
= LR_REGNUM
;
26571 /* If we have registers left on the stack then pop some more.
26572 We know that at most we will want to pop FP and SP. */
26573 if (pops_needed
> 0)
26578 thumb_pop (f
, regs_available_for_popping
);
26580 /* We have popped either FP or SP.
26581 Move whichever one it is into the correct register. */
26582 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26583 move_to
= number_of_first_bit_set (regs_to_pop
);
26585 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26587 regs_to_pop
&= ~(1 << move_to
);
26592 /* If we still have not popped everything then we must have only
26593 had one register available to us and we are now popping the SP. */
26594 if (pops_needed
> 0)
26598 thumb_pop (f
, regs_available_for_popping
);
26600 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26602 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26604 assert (regs_to_pop == (1 << STACK_POINTER))
26605 assert (pops_needed == 1)
26609 /* If necessary restore the a4 register. */
26612 if (reg_containing_return_addr
!= LR_REGNUM
)
26614 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26615 reg_containing_return_addr
= LR_REGNUM
;
26618 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26621 if (crtl
->calls_eh_return
)
26622 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26624 /* Return to caller. */
26625 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26628 /* Scan INSN just before assembler is output for it.
26629 For Thumb-1, we track the status of the condition codes; this
26630 information is used in the cbranchsi4_insn pattern. */
26632 thumb1_final_prescan_insn (rtx_insn
*insn
)
26634 if (flag_print_asm_name
)
26635 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26636 INSN_ADDRESSES (INSN_UID (insn
)));
26637 /* Don't overwrite the previous setter when we get to a cbranch. */
26638 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26640 enum attr_conds conds
;
26642 if (cfun
->machine
->thumb1_cc_insn
)
26644 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26645 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26648 conds
= get_attr_conds (insn
);
26649 if (conds
== CONDS_SET
)
26651 rtx set
= single_set (insn
);
26652 cfun
->machine
->thumb1_cc_insn
= insn
;
26653 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26654 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26655 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26656 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26658 rtx src1
= XEXP (SET_SRC (set
), 1);
26659 if (src1
== const0_rtx
)
26660 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26662 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26664 /* Record the src register operand instead of dest because
26665 cprop_hardreg pass propagates src. */
26666 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26669 else if (conds
!= CONDS_NOCOND
)
26670 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26673 /* Check if unexpected far jump is used. */
26674 if (cfun
->machine
->lr_save_eliminated
26675 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26676 internal_error("Unexpected thumb1 far jump");
26680 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26682 unsigned HOST_WIDE_INT mask
= 0xff;
26685 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26686 if (val
== 0) /* XXX */
26689 for (i
= 0; i
< 25; i
++)
26690 if ((val
& (mask
<< i
)) == val
)
26696 /* Returns nonzero if the current function contains,
26697 or might contain a far jump. */
26699 thumb_far_jump_used_p (void)
26702 bool far_jump
= false;
26703 unsigned int func_size
= 0;
26705 /* This test is only important for leaf functions. */
26706 /* assert (!leaf_function_p ()); */
26708 /* If we have already decided that far jumps may be used,
26709 do not bother checking again, and always return true even if
26710 it turns out that they are not being used. Once we have made
26711 the decision that far jumps are present (and that hence the link
26712 register will be pushed onto the stack) we cannot go back on it. */
26713 if (cfun
->machine
->far_jump_used
)
26716 /* If this function is not being called from the prologue/epilogue
26717 generation code then it must be being called from the
26718 INITIAL_ELIMINATION_OFFSET macro. */
26719 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26721 /* In this case we know that we are being asked about the elimination
26722 of the arg pointer register. If that register is not being used,
26723 then there are no arguments on the stack, and we do not have to
26724 worry that a far jump might force the prologue to push the link
26725 register, changing the stack offsets. In this case we can just
26726 return false, since the presence of far jumps in the function will
26727 not affect stack offsets.
26729 If the arg pointer is live (or if it was live, but has now been
26730 eliminated and so set to dead) then we do have to test to see if
26731 the function might contain a far jump. This test can lead to some
26732 false negatives, since before reload is completed, then length of
26733 branch instructions is not known, so gcc defaults to returning their
26734 longest length, which in turn sets the far jump attribute to true.
26736 A false negative will not result in bad code being generated, but it
26737 will result in a needless push and pop of the link register. We
26738 hope that this does not occur too often.
26740 If we need doubleword stack alignment this could affect the other
26741 elimination offsets so we can't risk getting it wrong. */
26742 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26743 cfun
->machine
->arg_pointer_live
= 1;
26744 else if (!cfun
->machine
->arg_pointer_live
)
26748 /* We should not change far_jump_used during or after reload, as there is
26749 no chance to change stack frame layout. */
26750 if (reload_in_progress
|| reload_completed
)
26753 /* Check to see if the function contains a branch
26754 insn with the far jump attribute set. */
26755 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26757 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26761 func_size
+= get_attr_length (insn
);
26764 /* Attribute far_jump will always be true for thumb1 before
26765 shorten_branch pass. So checking far_jump attribute before
26766 shorten_branch isn't much useful.
26768 Following heuristic tries to estimate more accurately if a far jump
26769 may finally be used. The heuristic is very conservative as there is
26770 no chance to roll-back the decision of not to use far jump.
26772 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26773 2-byte insn is associated with a 4 byte constant pool. Using
26774 function size 2048/3 as the threshold is conservative enough. */
26777 if ((func_size
* 3) >= 2048)
26779 /* Record the fact that we have decided that
26780 the function does use far jumps. */
26781 cfun
->machine
->far_jump_used
= 1;
26789 /* Return nonzero if FUNC must be entered in ARM mode. */
26791 is_called_in_ARM_mode (tree func
)
26793 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26795 /* Ignore the problem about functions whose address is taken. */
26796 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26800 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26806 /* Given the stack offsets and register mask in OFFSETS, decide how
26807 many additional registers to push instead of subtracting a constant
26808 from SP. For epilogues the principle is the same except we use pop.
26809 FOR_PROLOGUE indicates which we're generating. */
26811 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26813 HOST_WIDE_INT amount
;
26814 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26815 /* Extract a mask of the ones we can give to the Thumb's push/pop
26817 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26818 /* Then count how many other high registers will need to be pushed. */
26819 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26820 int n_free
, reg_base
, size
;
26822 if (!for_prologue
&& frame_pointer_needed
)
26823 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26825 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26827 /* If the stack frame size is 512 exactly, we can save one load
26828 instruction, which should make this a win even when optimizing
26830 if (!optimize_size
&& amount
!= 512)
26833 /* Can't do this if there are high registers to push. */
26834 if (high_regs_pushed
!= 0)
26837 /* Shouldn't do it in the prologue if no registers would normally
26838 be pushed at all. In the epilogue, also allow it if we'll have
26839 a pop insn for the PC. */
26842 || TARGET_BACKTRACE
26843 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26844 || TARGET_INTERWORK
26845 || crtl
->args
.pretend_args_size
!= 0))
26848 /* Don't do this if thumb_expand_prologue wants to emit instructions
26849 between the push and the stack frame allocation. */
26851 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26852 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26859 size
= arm_size_return_regs ();
26860 reg_base
= ARM_NUM_INTS (size
);
26861 live_regs_mask
>>= reg_base
;
26864 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26865 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26867 live_regs_mask
>>= 1;
26873 gcc_assert (amount
/ 4 * 4 == amount
);
26875 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26876 return (amount
- 508) / 4;
26877 if (amount
<= n_free
* 4)
26882 /* The bits which aren't usefully expanded as rtl. */
26884 thumb1_unexpanded_epilogue (void)
26886 arm_stack_offsets
*offsets
;
26888 unsigned long live_regs_mask
= 0;
26889 int high_regs_pushed
= 0;
26891 int had_to_push_lr
;
26894 if (cfun
->machine
->return_used_this_function
!= 0)
26897 if (IS_NAKED (arm_current_func_type ()))
26900 offsets
= arm_get_frame_offsets ();
26901 live_regs_mask
= offsets
->saved_regs_mask
;
26902 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26904 /* If we can deduce the registers used from the function's return value.
26905 This is more reliable that examining df_regs_ever_live_p () because that
26906 will be set if the register is ever used in the function, not just if
26907 the register is used to hold a return value. */
26908 size
= arm_size_return_regs ();
26910 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26913 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26914 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26917 /* The prolog may have pushed some high registers to use as
26918 work registers. e.g. the testsuite file:
26919 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26920 compiles to produce:
26921 push {r4, r5, r6, r7, lr}
26925 as part of the prolog. We have to undo that pushing here. */
26927 if (high_regs_pushed
)
26929 unsigned long mask
= live_regs_mask
& 0xff;
26932 /* The available low registers depend on the size of the value we are
26940 /* Oh dear! We have no low registers into which we can pop
26943 ("no low registers available for popping high registers");
26945 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26946 if (live_regs_mask
& (1 << next_hi_reg
))
26949 while (high_regs_pushed
)
26951 /* Find lo register(s) into which the high register(s) can
26953 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26955 if (mask
& (1 << regno
))
26956 high_regs_pushed
--;
26957 if (high_regs_pushed
== 0)
26961 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26963 /* Pop the values into the low register(s). */
26964 thumb_pop (asm_out_file
, mask
);
26966 /* Move the value(s) into the high registers. */
26967 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26969 if (mask
& (1 << regno
))
26971 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26974 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26975 if (live_regs_mask
& (1 << next_hi_reg
))
26980 live_regs_mask
&= ~0x0f00;
26983 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26984 live_regs_mask
&= 0xff;
26986 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26988 /* Pop the return address into the PC. */
26989 if (had_to_push_lr
)
26990 live_regs_mask
|= 1 << PC_REGNUM
;
26992 /* Either no argument registers were pushed or a backtrace
26993 structure was created which includes an adjusted stack
26994 pointer, so just pop everything. */
26995 if (live_regs_mask
)
26996 thumb_pop (asm_out_file
, live_regs_mask
);
26998 /* We have either just popped the return address into the
26999 PC or it is was kept in LR for the entire function.
27000 Note that thumb_pop has already called thumb_exit if the
27001 PC was in the list. */
27002 if (!had_to_push_lr
)
27003 thumb_exit (asm_out_file
, LR_REGNUM
);
27007 /* Pop everything but the return address. */
27008 if (live_regs_mask
)
27009 thumb_pop (asm_out_file
, live_regs_mask
);
27011 if (had_to_push_lr
)
27015 /* We have no free low regs, so save one. */
27016 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
27020 /* Get the return address into a temporary register. */
27021 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
27025 /* Move the return address to lr. */
27026 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
27028 /* Restore the low register. */
27029 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
27034 regno
= LAST_ARG_REGNUM
;
27039 /* Remove the argument registers that were pushed onto the stack. */
27040 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
27041 SP_REGNUM
, SP_REGNUM
,
27042 crtl
->args
.pretend_args_size
);
27044 thumb_exit (asm_out_file
, regno
);
27050 /* Functions to save and restore machine-specific function data. */
27051 static struct machine_function
*
27052 arm_init_machine_status (void)
27054 struct machine_function
*machine
;
27055 machine
= ggc_cleared_alloc
<machine_function
> ();
27057 #if ARM_FT_UNKNOWN != 0
27058 machine
->func_type
= ARM_FT_UNKNOWN
;
27063 /* Return an RTX indicating where the return address to the
27064 calling function can be found. */
27066 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
27071 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
27074 /* Do anything needed before RTL is emitted for each function. */
27076 arm_init_expanders (void)
27078 /* Arrange to initialize and mark the machine per-function status. */
27079 init_machine_status
= arm_init_machine_status
;
27081 /* This is to stop the combine pass optimizing away the alignment
27082 adjustment of va_arg. */
27083 /* ??? It is claimed that this should not be necessary. */
27085 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27089 /* Like arm_compute_initial_elimination offset. Simpler because there
27090 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27091 to point at the base of the local variables after static stack
27092 space for a function has been allocated. */
27095 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27097 arm_stack_offsets
*offsets
;
27099 offsets
= arm_get_frame_offsets ();
27103 case ARG_POINTER_REGNUM
:
27106 case STACK_POINTER_REGNUM
:
27107 return offsets
->outgoing_args
- offsets
->saved_args
;
27109 case FRAME_POINTER_REGNUM
:
27110 return offsets
->soft_frame
- offsets
->saved_args
;
27112 case ARM_HARD_FRAME_POINTER_REGNUM
:
27113 return offsets
->saved_regs
- offsets
->saved_args
;
27115 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27116 return offsets
->locals_base
- offsets
->saved_args
;
27119 gcc_unreachable ();
27123 case FRAME_POINTER_REGNUM
:
27126 case STACK_POINTER_REGNUM
:
27127 return offsets
->outgoing_args
- offsets
->soft_frame
;
27129 case ARM_HARD_FRAME_POINTER_REGNUM
:
27130 return offsets
->saved_regs
- offsets
->soft_frame
;
27132 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27133 return offsets
->locals_base
- offsets
->soft_frame
;
27136 gcc_unreachable ();
27141 gcc_unreachable ();
27145 /* Generate the function's prologue. */
27148 thumb1_expand_prologue (void)
27152 HOST_WIDE_INT amount
;
27153 arm_stack_offsets
*offsets
;
27154 unsigned long func_type
;
27156 unsigned long live_regs_mask
;
27157 unsigned long l_mask
;
27158 unsigned high_regs_pushed
= 0;
27160 func_type
= arm_current_func_type ();
27162 /* Naked functions don't have prologues. */
27163 if (IS_NAKED (func_type
))
27166 if (IS_INTERRUPT (func_type
))
27168 error ("interrupt Service Routines cannot be coded in Thumb mode");
27172 if (is_called_in_ARM_mode (current_function_decl
))
27173 emit_insn (gen_prologue_thumb1_interwork ());
27175 offsets
= arm_get_frame_offsets ();
27176 live_regs_mask
= offsets
->saved_regs_mask
;
27178 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27179 l_mask
= live_regs_mask
& 0x40ff;
27180 /* Then count how many other high registers will need to be pushed. */
27181 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27183 if (crtl
->args
.pretend_args_size
)
27185 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27187 if (cfun
->machine
->uses_anonymous_args
)
27189 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27190 unsigned long mask
;
27192 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27193 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27195 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27199 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27200 stack_pointer_rtx
, x
));
27202 RTX_FRAME_RELATED_P (insn
) = 1;
27205 if (TARGET_BACKTRACE
)
27207 HOST_WIDE_INT offset
= 0;
27208 unsigned work_register
;
27209 rtx work_reg
, x
, arm_hfp_rtx
;
27211 /* We have been asked to create a stack backtrace structure.
27212 The code looks like this:
27216 0 sub SP, #16 Reserve space for 4 registers.
27217 2 push {R7} Push low registers.
27218 4 add R7, SP, #20 Get the stack pointer before the push.
27219 6 str R7, [SP, #8] Store the stack pointer
27220 (before reserving the space).
27221 8 mov R7, PC Get hold of the start of this code + 12.
27222 10 str R7, [SP, #16] Store it.
27223 12 mov R7, FP Get hold of the current frame pointer.
27224 14 str R7, [SP, #4] Store it.
27225 16 mov R7, LR Get hold of the current return address.
27226 18 str R7, [SP, #12] Store it.
27227 20 add R7, SP, #16 Point at the start of the
27228 backtrace structure.
27229 22 mov FP, R7 Put this value into the frame pointer. */
27231 work_register
= thumb_find_work_register (live_regs_mask
);
27232 work_reg
= gen_rtx_REG (SImode
, work_register
);
27233 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27235 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27236 stack_pointer_rtx
, GEN_INT (-16)));
27237 RTX_FRAME_RELATED_P (insn
) = 1;
27241 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27242 RTX_FRAME_RELATED_P (insn
) = 1;
27244 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27247 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27248 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27250 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27251 x
= gen_frame_mem (SImode
, x
);
27252 emit_move_insn (x
, work_reg
);
27254 /* Make sure that the instruction fetching the PC is in the right place
27255 to calculate "start of backtrace creation code + 12". */
27256 /* ??? The stores using the common WORK_REG ought to be enough to
27257 prevent the scheduler from doing anything weird. Failing that
27258 we could always move all of the following into an UNSPEC_VOLATILE. */
27261 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27262 emit_move_insn (work_reg
, x
);
27264 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27265 x
= gen_frame_mem (SImode
, x
);
27266 emit_move_insn (x
, work_reg
);
27268 emit_move_insn (work_reg
, arm_hfp_rtx
);
27270 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27271 x
= gen_frame_mem (SImode
, x
);
27272 emit_move_insn (x
, work_reg
);
27276 emit_move_insn (work_reg
, arm_hfp_rtx
);
27278 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27279 x
= gen_frame_mem (SImode
, x
);
27280 emit_move_insn (x
, work_reg
);
27282 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27283 emit_move_insn (work_reg
, x
);
27285 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27286 x
= gen_frame_mem (SImode
, x
);
27287 emit_move_insn (x
, work_reg
);
27290 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27291 emit_move_insn (work_reg
, x
);
27293 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27294 x
= gen_frame_mem (SImode
, x
);
27295 emit_move_insn (x
, work_reg
);
27297 x
= GEN_INT (offset
+ 12);
27298 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27300 emit_move_insn (arm_hfp_rtx
, work_reg
);
27302 /* Optimization: If we are not pushing any low registers but we are going
27303 to push some high registers then delay our first push. This will just
27304 be a push of LR and we can combine it with the push of the first high
27306 else if ((l_mask
& 0xff) != 0
27307 || (high_regs_pushed
== 0 && l_mask
))
27309 unsigned long mask
= l_mask
;
27310 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27311 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27312 RTX_FRAME_RELATED_P (insn
) = 1;
27315 if (high_regs_pushed
)
27317 unsigned pushable_regs
;
27318 unsigned next_hi_reg
;
27319 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27320 : crtl
->args
.info
.nregs
;
27321 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27323 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27324 if (live_regs_mask
& (1 << next_hi_reg
))
27327 /* Here we need to mask out registers used for passing arguments
27328 even if they can be pushed. This is to avoid using them to stash the high
27329 registers. Such kind of stash may clobber the use of arguments. */
27330 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
27332 if (pushable_regs
== 0)
27333 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27335 while (high_regs_pushed
> 0)
27337 unsigned long real_regs_mask
= 0;
27339 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27341 if (pushable_regs
& (1 << regno
))
27343 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27344 gen_rtx_REG (SImode
, next_hi_reg
));
27346 high_regs_pushed
--;
27347 real_regs_mask
|= (1 << next_hi_reg
);
27349 if (high_regs_pushed
)
27351 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27353 if (live_regs_mask
& (1 << next_hi_reg
))
27358 pushable_regs
&= ~((1 << regno
) - 1);
27364 /* If we had to find a work register and we have not yet
27365 saved the LR then add it to the list of regs to push. */
27366 if (l_mask
== (1 << LR_REGNUM
))
27368 pushable_regs
|= l_mask
;
27369 real_regs_mask
|= l_mask
;
27373 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
27374 RTX_FRAME_RELATED_P (insn
) = 1;
27378 /* Load the pic register before setting the frame pointer,
27379 so we can use r7 as a temporary work register. */
27380 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27381 arm_load_pic_register (live_regs_mask
);
27383 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27384 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27385 stack_pointer_rtx
);
27387 if (flag_stack_usage_info
)
27388 current_function_static_stack_size
27389 = offsets
->outgoing_args
- offsets
->saved_args
;
27391 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27392 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27397 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27398 GEN_INT (- amount
)));
27399 RTX_FRAME_RELATED_P (insn
) = 1;
27405 /* The stack decrement is too big for an immediate value in a single
27406 insn. In theory we could issue multiple subtracts, but after
27407 three of them it becomes more space efficient to place the full
27408 value in the constant pool and load into a register. (Also the
27409 ARM debugger really likes to see only one stack decrement per
27410 function). So instead we look for a scratch register into which
27411 we can load the decrement, and then we subtract this from the
27412 stack pointer. Unfortunately on the thumb the only available
27413 scratch registers are the argument registers, and we cannot use
27414 these as they may hold arguments to the function. Instead we
27415 attempt to locate a call preserved register which is used by this
27416 function. If we can find one, then we know that it will have
27417 been pushed at the start of the prologue and so we can corrupt
27419 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27420 if (live_regs_mask
& (1 << regno
))
27423 gcc_assert(regno
<= LAST_LO_REGNUM
);
27425 reg
= gen_rtx_REG (SImode
, regno
);
27427 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27429 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27430 stack_pointer_rtx
, reg
));
27432 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27433 plus_constant (Pmode
, stack_pointer_rtx
,
27435 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27436 RTX_FRAME_RELATED_P (insn
) = 1;
27440 if (frame_pointer_needed
)
27441 thumb_set_frame_pointer (offsets
);
27443 /* If we are profiling, make sure no instructions are scheduled before
27444 the call to mcount. Similarly if the user has requested no
27445 scheduling in the prolog. Similarly if we want non-call exceptions
27446 using the EABI unwinder, to prevent faulting instructions from being
27447 swapped with a stack adjustment. */
27448 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27449 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27450 && cfun
->can_throw_non_call_exceptions
))
27451 emit_insn (gen_blockage ());
27453 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27454 if (live_regs_mask
& 0xff)
27455 cfun
->machine
->lr_save_eliminated
= 0;
27458 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27459 POP instruction can be generated. LR should be replaced by PC. All
27460 the checks required are already done by USE_RETURN_INSN (). Hence,
27461 all we really need to check here is if single register is to be
27462 returned, or multiple register return. */
27464 thumb2_expand_return (bool simple_return
)
27467 unsigned long saved_regs_mask
;
27468 arm_stack_offsets
*offsets
;
27470 offsets
= arm_get_frame_offsets ();
27471 saved_regs_mask
= offsets
->saved_regs_mask
;
27473 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27474 if (saved_regs_mask
& (1 << i
))
27477 if (!simple_return
&& saved_regs_mask
)
27481 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27482 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27483 rtx addr
= gen_rtx_MEM (SImode
,
27484 gen_rtx_POST_INC (SImode
,
27485 stack_pointer_rtx
));
27486 set_mem_alias_set (addr
, get_frame_alias_set ());
27487 XVECEXP (par
, 0, 0) = ret_rtx
;
27488 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27489 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27490 emit_jump_insn (par
);
27494 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27495 saved_regs_mask
|= (1 << PC_REGNUM
);
27496 arm_emit_multi_reg_pop (saved_regs_mask
);
27501 emit_jump_insn (simple_return_rtx
);
27506 thumb1_expand_epilogue (void)
27508 HOST_WIDE_INT amount
;
27509 arm_stack_offsets
*offsets
;
27512 /* Naked functions don't have prologues. */
27513 if (IS_NAKED (arm_current_func_type ()))
27516 offsets
= arm_get_frame_offsets ();
27517 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27519 if (frame_pointer_needed
)
27521 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27522 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27524 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27526 gcc_assert (amount
>= 0);
27529 emit_insn (gen_blockage ());
27532 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27533 GEN_INT (amount
)));
27536 /* r3 is always free in the epilogue. */
27537 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27539 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27540 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27544 /* Emit a USE (stack_pointer_rtx), so that
27545 the stack adjustment will not be deleted. */
27546 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27548 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27549 emit_insn (gen_blockage ());
27551 /* Emit a clobber for each insn that will be restored in the epilogue,
27552 so that flow2 will get register lifetimes correct. */
27553 for (regno
= 0; regno
< 13; regno
++)
27554 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27555 emit_clobber (gen_rtx_REG (SImode
, regno
));
27557 if (! df_regs_ever_live_p (LR_REGNUM
))
27558 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27561 /* Epilogue code for APCS frame. */
27563 arm_expand_epilogue_apcs_frame (bool really_return
)
27565 unsigned long func_type
;
27566 unsigned long saved_regs_mask
;
27569 int floats_from_frame
= 0;
27570 arm_stack_offsets
*offsets
;
27572 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27573 func_type
= arm_current_func_type ();
27575 /* Get frame offsets for ARM. */
27576 offsets
= arm_get_frame_offsets ();
27577 saved_regs_mask
= offsets
->saved_regs_mask
;
27579 /* Find the offset of the floating-point save area in the frame. */
27581 = (offsets
->saved_args
27582 + arm_compute_static_chain_stack_bytes ()
27585 /* Compute how many core registers saved and how far away the floats are. */
27586 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27587 if (saved_regs_mask
& (1 << i
))
27590 floats_from_frame
+= 4;
27593 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27596 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27598 /* The offset is from IP_REGNUM. */
27599 int saved_size
= arm_get_vfp_saved_size ();
27600 if (saved_size
> 0)
27603 floats_from_frame
+= saved_size
;
27604 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27605 hard_frame_pointer_rtx
,
27606 GEN_INT (-floats_from_frame
)));
27607 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27608 ip_rtx
, hard_frame_pointer_rtx
);
27611 /* Generate VFP register multi-pop. */
27612 start_reg
= FIRST_VFP_REGNUM
;
27614 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27615 /* Look for a case where a reg does not need restoring. */
27616 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27617 && (!df_regs_ever_live_p (i
+ 1)
27618 || call_used_regs
[i
+ 1]))
27620 if (start_reg
!= i
)
27621 arm_emit_vfp_multi_reg_pop (start_reg
,
27622 (i
- start_reg
) / 2,
27623 gen_rtx_REG (SImode
,
27628 /* Restore the remaining regs that we have discovered (or possibly
27629 even all of them, if the conditional in the for loop never
27631 if (start_reg
!= i
)
27632 arm_emit_vfp_multi_reg_pop (start_reg
,
27633 (i
- start_reg
) / 2,
27634 gen_rtx_REG (SImode
, IP_REGNUM
));
27639 /* The frame pointer is guaranteed to be non-double-word aligned, as
27640 it is set to double-word-aligned old_stack_pointer - 4. */
27642 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27644 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27645 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27647 rtx addr
= gen_frame_mem (V2SImode
,
27648 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27650 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27651 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27652 gen_rtx_REG (V2SImode
, i
),
27658 /* saved_regs_mask should contain IP which contains old stack pointer
27659 at the time of activation creation. Since SP and IP are adjacent registers,
27660 we can restore the value directly into SP. */
27661 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27662 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27663 saved_regs_mask
|= (1 << SP_REGNUM
);
27665 /* There are two registers left in saved_regs_mask - LR and PC. We
27666 only need to restore LR (the return address), but to
27667 save time we can load it directly into PC, unless we need a
27668 special function exit sequence, or we are not really returning. */
27670 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27671 && !crtl
->calls_eh_return
)
27672 /* Delete LR from the register mask, so that LR on
27673 the stack is loaded into the PC in the register mask. */
27674 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27676 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27678 num_regs
= bit_count (saved_regs_mask
);
27679 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27682 emit_insn (gen_blockage ());
27683 /* Unwind the stack to just below the saved registers. */
27684 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27685 hard_frame_pointer_rtx
,
27686 GEN_INT (- 4 * num_regs
)));
27688 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27689 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27692 arm_emit_multi_reg_pop (saved_regs_mask
);
27694 if (IS_INTERRUPT (func_type
))
27696 /* Interrupt handlers will have pushed the
27697 IP onto the stack, so restore it now. */
27699 rtx addr
= gen_rtx_MEM (SImode
,
27700 gen_rtx_POST_INC (SImode
,
27701 stack_pointer_rtx
));
27702 set_mem_alias_set (addr
, get_frame_alias_set ());
27703 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27704 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27705 gen_rtx_REG (SImode
, IP_REGNUM
),
27709 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27712 if (crtl
->calls_eh_return
)
27713 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27715 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27717 if (IS_STACKALIGN (func_type
))
27718 /* Restore the original stack pointer. Before prologue, the stack was
27719 realigned and the original stack pointer saved in r0. For details,
27720 see comment in arm_expand_prologue. */
27721 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27723 emit_jump_insn (simple_return_rtx
);
27726 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27727 function is not a sibcall. */
27729 arm_expand_epilogue (bool really_return
)
27731 unsigned long func_type
;
27732 unsigned long saved_regs_mask
;
27736 arm_stack_offsets
*offsets
;
27738 func_type
= arm_current_func_type ();
27740 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27741 let output_return_instruction take care of instruction emission if any. */
27742 if (IS_NAKED (func_type
)
27743 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27746 emit_jump_insn (simple_return_rtx
);
27750 /* If we are throwing an exception, then we really must be doing a
27751 return, so we can't tail-call. */
27752 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27754 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27756 arm_expand_epilogue_apcs_frame (really_return
);
27760 /* Get frame offsets for ARM. */
27761 offsets
= arm_get_frame_offsets ();
27762 saved_regs_mask
= offsets
->saved_regs_mask
;
27763 num_regs
= bit_count (saved_regs_mask
);
27765 if (frame_pointer_needed
)
27768 /* Restore stack pointer if necessary. */
27771 /* In ARM mode, frame pointer points to first saved register.
27772 Restore stack pointer to last saved register. */
27773 amount
= offsets
->frame
- offsets
->saved_regs
;
27775 /* Force out any pending memory operations that reference stacked data
27776 before stack de-allocation occurs. */
27777 emit_insn (gen_blockage ());
27778 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27779 hard_frame_pointer_rtx
,
27780 GEN_INT (amount
)));
27781 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27783 hard_frame_pointer_rtx
);
27785 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27787 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27791 /* In Thumb-2 mode, the frame pointer points to the last saved
27793 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27796 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27797 hard_frame_pointer_rtx
,
27798 GEN_INT (amount
)));
27799 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27800 hard_frame_pointer_rtx
,
27801 hard_frame_pointer_rtx
);
27804 /* Force out any pending memory operations that reference stacked data
27805 before stack de-allocation occurs. */
27806 emit_insn (gen_blockage ());
27807 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27808 hard_frame_pointer_rtx
));
27809 arm_add_cfa_adjust_cfa_note (insn
, 0,
27811 hard_frame_pointer_rtx
);
27812 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27814 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27819 /* Pop off outgoing args and local frame to adjust stack pointer to
27820 last saved register. */
27821 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27825 /* Force out any pending memory operations that reference stacked data
27826 before stack de-allocation occurs. */
27827 emit_insn (gen_blockage ());
27828 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27830 GEN_INT (amount
)));
27831 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27832 stack_pointer_rtx
, stack_pointer_rtx
);
27833 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27835 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27839 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27841 /* Generate VFP register multi-pop. */
27842 int end_reg
= LAST_VFP_REGNUM
+ 1;
27844 /* Scan the registers in reverse order. We need to match
27845 any groupings made in the prologue and generate matching
27846 vldm operations. The need to match groups is because,
27847 unlike pop, vldm can only do consecutive regs. */
27848 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27849 /* Look for a case where a reg does not need restoring. */
27850 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27851 && (!df_regs_ever_live_p (i
+ 1)
27852 || call_used_regs
[i
+ 1]))
27854 /* Restore the regs discovered so far (from reg+2 to
27856 if (end_reg
> i
+ 2)
27857 arm_emit_vfp_multi_reg_pop (i
+ 2,
27858 (end_reg
- (i
+ 2)) / 2,
27859 stack_pointer_rtx
);
27863 /* Restore the remaining regs that we have discovered (or possibly
27864 even all of them, if the conditional in the for loop never
27866 if (end_reg
> i
+ 2)
27867 arm_emit_vfp_multi_reg_pop (i
+ 2,
27868 (end_reg
- (i
+ 2)) / 2,
27869 stack_pointer_rtx
);
27873 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27874 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27877 rtx addr
= gen_rtx_MEM (V2SImode
,
27878 gen_rtx_POST_INC (SImode
,
27879 stack_pointer_rtx
));
27880 set_mem_alias_set (addr
, get_frame_alias_set ());
27881 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27882 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27883 gen_rtx_REG (V2SImode
, i
),
27885 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27886 stack_pointer_rtx
, stack_pointer_rtx
);
27889 if (saved_regs_mask
)
27892 bool return_in_pc
= false;
27894 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27895 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27896 && !IS_STACKALIGN (func_type
)
27898 && crtl
->args
.pretend_args_size
== 0
27899 && saved_regs_mask
& (1 << LR_REGNUM
)
27900 && !crtl
->calls_eh_return
)
27902 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27903 saved_regs_mask
|= (1 << PC_REGNUM
);
27904 return_in_pc
= true;
27907 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27909 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27910 if (saved_regs_mask
& (1 << i
))
27912 rtx addr
= gen_rtx_MEM (SImode
,
27913 gen_rtx_POST_INC (SImode
,
27914 stack_pointer_rtx
));
27915 set_mem_alias_set (addr
, get_frame_alias_set ());
27917 if (i
== PC_REGNUM
)
27919 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27920 XVECEXP (insn
, 0, 0) = ret_rtx
;
27921 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27922 gen_rtx_REG (SImode
, i
),
27924 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27925 insn
= emit_jump_insn (insn
);
27929 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27931 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27932 gen_rtx_REG (SImode
, i
),
27934 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27936 stack_pointer_rtx
);
27943 && current_tune
->prefer_ldrd_strd
27944 && !optimize_function_for_size_p (cfun
))
27947 thumb2_emit_ldrd_pop (saved_regs_mask
);
27948 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27949 arm_emit_ldrd_pop (saved_regs_mask
);
27951 arm_emit_multi_reg_pop (saved_regs_mask
);
27954 arm_emit_multi_reg_pop (saved_regs_mask
);
27957 if (return_in_pc
== true)
27961 if (crtl
->args
.pretend_args_size
)
27964 rtx dwarf
= NULL_RTX
;
27966 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27968 GEN_INT (crtl
->args
.pretend_args_size
)));
27970 RTX_FRAME_RELATED_P (tmp
) = 1;
27972 if (cfun
->machine
->uses_anonymous_args
)
27974 /* Restore pretend args. Refer arm_expand_prologue on how to save
27975 pretend_args in stack. */
27976 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27977 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27978 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27979 if (saved_regs_mask
& (1 << i
))
27981 rtx reg
= gen_rtx_REG (SImode
, i
);
27982 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27985 REG_NOTES (tmp
) = dwarf
;
27987 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27988 stack_pointer_rtx
, stack_pointer_rtx
);
27991 if (!really_return
)
27994 if (crtl
->calls_eh_return
)
27995 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27997 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27999 if (IS_STACKALIGN (func_type
))
28000 /* Restore the original stack pointer. Before prologue, the stack was
28001 realigned and the original stack pointer saved in r0. For details,
28002 see comment in arm_expand_prologue. */
28003 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
28005 emit_jump_insn (simple_return_rtx
);
28008 /* Implementation of insn prologue_thumb1_interwork. This is the first
28009 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28012 thumb1_output_interwork (void)
28015 FILE *f
= asm_out_file
;
28017 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
28018 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
28020 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28022 /* Generate code sequence to switch us into Thumb mode. */
28023 /* The .code 32 directive has already been emitted by
28024 ASM_DECLARE_FUNCTION_NAME. */
28025 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
28026 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
28028 /* Generate a label, so that the debugger will notice the
28029 change in instruction sets. This label is also used by
28030 the assembler to bypass the ARM code when this function
28031 is called from a Thumb encoded function elsewhere in the
28032 same file. Hence the definition of STUB_NAME here must
28033 agree with the definition in gas/config/tc-arm.c. */
28035 #define STUB_NAME ".real_start_of"
28037 fprintf (f
, "\t.code\t16\n");
28039 if (arm_dllexport_name_p (name
))
28040 name
= arm_strip_name_encoding (name
);
28042 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
28043 fprintf (f
, "\t.thumb_func\n");
28044 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
28049 /* Handle the case of a double word load into a low register from
28050 a computed memory address. The computed address may involve a
28051 register which is overwritten by the load. */
28053 thumb_load_double_from_address (rtx
*operands
)
28061 gcc_assert (REG_P (operands
[0]));
28062 gcc_assert (MEM_P (operands
[1]));
28064 /* Get the memory address. */
28065 addr
= XEXP (operands
[1], 0);
28067 /* Work out how the memory address is computed. */
28068 switch (GET_CODE (addr
))
28071 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28073 if (REGNO (operands
[0]) == REGNO (addr
))
28075 output_asm_insn ("ldr\t%H0, %2", operands
);
28076 output_asm_insn ("ldr\t%0, %1", operands
);
28080 output_asm_insn ("ldr\t%0, %1", operands
);
28081 output_asm_insn ("ldr\t%H0, %2", operands
);
28086 /* Compute <address> + 4 for the high order load. */
28087 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28089 output_asm_insn ("ldr\t%0, %1", operands
);
28090 output_asm_insn ("ldr\t%H0, %2", operands
);
28094 arg1
= XEXP (addr
, 0);
28095 arg2
= XEXP (addr
, 1);
28097 if (CONSTANT_P (arg1
))
28098 base
= arg2
, offset
= arg1
;
28100 base
= arg1
, offset
= arg2
;
28102 gcc_assert (REG_P (base
));
28104 /* Catch the case of <address> = <reg> + <reg> */
28105 if (REG_P (offset
))
28107 int reg_offset
= REGNO (offset
);
28108 int reg_base
= REGNO (base
);
28109 int reg_dest
= REGNO (operands
[0]);
28111 /* Add the base and offset registers together into the
28112 higher destination register. */
28113 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28114 reg_dest
+ 1, reg_base
, reg_offset
);
28116 /* Load the lower destination register from the address in
28117 the higher destination register. */
28118 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28119 reg_dest
, reg_dest
+ 1);
28121 /* Load the higher destination register from its own address
28123 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28124 reg_dest
+ 1, reg_dest
+ 1);
28128 /* Compute <address> + 4 for the high order load. */
28129 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28131 /* If the computed address is held in the low order register
28132 then load the high order register first, otherwise always
28133 load the low order register first. */
28134 if (REGNO (operands
[0]) == REGNO (base
))
28136 output_asm_insn ("ldr\t%H0, %2", operands
);
28137 output_asm_insn ("ldr\t%0, %1", operands
);
28141 output_asm_insn ("ldr\t%0, %1", operands
);
28142 output_asm_insn ("ldr\t%H0, %2", operands
);
28148 /* With no registers to worry about we can just load the value
28150 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28152 output_asm_insn ("ldr\t%H0, %2", operands
);
28153 output_asm_insn ("ldr\t%0, %1", operands
);
28157 gcc_unreachable ();
28164 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28171 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28174 operands
[4] = operands
[5];
28177 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28178 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28182 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28185 operands
[4] = operands
[5];
28188 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28191 operands
[5] = operands
[6];
28194 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28197 operands
[4] = operands
[5];
28201 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28202 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28206 gcc_unreachable ();
28212 /* Output a call-via instruction for thumb state. */
28214 thumb_call_via_reg (rtx reg
)
28216 int regno
= REGNO (reg
);
28219 gcc_assert (regno
< LR_REGNUM
);
28221 /* If we are in the normal text section we can use a single instance
28222 per compilation unit. If we are doing function sections, then we need
28223 an entry per section, since we can't rely on reachability. */
28224 if (in_section
== text_section
)
28226 thumb_call_reg_needed
= 1;
28228 if (thumb_call_via_label
[regno
] == NULL
)
28229 thumb_call_via_label
[regno
] = gen_label_rtx ();
28230 labelp
= thumb_call_via_label
+ regno
;
28234 if (cfun
->machine
->call_via
[regno
] == NULL
)
28235 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28236 labelp
= cfun
->machine
->call_via
+ regno
;
28239 output_asm_insn ("bl\t%a0", labelp
);
28243 /* Routines for generating rtl. */
28245 thumb_expand_movmemqi (rtx
*operands
)
28247 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28248 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28249 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28250 HOST_WIDE_INT offset
= 0;
28254 emit_insn (gen_movmem12b (out
, in
, out
, in
));
28260 emit_insn (gen_movmem8b (out
, in
, out
, in
));
28266 rtx reg
= gen_reg_rtx (SImode
);
28267 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28268 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28275 rtx reg
= gen_reg_rtx (HImode
);
28276 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28277 plus_constant (Pmode
, in
,
28279 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28288 rtx reg
= gen_reg_rtx (QImode
);
28289 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28290 plus_constant (Pmode
, in
,
28292 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28299 thumb_reload_out_hi (rtx
*operands
)
28301 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28304 /* Handle reading a half-word from memory during reload. */
28306 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
28308 gcc_unreachable ();
28311 /* Return the length of a function name prefix
28312 that starts with the character 'c'. */
28314 arm_get_strip_length (int c
)
28318 ARM_NAME_ENCODING_LENGTHS
28323 /* Return a pointer to a function's name with any
28324 and all prefix encodings stripped from it. */
28326 arm_strip_name_encoding (const char *name
)
28330 while ((skip
= arm_get_strip_length (* name
)))
28336 /* If there is a '*' anywhere in the name's prefix, then
28337 emit the stripped name verbatim, otherwise prepend an
28338 underscore if leading underscores are being used. */
28340 arm_asm_output_labelref (FILE *stream
, const char *name
)
28345 while ((skip
= arm_get_strip_length (* name
)))
28347 verbatim
|= (*name
== '*');
28352 fputs (name
, stream
);
28354 asm_fprintf (stream
, "%U%s", name
);
28357 /* This function is used to emit an EABI tag and its associated value.
28358 We emit the numerical value of the tag in case the assembler does not
28359 support textual tags. (Eg gas prior to 2.20). If requested we include
28360 the tag name in a comment so that anyone reading the assembler output
28361 will know which tag is being set.
28363 This function is not static because arm-c.c needs it too. */
28366 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28368 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28369 if (flag_verbose_asm
|| flag_debug_asm
)
28370 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28371 asm_fprintf (asm_out_file
, "\n");
28375 arm_file_start (void)
28379 if (TARGET_UNIFIED_ASM
)
28380 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
28384 const char *fpu_name
;
28385 if (arm_selected_arch
)
28387 /* armv7ve doesn't support any extensions. */
28388 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
28390 /* Keep backward compatability for assemblers
28391 which don't support armv7ve. */
28392 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28393 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28394 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28395 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28396 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28400 const char* pos
= strchr (arm_selected_arch
->name
, '+');
28404 gcc_assert (strlen (arm_selected_arch
->name
)
28405 <= sizeof (buf
) / sizeof (*pos
));
28406 strncpy (buf
, arm_selected_arch
->name
,
28407 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
28408 buf
[pos
- arm_selected_arch
->name
] = '\0';
28409 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
28410 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
28413 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
28416 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
28417 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
28420 const char* truncated_name
28421 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
28422 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28425 if (TARGET_SOFT_FLOAT
)
28427 fpu_name
= "softvfp";
28431 fpu_name
= arm_fpu_desc
->name
;
28432 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
28434 if (TARGET_HARD_FLOAT
)
28435 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28436 if (TARGET_HARD_FLOAT_ABI
)
28437 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28440 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
28442 /* Some of these attributes only apply when the corresponding features
28443 are used. However we don't have any easy way of figuring this out.
28444 Conservatively record the setting that would have been used. */
28446 if (flag_rounding_math
)
28447 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28449 if (!flag_unsafe_math_optimizations
)
28451 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28452 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28454 if (flag_signaling_nans
)
28455 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28457 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28458 flag_finite_math_only
? 1 : 3);
28460 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28461 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28462 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28463 flag_short_enums
? 1 : 2);
28465 /* Tag_ABI_optimization_goals. */
28468 else if (optimize
>= 2)
28474 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28476 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28479 if (arm_fp16_format
)
28480 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28481 (int) arm_fp16_format
);
28483 if (arm_lang_output_object_attributes_hook
)
28484 arm_lang_output_object_attributes_hook();
28487 default_file_start ();
28491 arm_file_end (void)
28495 if (NEED_INDICATE_EXEC_STACK
)
28496 /* Add .note.GNU-stack. */
28497 file_end_indicate_exec_stack ();
28499 if (! thumb_call_reg_needed
)
28502 switch_to_section (text_section
);
28503 asm_fprintf (asm_out_file
, "\t.code 16\n");
28504 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28506 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28508 rtx label
= thumb_call_via_label
[regno
];
28512 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28513 CODE_LABEL_NUMBER (label
));
28514 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28520 /* Symbols in the text segment can be accessed without indirecting via the
28521 constant pool; it may take an extra binary operation, but this is still
28522 faster than indirecting via memory. Don't do this when not optimizing,
28523 since we won't be calculating al of the offsets necessary to do this
28527 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28529 if (optimize
> 0 && TREE_CONSTANT (decl
))
28530 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28532 default_encode_section_info (decl
, rtl
, first
);
28534 #endif /* !ARM_PE */
28537 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28539 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28540 && !strcmp (prefix
, "L"))
28542 arm_ccfsm_state
= 0;
28543 arm_target_insn
= NULL
;
28545 default_internal_label (stream
, prefix
, labelno
);
28548 /* Output code to add DELTA to the first argument, and then jump
28549 to FUNCTION. Used for C++ multiple inheritance. */
28551 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28552 HOST_WIDE_INT delta
,
28553 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28556 static int thunk_label
= 0;
28559 int mi_delta
= delta
;
28560 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28562 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28565 mi_delta
= - mi_delta
;
28567 final_start_function (emit_barrier (), file
, 1);
28571 int labelno
= thunk_label
++;
28572 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28573 /* Thunks are entered in arm mode when avaiable. */
28574 if (TARGET_THUMB1_ONLY
)
28576 /* push r3 so we can use it as a temporary. */
28577 /* TODO: Omit this save if r3 is not used. */
28578 fputs ("\tpush {r3}\n", file
);
28579 fputs ("\tldr\tr3, ", file
);
28583 fputs ("\tldr\tr12, ", file
);
28585 assemble_name (file
, label
);
28586 fputc ('\n', file
);
28589 /* If we are generating PIC, the ldr instruction below loads
28590 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28591 the address of the add + 8, so we have:
28593 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28596 Note that we have "+ 1" because some versions of GNU ld
28597 don't set the low bit of the result for R_ARM_REL32
28598 relocations against thumb function symbols.
28599 On ARMv6M this is +4, not +8. */
28600 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28601 assemble_name (file
, labelpc
);
28602 fputs (":\n", file
);
28603 if (TARGET_THUMB1_ONLY
)
28605 /* This is 2 insns after the start of the thunk, so we know it
28606 is 4-byte aligned. */
28607 fputs ("\tadd\tr3, pc, r3\n", file
);
28608 fputs ("\tmov r12, r3\n", file
);
28611 fputs ("\tadd\tr12, pc, r12\n", file
);
28613 else if (TARGET_THUMB1_ONLY
)
28614 fputs ("\tmov r12, r3\n", file
);
28616 if (TARGET_THUMB1_ONLY
)
28618 if (mi_delta
> 255)
28620 fputs ("\tldr\tr3, ", file
);
28621 assemble_name (file
, label
);
28622 fputs ("+4\n", file
);
28623 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28624 mi_op
, this_regno
, this_regno
);
28626 else if (mi_delta
!= 0)
28628 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28629 mi_op
, this_regno
, this_regno
,
28635 /* TODO: Use movw/movt for large constants when available. */
28636 while (mi_delta
!= 0)
28638 if ((mi_delta
& (3 << shift
)) == 0)
28642 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28643 mi_op
, this_regno
, this_regno
,
28644 mi_delta
& (0xff << shift
));
28645 mi_delta
&= ~(0xff << shift
);
28652 if (TARGET_THUMB1_ONLY
)
28653 fputs ("\tpop\t{r3}\n", file
);
28655 fprintf (file
, "\tbx\tr12\n");
28656 ASM_OUTPUT_ALIGN (file
, 2);
28657 assemble_name (file
, label
);
28658 fputs (":\n", file
);
28661 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28662 rtx tem
= XEXP (DECL_RTL (function
), 0);
28663 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28664 pipeline offset is four rather than eight. Adjust the offset
28666 tem
= plus_constant (GET_MODE (tem
), tem
,
28667 TARGET_THUMB1_ONLY
? -3 : -7);
28668 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28670 gen_rtx_SYMBOL_REF (Pmode
,
28671 ggc_strdup (labelpc
)));
28672 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28675 /* Output ".word .LTHUNKn". */
28676 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28678 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28679 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28683 fputs ("\tb\t", file
);
28684 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28685 if (NEED_PLT_RELOC
)
28686 fputs ("(PLT)", file
);
28687 fputc ('\n', file
);
28690 final_end_function ();
28694 arm_emit_vector_const (FILE *file
, rtx x
)
28697 const char * pattern
;
28699 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28701 switch (GET_MODE (x
))
28703 case V2SImode
: pattern
= "%08x"; break;
28704 case V4HImode
: pattern
= "%04x"; break;
28705 case V8QImode
: pattern
= "%02x"; break;
28706 default: gcc_unreachable ();
28709 fprintf (file
, "0x");
28710 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28714 element
= CONST_VECTOR_ELT (x
, i
);
28715 fprintf (file
, pattern
, INTVAL (element
));
28721 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28722 HFmode constant pool entries are actually loaded with ldr. */
28724 arm_emit_fp16_const (rtx c
)
28729 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28730 bits
= real_to_target (NULL
, &r
, HFmode
);
28731 if (WORDS_BIG_ENDIAN
)
28732 assemble_zeros (2);
28733 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28734 if (!WORDS_BIG_ENDIAN
)
28735 assemble_zeros (2);
28739 arm_output_load_gr (rtx
*operands
)
28746 if (!MEM_P (operands
[1])
28747 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28748 || !REG_P (reg
= XEXP (sum
, 0))
28749 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28750 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28751 return "wldrw%?\t%0, %1";
28753 /* Fix up an out-of-range load of a GR register. */
28754 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28755 wcgr
= operands
[0];
28757 output_asm_insn ("ldr%?\t%0, %1", operands
);
28759 operands
[0] = wcgr
;
28761 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28762 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28767 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28769 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28770 named arg and all anonymous args onto the stack.
28771 XXX I know the prologue shouldn't be pushing registers, but it is faster
28775 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28779 int second_time ATTRIBUTE_UNUSED
)
28781 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28784 cfun
->machine
->uses_anonymous_args
= 1;
28785 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28787 nregs
= pcum
->aapcs_ncrn
;
28788 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28792 nregs
= pcum
->nregs
;
28794 if (nregs
< NUM_ARG_REGS
)
28795 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28798 /* We can't rely on the caller doing the proper promotion when
28799 using APCS or ATPCS. */
28802 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28804 return !TARGET_AAPCS_BASED
;
28807 static machine_mode
28808 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28810 int *punsignedp ATTRIBUTE_UNUSED
,
28811 const_tree fntype ATTRIBUTE_UNUSED
,
28812 int for_return ATTRIBUTE_UNUSED
)
28814 if (GET_MODE_CLASS (mode
) == MODE_INT
28815 && GET_MODE_SIZE (mode
) < 4)
28821 /* AAPCS based ABIs use short enums by default. */
28824 arm_default_short_enums (void)
28826 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28830 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28833 arm_align_anon_bitfield (void)
28835 return TARGET_AAPCS_BASED
;
28839 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28842 arm_cxx_guard_type (void)
28844 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28848 /* The EABI says test the least significant bit of a guard variable. */
28851 arm_cxx_guard_mask_bit (void)
28853 return TARGET_AAPCS_BASED
;
28857 /* The EABI specifies that all array cookies are 8 bytes long. */
28860 arm_get_cookie_size (tree type
)
28864 if (!TARGET_AAPCS_BASED
)
28865 return default_cxx_get_cookie_size (type
);
28867 size
= build_int_cst (sizetype
, 8);
28872 /* The EABI says that array cookies should also contain the element size. */
28875 arm_cookie_has_size (void)
28877 return TARGET_AAPCS_BASED
;
28881 /* The EABI says constructors and destructors should return a pointer to
28882 the object constructed/destroyed. */
28885 arm_cxx_cdtor_returns_this (void)
28887 return TARGET_AAPCS_BASED
;
28890 /* The EABI says that an inline function may never be the key
28894 arm_cxx_key_method_may_be_inline (void)
28896 return !TARGET_AAPCS_BASED
;
28900 arm_cxx_determine_class_data_visibility (tree decl
)
28902 if (!TARGET_AAPCS_BASED
28903 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28906 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28907 is exported. However, on systems without dynamic vague linkage,
28908 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28909 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28910 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28912 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28913 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28917 arm_cxx_class_data_always_comdat (void)
28919 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28920 vague linkage if the class has no key function. */
28921 return !TARGET_AAPCS_BASED
;
28925 /* The EABI says __aeabi_atexit should be used to register static
28929 arm_cxx_use_aeabi_atexit (void)
28931 return TARGET_AAPCS_BASED
;
28936 arm_set_return_address (rtx source
, rtx scratch
)
28938 arm_stack_offsets
*offsets
;
28939 HOST_WIDE_INT delta
;
28941 unsigned long saved_regs
;
28943 offsets
= arm_get_frame_offsets ();
28944 saved_regs
= offsets
->saved_regs_mask
;
28946 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28947 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28950 if (frame_pointer_needed
)
28951 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28954 /* LR will be the first saved register. */
28955 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28960 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28961 GEN_INT (delta
& ~4095)));
28966 addr
= stack_pointer_rtx
;
28968 addr
= plus_constant (Pmode
, addr
, delta
);
28970 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28976 thumb_set_return_address (rtx source
, rtx scratch
)
28978 arm_stack_offsets
*offsets
;
28979 HOST_WIDE_INT delta
;
28980 HOST_WIDE_INT limit
;
28983 unsigned long mask
;
28987 offsets
= arm_get_frame_offsets ();
28988 mask
= offsets
->saved_regs_mask
;
28989 if (mask
& (1 << LR_REGNUM
))
28992 /* Find the saved regs. */
28993 if (frame_pointer_needed
)
28995 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28996 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29002 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29005 /* Allow for the stack frame. */
29006 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29008 /* The link register is always the first saved register. */
29011 /* Construct the address. */
29012 addr
= gen_rtx_REG (SImode
, reg
);
29015 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29016 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29020 addr
= plus_constant (Pmode
, addr
, delta
);
29022 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
29025 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29028 /* Implements target hook vector_mode_supported_p. */
29030 arm_vector_mode_supported_p (machine_mode mode
)
29032 /* Neon also supports V2SImode, etc. listed in the clause below. */
29033 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29034 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
29037 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29038 && ((mode
== V2SImode
)
29039 || (mode
== V4HImode
)
29040 || (mode
== V8QImode
)))
29043 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29044 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29045 || mode
== V2HAmode
))
29051 /* Implements target hook array_mode_supported_p. */
29054 arm_array_mode_supported_p (machine_mode mode
,
29055 unsigned HOST_WIDE_INT nelems
)
29058 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29059 && (nelems
>= 2 && nelems
<= 4))
29065 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29066 registers when autovectorizing for Neon, at least until multiple vector
29067 widths are supported properly by the middle-end. */
29069 static machine_mode
29070 arm_preferred_simd_mode (machine_mode mode
)
29076 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29078 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29080 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29082 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29084 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29091 if (TARGET_REALLY_IWMMXT
)
29107 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29109 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29110 using r0-r4 for function arguments, r7 for the stack frame and don't have
29111 enough left over to do doubleword arithmetic. For Thumb-2 all the
29112 potentially problematic instructions accept high registers so this is not
29113 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29114 that require many low registers. */
29116 arm_class_likely_spilled_p (reg_class_t rclass
)
29118 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29119 || rclass
== CC_REG
)
29125 /* Implements target hook small_register_classes_for_mode_p. */
29127 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29129 return TARGET_THUMB1
;
29132 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29133 ARM insns and therefore guarantee that the shift count is modulo 256.
29134 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29135 guarantee no particular behavior for out-of-range counts. */
29137 static unsigned HOST_WIDE_INT
29138 arm_shift_truncation_mask (machine_mode mode
)
29140 return mode
== SImode
? 255 : 0;
29144 /* Map internal gcc register numbers to DWARF2 register numbers. */
29147 arm_dbx_register_number (unsigned int regno
)
29152 if (IS_VFP_REGNUM (regno
))
29154 /* See comment in arm_dwarf_register_span. */
29155 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29156 return 64 + regno
- FIRST_VFP_REGNUM
;
29158 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29161 if (IS_IWMMXT_GR_REGNUM (regno
))
29162 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29164 if (IS_IWMMXT_REGNUM (regno
))
29165 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29167 gcc_unreachable ();
29170 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29171 GCC models tham as 64 32-bit registers, so we need to describe this to
29172 the DWARF generation code. Other registers can use the default. */
29174 arm_dwarf_register_span (rtx rtl
)
29182 regno
= REGNO (rtl
);
29183 if (!IS_VFP_REGNUM (regno
))
29186 /* XXX FIXME: The EABI defines two VFP register ranges:
29187 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29189 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29190 corresponding D register. Until GDB supports this, we shall use the
29191 legacy encodings. We also use these encodings for D0-D15 for
29192 compatibility with older debuggers. */
29193 mode
= GET_MODE (rtl
);
29194 if (GET_MODE_SIZE (mode
) < 8)
29197 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29199 nregs
= GET_MODE_SIZE (mode
) / 4;
29200 for (i
= 0; i
< nregs
; i
+= 2)
29201 if (TARGET_BIG_END
)
29203 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29204 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29208 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29209 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29214 nregs
= GET_MODE_SIZE (mode
) / 8;
29215 for (i
= 0; i
< nregs
; i
++)
29216 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29219 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29222 #if ARM_UNWIND_INFO
29223 /* Emit unwind directives for a store-multiple instruction or stack pointer
29224 push during alignment.
29225 These should only ever be generated by the function prologue code, so
29226 expect them to have a particular form.
29227 The store-multiple instruction sometimes pushes pc as the last register,
29228 although it should not be tracked into unwind information, or for -Os
29229 sometimes pushes some dummy registers before first register that needs
29230 to be tracked in unwind information; such dummy registers are there just
29231 to avoid separate stack adjustment, and will not be restored in the
29235 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29238 HOST_WIDE_INT offset
;
29239 HOST_WIDE_INT nregs
;
29243 unsigned padfirst
= 0, padlast
= 0;
29246 e
= XVECEXP (p
, 0, 0);
29247 gcc_assert (GET_CODE (e
) == SET
);
29249 /* First insn will adjust the stack pointer. */
29250 gcc_assert (GET_CODE (e
) == SET
29251 && REG_P (SET_DEST (e
))
29252 && REGNO (SET_DEST (e
)) == SP_REGNUM
29253 && GET_CODE (SET_SRC (e
)) == PLUS
);
29255 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29256 nregs
= XVECLEN (p
, 0) - 1;
29257 gcc_assert (nregs
);
29259 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29262 /* For -Os dummy registers can be pushed at the beginning to
29263 avoid separate stack pointer adjustment. */
29264 e
= XVECEXP (p
, 0, 1);
29265 e
= XEXP (SET_DEST (e
), 0);
29266 if (GET_CODE (e
) == PLUS
)
29267 padfirst
= INTVAL (XEXP (e
, 1));
29268 gcc_assert (padfirst
== 0 || optimize_size
);
29269 /* The function prologue may also push pc, but not annotate it as it is
29270 never restored. We turn this into a stack pointer adjustment. */
29271 e
= XVECEXP (p
, 0, nregs
);
29272 e
= XEXP (SET_DEST (e
), 0);
29273 if (GET_CODE (e
) == PLUS
)
29274 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29276 padlast
= offset
- 4;
29277 gcc_assert (padlast
== 0 || padlast
== 4);
29279 fprintf (asm_out_file
, "\t.pad #4\n");
29281 fprintf (asm_out_file
, "\t.save {");
29283 else if (IS_VFP_REGNUM (reg
))
29286 fprintf (asm_out_file
, "\t.vsave {");
29289 /* Unknown register type. */
29290 gcc_unreachable ();
29292 /* If the stack increment doesn't match the size of the saved registers,
29293 something has gone horribly wrong. */
29294 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29298 /* The remaining insns will describe the stores. */
29299 for (i
= 1; i
<= nregs
; i
++)
29301 /* Expect (set (mem <addr>) (reg)).
29302 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29303 e
= XVECEXP (p
, 0, i
);
29304 gcc_assert (GET_CODE (e
) == SET
29305 && MEM_P (SET_DEST (e
))
29306 && REG_P (SET_SRC (e
)));
29308 reg
= REGNO (SET_SRC (e
));
29309 gcc_assert (reg
>= lastreg
);
29312 fprintf (asm_out_file
, ", ");
29313 /* We can't use %r for vfp because we need to use the
29314 double precision register names. */
29315 if (IS_VFP_REGNUM (reg
))
29316 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29318 asm_fprintf (asm_out_file
, "%r", reg
);
29320 #ifdef ENABLE_CHECKING
29321 /* Check that the addresses are consecutive. */
29322 e
= XEXP (SET_DEST (e
), 0);
29323 if (GET_CODE (e
) == PLUS
)
29324 gcc_assert (REG_P (XEXP (e
, 0))
29325 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29326 && CONST_INT_P (XEXP (e
, 1))
29327 && offset
== INTVAL (XEXP (e
, 1)));
29331 && REGNO (e
) == SP_REGNUM
);
29332 offset
+= reg_size
;
29335 fprintf (asm_out_file
, "}\n");
29337 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29340 /* Emit unwind directives for a SET. */
29343 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29351 switch (GET_CODE (e0
))
29354 /* Pushing a single register. */
29355 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29356 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29357 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29360 asm_fprintf (asm_out_file
, "\t.save ");
29361 if (IS_VFP_REGNUM (REGNO (e1
)))
29362 asm_fprintf(asm_out_file
, "{d%d}\n",
29363 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29365 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29369 if (REGNO (e0
) == SP_REGNUM
)
29371 /* A stack increment. */
29372 if (GET_CODE (e1
) != PLUS
29373 || !REG_P (XEXP (e1
, 0))
29374 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29375 || !CONST_INT_P (XEXP (e1
, 1)))
29378 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29379 -INTVAL (XEXP (e1
, 1)));
29381 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29383 HOST_WIDE_INT offset
;
29385 if (GET_CODE (e1
) == PLUS
)
29387 if (!REG_P (XEXP (e1
, 0))
29388 || !CONST_INT_P (XEXP (e1
, 1)))
29390 reg
= REGNO (XEXP (e1
, 0));
29391 offset
= INTVAL (XEXP (e1
, 1));
29392 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29393 HARD_FRAME_POINTER_REGNUM
, reg
,
29396 else if (REG_P (e1
))
29399 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29400 HARD_FRAME_POINTER_REGNUM
, reg
);
29405 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29407 /* Move from sp to reg. */
29408 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29410 else if (GET_CODE (e1
) == PLUS
29411 && REG_P (XEXP (e1
, 0))
29412 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29413 && CONST_INT_P (XEXP (e1
, 1)))
29415 /* Set reg to offset from sp. */
29416 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29417 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29429 /* Emit unwind directives for the given insn. */
29432 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
29435 bool handled_one
= false;
29437 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29440 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29441 && (TREE_NOTHROW (current_function_decl
)
29442 || crtl
->all_throwers_are_sibcalls
))
29445 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29448 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29450 switch (REG_NOTE_KIND (note
))
29452 case REG_FRAME_RELATED_EXPR
:
29453 pat
= XEXP (note
, 0);
29456 case REG_CFA_REGISTER
:
29457 pat
= XEXP (note
, 0);
29460 pat
= PATTERN (insn
);
29461 if (GET_CODE (pat
) == PARALLEL
)
29462 pat
= XVECEXP (pat
, 0, 0);
29465 /* Only emitted for IS_STACKALIGN re-alignment. */
29470 src
= SET_SRC (pat
);
29471 dest
= SET_DEST (pat
);
29473 gcc_assert (src
== stack_pointer_rtx
);
29474 reg
= REGNO (dest
);
29475 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29478 handled_one
= true;
29481 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29482 to get correct dwarf information for shrink-wrap. We should not
29483 emit unwind information for it because these are used either for
29484 pretend arguments or notes to adjust sp and restore registers from
29486 case REG_CFA_DEF_CFA
:
29487 case REG_CFA_ADJUST_CFA
:
29488 case REG_CFA_RESTORE
:
29491 case REG_CFA_EXPRESSION
:
29492 case REG_CFA_OFFSET
:
29493 /* ??? Only handling here what we actually emit. */
29494 gcc_unreachable ();
29502 pat
= PATTERN (insn
);
29505 switch (GET_CODE (pat
))
29508 arm_unwind_emit_set (asm_out_file
, pat
);
29512 /* Store multiple. */
29513 arm_unwind_emit_sequence (asm_out_file
, pat
);
29522 /* Output a reference from a function exception table to the type_info
29523 object X. The EABI specifies that the symbol should be relocated by
29524 an R_ARM_TARGET2 relocation. */
29527 arm_output_ttype (rtx x
)
29529 fputs ("\t.word\t", asm_out_file
);
29530 output_addr_const (asm_out_file
, x
);
29531 /* Use special relocations for symbol references. */
29532 if (!CONST_INT_P (x
))
29533 fputs ("(TARGET2)", asm_out_file
);
29534 fputc ('\n', asm_out_file
);
29539 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29542 arm_asm_emit_except_personality (rtx personality
)
29544 fputs ("\t.personality\t", asm_out_file
);
29545 output_addr_const (asm_out_file
, personality
);
29546 fputc ('\n', asm_out_file
);
29549 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29552 arm_asm_init_sections (void)
29554 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29557 #endif /* ARM_UNWIND_INFO */
29559 /* Output unwind directives for the start/end of a function. */
29562 arm_output_fn_unwind (FILE * f
, bool prologue
)
29564 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29568 fputs ("\t.fnstart\n", f
);
29571 /* If this function will never be unwound, then mark it as such.
29572 The came condition is used in arm_unwind_emit to suppress
29573 the frame annotations. */
29574 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29575 && (TREE_NOTHROW (current_function_decl
)
29576 || crtl
->all_throwers_are_sibcalls
))
29577 fputs("\t.cantunwind\n", f
);
29579 fputs ("\t.fnend\n", f
);
29584 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29586 enum tls_reloc reloc
;
29589 val
= XVECEXP (x
, 0, 0);
29590 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29592 output_addr_const (fp
, val
);
29597 fputs ("(tlsgd)", fp
);
29600 fputs ("(tlsldm)", fp
);
29603 fputs ("(tlsldo)", fp
);
29606 fputs ("(gottpoff)", fp
);
29609 fputs ("(tpoff)", fp
);
29612 fputs ("(tlsdesc)", fp
);
29615 gcc_unreachable ();
29624 fputs (" + (. - ", fp
);
29625 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29626 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29627 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29628 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29638 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29641 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29643 gcc_assert (size
== 4);
29644 fputs ("\t.word\t", file
);
29645 output_addr_const (file
, x
);
29646 fputs ("(tlsldo)", file
);
29649 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29652 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29654 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29655 return arm_emit_tls_decoration (fp
, x
);
29656 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29659 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29661 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29662 assemble_name_raw (fp
, label
);
29666 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29668 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29672 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29676 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29678 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29682 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29686 else if (GET_CODE (x
) == CONST_VECTOR
)
29687 return arm_emit_vector_const (fp
, x
);
29692 /* Output assembly for a shift instruction.
29693 SET_FLAGS determines how the instruction modifies the condition codes.
29694 0 - Do not set condition codes.
29695 1 - Set condition codes.
29696 2 - Use smallest instruction. */
29698 arm_output_shift(rtx
* operands
, int set_flags
)
29701 static const char flag_chars
[3] = {'?', '.', '!'};
29706 c
= flag_chars
[set_flags
];
29707 if (TARGET_UNIFIED_ASM
)
29709 shift
= shift_op(operands
[3], &val
);
29713 operands
[2] = GEN_INT(val
);
29714 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29717 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29720 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29721 output_asm_insn (pattern
, operands
);
29725 /* Output assembly for a WMMX immediate shift instruction. */
29727 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29729 int shift
= INTVAL (operands
[2]);
29731 machine_mode opmode
= GET_MODE (operands
[0]);
29733 gcc_assert (shift
>= 0);
29735 /* If the shift value in the register versions is > 63 (for D qualifier),
29736 31 (for W qualifier) or 15 (for H qualifier). */
29737 if (((opmode
== V4HImode
) && (shift
> 15))
29738 || ((opmode
== V2SImode
) && (shift
> 31))
29739 || ((opmode
== DImode
) && (shift
> 63)))
29743 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29744 output_asm_insn (templ
, operands
);
29745 if (opmode
== DImode
)
29747 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29748 output_asm_insn (templ
, operands
);
29753 /* The destination register will contain all zeros. */
29754 sprintf (templ
, "wzero\t%%0");
29755 output_asm_insn (templ
, operands
);
29760 if ((opmode
== DImode
) && (shift
> 32))
29762 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29763 output_asm_insn (templ
, operands
);
29764 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29765 output_asm_insn (templ
, operands
);
29769 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29770 output_asm_insn (templ
, operands
);
29775 /* Output assembly for a WMMX tinsr instruction. */
29777 arm_output_iwmmxt_tinsr (rtx
*operands
)
29779 int mask
= INTVAL (operands
[3]);
29782 int units
= mode_nunits
[GET_MODE (operands
[0])];
29783 gcc_assert ((mask
& (mask
- 1)) == 0);
29784 for (i
= 0; i
< units
; ++i
)
29786 if ((mask
& 0x01) == 1)
29792 gcc_assert (i
< units
);
29794 switch (GET_MODE (operands
[0]))
29797 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29800 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29803 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29806 gcc_unreachable ();
29809 output_asm_insn (templ
, operands
);
29814 /* Output a Thumb-1 casesi dispatch sequence. */
29816 thumb1_output_casesi (rtx
*operands
)
29818 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
29820 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29822 switch (GET_MODE(diff_vec
))
29825 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29826 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29828 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29829 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29831 return "bl\t%___gnu_thumb1_case_si";
29833 gcc_unreachable ();
29837 /* Output a Thumb-2 casesi instruction. */
29839 thumb2_output_casesi (rtx
*operands
)
29841 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
29843 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29845 output_asm_insn ("cmp\t%0, %1", operands
);
29846 output_asm_insn ("bhi\t%l3", operands
);
29847 switch (GET_MODE(diff_vec
))
29850 return "tbb\t[%|pc, %0]";
29852 return "tbh\t[%|pc, %0, lsl #1]";
29856 output_asm_insn ("adr\t%4, %l2", operands
);
29857 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29858 output_asm_insn ("add\t%4, %4, %5", operands
);
29863 output_asm_insn ("adr\t%4, %l2", operands
);
29864 return "ldr\t%|pc, [%4, %0, lsl #2]";
29867 gcc_unreachable ();
29871 /* Most ARM cores are single issue, but some newer ones can dual issue.
29872 The scheduler descriptions rely on this being correct. */
29874 arm_issue_rate (void)
29901 /* A table and a function to perform ARM-specific name mangling for
29902 NEON vector types in order to conform to the AAPCS (see "Procedure
29903 Call Standard for the ARM Architecture", Appendix A). To qualify
29904 for emission with the mangled names defined in that document, a
29905 vector type must not only be of the correct mode but also be
29906 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29910 const char *element_type_name
;
29911 const char *aapcs_name
;
29912 } arm_mangle_map_entry
;
29914 static arm_mangle_map_entry arm_mangle_map
[] = {
29915 /* 64-bit containerized types. */
29916 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29917 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29918 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29919 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29920 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29921 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29922 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29923 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29924 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29925 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29927 /* 128-bit containerized types. */
29928 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29929 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29930 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29931 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29932 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29933 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29934 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29935 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29936 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29937 { VOIDmode
, NULL
, NULL
}
29941 arm_mangle_type (const_tree type
)
29943 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29945 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29946 has to be managled as if it is in the "std" namespace. */
29947 if (TARGET_AAPCS_BASED
29948 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29949 return "St9__va_list";
29951 /* Half-precision float. */
29952 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29955 if (TREE_CODE (type
) != VECTOR_TYPE
)
29958 /* Check the mode of the vector type, and the name of the vector
29959 element type, against the table. */
29960 while (pos
->mode
!= VOIDmode
)
29962 tree elt_type
= TREE_TYPE (type
);
29964 if (pos
->mode
== TYPE_MODE (type
)
29965 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29966 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29967 pos
->element_type_name
))
29968 return pos
->aapcs_name
;
29973 /* Use the default mangling for unrecognized (possibly user-defined)
29978 /* Order of allocation of core registers for Thumb: this allocation is
29979 written over the corresponding initial entries of the array
29980 initialized with REG_ALLOC_ORDER. We allocate all low registers
29981 first. Saving and restoring a low register is usually cheaper than
29982 using a call-clobbered high register. */
29984 static const int thumb_core_reg_alloc_order
[] =
29986 3, 2, 1, 0, 4, 5, 6, 7,
29987 14, 12, 8, 9, 10, 11
29990 /* Adjust register allocation order when compiling for Thumb. */
29993 arm_order_regs_for_local_alloc (void)
29995 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29996 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29998 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29999 sizeof (thumb_core_reg_alloc_order
));
30002 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30005 arm_frame_pointer_required (void)
30007 return (cfun
->has_nonlocal_label
30008 || SUBTARGET_FRAME_POINTER_REQUIRED
30009 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
30012 /* Only thumb1 can't support conditional execution, so return true if
30013 the target is not thumb1. */
30015 arm_have_conditional_execution (void)
30017 return !TARGET_THUMB1
;
30021 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
30023 machine_mode in_mode
, out_mode
;
30025 bool out_unsigned_p
= TYPE_UNSIGNED (type_out
);
30027 if (TREE_CODE (type_out
) != VECTOR_TYPE
30028 || TREE_CODE (type_in
) != VECTOR_TYPE
)
30031 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30032 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30033 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30034 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30036 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30037 decl of the vectorized builtin for the appropriate vector mode.
30038 NULL_TREE is returned if no such builtin is available. */
30039 #undef ARM_CHECK_BUILTIN_MODE
30040 #define ARM_CHECK_BUILTIN_MODE(C) \
30041 (TARGET_NEON && TARGET_FPU_ARMV8 \
30042 && flag_unsafe_math_optimizations \
30043 && ARM_CHECK_BUILTIN_MODE_1 (C))
30045 #undef ARM_CHECK_BUILTIN_MODE_1
30046 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30047 (out_mode == SFmode && out_n == C \
30048 && in_mode == SFmode && in_n == C)
30050 #undef ARM_FIND_VRINT_VARIANT
30051 #define ARM_FIND_VRINT_VARIANT(N) \
30052 (ARM_CHECK_BUILTIN_MODE (2) \
30053 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30054 : (ARM_CHECK_BUILTIN_MODE (4) \
30055 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30058 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
30060 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30063 case BUILT_IN_FLOORF
:
30064 return ARM_FIND_VRINT_VARIANT (vrintm
);
30065 case BUILT_IN_CEILF
:
30066 return ARM_FIND_VRINT_VARIANT (vrintp
);
30067 case BUILT_IN_TRUNCF
:
30068 return ARM_FIND_VRINT_VARIANT (vrintz
);
30069 case BUILT_IN_ROUNDF
:
30070 return ARM_FIND_VRINT_VARIANT (vrinta
);
30071 #undef ARM_CHECK_BUILTIN_MODE_1
30072 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30073 (out_mode == SImode && out_n == C \
30074 && in_mode == SFmode && in_n == C)
30076 #define ARM_FIND_VCVT_VARIANT(N) \
30077 (ARM_CHECK_BUILTIN_MODE (2) \
30078 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30079 : (ARM_CHECK_BUILTIN_MODE (4) \
30080 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30083 #define ARM_FIND_VCVTU_VARIANT(N) \
30084 (ARM_CHECK_BUILTIN_MODE (2) \
30085 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30086 : (ARM_CHECK_BUILTIN_MODE (4) \
30087 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30089 case BUILT_IN_LROUNDF
:
30090 return out_unsigned_p
30091 ? ARM_FIND_VCVTU_VARIANT (vcvta
)
30092 : ARM_FIND_VCVT_VARIANT (vcvta
);
30093 case BUILT_IN_LCEILF
:
30094 return out_unsigned_p
30095 ? ARM_FIND_VCVTU_VARIANT (vcvtp
)
30096 : ARM_FIND_VCVT_VARIANT (vcvtp
);
30097 case BUILT_IN_LFLOORF
:
30098 return out_unsigned_p
30099 ? ARM_FIND_VCVTU_VARIANT (vcvtm
)
30100 : ARM_FIND_VCVT_VARIANT (vcvtm
);
30101 #undef ARM_CHECK_BUILTIN_MODE
30102 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30103 (out_mode == N##mode && out_n == C \
30104 && in_mode == N##mode && in_n == C)
30105 case BUILT_IN_BSWAP16
:
30106 if (ARM_CHECK_BUILTIN_MODE (4, HI
))
30107 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi
, false);
30108 else if (ARM_CHECK_BUILTIN_MODE (8, HI
))
30109 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi
, false);
30112 case BUILT_IN_BSWAP32
:
30113 if (ARM_CHECK_BUILTIN_MODE (2, SI
))
30114 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si
, false);
30115 else if (ARM_CHECK_BUILTIN_MODE (4, SI
))
30116 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si
, false);
30119 case BUILT_IN_BSWAP64
:
30120 if (ARM_CHECK_BUILTIN_MODE (2, DI
))
30121 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di
, false);
30124 case BUILT_IN_COPYSIGNF
:
30125 if (ARM_CHECK_BUILTIN_MODE (2, SF
))
30126 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf
, false);
30127 else if (ARM_CHECK_BUILTIN_MODE (4, SF
))
30128 return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf
, false);
30138 #undef ARM_FIND_VCVT_VARIANT
30139 #undef ARM_FIND_VCVTU_VARIANT
30140 #undef ARM_CHECK_BUILTIN_MODE
30141 #undef ARM_FIND_VRINT_VARIANT
30144 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30145 static HOST_WIDE_INT
30146 arm_vector_alignment (const_tree type
)
30148 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30150 if (TARGET_AAPCS_BASED
)
30151 align
= MIN (align
, 64);
30156 static unsigned int
30157 arm_autovectorize_vector_sizes (void)
30159 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
30163 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30165 /* Vectors which aren't in packed structures will not be less aligned than
30166 the natural alignment of their element type, so this is safe. */
30167 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30170 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30174 arm_builtin_support_vector_misalignment (machine_mode mode
,
30175 const_tree type
, int misalignment
,
30178 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30180 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30185 /* If the misalignment is unknown, we should be able to handle the access
30186 so long as it is not to a member of a packed data structure. */
30187 if (misalignment
== -1)
30190 /* Return true if the misalignment is a multiple of the natural alignment
30191 of the vector's element type. This is probably always going to be
30192 true in practice, since we've already established that this isn't a
30194 return ((misalignment
% align
) == 0);
30197 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30202 arm_conditional_register_usage (void)
30206 if (TARGET_THUMB1
&& optimize_size
)
30208 /* When optimizing for size on Thumb-1, it's better not
30209 to use the HI regs, because of the overhead of
30211 for (regno
= FIRST_HI_REGNUM
;
30212 regno
<= LAST_HI_REGNUM
; ++regno
)
30213 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30216 /* The link register can be clobbered by any branch insn,
30217 but we have no way to track that at present, so mark
30218 it as unavailable. */
30220 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30222 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
30224 /* VFPv3 registers are disabled when earlier VFP
30225 versions are selected due to the definition of
30226 LAST_VFP_REGNUM. */
30227 for (regno
= FIRST_VFP_REGNUM
;
30228 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30230 fixed_regs
[regno
] = 0;
30231 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30232 || regno
>= FIRST_VFP_REGNUM
+ 32;
30236 if (TARGET_REALLY_IWMMXT
)
30238 regno
= FIRST_IWMMXT_GR_REGNUM
;
30239 /* The 2002/10/09 revision of the XScale ABI has wCG0
30240 and wCG1 as call-preserved registers. The 2002/11/21
30241 revision changed this so that all wCG registers are
30242 scratch registers. */
30243 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30244 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30245 fixed_regs
[regno
] = 0;
30246 /* The XScale ABI has wR0 - wR9 as scratch registers,
30247 the rest as call-preserved registers. */
30248 for (regno
= FIRST_IWMMXT_REGNUM
;
30249 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30251 fixed_regs
[regno
] = 0;
30252 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30256 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30258 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30259 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30261 else if (TARGET_APCS_STACK
)
30263 fixed_regs
[10] = 1;
30264 call_used_regs
[10] = 1;
30266 /* -mcaller-super-interworking reserves r11 for calls to
30267 _interwork_r11_call_via_rN(). Making the register global
30268 is an easy way of ensuring that it remains valid for all
30270 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30271 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30273 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30274 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30275 if (TARGET_CALLER_INTERWORKING
)
30276 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30278 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30282 arm_preferred_rename_class (reg_class_t rclass
)
30284 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30285 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30286 and code size can be reduced. */
30287 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30293 /* Compute the atrribute "length" of insn "*push_multi".
30294 So this function MUST be kept in sync with that insn pattern. */
30296 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30298 int i
, regno
, hi_reg
;
30299 int num_saves
= XVECLEN (parallel_op
, 0);
30309 regno
= REGNO (first_op
);
30310 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30311 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30313 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30314 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30322 /* Compute the number of instructions emitted by output_move_double. */
30324 arm_count_output_move_double_insns (rtx
*operands
)
30328 /* output_move_double may modify the operands array, so call it
30329 here on a copy of the array. */
30330 ops
[0] = operands
[0];
30331 ops
[1] = operands
[1];
30332 output_move_double (ops
, false, &count
);
30337 vfp3_const_double_for_fract_bits (rtx operand
)
30339 REAL_VALUE_TYPE r0
;
30341 if (!CONST_DOUBLE_P (operand
))
30344 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30345 if (exact_real_inverse (DFmode
, &r0
))
30347 if (exact_real_truncate (DFmode
, &r0
))
30349 HOST_WIDE_INT value
= real_to_integer (&r0
);
30350 value
= value
& 0xffffffff;
30351 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30352 return int_log2 (value
);
30359 vfp3_const_double_for_bits (rtx operand
)
30361 REAL_VALUE_TYPE r0
;
30363 if (!CONST_DOUBLE_P (operand
))
30366 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30367 if (exact_real_truncate (DFmode
, &r0
))
30369 HOST_WIDE_INT value
= real_to_integer (&r0
);
30370 value
= value
& 0xffffffff;
30371 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30372 return int_log2 (value
);
30378 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30381 arm_pre_atomic_barrier (enum memmodel model
)
30383 if (need_atomic_barrier_p (model
, true))
30384 emit_insn (gen_memory_barrier ());
30388 arm_post_atomic_barrier (enum memmodel model
)
30390 if (need_atomic_barrier_p (model
, false))
30391 emit_insn (gen_memory_barrier ());
30394 /* Emit the load-exclusive and store-exclusive instructions.
30395 Use acquire and release versions if necessary. */
30398 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30400 rtx (*gen
) (rtx
, rtx
);
30406 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30407 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30408 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30409 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30411 gcc_unreachable ();
30418 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30419 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
30420 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
30421 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
30423 gcc_unreachable ();
30427 emit_insn (gen (rval
, mem
));
30431 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
30434 rtx (*gen
) (rtx
, rtx
, rtx
);
30440 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30441 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30442 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30443 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30445 gcc_unreachable ();
30452 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30453 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
30454 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
30455 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
30457 gcc_unreachable ();
30461 emit_insn (gen (bval
, rval
, mem
));
30464 /* Mark the previous jump instruction as unlikely. */
30467 emit_unlikely_jump (rtx insn
)
30469 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
30471 insn
= emit_jump_insn (insn
);
30472 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
30475 /* Expand a compare and swap pattern. */
30478 arm_expand_compare_and_swap (rtx operands
[])
30480 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30482 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
30484 bval
= operands
[0];
30485 rval
= operands
[1];
30487 oldval
= operands
[3];
30488 newval
= operands
[4];
30489 is_weak
= operands
[5];
30490 mod_s
= operands
[6];
30491 mod_f
= operands
[7];
30492 mode
= GET_MODE (mem
);
30494 /* Normally the succ memory model must be stronger than fail, but in the
30495 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30496 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30498 if (TARGET_HAVE_LDACQ
30499 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
30500 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
30501 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30507 /* For narrow modes, we're going to perform the comparison in SImode,
30508 so do the zero-extension now. */
30509 rval
= gen_reg_rtx (SImode
);
30510 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30514 /* Force the value into a register if needed. We waited until after
30515 the zero-extension above to do this properly. */
30516 if (!arm_add_operand (oldval
, SImode
))
30517 oldval
= force_reg (SImode
, oldval
);
30521 if (!cmpdi_operand (oldval
, mode
))
30522 oldval
= force_reg (mode
, oldval
);
30526 gcc_unreachable ();
30531 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
30532 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
30533 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
30534 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
30536 gcc_unreachable ();
30539 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
30541 if (mode
== QImode
|| mode
== HImode
)
30542 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30544 /* In all cases, we arrange for success to be signaled by Z set.
30545 This arrangement allows for the boolean result to be used directly
30546 in a subsequent branch, post optimization. */
30547 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30548 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30549 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30552 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30553 another memory store between the load-exclusive and store-exclusive can
30554 reset the monitor from Exclusive to Open state. This means we must wait
30555 until after reload to split the pattern, lest we get a register spill in
30556 the middle of the atomic sequence. */
30559 arm_split_compare_and_swap (rtx operands
[])
30561 rtx rval
, mem
, oldval
, newval
, scratch
;
30563 enum memmodel mod_s
, mod_f
;
30565 rtx_code_label
*label1
, *label2
;
30568 rval
= operands
[0];
30570 oldval
= operands
[2];
30571 newval
= operands
[3];
30572 is_weak
= (operands
[4] != const0_rtx
);
30573 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30574 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30575 scratch
= operands
[7];
30576 mode
= GET_MODE (mem
);
30578 bool use_acquire
= TARGET_HAVE_LDACQ
30579 && !(mod_s
== MEMMODEL_RELAXED
30580 || mod_s
== MEMMODEL_CONSUME
30581 || mod_s
== MEMMODEL_RELEASE
);
30583 bool use_release
= TARGET_HAVE_LDACQ
30584 && !(mod_s
== MEMMODEL_RELAXED
30585 || mod_s
== MEMMODEL_CONSUME
30586 || mod_s
== MEMMODEL_ACQUIRE
);
30588 /* Checks whether a barrier is needed and emits one accordingly. */
30589 if (!(use_acquire
|| use_release
))
30590 arm_pre_atomic_barrier (mod_s
);
30595 label1
= gen_label_rtx ();
30596 emit_label (label1
);
30598 label2
= gen_label_rtx ();
30600 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30602 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30603 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30604 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30605 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30606 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30608 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30610 /* Weak or strong, we want EQ to be true for success, so that we
30611 match the flags that we got from the compare above. */
30612 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30613 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30614 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30618 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30619 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30620 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30621 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30624 if (mod_f
!= MEMMODEL_RELAXED
)
30625 emit_label (label2
);
30627 /* Checks whether a barrier is needed and emits one accordingly. */
30628 if (!(use_acquire
|| use_release
))
30629 arm_post_atomic_barrier (mod_s
);
30631 if (mod_f
== MEMMODEL_RELAXED
)
30632 emit_label (label2
);
30636 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30637 rtx value
, rtx model_rtx
, rtx cond
)
30639 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30640 machine_mode mode
= GET_MODE (mem
);
30641 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30642 rtx_code_label
*label
;
30645 bool use_acquire
= TARGET_HAVE_LDACQ
30646 && !(model
== MEMMODEL_RELAXED
30647 || model
== MEMMODEL_CONSUME
30648 || model
== MEMMODEL_RELEASE
);
30650 bool use_release
= TARGET_HAVE_LDACQ
30651 && !(model
== MEMMODEL_RELAXED
30652 || model
== MEMMODEL_CONSUME
30653 || model
== MEMMODEL_ACQUIRE
);
30655 /* Checks whether a barrier is needed and emits one accordingly. */
30656 if (!(use_acquire
|| use_release
))
30657 arm_pre_atomic_barrier (model
);
30659 label
= gen_label_rtx ();
30660 emit_label (label
);
30663 new_out
= gen_lowpart (wmode
, new_out
);
30665 old_out
= gen_lowpart (wmode
, old_out
);
30668 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30670 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30679 x
= gen_rtx_AND (wmode
, old_out
, value
);
30680 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30681 x
= gen_rtx_NOT (wmode
, new_out
);
30682 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30686 if (CONST_INT_P (value
))
30688 value
= GEN_INT (-INTVAL (value
));
30694 if (mode
== DImode
)
30696 /* DImode plus/minus need to clobber flags. */
30697 /* The adddi3 and subdi3 patterns are incorrectly written so that
30698 they require matching operands, even when we could easily support
30699 three operands. Thankfully, this can be fixed up post-splitting,
30700 as the individual add+adc patterns do accept three operands and
30701 post-reload cprop can make these moves go away. */
30702 emit_move_insn (new_out
, old_out
);
30704 x
= gen_adddi3 (new_out
, new_out
, value
);
30706 x
= gen_subdi3 (new_out
, new_out
, value
);
30713 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30714 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30718 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30721 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30722 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30724 /* Checks whether a barrier is needed and emits one accordingly. */
30725 if (!(use_acquire
|| use_release
))
30726 arm_post_atomic_barrier (model
);
30729 #define MAX_VECT_LEN 16
30731 struct expand_vec_perm_d
30733 rtx target
, op0
, op1
;
30734 unsigned char perm
[MAX_VECT_LEN
];
30735 machine_mode vmode
;
30736 unsigned char nelt
;
30741 /* Generate a variable permutation. */
30744 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30746 machine_mode vmode
= GET_MODE (target
);
30747 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30749 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30750 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30751 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30752 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30753 gcc_checking_assert (TARGET_NEON
);
30757 if (vmode
== V8QImode
)
30758 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30760 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30766 if (vmode
== V8QImode
)
30768 pair
= gen_reg_rtx (V16QImode
);
30769 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30770 pair
= gen_lowpart (TImode
, pair
);
30771 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30775 pair
= gen_reg_rtx (OImode
);
30776 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30777 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30783 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30785 machine_mode vmode
= GET_MODE (target
);
30786 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30787 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30788 rtx rmask
[MAX_VECT_LEN
], mask
;
30790 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30791 numbering of elements for big-endian, we must reverse the order. */
30792 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30794 /* The VTBL instruction does not use a modulo index, so we must take care
30795 of that ourselves. */
30796 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30797 for (i
= 0; i
< nelt
; ++i
)
30799 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30800 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30802 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30805 /* Generate or test for an insn that supports a constant permutation. */
30807 /* Recognize patterns for the VUZP insns. */
30810 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30812 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30813 rtx out0
, out1
, in0
, in1
, x
;
30814 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30816 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30819 /* Note that these are little-endian tests. Adjust for big-endian later. */
30820 if (d
->perm
[0] == 0)
30822 else if (d
->perm
[0] == 1)
30826 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30828 for (i
= 0; i
< nelt
; i
++)
30830 unsigned elt
= (i
* 2 + odd
) & mask
;
30831 if (d
->perm
[i
] != elt
)
30841 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30842 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30843 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30844 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30845 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30846 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30847 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30848 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30850 gcc_unreachable ();
30855 if (BYTES_BIG_ENDIAN
)
30857 x
= in0
, in0
= in1
, in1
= x
;
30862 out1
= gen_reg_rtx (d
->vmode
);
30864 x
= out0
, out0
= out1
, out1
= x
;
30866 emit_insn (gen (out0
, in0
, in1
, out1
));
30870 /* Recognize patterns for the VZIP insns. */
30873 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30875 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30876 rtx out0
, out1
, in0
, in1
, x
;
30877 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30879 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30882 /* Note that these are little-endian tests. Adjust for big-endian later. */
30884 if (d
->perm
[0] == high
)
30886 else if (d
->perm
[0] == 0)
30890 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30892 for (i
= 0; i
< nelt
/ 2; i
++)
30894 unsigned elt
= (i
+ high
) & mask
;
30895 if (d
->perm
[i
* 2] != elt
)
30897 elt
= (elt
+ nelt
) & mask
;
30898 if (d
->perm
[i
* 2 + 1] != elt
)
30908 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30909 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30910 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30911 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30912 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30913 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30914 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30915 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30917 gcc_unreachable ();
30922 if (BYTES_BIG_ENDIAN
)
30924 x
= in0
, in0
= in1
, in1
= x
;
30929 out1
= gen_reg_rtx (d
->vmode
);
30931 x
= out0
, out0
= out1
, out1
= x
;
30933 emit_insn (gen (out0
, in0
, in1
, out1
));
30937 /* Recognize patterns for the VREV insns. */
30940 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30942 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30943 rtx (*gen
)(rtx
, rtx
, rtx
);
30945 if (!d
->one_vector_p
)
30954 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30955 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30963 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30964 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30965 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30966 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30974 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30975 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30976 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30977 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30978 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30979 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30980 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30981 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30990 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30991 for (j
= 0; j
<= diff
; j
+= 1)
30993 /* This is guaranteed to be true as the value of diff
30994 is 7, 3, 1 and we should have enough elements in the
30995 queue to generate this. Getting a vector mask with a
30996 value of diff other than these values implies that
30997 something is wrong by the time we get here. */
30998 gcc_assert (i
+ j
< nelt
);
30999 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31007 /* ??? The third operand is an artifact of the builtin infrastructure
31008 and is ignored by the actual instruction. */
31009 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
31013 /* Recognize patterns for the VTRN insns. */
31016 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31018 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
31019 rtx out0
, out1
, in0
, in1
, x
;
31020 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
31022 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31025 /* Note that these are little-endian tests. Adjust for big-endian later. */
31026 if (d
->perm
[0] == 0)
31028 else if (d
->perm
[0] == 1)
31032 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31034 for (i
= 0; i
< nelt
; i
+= 2)
31036 if (d
->perm
[i
] != i
+ odd
)
31038 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31048 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
31049 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
31050 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
31051 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
31052 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
31053 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
31054 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
31055 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
31057 gcc_unreachable ();
31062 if (BYTES_BIG_ENDIAN
)
31064 x
= in0
, in0
= in1
, in1
= x
;
31069 out1
= gen_reg_rtx (d
->vmode
);
31071 x
= out0
, out0
= out1
, out1
= x
;
31073 emit_insn (gen (out0
, in0
, in1
, out1
));
31077 /* Recognize patterns for the VEXT insns. */
31080 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31082 unsigned int i
, nelt
= d
->nelt
;
31083 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
31086 unsigned int location
;
31088 unsigned int next
= d
->perm
[0] + 1;
31090 /* TODO: Handle GCC's numbering of elements for big-endian. */
31091 if (BYTES_BIG_ENDIAN
)
31094 /* Check if the extracted indexes are increasing by one. */
31095 for (i
= 1; i
< nelt
; next
++, i
++)
31097 /* If we hit the most significant element of the 2nd vector in
31098 the previous iteration, no need to test further. */
31099 if (next
== 2 * nelt
)
31102 /* If we are operating on only one vector: it could be a
31103 rotation. If there are only two elements of size < 64, let
31104 arm_evpc_neon_vrev catch it. */
31105 if (d
->one_vector_p
&& (next
== nelt
))
31107 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
31113 if (d
->perm
[i
] != next
)
31117 location
= d
->perm
[0];
31121 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
31122 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
31123 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
31124 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
31125 case V2SImode
: gen
= gen_neon_vextv2si
; break;
31126 case V4SImode
: gen
= gen_neon_vextv4si
; break;
31127 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
31128 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
31129 case V2DImode
: gen
= gen_neon_vextv2di
; break;
31138 offset
= GEN_INT (location
);
31139 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
31143 /* The NEON VTBL instruction is a fully variable permuation that's even
31144 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31145 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31146 can do slightly better by expanding this as a constant where we don't
31147 have to apply a mask. */
31150 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31152 rtx rperm
[MAX_VECT_LEN
], sel
;
31153 machine_mode vmode
= d
->vmode
;
31154 unsigned int i
, nelt
= d
->nelt
;
31156 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31157 numbering of elements for big-endian, we must reverse the order. */
31158 if (BYTES_BIG_ENDIAN
)
31164 /* Generic code will try constant permutation twice. Once with the
31165 original mode and again with the elements lowered to QImode.
31166 So wait and don't do the selector expansion ourselves. */
31167 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31170 for (i
= 0; i
< nelt
; ++i
)
31171 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31172 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31173 sel
= force_reg (vmode
, sel
);
31175 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31180 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31182 /* Check if the input mask matches vext before reordering the
31185 if (arm_evpc_neon_vext (d
))
31188 /* The pattern matching functions above are written to look for a small
31189 number to begin the sequence (0, 1, N/2). If we begin with an index
31190 from the second operand, we can swap the operands. */
31191 if (d
->perm
[0] >= d
->nelt
)
31193 unsigned i
, nelt
= d
->nelt
;
31196 for (i
= 0; i
< nelt
; ++i
)
31197 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
31206 if (arm_evpc_neon_vuzp (d
))
31208 if (arm_evpc_neon_vzip (d
))
31210 if (arm_evpc_neon_vrev (d
))
31212 if (arm_evpc_neon_vtrn (d
))
31214 return arm_evpc_neon_vtbl (d
);
31219 /* Expand a vec_perm_const pattern. */
31222 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31224 struct expand_vec_perm_d d
;
31225 int i
, nelt
, which
;
31231 d
.vmode
= GET_MODE (target
);
31232 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31233 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31234 d
.testing_p
= false;
31236 for (i
= which
= 0; i
< nelt
; ++i
)
31238 rtx e
= XVECEXP (sel
, 0, i
);
31239 int ei
= INTVAL (e
) & (2 * nelt
- 1);
31240 which
|= (ei
< nelt
? 1 : 2);
31250 d
.one_vector_p
= false;
31251 if (!rtx_equal_p (op0
, op1
))
31254 /* The elements of PERM do not suggest that only the first operand
31255 is used, but both operands are identical. Allow easier matching
31256 of the permutation by folding the permutation into the single
31260 for (i
= 0; i
< nelt
; ++i
)
31261 d
.perm
[i
] &= nelt
- 1;
31263 d
.one_vector_p
= true;
31268 d
.one_vector_p
= true;
31272 return arm_expand_vec_perm_const_1 (&d
);
31275 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31278 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
31279 const unsigned char *sel
)
31281 struct expand_vec_perm_d d
;
31282 unsigned int i
, nelt
, which
;
31286 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31287 d
.testing_p
= true;
31288 memcpy (d
.perm
, sel
, nelt
);
31290 /* Categorize the set of elements in the selector. */
31291 for (i
= which
= 0; i
< nelt
; ++i
)
31293 unsigned char e
= d
.perm
[i
];
31294 gcc_assert (e
< 2 * nelt
);
31295 which
|= (e
< nelt
? 1 : 2);
31298 /* For all elements from second vector, fold the elements to first. */
31300 for (i
= 0; i
< nelt
; ++i
)
31303 /* Check whether the mask can be applied to the vector type. */
31304 d
.one_vector_p
= (which
!= 3);
31306 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31307 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31308 if (!d
.one_vector_p
)
31309 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31312 ret
= arm_expand_vec_perm_const_1 (&d
);
31319 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
31321 /* If we are soft float and we do not have ldrd
31322 then all auto increment forms are ok. */
31323 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31328 /* Post increment and Pre Decrement are supported for all
31329 instruction forms except for vector forms. */
31332 if (VECTOR_MODE_P (mode
))
31334 if (code
!= ARM_PRE_DEC
)
31344 /* Without LDRD and mode size greater than
31345 word size, there is no point in auto-incrementing
31346 because ldm and stm will not have these forms. */
31347 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31350 /* Vector and floating point modes do not support
31351 these auto increment forms. */
31352 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31365 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31366 on ARM, since we know that shifts by negative amounts are no-ops.
31367 Additionally, the default expansion code is not available or suitable
31368 for post-reload insn splits (this can occur when the register allocator
31369 chooses not to do a shift in NEON).
31371 This function is used in both initial expand and post-reload splits, and
31372 handles all kinds of 64-bit shifts.
31374 Input requirements:
31375 - It is safe for the input and output to be the same register, but
31376 early-clobber rules apply for the shift amount and scratch registers.
31377 - Shift by register requires both scratch registers. In all other cases
31378 the scratch registers may be NULL.
31379 - Ashiftrt by a register also clobbers the CC register. */
31381 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31382 rtx amount
, rtx scratch1
, rtx scratch2
)
31384 rtx out_high
= gen_highpart (SImode
, out
);
31385 rtx out_low
= gen_lowpart (SImode
, out
);
31386 rtx in_high
= gen_highpart (SImode
, in
);
31387 rtx in_low
= gen_lowpart (SImode
, in
);
31390 in = the register pair containing the input value.
31391 out = the destination register pair.
31392 up = the high- or low-part of each pair.
31393 down = the opposite part to "up".
31394 In a shift, we can consider bits to shift from "up"-stream to
31395 "down"-stream, so in a left-shift "up" is the low-part and "down"
31396 is the high-part of each register pair. */
31398 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31399 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31400 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31401 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31403 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31405 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31406 && GET_MODE (out
) == DImode
);
31408 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31409 && GET_MODE (in
) == DImode
);
31411 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31412 && GET_MODE (amount
) == SImode
)
31413 || CONST_INT_P (amount
)));
31414 gcc_assert (scratch1
== NULL
31415 || (GET_CODE (scratch1
) == SCRATCH
)
31416 || (GET_MODE (scratch1
) == SImode
31417 && REG_P (scratch1
)));
31418 gcc_assert (scratch2
== NULL
31419 || (GET_CODE (scratch2
) == SCRATCH
)
31420 || (GET_MODE (scratch2
) == SImode
31421 && REG_P (scratch2
)));
31422 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31423 || !HARD_REGISTER_P (out
)
31424 || (REGNO (out
) != REGNO (amount
)
31425 && REGNO (out
) + 1 != REGNO (amount
)));
31427 /* Macros to make following code more readable. */
31428 #define SUB_32(DEST,SRC) \
31429 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31430 #define RSB_32(DEST,SRC) \
31431 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31432 #define SUB_S_32(DEST,SRC) \
31433 gen_addsi3_compare0 ((DEST), (SRC), \
31435 #define SET(DEST,SRC) \
31436 gen_rtx_SET (SImode, (DEST), (SRC))
31437 #define SHIFT(CODE,SRC,AMOUNT) \
31438 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31439 #define LSHIFT(CODE,SRC,AMOUNT) \
31440 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31441 SImode, (SRC), (AMOUNT))
31442 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31443 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31444 SImode, (SRC), (AMOUNT))
31446 gen_rtx_IOR (SImode, (A), (B))
31447 #define BRANCH(COND,LABEL) \
31448 gen_arm_cond_branch ((LABEL), \
31449 gen_rtx_ ## COND (CCmode, cc_reg, \
31453 /* Shifts by register and shifts by constant are handled separately. */
31454 if (CONST_INT_P (amount
))
31456 /* We have a shift-by-constant. */
31458 /* First, handle out-of-range shift amounts.
31459 In both cases we try to match the result an ARM instruction in a
31460 shift-by-register would give. This helps reduce execution
31461 differences between optimization levels, but it won't stop other
31462 parts of the compiler doing different things. This is "undefined
31463 behaviour, in any case. */
31464 if (INTVAL (amount
) <= 0)
31465 emit_insn (gen_movdi (out
, in
));
31466 else if (INTVAL (amount
) >= 64)
31468 if (code
== ASHIFTRT
)
31470 rtx const31_rtx
= GEN_INT (31);
31471 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31472 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31475 emit_insn (gen_movdi (out
, const0_rtx
));
31478 /* Now handle valid shifts. */
31479 else if (INTVAL (amount
) < 32)
31481 /* Shifts by a constant less than 32. */
31482 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31484 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31485 emit_insn (SET (out_down
,
31486 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31488 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31492 /* Shifts by a constant greater than 31. */
31493 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31495 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31496 if (code
== ASHIFTRT
)
31497 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31500 emit_insn (SET (out_up
, const0_rtx
));
31505 /* We have a shift-by-register. */
31506 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
31508 /* This alternative requires the scratch registers. */
31509 gcc_assert (scratch1
&& REG_P (scratch1
));
31510 gcc_assert (scratch2
&& REG_P (scratch2
));
31512 /* We will need the values "amount-32" and "32-amount" later.
31513 Swapping them around now allows the later code to be more general. */
31517 emit_insn (SUB_32 (scratch1
, amount
));
31518 emit_insn (RSB_32 (scratch2
, amount
));
31521 emit_insn (RSB_32 (scratch1
, amount
));
31522 /* Also set CC = amount > 32. */
31523 emit_insn (SUB_S_32 (scratch2
, amount
));
31526 emit_insn (RSB_32 (scratch1
, amount
));
31527 emit_insn (SUB_32 (scratch2
, amount
));
31530 gcc_unreachable ();
31533 /* Emit code like this:
31536 out_down = in_down << amount;
31537 out_down = (in_up << (amount - 32)) | out_down;
31538 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31539 out_up = in_up << amount;
31542 out_down = in_down >> amount;
31543 out_down = (in_up << (32 - amount)) | out_down;
31545 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31546 out_up = in_up << amount;
31549 out_down = in_down >> amount;
31550 out_down = (in_up << (32 - amount)) | out_down;
31552 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31553 out_up = in_up << amount;
31555 The ARM and Thumb2 variants are the same but implemented slightly
31556 differently. If this were only called during expand we could just
31557 use the Thumb2 case and let combine do the right thing, but this
31558 can also be called from post-reload splitters. */
31560 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31562 if (!TARGET_THUMB2
)
31564 /* Emit code for ARM mode. */
31565 emit_insn (SET (out_down
,
31566 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31567 if (code
== ASHIFTRT
)
31569 rtx_code_label
*done_label
= gen_label_rtx ();
31570 emit_jump_insn (BRANCH (LT
, done_label
));
31571 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31573 emit_label (done_label
);
31576 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31581 /* Emit code for Thumb2 mode.
31582 Thumb2 can't do shift and or in one insn. */
31583 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31584 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31586 if (code
== ASHIFTRT
)
31588 rtx_code_label
*done_label
= gen_label_rtx ();
31589 emit_jump_insn (BRANCH (LT
, done_label
));
31590 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31591 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31592 emit_label (done_label
);
31596 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31597 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31601 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31616 /* Returns true if a valid comparison operation and makes
31617 the operands in a form that is valid. */
31619 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31621 enum rtx_code code
= GET_CODE (*comparison
);
31623 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31624 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31626 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31628 if (code
== UNEQ
|| code
== LTGT
)
31631 code_int
= (int)code
;
31632 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31633 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31638 if (!arm_add_operand (*op1
, mode
))
31639 *op1
= force_reg (mode
, *op1
);
31640 if (!arm_add_operand (*op2
, mode
))
31641 *op2
= force_reg (mode
, *op2
);
31645 if (!cmpdi_operand (*op1
, mode
))
31646 *op1
= force_reg (mode
, *op1
);
31647 if (!cmpdi_operand (*op2
, mode
))
31648 *op2
= force_reg (mode
, *op2
);
31653 if (!arm_float_compare_operand (*op1
, mode
))
31654 *op1
= force_reg (mode
, *op1
);
31655 if (!arm_float_compare_operand (*op2
, mode
))
31656 *op2
= force_reg (mode
, *op2
);
31666 /* Maximum number of instructions to set block of memory. */
31668 arm_block_set_max_insns (void)
31670 if (optimize_function_for_size_p (cfun
))
31673 return current_tune
->max_insns_inline_memset
;
31676 /* Return TRUE if it's profitable to set block of memory for
31677 non-vectorized case. VAL is the value to set the memory
31678 with. LENGTH is the number of bytes to set. ALIGN is the
31679 alignment of the destination memory in bytes. UNALIGNED_P
31680 is TRUE if we can only set the memory with instructions
31681 meeting alignment requirements. USE_STRD_P is TRUE if we
31682 can use strd to set the memory. */
31684 arm_block_set_non_vect_profit_p (rtx val
,
31685 unsigned HOST_WIDE_INT length
,
31686 unsigned HOST_WIDE_INT align
,
31687 bool unaligned_p
, bool use_strd_p
)
31690 /* For leftovers in bytes of 0-7, we can set the memory block using
31691 strb/strh/str with minimum instruction number. */
31692 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31696 num
= arm_const_inline_cost (SET
, val
);
31697 num
+= length
/ align
+ length
% align
;
31699 else if (use_strd_p
)
31701 num
= arm_const_double_inline_cost (val
);
31702 num
+= (length
>> 3) + leftover
[length
& 7];
31706 num
= arm_const_inline_cost (SET
, val
);
31707 num
+= (length
>> 2) + leftover
[length
& 3];
31710 /* We may be able to combine last pair STRH/STRB into a single STR
31711 by shifting one byte back. */
31712 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
31715 return (num
<= arm_block_set_max_insns ());
31718 /* Return TRUE if it's profitable to set block of memory for
31719 vectorized case. LENGTH is the number of bytes to set.
31720 ALIGN is the alignment of destination memory in bytes.
31721 MODE is the vector mode used to set the memory. */
31723 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
31724 unsigned HOST_WIDE_INT align
,
31728 bool unaligned_p
= ((align
& 3) != 0);
31729 unsigned int nelt
= GET_MODE_NUNITS (mode
);
31731 /* Instruction loading constant value. */
31733 /* Instructions storing the memory. */
31734 num
+= (length
+ nelt
- 1) / nelt
;
31735 /* Instructions adjusting the address expression. Only need to
31736 adjust address expression if it's 4 bytes aligned and bytes
31737 leftover can only be stored by mis-aligned store instruction. */
31738 if (!unaligned_p
&& (length
& 3) != 0)
31741 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
31742 if (!unaligned_p
&& mode
== V16QImode
)
31745 return (num
<= arm_block_set_max_insns ());
31748 /* Set a block of memory using vectorization instructions for the
31749 unaligned case. We fill the first LENGTH bytes of the memory
31750 area starting from DSTBASE with byte constant VALUE. ALIGN is
31751 the alignment requirement of memory. Return TRUE if succeeded. */
31753 arm_block_set_unaligned_vect (rtx dstbase
,
31754 unsigned HOST_WIDE_INT length
,
31755 unsigned HOST_WIDE_INT value
,
31756 unsigned HOST_WIDE_INT align
)
31758 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
31760 rtx val_elt
, val_vec
, reg
;
31761 rtx rval
[MAX_VECT_LEN
];
31762 rtx (*gen_func
) (rtx
, rtx
);
31764 unsigned HOST_WIDE_INT v
= value
;
31766 gcc_assert ((align
& 0x3) != 0);
31767 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31768 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31769 if (length
>= nelt_v16
)
31772 gen_func
= gen_movmisalignv16qi
;
31777 gen_func
= gen_movmisalignv8qi
;
31779 nelt_mode
= GET_MODE_NUNITS (mode
);
31780 gcc_assert (length
>= nelt_mode
);
31781 /* Skip if it isn't profitable. */
31782 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31785 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31786 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31788 v
= sext_hwi (v
, BITS_PER_WORD
);
31789 val_elt
= GEN_INT (v
);
31790 for (j
= 0; j
< nelt_mode
; j
++)
31793 reg
= gen_reg_rtx (mode
);
31794 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
31795 /* Emit instruction loading the constant value. */
31796 emit_move_insn (reg
, val_vec
);
31798 /* Handle nelt_mode bytes in a vector. */
31799 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31801 emit_insn ((*gen_func
) (mem
, reg
));
31802 if (i
+ 2 * nelt_mode
<= length
)
31803 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
31806 /* If there are not less than nelt_v8 bytes leftover, we must be in
31808 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
31810 /* Handle (8, 16) bytes leftover. */
31811 if (i
+ nelt_v8
< length
)
31813 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
31814 /* We are shifting bytes back, set the alignment accordingly. */
31815 if ((length
& 1) != 0 && align
>= 2)
31816 set_mem_align (mem
, BITS_PER_UNIT
);
31818 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31820 /* Handle (0, 8] bytes leftover. */
31821 else if (i
< length
&& i
+ nelt_v8
>= length
)
31823 if (mode
== V16QImode
)
31825 reg
= gen_lowpart (V8QImode
, reg
);
31826 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
31828 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
31829 + (nelt_mode
- nelt_v8
))));
31830 /* We are shifting bytes back, set the alignment accordingly. */
31831 if ((length
& 1) != 0 && align
>= 2)
31832 set_mem_align (mem
, BITS_PER_UNIT
);
31834 emit_insn (gen_movmisalignv8qi (mem
, reg
));
31840 /* Set a block of memory using vectorization instructions for the
31841 aligned case. We fill the first LENGTH bytes of the memory area
31842 starting from DSTBASE with byte constant VALUE. ALIGN is the
31843 alignment requirement of memory. Return TRUE if succeeded. */
31845 arm_block_set_aligned_vect (rtx dstbase
,
31846 unsigned HOST_WIDE_INT length
,
31847 unsigned HOST_WIDE_INT value
,
31848 unsigned HOST_WIDE_INT align
)
31850 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
31851 rtx dst
, addr
, mem
;
31852 rtx val_elt
, val_vec
, reg
;
31853 rtx rval
[MAX_VECT_LEN
];
31855 unsigned HOST_WIDE_INT v
= value
;
31857 gcc_assert ((align
& 0x3) == 0);
31858 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
31859 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
31860 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
31865 nelt_mode
= GET_MODE_NUNITS (mode
);
31866 gcc_assert (length
>= nelt_mode
);
31867 /* Skip if it isn't profitable. */
31868 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
31871 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31873 v
= sext_hwi (v
, BITS_PER_WORD
);
31874 val_elt
= GEN_INT (v
);
31875 for (j
= 0; j
< nelt_mode
; j
++)
31878 reg
= gen_reg_rtx (mode
);
31879 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
31880 /* Emit instruction loading the constant value. */
31881 emit_move_insn (reg
, val_vec
);
31884 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
31885 if (mode
== V16QImode
)
31887 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31888 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31890 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
31891 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
31893 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
31894 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31895 /* We are shifting bytes back, set the alignment accordingly. */
31896 if ((length
& 0x3) == 0)
31897 set_mem_align (mem
, BITS_PER_UNIT
* 4);
31898 else if ((length
& 0x1) == 0)
31899 set_mem_align (mem
, BITS_PER_UNIT
* 2);
31901 set_mem_align (mem
, BITS_PER_UNIT
);
31903 emit_insn (gen_movmisalignv16qi (mem
, reg
));
31906 /* Fall through for bytes leftover. */
31908 nelt_mode
= GET_MODE_NUNITS (mode
);
31909 reg
= gen_lowpart (V8QImode
, reg
);
31912 /* Handle 8 bytes in a vector. */
31913 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
31915 addr
= plus_constant (Pmode
, dst
, i
);
31916 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
31917 emit_move_insn (mem
, reg
);
31920 /* Handle single word leftover by shifting 4 bytes back. We can
31921 use aligned access for this case. */
31922 if (i
+ UNITS_PER_WORD
== length
)
31924 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
31925 mem
= adjust_automodify_address (dstbase
, mode
,
31926 addr
, i
- UNITS_PER_WORD
);
31927 /* We are shifting 4 bytes back, set the alignment accordingly. */
31928 if (align
> UNITS_PER_WORD
)
31929 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
31931 emit_move_insn (mem
, reg
);
31933 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31934 We have to use unaligned access for this case. */
31935 else if (i
< length
)
31937 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
31938 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
31939 /* We are shifting bytes back, set the alignment accordingly. */
31940 if ((length
& 1) == 0)
31941 set_mem_align (mem
, BITS_PER_UNIT
* 2);
31943 set_mem_align (mem
, BITS_PER_UNIT
);
31945 emit_insn (gen_movmisalignv8qi (mem
, reg
));
31951 /* Set a block of memory using plain strh/strb instructions, only
31952 using instructions allowed by ALIGN on processor. We fill the
31953 first LENGTH bytes of the memory area starting from DSTBASE
31954 with byte constant VALUE. ALIGN is the alignment requirement
31957 arm_block_set_unaligned_non_vect (rtx dstbase
,
31958 unsigned HOST_WIDE_INT length
,
31959 unsigned HOST_WIDE_INT value
,
31960 unsigned HOST_WIDE_INT align
)
31963 rtx dst
, addr
, mem
;
31964 rtx val_exp
, val_reg
, reg
;
31966 HOST_WIDE_INT v
= value
;
31968 gcc_assert (align
== 1 || align
== 2);
31971 v
|= (value
<< BITS_PER_UNIT
);
31973 v
= sext_hwi (v
, BITS_PER_WORD
);
31974 val_exp
= GEN_INT (v
);
31975 /* Skip if it isn't profitable. */
31976 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
31977 align
, true, false))
31980 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31981 mode
= (align
== 2 ? HImode
: QImode
);
31982 val_reg
= force_reg (SImode
, val_exp
);
31983 reg
= gen_lowpart (mode
, val_reg
);
31985 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
31987 addr
= plus_constant (Pmode
, dst
, i
);
31988 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
31989 emit_move_insn (mem
, reg
);
31992 /* Handle single byte leftover. */
31993 if (i
+ 1 == length
)
31995 reg
= gen_lowpart (QImode
, val_reg
);
31996 addr
= plus_constant (Pmode
, dst
, i
);
31997 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
31998 emit_move_insn (mem
, reg
);
32002 gcc_assert (i
== length
);
32006 /* Set a block of memory using plain strd/str/strh/strb instructions,
32007 to permit unaligned copies on processors which support unaligned
32008 semantics for those instructions. We fill the first LENGTH bytes
32009 of the memory area starting from DSTBASE with byte constant VALUE.
32010 ALIGN is the alignment requirement of memory. */
32012 arm_block_set_aligned_non_vect (rtx dstbase
,
32013 unsigned HOST_WIDE_INT length
,
32014 unsigned HOST_WIDE_INT value
,
32015 unsigned HOST_WIDE_INT align
)
32018 rtx dst
, addr
, mem
;
32019 rtx val_exp
, val_reg
, reg
;
32020 unsigned HOST_WIDE_INT v
;
32023 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32024 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32026 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32027 if (length
< UNITS_PER_WORD
)
32028 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32031 v
|= (v
<< BITS_PER_WORD
);
32033 v
= sext_hwi (v
, BITS_PER_WORD
);
32035 val_exp
= GEN_INT (v
);
32036 /* Skip if it isn't profitable. */
32037 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32038 align
, false, use_strd_p
))
32043 /* Try without strd. */
32044 v
= (v
>> BITS_PER_WORD
);
32045 v
= sext_hwi (v
, BITS_PER_WORD
);
32046 val_exp
= GEN_INT (v
);
32047 use_strd_p
= false;
32048 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32049 align
, false, use_strd_p
))
32054 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32055 /* Handle double words using strd if possible. */
32058 val_reg
= force_reg (DImode
, val_exp
);
32060 for (; (i
+ 8 <= length
); i
+= 8)
32062 addr
= plus_constant (Pmode
, dst
, i
);
32063 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32064 emit_move_insn (mem
, reg
);
32068 val_reg
= force_reg (SImode
, val_exp
);
32070 /* Handle words. */
32071 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32072 for (; (i
+ 4 <= length
); i
+= 4)
32074 addr
= plus_constant (Pmode
, dst
, i
);
32075 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
32076 if ((align
& 3) == 0)
32077 emit_move_insn (mem
, reg
);
32079 emit_insn (gen_unaligned_storesi (mem
, reg
));
32082 /* Merge last pair of STRH and STRB into a STR if possible. */
32083 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
32085 addr
= plus_constant (Pmode
, dst
, i
- 1);
32086 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
32087 /* We are shifting one byte back, set the alignment accordingly. */
32088 if ((align
& 1) == 0)
32089 set_mem_align (mem
, BITS_PER_UNIT
);
32091 /* Most likely this is an unaligned access, and we can't tell at
32092 compilation time. */
32093 emit_insn (gen_unaligned_storesi (mem
, reg
));
32097 /* Handle half word leftover. */
32098 if (i
+ 2 <= length
)
32100 reg
= gen_lowpart (HImode
, val_reg
);
32101 addr
= plus_constant (Pmode
, dst
, i
);
32102 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
32103 if ((align
& 1) == 0)
32104 emit_move_insn (mem
, reg
);
32106 emit_insn (gen_unaligned_storehi (mem
, reg
));
32111 /* Handle single byte leftover. */
32112 if (i
+ 1 == length
)
32114 reg
= gen_lowpart (QImode
, val_reg
);
32115 addr
= plus_constant (Pmode
, dst
, i
);
32116 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32117 emit_move_insn (mem
, reg
);
32123 /* Set a block of memory using vectorization instructions for both
32124 aligned and unaligned cases. We fill the first LENGTH bytes of
32125 the memory area starting from DSTBASE with byte constant VALUE.
32126 ALIGN is the alignment requirement of memory. */
32128 arm_block_set_vect (rtx dstbase
,
32129 unsigned HOST_WIDE_INT length
,
32130 unsigned HOST_WIDE_INT value
,
32131 unsigned HOST_WIDE_INT align
)
32133 /* Check whether we need to use unaligned store instruction. */
32134 if (((align
& 3) != 0 || (length
& 3) != 0)
32135 /* Check whether unaligned store instruction is available. */
32136 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
32139 if ((align
& 3) == 0)
32140 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
32142 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
32145 /* Expand string store operation. Firstly we try to do that by using
32146 vectorization instructions, then try with ARM unaligned access and
32147 double-word store if profitable. OPERANDS[0] is the destination,
32148 OPERANDS[1] is the number of bytes, operands[2] is the value to
32149 initialize the memory, OPERANDS[3] is the known alignment of the
32152 arm_gen_setmem (rtx
*operands
)
32154 rtx dstbase
= operands
[0];
32155 unsigned HOST_WIDE_INT length
;
32156 unsigned HOST_WIDE_INT value
;
32157 unsigned HOST_WIDE_INT align
;
32159 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
32162 length
= UINTVAL (operands
[1]);
32166 value
= (UINTVAL (operands
[2]) & 0xFF);
32167 align
= UINTVAL (operands
[3]);
32168 if (TARGET_NEON
&& length
>= 8
32169 && current_tune
->string_ops_prefer_neon
32170 && arm_block_set_vect (dstbase
, length
, value
, align
))
32173 if (!unaligned_access
&& (align
& 3) != 0)
32174 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
32176 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
32179 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32181 static unsigned HOST_WIDE_INT
32182 arm_asan_shadow_offset (void)
32184 return (unsigned HOST_WIDE_INT
) 1 << 29;
32188 /* This is a temporary fix for PR60655. Ideally we need
32189 to handle most of these cases in the generic part but
32190 currently we reject minus (..) (sym_ref). We try to
32191 ameliorate the case with minus (sym_ref1) (sym_ref2)
32192 where they are in the same section. */
32195 arm_const_not_ok_for_debug_p (rtx p
)
32197 tree decl_op0
= NULL
;
32198 tree decl_op1
= NULL
;
32200 if (GET_CODE (p
) == MINUS
)
32202 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
32204 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
32206 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
32207 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
32209 if ((TREE_CODE (decl_op1
) == VAR_DECL
32210 || TREE_CODE (decl_op1
) == CONST_DECL
)
32211 && (TREE_CODE (decl_op0
) == VAR_DECL
32212 || TREE_CODE (decl_op0
) == CONST_DECL
))
32213 return (get_variable_section (decl_op1
, false)
32214 != get_variable_section (decl_op0
, false));
32216 if (TREE_CODE (decl_op1
) == LABEL_DECL
32217 && TREE_CODE (decl_op0
) == LABEL_DECL
)
32218 return (DECL_CONTEXT (decl_op1
)
32219 != DECL_CONTEXT (decl_op0
));
32230 arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
32232 const unsigned ARM_FE_INVALID
= 1;
32233 const unsigned ARM_FE_DIVBYZERO
= 2;
32234 const unsigned ARM_FE_OVERFLOW
= 4;
32235 const unsigned ARM_FE_UNDERFLOW
= 8;
32236 const unsigned ARM_FE_INEXACT
= 16;
32237 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT
= (ARM_FE_INVALID
32242 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT
= 8;
32243 tree fenv_var
, get_fpscr
, set_fpscr
, mask
, ld_fenv
, masked_fenv
;
32244 tree new_fenv_var
, reload_fenv
, restore_fnenv
;
32245 tree update_call
, atomic_feraiseexcept
, hold_fnclex
;
32247 if (!TARGET_VFP
|| !TARGET_HARD_FLOAT
)
32250 /* Generate the equivalent of :
32251 unsigned int fenv_var;
32252 fenv_var = __builtin_arm_get_fpscr ();
32254 unsigned int masked_fenv;
32255 masked_fenv = fenv_var & mask;
32257 __builtin_arm_set_fpscr (masked_fenv); */
32259 fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
32260 get_fpscr
= arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
];
32261 set_fpscr
= arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
];
32262 mask
= build_int_cst (unsigned_type_node
,
32263 ~((ARM_FE_ALL_EXCEPT
<< ARM_FE_EXCEPT_SHIFT
)
32264 | ARM_FE_ALL_EXCEPT
));
32265 ld_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
,
32266 fenv_var
, build_call_expr (get_fpscr
, 0));
32267 masked_fenv
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
, mask
);
32268 hold_fnclex
= build_call_expr (set_fpscr
, 1, masked_fenv
);
32269 *hold
= build2 (COMPOUND_EXPR
, void_type_node
,
32270 build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
),
32273 /* Store the value of masked_fenv to clear the exceptions:
32274 __builtin_arm_set_fpscr (masked_fenv); */
32276 *clear
= build_call_expr (set_fpscr
, 1, masked_fenv
);
32278 /* Generate the equivalent of :
32279 unsigned int new_fenv_var;
32280 new_fenv_var = __builtin_arm_get_fpscr ();
32282 __builtin_arm_set_fpscr (fenv_var);
32284 __atomic_feraiseexcept (new_fenv_var); */
32286 new_fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
32287 reload_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
, new_fenv_var
,
32288 build_call_expr (get_fpscr
, 0));
32289 restore_fnenv
= build_call_expr (set_fpscr
, 1, fenv_var
);
32290 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
32291 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
32292 fold_convert (integer_type_node
, new_fenv_var
));
32293 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
32294 build2 (COMPOUND_EXPR
, void_type_node
,
32295 reload_fenv
, restore_fnenv
), update_call
);
32298 /* return TRUE if x is a reference to a value in a constant pool */
32300 arm_is_constant_pool_ref (rtx x
)
32303 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
32304 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
32307 #include "gt-arm.h"