1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets
*arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
81 HOST_WIDE_INT
, rtx
, rtx
, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx
, int);
84 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
85 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
86 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
87 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
88 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
89 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
90 inline static int thumb1_index_register_rtx_p (rtx
, int);
91 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx
, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx
, int);
97 static void arm_print_operand_address (FILE *, rtx
);
98 static bool arm_print_operand_punct_valid_p (unsigned char code
);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
100 static arm_cc
get_arm_condition_code (rtx
);
101 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
102 static const char *output_multi_immediate (rtx
*, const char *, const char *,
104 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
105 static struct machine_function
*arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT
get_jump_table_size (rtx
);
108 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_forward_ref (Mfix
*);
110 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
111 static Mnode
*add_minipool_backward_ref (Mfix
*);
112 static void assign_minipool_offsets (Mfix
*);
113 static void arm_print_value (FILE *, rtx
);
114 static void dump_minipool (rtx
);
115 static int arm_barrier_cost (rtx
);
116 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
117 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
118 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree
);
125 static unsigned long arm_compute_func_type (void);
126 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
127 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
134 static int arm_comp_type_attributes (const_tree
, const_tree
);
135 static void arm_set_default_type_attributes (tree
);
136 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
137 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
);
141 static int optimal_immediate_sequence_1 (enum rtx_code code
,
142 unsigned HOST_WIDE_INT val
,
143 struct four_ints
*return_sequence
,
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree
, tree
);
147 static enum machine_mode
arm_promote_function_mode (const_tree
,
148 enum machine_mode
, int *,
150 static bool arm_return_in_memory (const_tree
, const_tree
);
151 static rtx
arm_function_value (const_tree
, const_tree
, bool);
152 static rtx
arm_libcall_value_1 (enum machine_mode
);
153 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
160 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
161 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
162 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
163 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
168 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
169 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
170 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
174 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
175 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
176 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
177 static tree
arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond
, rtx pattern
);
179 static rtx
emit_set_insn (rtx
, rtx
);
180 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
183 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
185 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
187 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
188 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
190 static rtx
aapcs_libcall_value (enum machine_mode
);
191 static int aapcs_select_return_coproc (const_tree
, const_tree
);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_encode_section_info (tree
, rtx
, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 enum machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx
);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
237 static bool arm_cannot_copy_insn_p (rtx
);
238 static int arm_issue_rate (void);
239 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
240 static bool arm_output_addr_const_extra (FILE *, rtx
);
241 static bool arm_allocate_stack_slots_for_args (void);
242 static bool arm_warn_func_return (tree
);
243 static const char *arm_invalid_parameter_type (const_tree t
);
244 static const char *arm_invalid_return_type (const_tree t
);
245 static tree
arm_promoted_type (const_tree t
);
246 static tree
arm_convert_to_type (tree type
, tree expr
);
247 static bool arm_scalar_mode_supported_p (enum machine_mode
);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx
, tree
, rtx
);
252 static rtx
arm_trampoline_adjust_address (rtx
);
253 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
254 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
255 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode
,
258 unsigned HOST_WIDE_INT
);
259 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
260 static bool arm_class_likely_spilled_p (reg_class_t
);
261 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
262 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
263 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
267 static void arm_conditional_register_usage (void);
268 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
269 static unsigned int arm_autovectorize_vector_sizes (void);
270 static int arm_default_branch_cost (bool, bool);
271 static int arm_cortex_a5_branch_cost (bool, bool);
272 static int arm_cortex_m_branch_cost (bool, bool);
274 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
275 const unsigned char *sel
);
277 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
279 int misalign ATTRIBUTE_UNUSED
);
280 static unsigned arm_add_stmt_cost (void *data
, int count
,
281 enum vect_cost_for_stmt kind
,
282 struct _stmt_vec_info
*stmt_info
,
284 enum vect_cost_model_location where
);
286 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
287 bool op0_preserve_value
);
288 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
290 /* Table of machine attributes. */
291 static const struct attribute_spec arm_attribute_table
[] =
293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294 affects_type_identity } */
295 /* Function calls made to this symbol must be done indirectly, because
296 it may lie outside of the 26 bit addressing range of a normal function
298 { "long_call", 0, 0, false, true, true, NULL
, false },
299 /* Whereas these functions are always known to reside within the 26 bit
301 { "short_call", 0, 0, false, true, true, NULL
, false },
302 /* Specify the procedure call conventions for a function. */
303 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
305 /* Interrupt Service Routines have special prologue and epilogue requirements. */
306 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
308 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
310 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
313 /* ARM/PE has three new attributes:
315 dllexport - for exporting a function/variable that will live in a dll
316 dllimport - for importing a function/variable from a dll
318 Microsoft allows multiple declspecs in one __declspec, separating
319 them with spaces. We do NOT support this. Instead, use __declspec
322 { "dllimport", 0, 0, true, false, false, NULL
, false },
323 { "dllexport", 0, 0, true, false, false, NULL
, false },
324 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
326 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
327 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
328 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
332 { NULL
, 0, 0, false, false, false, NULL
, false }
335 /* Initialize the GCC target structure. */
336 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
337 #undef TARGET_MERGE_DECL_ATTRIBUTES
338 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
341 #undef TARGET_LEGITIMIZE_ADDRESS
342 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
345 #define TARGET_LRA_P arm_lra_p
347 #undef TARGET_ATTRIBUTE_TABLE
348 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
350 #undef TARGET_ASM_FILE_START
351 #define TARGET_ASM_FILE_START arm_file_start
352 #undef TARGET_ASM_FILE_END
353 #define TARGET_ASM_FILE_END arm_file_end
355 #undef TARGET_ASM_ALIGNED_SI_OP
356 #define TARGET_ASM_ALIGNED_SI_OP NULL
357 #undef TARGET_ASM_INTEGER
358 #define TARGET_ASM_INTEGER arm_assemble_integer
360 #undef TARGET_PRINT_OPERAND
361 #define TARGET_PRINT_OPERAND arm_print_operand
362 #undef TARGET_PRINT_OPERAND_ADDRESS
363 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
364 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
365 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
370 #undef TARGET_ASM_FUNCTION_PROLOGUE
371 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
373 #undef TARGET_ASM_FUNCTION_EPILOGUE
374 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
376 #undef TARGET_OPTION_OVERRIDE
377 #define TARGET_OPTION_OVERRIDE arm_option_override
379 #undef TARGET_COMP_TYPE_ATTRIBUTES
380 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
382 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
383 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
385 #undef TARGET_SCHED_ADJUST_COST
386 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
388 #undef TARGET_SCHED_REORDER
389 #define TARGET_SCHED_REORDER arm_sched_reorder
391 #undef TARGET_REGISTER_MOVE_COST
392 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
394 #undef TARGET_MEMORY_MOVE_COST
395 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
397 #undef TARGET_ENCODE_SECTION_INFO
399 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
404 #undef TARGET_STRIP_NAME_ENCODING
405 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
407 #undef TARGET_ASM_INTERNAL_LABEL
408 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
410 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
411 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
413 #undef TARGET_FUNCTION_VALUE
414 #define TARGET_FUNCTION_VALUE arm_function_value
416 #undef TARGET_LIBCALL_VALUE
417 #define TARGET_LIBCALL_VALUE arm_libcall_value
419 #undef TARGET_FUNCTION_VALUE_REGNO_P
420 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
422 #undef TARGET_ASM_OUTPUT_MI_THUNK
423 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
424 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
425 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
427 #undef TARGET_RTX_COSTS
428 #define TARGET_RTX_COSTS arm_rtx_costs
429 #undef TARGET_ADDRESS_COST
430 #define TARGET_ADDRESS_COST arm_address_cost
432 #undef TARGET_SHIFT_TRUNCATION_MASK
433 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
434 #undef TARGET_VECTOR_MODE_SUPPORTED_P
435 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
436 #undef TARGET_ARRAY_MODE_SUPPORTED_P
437 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
438 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
439 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
440 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
441 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
442 arm_autovectorize_vector_sizes
444 #undef TARGET_MACHINE_DEPENDENT_REORG
445 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
447 #undef TARGET_INIT_BUILTINS
448 #define TARGET_INIT_BUILTINS arm_init_builtins
449 #undef TARGET_EXPAND_BUILTIN
450 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
451 #undef TARGET_BUILTIN_DECL
452 #define TARGET_BUILTIN_DECL arm_builtin_decl
454 #undef TARGET_INIT_LIBFUNCS
455 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
457 #undef TARGET_PROMOTE_FUNCTION_MODE
458 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
459 #undef TARGET_PROMOTE_PROTOTYPES
460 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
461 #undef TARGET_PASS_BY_REFERENCE
462 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
463 #undef TARGET_ARG_PARTIAL_BYTES
464 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
465 #undef TARGET_FUNCTION_ARG
466 #define TARGET_FUNCTION_ARG arm_function_arg
467 #undef TARGET_FUNCTION_ARG_ADVANCE
468 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
469 #undef TARGET_FUNCTION_ARG_BOUNDARY
470 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
472 #undef TARGET_SETUP_INCOMING_VARARGS
473 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
475 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
476 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
478 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
479 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
480 #undef TARGET_TRAMPOLINE_INIT
481 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
482 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
483 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
485 #undef TARGET_WARN_FUNC_RETURN
486 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
488 #undef TARGET_DEFAULT_SHORT_ENUMS
489 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
491 #undef TARGET_ALIGN_ANON_BITFIELD
492 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
494 #undef TARGET_NARROW_VOLATILE_BITFIELD
495 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
497 #undef TARGET_CXX_GUARD_TYPE
498 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
500 #undef TARGET_CXX_GUARD_MASK_BIT
501 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
503 #undef TARGET_CXX_GET_COOKIE_SIZE
504 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
506 #undef TARGET_CXX_COOKIE_HAS_SIZE
507 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
509 #undef TARGET_CXX_CDTOR_RETURNS_THIS
510 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
512 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
513 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
515 #undef TARGET_CXX_USE_AEABI_ATEXIT
516 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
518 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
519 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
520 arm_cxx_determine_class_data_visibility
522 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
523 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
525 #undef TARGET_RETURN_IN_MSB
526 #define TARGET_RETURN_IN_MSB arm_return_in_msb
528 #undef TARGET_RETURN_IN_MEMORY
529 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
531 #undef TARGET_MUST_PASS_IN_STACK
532 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
535 #undef TARGET_ASM_UNWIND_EMIT
536 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
538 /* EABI unwinding tables use a different format for the typeinfo tables. */
539 #undef TARGET_ASM_TTYPE
540 #define TARGET_ASM_TTYPE arm_output_ttype
542 #undef TARGET_ARM_EABI_UNWINDER
543 #define TARGET_ARM_EABI_UNWINDER true
545 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
546 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
548 #undef TARGET_ASM_INIT_SECTIONS
549 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
550 #endif /* ARM_UNWIND_INFO */
552 #undef TARGET_DWARF_REGISTER_SPAN
553 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
555 #undef TARGET_CANNOT_COPY_INSN_P
556 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
559 #undef TARGET_HAVE_TLS
560 #define TARGET_HAVE_TLS true
563 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
564 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
566 #undef TARGET_LEGITIMATE_CONSTANT_P
567 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
569 #undef TARGET_CANNOT_FORCE_CONST_MEM
570 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
572 #undef TARGET_MAX_ANCHOR_OFFSET
573 #define TARGET_MAX_ANCHOR_OFFSET 4095
575 /* The minimum is set such that the total size of the block
576 for a particular anchor is -4088 + 1 + 4095 bytes, which is
577 divisible by eight, ensuring natural spacing of anchors. */
578 #undef TARGET_MIN_ANCHOR_OFFSET
579 #define TARGET_MIN_ANCHOR_OFFSET -4088
581 #undef TARGET_SCHED_ISSUE_RATE
582 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
584 #undef TARGET_MANGLE_TYPE
585 #define TARGET_MANGLE_TYPE arm_mangle_type
587 #undef TARGET_BUILD_BUILTIN_VA_LIST
588 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
589 #undef TARGET_EXPAND_BUILTIN_VA_START
590 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
591 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
592 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
602 #undef TARGET_PREFERRED_RELOAD_CLASS
603 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
605 #undef TARGET_INVALID_PARAMETER_TYPE
606 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
608 #undef TARGET_INVALID_RETURN_TYPE
609 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
611 #undef TARGET_PROMOTED_TYPE
612 #define TARGET_PROMOTED_TYPE arm_promoted_type
614 #undef TARGET_CONVERT_TO_TYPE
615 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
617 #undef TARGET_SCALAR_MODE_SUPPORTED_P
618 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
620 #undef TARGET_FRAME_POINTER_REQUIRED
621 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
623 #undef TARGET_CAN_ELIMINATE
624 #define TARGET_CAN_ELIMINATE arm_can_eliminate
626 #undef TARGET_CONDITIONAL_REGISTER_USAGE
627 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
629 #undef TARGET_CLASS_LIKELY_SPILLED_P
630 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
632 #undef TARGET_VECTORIZE_BUILTINS
633 #define TARGET_VECTORIZE_BUILTINS
635 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
636 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
637 arm_builtin_vectorized_function
639 #undef TARGET_VECTOR_ALIGNMENT
640 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
642 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
643 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
644 arm_vector_alignment_reachable
646 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
647 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
648 arm_builtin_support_vector_misalignment
650 #undef TARGET_PREFERRED_RENAME_CLASS
651 #define TARGET_PREFERRED_RENAME_CLASS \
652 arm_preferred_rename_class
654 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
655 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
656 arm_vectorize_vec_perm_const_ok
658 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
659 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
660 arm_builtin_vectorization_cost
661 #undef TARGET_VECTORIZE_ADD_STMT_COST
662 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
664 #undef TARGET_CANONICALIZE_COMPARISON
665 #define TARGET_CANONICALIZE_COMPARISON \
666 arm_canonicalize_comparison
668 #undef TARGET_ASAN_SHADOW_OFFSET
669 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
671 #undef MAX_INSN_PER_IT_BLOCK
672 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
674 #undef TARGET_CAN_USE_DOLOOP_P
675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
677 struct gcc_target targetm
= TARGET_INITIALIZER
;
679 /* Obstack for minipool constant handling. */
680 static struct obstack minipool_obstack
;
681 static char * minipool_startobj
;
683 /* The maximum number of insns skipped which
684 will be conditionalised if possible. */
685 static int max_insns_skipped
= 5;
687 extern FILE * asm_out_file
;
689 /* True if we are currently building a constant table. */
690 int making_const_table
;
692 /* The processor for which instructions should be scheduled. */
693 enum processor_type arm_tune
= arm_none
;
695 /* The current tuning set. */
696 const struct tune_params
*current_tune
;
698 /* Which floating point hardware to schedule for. */
701 /* Which floating popint hardware to use. */
702 const struct arm_fpu_desc
*arm_fpu_desc
;
704 /* Used for Thumb call_via trampolines. */
705 rtx thumb_call_via_label
[14];
706 static int thumb_call_reg_needed
;
708 /* Bit values used to identify processor capabilities. */
709 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
710 #define FL_ARCH3M (1 << 1) /* Extended multiply */
711 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
712 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
713 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
714 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
715 #define FL_THUMB (1 << 6) /* Thumb aware */
716 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
717 #define FL_STRONG (1 << 8) /* StrongARM */
718 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
719 #define FL_XSCALE (1 << 10) /* XScale */
720 /* spare (1 << 11) */
721 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
722 media instructions. */
723 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
724 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
725 Note: ARM6 & 7 derivatives only. */
726 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
727 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
728 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
731 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
732 #define FL_NEON (1 << 20) /* Neon instructions. */
733 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
736 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
737 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
738 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
740 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
741 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743 /* Flags that only effect tuning, not available instructions. */
744 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
747 #define FL_FOR_ARCH2 FL_NOTM
748 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
749 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
750 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
751 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
752 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
753 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
754 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
755 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
756 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
757 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
758 #define FL_FOR_ARCH6J FL_FOR_ARCH6
759 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
760 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
761 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
762 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
763 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
764 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
765 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
766 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 static unsigned long insn_flags
= 0;
776 /* The bits in this mask specify which instruction scheduling options should
778 static unsigned long tune_flags
= 0;
780 /* The highest ARM architecture version supported by the
782 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
805 /* Nonzero if this chip supports the ARM 6K extensions. */
808 /* Nonzero if instructions present in ARMv6-M can be used. */
811 /* Nonzero if this chip supports the ARM 7 extensions. */
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm
= 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 /* Nonzero if instructions present in ARMv8 can be used. */
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched
= 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm
= 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt
= 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2
= 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale
= 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale
= 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf
= 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9
= 0;
848 /* Nonzero if generating Thumb instructions. */
851 /* Nonzero if generating Thumb-1 instructions. */
854 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 XXX This is a bit of a hack, it's intended to help work around
857 problems in GLD which doesn't understand that armv5t code is
858 interworking clean. */
859 int arm_cpp_interwork
= 0;
861 /* Nonzero if chip supports Thumb 2. */
864 /* Nonzero if chip supports integer division instruction. */
865 int arm_arch_arm_hwdiv
;
866 int arm_arch_thumb_hwdiv
;
868 /* Nonzero if we should use Neon to handle 64-bits operations rather
869 than core registers. */
870 int prefer_neon_for_64bits
= 0;
872 /* Nonzero if we shouldn't use literal pools. */
873 bool arm_disable_literal_pool
= false;
875 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
876 we must report the mode of the memory reference from
877 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
878 enum machine_mode output_memory_reference_mode
;
880 /* The register number to be used for the PIC offset register. */
881 unsigned arm_pic_register
= INVALID_REGNUM
;
883 /* Set to 1 after arm_reorg has started. Reset to start at the start of
884 the next function. */
885 static int after_arm_reorg
= 0;
887 enum arm_pcs arm_pcs_default
;
889 /* For an explanation of these variables, see final_prescan_insn below. */
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc
;
895 int arm_target_label
;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count
= 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask
= 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen
= 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc
= 0;
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes
[] =
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence
[] =
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 /* Initialization code. */
931 const char *const name
;
932 enum processor_type core
;
934 enum base_architecture base_arch
;
935 const unsigned long flags
;
936 const struct tune_params
*const tune
;
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
946 /* arm generic vectorizer costs. */
948 struct cpu_vec_costs arm_default_vec_cost
= {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs
=
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
986 true /* non_exec_costs_exec. */
991 COSTS_N_INSNS (3), /* simple. */
992 COSTS_N_INSNS (3), /* flag_setting. */
993 COSTS_N_INSNS (2), /* extend. */
994 COSTS_N_INSNS (3), /* add. */
995 COSTS_N_INSNS (2), /* extend_add. */
996 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1000 0, /* simple (N/A). */
1001 0, /* flag_setting (N/A). */
1002 COSTS_N_INSNS (4), /* extend. */
1004 COSTS_N_INSNS (4), /* extend_add. */
1010 COSTS_N_INSNS (2), /* load. */
1011 COSTS_N_INSNS (2), /* load_sign_extend. */
1012 COSTS_N_INSNS (2), /* ldrd. */
1013 COSTS_N_INSNS (2), /* ldm_1st. */
1014 1, /* ldm_regs_per_insn_1st. */
1015 2, /* ldm_regs_per_insn_subsequent. */
1016 COSTS_N_INSNS (5), /* loadf. */
1017 COSTS_N_INSNS (5), /* loadd. */
1018 COSTS_N_INSNS (1), /* load_unaligned. */
1019 COSTS_N_INSNS (2), /* store. */
1020 COSTS_N_INSNS (2), /* strd. */
1021 COSTS_N_INSNS (2), /* stm_1st. */
1022 1, /* stm_regs_per_insn_1st. */
1023 2, /* stm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (1), /* storef. */
1025 COSTS_N_INSNS (1), /* stored. */
1026 COSTS_N_INSNS (1) /* store_unaligned. */
1031 COSTS_N_INSNS (14), /* div. */
1032 COSTS_N_INSNS (4), /* mult. */
1033 COSTS_N_INSNS (7), /* mult_addsub. */
1034 COSTS_N_INSNS (30), /* fma. */
1035 COSTS_N_INSNS (3), /* addsub. */
1036 COSTS_N_INSNS (1), /* fpconst. */
1037 COSTS_N_INSNS (1), /* neg. */
1038 COSTS_N_INSNS (3), /* compare. */
1039 COSTS_N_INSNS (3), /* widen. */
1040 COSTS_N_INSNS (3), /* narrow. */
1041 COSTS_N_INSNS (3), /* toint. */
1042 COSTS_N_INSNS (3), /* fromint. */
1043 COSTS_N_INSNS (3) /* roundint. */
1047 COSTS_N_INSNS (24), /* div. */
1048 COSTS_N_INSNS (5), /* mult. */
1049 COSTS_N_INSNS (8), /* mult_addsub. */
1050 COSTS_N_INSNS (30), /* fma. */
1051 COSTS_N_INSNS (3), /* addsub. */
1052 COSTS_N_INSNS (1), /* fpconst. */
1053 COSTS_N_INSNS (1), /* neg. */
1054 COSTS_N_INSNS (3), /* compare. */
1055 COSTS_N_INSNS (3), /* widen. */
1056 COSTS_N_INSNS (3), /* narrow. */
1057 COSTS_N_INSNS (3), /* toint. */
1058 COSTS_N_INSNS (3), /* fromint. */
1059 COSTS_N_INSNS (3) /* roundint. */
1064 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa7_extra_costs
=
1075 COSTS_N_INSNS (1), /* shift. */
1076 COSTS_N_INSNS (1), /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 COSTS_N_INSNS (1), /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 COSTS_N_INSNS (1), /* log_shift_reg. */
1081 COSTS_N_INSNS (1), /* extend. */
1082 COSTS_N_INSNS (1), /* extend_arith. */
1083 COSTS_N_INSNS (1), /* bfi. */
1084 COSTS_N_INSNS (1), /* bfx. */
1085 COSTS_N_INSNS (1), /* clz. */
1087 true /* non_exec_costs_exec. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (7) /* idiv. */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (1), /* extend. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (3), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (2), /* loadf. */
1119 COSTS_N_INSNS (2), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (3), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (2), /* storef. */
1127 COSTS_N_INSNS (2), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1133 COSTS_N_INSNS (15), /* div. */
1134 COSTS_N_INSNS (3), /* mult. */
1135 COSTS_N_INSNS (7), /* mult_addsub. */
1136 COSTS_N_INSNS (7), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1149 COSTS_N_INSNS (30), /* div. */
1150 COSTS_N_INSNS (6), /* mult. */
1151 COSTS_N_INSNS (10), /* mult_addsub. */
1152 COSTS_N_INSNS (7), /* fma. */
1153 COSTS_N_INSNS (3), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (3), /* compare. */
1157 COSTS_N_INSNS (3), /* widen. */
1158 COSTS_N_INSNS (3), /* narrow. */
1159 COSTS_N_INSNS (3), /* toint. */
1160 COSTS_N_INSNS (3), /* fromint. */
1161 COSTS_N_INSNS (3) /* roundint. */
1166 COSTS_N_INSNS (1) /* alu. */
1170 const struct cpu_cost_table cortexa12_extra_costs
=
1177 COSTS_N_INSNS (1), /* shift_reg. */
1178 COSTS_N_INSNS (1), /* arith_shift. */
1179 COSTS_N_INSNS (1), /* arith_shift_reg. */
1180 COSTS_N_INSNS (1), /* log_shift. */
1181 COSTS_N_INSNS (1), /* log_shift_reg. */
1183 COSTS_N_INSNS (1), /* extend_arith. */
1185 COSTS_N_INSNS (1), /* bfx. */
1186 COSTS_N_INSNS (1), /* clz. */
1188 true /* non_exec_costs_exec. */
1193 COSTS_N_INSNS (2), /* simple. */
1194 COSTS_N_INSNS (3), /* flag_setting. */
1195 COSTS_N_INSNS (2), /* extend. */
1196 COSTS_N_INSNS (3), /* add. */
1197 COSTS_N_INSNS (2), /* extend_add. */
1198 COSTS_N_INSNS (18) /* idiv. */
1202 0, /* simple (N/A). */
1203 0, /* flag_setting (N/A). */
1204 COSTS_N_INSNS (3), /* extend. */
1206 COSTS_N_INSNS (3), /* extend_add. */
1212 COSTS_N_INSNS (3), /* load. */
1213 COSTS_N_INSNS (3), /* load_sign_extend. */
1214 COSTS_N_INSNS (3), /* ldrd. */
1215 COSTS_N_INSNS (3), /* ldm_1st. */
1216 1, /* ldm_regs_per_insn_1st. */
1217 2, /* ldm_regs_per_insn_subsequent. */
1218 COSTS_N_INSNS (3), /* loadf. */
1219 COSTS_N_INSNS (3), /* loadd. */
1220 0, /* load_unaligned. */
1224 1, /* stm_regs_per_insn_1st. */
1225 2, /* stm_regs_per_insn_subsequent. */
1226 COSTS_N_INSNS (2), /* storef. */
1227 COSTS_N_INSNS (2), /* stored. */
1228 0 /* store_unaligned. */
1233 COSTS_N_INSNS (17), /* div. */
1234 COSTS_N_INSNS (4), /* mult. */
1235 COSTS_N_INSNS (8), /* mult_addsub. */
1236 COSTS_N_INSNS (8), /* fma. */
1237 COSTS_N_INSNS (4), /* addsub. */
1238 COSTS_N_INSNS (2), /* fpconst. */
1239 COSTS_N_INSNS (2), /* neg. */
1240 COSTS_N_INSNS (2), /* compare. */
1241 COSTS_N_INSNS (4), /* widen. */
1242 COSTS_N_INSNS (4), /* narrow. */
1243 COSTS_N_INSNS (4), /* toint. */
1244 COSTS_N_INSNS (4), /* fromint. */
1245 COSTS_N_INSNS (4) /* roundint. */
1249 COSTS_N_INSNS (31), /* div. */
1250 COSTS_N_INSNS (4), /* mult. */
1251 COSTS_N_INSNS (8), /* mult_addsub. */
1252 COSTS_N_INSNS (8), /* fma. */
1253 COSTS_N_INSNS (4), /* addsub. */
1254 COSTS_N_INSNS (2), /* fpconst. */
1255 COSTS_N_INSNS (2), /* neg. */
1256 COSTS_N_INSNS (2), /* compare. */
1257 COSTS_N_INSNS (4), /* widen. */
1258 COSTS_N_INSNS (4), /* narrow. */
1259 COSTS_N_INSNS (4), /* toint. */
1260 COSTS_N_INSNS (4), /* fromint. */
1261 COSTS_N_INSNS (4) /* roundint. */
1266 COSTS_N_INSNS (1) /* alu. */
1270 const struct cpu_cost_table cortexa15_extra_costs
=
1278 COSTS_N_INSNS (1), /* arith_shift. */
1279 COSTS_N_INSNS (1), /* arith_shift_reg. */
1280 COSTS_N_INSNS (1), /* log_shift. */
1281 COSTS_N_INSNS (1), /* log_shift_reg. */
1283 COSTS_N_INSNS (1), /* extend_arith. */
1284 COSTS_N_INSNS (1), /* bfi. */
1288 true /* non_exec_costs_exec. */
1293 COSTS_N_INSNS (2), /* simple. */
1294 COSTS_N_INSNS (3), /* flag_setting. */
1295 COSTS_N_INSNS (2), /* extend. */
1296 COSTS_N_INSNS (2), /* add. */
1297 COSTS_N_INSNS (2), /* extend_add. */
1298 COSTS_N_INSNS (18) /* idiv. */
1302 0, /* simple (N/A). */
1303 0, /* flag_setting (N/A). */
1304 COSTS_N_INSNS (3), /* extend. */
1306 COSTS_N_INSNS (3), /* extend_add. */
1312 COSTS_N_INSNS (3), /* load. */
1313 COSTS_N_INSNS (3), /* load_sign_extend. */
1314 COSTS_N_INSNS (3), /* ldrd. */
1315 COSTS_N_INSNS (4), /* ldm_1st. */
1316 1, /* ldm_regs_per_insn_1st. */
1317 2, /* ldm_regs_per_insn_subsequent. */
1318 COSTS_N_INSNS (4), /* loadf. */
1319 COSTS_N_INSNS (4), /* loadd. */
1320 0, /* load_unaligned. */
1323 COSTS_N_INSNS (1), /* stm_1st. */
1324 1, /* stm_regs_per_insn_1st. */
1325 2, /* stm_regs_per_insn_subsequent. */
1328 0 /* store_unaligned. */
1333 COSTS_N_INSNS (17), /* div. */
1334 COSTS_N_INSNS (4), /* mult. */
1335 COSTS_N_INSNS (8), /* mult_addsub. */
1336 COSTS_N_INSNS (8), /* fma. */
1337 COSTS_N_INSNS (4), /* addsub. */
1338 COSTS_N_INSNS (2), /* fpconst. */
1339 COSTS_N_INSNS (2), /* neg. */
1340 COSTS_N_INSNS (5), /* compare. */
1341 COSTS_N_INSNS (4), /* widen. */
1342 COSTS_N_INSNS (4), /* narrow. */
1343 COSTS_N_INSNS (4), /* toint. */
1344 COSTS_N_INSNS (4), /* fromint. */
1345 COSTS_N_INSNS (4) /* roundint. */
1349 COSTS_N_INSNS (31), /* div. */
1350 COSTS_N_INSNS (4), /* mult. */
1351 COSTS_N_INSNS (8), /* mult_addsub. */
1352 COSTS_N_INSNS (8), /* fma. */
1353 COSTS_N_INSNS (4), /* addsub. */
1354 COSTS_N_INSNS (2), /* fpconst. */
1355 COSTS_N_INSNS (2), /* neg. */
1356 COSTS_N_INSNS (2), /* compare. */
1357 COSTS_N_INSNS (4), /* widen. */
1358 COSTS_N_INSNS (4), /* narrow. */
1359 COSTS_N_INSNS (4), /* toint. */
1360 COSTS_N_INSNS (4), /* fromint. */
1361 COSTS_N_INSNS (4) /* roundint. */
1366 COSTS_N_INSNS (1) /* alu. */
1370 const struct cpu_cost_table v7m_extra_costs
=
1378 0, /* arith_shift. */
1379 COSTS_N_INSNS (1), /* arith_shift_reg. */
1381 COSTS_N_INSNS (1), /* log_shift_reg. */
1383 COSTS_N_INSNS (1), /* extend_arith. */
1387 COSTS_N_INSNS (1), /* non_exec. */
1388 false /* non_exec_costs_exec. */
1393 COSTS_N_INSNS (1), /* simple. */
1394 COSTS_N_INSNS (1), /* flag_setting. */
1395 COSTS_N_INSNS (2), /* extend. */
1396 COSTS_N_INSNS (1), /* add. */
1397 COSTS_N_INSNS (3), /* extend_add. */
1398 COSTS_N_INSNS (8) /* idiv. */
1402 0, /* simple (N/A). */
1403 0, /* flag_setting (N/A). */
1404 COSTS_N_INSNS (2), /* extend. */
1406 COSTS_N_INSNS (3), /* extend_add. */
1412 COSTS_N_INSNS (2), /* load. */
1413 0, /* load_sign_extend. */
1414 COSTS_N_INSNS (3), /* ldrd. */
1415 COSTS_N_INSNS (2), /* ldm_1st. */
1416 1, /* ldm_regs_per_insn_1st. */
1417 1, /* ldm_regs_per_insn_subsequent. */
1418 COSTS_N_INSNS (2), /* loadf. */
1419 COSTS_N_INSNS (3), /* loadd. */
1420 COSTS_N_INSNS (1), /* load_unaligned. */
1421 COSTS_N_INSNS (2), /* store. */
1422 COSTS_N_INSNS (3), /* strd. */
1423 COSTS_N_INSNS (2), /* stm_1st. */
1424 1, /* stm_regs_per_insn_1st. */
1425 1, /* stm_regs_per_insn_subsequent. */
1426 COSTS_N_INSNS (2), /* storef. */
1427 COSTS_N_INSNS (3), /* stored. */
1428 COSTS_N_INSNS (1) /* store_unaligned. */
1433 COSTS_N_INSNS (7), /* div. */
1434 COSTS_N_INSNS (2), /* mult. */
1435 COSTS_N_INSNS (5), /* mult_addsub. */
1436 COSTS_N_INSNS (3), /* fma. */
1437 COSTS_N_INSNS (1), /* addsub. */
1449 COSTS_N_INSNS (15), /* div. */
1450 COSTS_N_INSNS (5), /* mult. */
1451 COSTS_N_INSNS (7), /* mult_addsub. */
1452 COSTS_N_INSNS (7), /* fma. */
1453 COSTS_N_INSNS (3), /* addsub. */
1466 COSTS_N_INSNS (1) /* alu. */
1470 const struct tune_params arm_slowmul_tune
=
1472 arm_slowmul_rtx_costs
,
1474 NULL
, /* Sched adj cost. */
1475 3, /* Constant limit. */
1476 5, /* Max cond insns. */
1477 ARM_PREFETCH_NOT_BENEFICIAL
,
1478 true, /* Prefer constant pool. */
1479 arm_default_branch_cost
,
1480 false, /* Prefer LDRD/STRD. */
1481 {true, true}, /* Prefer non short circuit. */
1482 &arm_default_vec_cost
, /* Vectorizer costs. */
1483 false /* Prefer Neon for 64-bits bitops. */
1486 const struct tune_params arm_fastmul_tune
=
1488 arm_fastmul_rtx_costs
,
1490 NULL
, /* Sched adj cost. */
1491 1, /* Constant limit. */
1492 5, /* Max cond insns. */
1493 ARM_PREFETCH_NOT_BENEFICIAL
,
1494 true, /* Prefer constant pool. */
1495 arm_default_branch_cost
,
1496 false, /* Prefer LDRD/STRD. */
1497 {true, true}, /* Prefer non short circuit. */
1498 &arm_default_vec_cost
, /* Vectorizer costs. */
1499 false /* Prefer Neon for 64-bits bitops. */
1502 /* StrongARM has early execution of branches, so a sequence that is worth
1503 skipping is shorter. Set max_insns_skipped to a lower value. */
1505 const struct tune_params arm_strongarm_tune
=
1507 arm_fastmul_rtx_costs
,
1509 NULL
, /* Sched adj cost. */
1510 1, /* Constant limit. */
1511 3, /* Max cond insns. */
1512 ARM_PREFETCH_NOT_BENEFICIAL
,
1513 true, /* Prefer constant pool. */
1514 arm_default_branch_cost
,
1515 false, /* Prefer LDRD/STRD. */
1516 {true, true}, /* Prefer non short circuit. */
1517 &arm_default_vec_cost
, /* Vectorizer costs. */
1518 false /* Prefer Neon for 64-bits bitops. */
1521 const struct tune_params arm_xscale_tune
=
1523 arm_xscale_rtx_costs
,
1525 xscale_sched_adjust_cost
,
1526 2, /* Constant limit. */
1527 3, /* Max cond insns. */
1528 ARM_PREFETCH_NOT_BENEFICIAL
,
1529 true, /* Prefer constant pool. */
1530 arm_default_branch_cost
,
1531 false, /* Prefer LDRD/STRD. */
1532 {true, true}, /* Prefer non short circuit. */
1533 &arm_default_vec_cost
, /* Vectorizer costs. */
1534 false /* Prefer Neon for 64-bits bitops. */
1537 const struct tune_params arm_9e_tune
=
1541 NULL
, /* Sched adj cost. */
1542 1, /* Constant limit. */
1543 5, /* Max cond insns. */
1544 ARM_PREFETCH_NOT_BENEFICIAL
,
1545 true, /* Prefer constant pool. */
1546 arm_default_branch_cost
,
1547 false, /* Prefer LDRD/STRD. */
1548 {true, true}, /* Prefer non short circuit. */
1549 &arm_default_vec_cost
, /* Vectorizer costs. */
1550 false /* Prefer Neon for 64-bits bitops. */
1553 const struct tune_params arm_v6t2_tune
=
1557 NULL
, /* Sched adj cost. */
1558 1, /* Constant limit. */
1559 5, /* Max cond insns. */
1560 ARM_PREFETCH_NOT_BENEFICIAL
,
1561 false, /* Prefer constant pool. */
1562 arm_default_branch_cost
,
1563 false, /* Prefer LDRD/STRD. */
1564 {true, true}, /* Prefer non short circuit. */
1565 &arm_default_vec_cost
, /* Vectorizer costs. */
1566 false /* Prefer Neon for 64-bits bitops. */
1569 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1570 const struct tune_params arm_cortex_tune
=
1573 &generic_extra_costs
,
1574 NULL
, /* Sched adj cost. */
1575 1, /* Constant limit. */
1576 5, /* Max cond insns. */
1577 ARM_PREFETCH_NOT_BENEFICIAL
,
1578 false, /* Prefer constant pool. */
1579 arm_default_branch_cost
,
1580 false, /* Prefer LDRD/STRD. */
1581 {true, true}, /* Prefer non short circuit. */
1582 &arm_default_vec_cost
, /* Vectorizer costs. */
1583 false /* Prefer Neon for 64-bits bitops. */
1586 const struct tune_params arm_cortex_a7_tune
=
1589 &cortexa7_extra_costs
,
1591 1, /* Constant limit. */
1592 5, /* Max cond insns. */
1593 ARM_PREFETCH_NOT_BENEFICIAL
,
1594 false, /* Prefer constant pool. */
1595 arm_default_branch_cost
,
1596 false, /* Prefer LDRD/STRD. */
1597 {true, true}, /* Prefer non short circuit. */
1598 &arm_default_vec_cost
, /* Vectorizer costs. */
1599 false /* Prefer Neon for 64-bits bitops. */
1602 const struct tune_params arm_cortex_a15_tune
=
1605 &cortexa15_extra_costs
,
1606 NULL
, /* Sched adj cost. */
1607 1, /* Constant limit. */
1608 2, /* Max cond insns. */
1609 ARM_PREFETCH_NOT_BENEFICIAL
,
1610 false, /* Prefer constant pool. */
1611 arm_default_branch_cost
,
1612 true, /* Prefer LDRD/STRD. */
1613 {true, true}, /* Prefer non short circuit. */
1614 &arm_default_vec_cost
, /* Vectorizer costs. */
1615 false /* Prefer Neon for 64-bits bitops. */
1618 const struct tune_params arm_cortex_a53_tune
=
1621 &cortexa53_extra_costs
,
1622 NULL
, /* Scheduler cost adjustment. */
1623 1, /* Constant limit. */
1624 5, /* Max cond insns. */
1625 ARM_PREFETCH_NOT_BENEFICIAL
,
1626 false, /* Prefer constant pool. */
1627 arm_default_branch_cost
,
1628 false, /* Prefer LDRD/STRD. */
1629 {true, true}, /* Prefer non short circuit. */
1630 &arm_default_vec_cost
, /* Vectorizer costs. */
1631 false /* Prefer Neon for 64-bits bitops. */
1634 const struct tune_params arm_cortex_a57_tune
=
1637 &cortexa57_extra_costs
,
1638 NULL
, /* Scheduler cost adjustment. */
1639 1, /* Constant limit. */
1640 2, /* Max cond insns. */
1641 ARM_PREFETCH_NOT_BENEFICIAL
,
1642 false, /* Prefer constant pool. */
1643 arm_default_branch_cost
,
1644 true, /* Prefer LDRD/STRD. */
1645 {true, true}, /* Prefer non short circuit. */
1646 &arm_default_vec_cost
, /* Vectorizer costs. */
1647 false /* Prefer Neon for 64-bits bitops. */
1650 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1651 less appealing. Set max_insns_skipped to a low value. */
1653 const struct tune_params arm_cortex_a5_tune
=
1657 NULL
, /* Sched adj cost. */
1658 1, /* Constant limit. */
1659 1, /* Max cond insns. */
1660 ARM_PREFETCH_NOT_BENEFICIAL
,
1661 false, /* Prefer constant pool. */
1662 arm_cortex_a5_branch_cost
,
1663 false, /* Prefer LDRD/STRD. */
1664 {false, false}, /* Prefer non short circuit. */
1665 &arm_default_vec_cost
, /* Vectorizer costs. */
1666 false /* Prefer Neon for 64-bits bitops. */
1669 const struct tune_params arm_cortex_a9_tune
=
1672 &cortexa9_extra_costs
,
1673 cortex_a9_sched_adjust_cost
,
1674 1, /* Constant limit. */
1675 5, /* Max cond insns. */
1676 ARM_PREFETCH_BENEFICIAL(4,32,32),
1677 false, /* Prefer constant pool. */
1678 arm_default_branch_cost
,
1679 false, /* Prefer LDRD/STRD. */
1680 {true, true}, /* Prefer non short circuit. */
1681 &arm_default_vec_cost
, /* Vectorizer costs. */
1682 false /* Prefer Neon for 64-bits bitops. */
1685 const struct tune_params arm_cortex_a12_tune
=
1688 &cortexa12_extra_costs
,
1690 1, /* Constant limit. */
1691 5, /* Max cond insns. */
1692 ARM_PREFETCH_BENEFICIAL(4,32,32),
1693 false, /* Prefer constant pool. */
1694 arm_default_branch_cost
,
1695 true, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost
, /* Vectorizer costs. */
1698 false /* Prefer Neon for 64-bits bitops. */
1701 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1702 cycle to execute each. An LDR from the constant pool also takes two cycles
1703 to execute, but mildly increases pipelining opportunity (consecutive
1704 loads/stores can be pipelined together, saving one cycle), and may also
1705 improve icache utilisation. Hence we prefer the constant pool for such
1708 const struct tune_params arm_v7m_tune
=
1712 NULL
, /* Sched adj cost. */
1713 1, /* Constant limit. */
1714 2, /* Max cond insns. */
1715 ARM_PREFETCH_NOT_BENEFICIAL
,
1716 true, /* Prefer constant pool. */
1717 arm_cortex_m_branch_cost
,
1718 false, /* Prefer LDRD/STRD. */
1719 {false, false}, /* Prefer non short circuit. */
1720 &arm_default_vec_cost
, /* Vectorizer costs. */
1721 false /* Prefer Neon for 64-bits bitops. */
1724 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1725 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1726 const struct tune_params arm_v6m_tune
=
1730 NULL
, /* Sched adj cost. */
1731 1, /* Constant limit. */
1732 5, /* Max cond insns. */
1733 ARM_PREFETCH_NOT_BENEFICIAL
,
1734 false, /* Prefer constant pool. */
1735 arm_default_branch_cost
,
1736 false, /* Prefer LDRD/STRD. */
1737 {false, false}, /* Prefer non short circuit. */
1738 &arm_default_vec_cost
, /* Vectorizer costs. */
1739 false /* Prefer Neon for 64-bits bitops. */
1742 const struct tune_params arm_fa726te_tune
=
1746 fa726te_sched_adjust_cost
,
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL
,
1750 true, /* Prefer constant pool. */
1751 arm_default_branch_cost
,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost
, /* Vectorizer costs. */
1755 false /* Prefer Neon for 64-bits bitops. */
1759 /* Not all of these give usefully different compilation alternatives,
1760 but there is no simple way of generalizing them. */
1761 static const struct processors all_cores
[] =
1764 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1765 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1766 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1767 #include "arm-cores.def"
1769 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1772 static const struct processors all_architectures
[] =
1774 /* ARM Architectures */
1775 /* We don't specify tuning costs here as it will be figured out
1778 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1779 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1780 #include "arm-arches.def"
1782 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1786 /* These are populated as commandline arguments are processed, or NULL
1787 if not specified. */
1788 static const struct processors
*arm_selected_arch
;
1789 static const struct processors
*arm_selected_cpu
;
1790 static const struct processors
*arm_selected_tune
;
1792 /* The name of the preprocessor macro to define for this architecture. */
1794 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1796 /* Available values for -mfpu=. */
1798 static const struct arm_fpu_desc all_fpus
[] =
1800 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1801 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1802 #include "arm-fpus.def"
1807 /* Supported TLS relocations. */
1815 TLS_DESCSEQ
/* GNU scheme */
1818 /* The maximum number of insns to be used when loading a constant. */
1820 arm_constant_limit (bool size_p
)
1822 return size_p
? 1 : current_tune
->constant_limit
;
1825 /* Emit an insn that's a simple single-set. Both the operands must be known
1828 emit_set_insn (rtx x
, rtx y
)
1830 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1833 /* Return the number of bits set in VALUE. */
1835 bit_count (unsigned long value
)
1837 unsigned long count
= 0;
1842 value
&= value
- 1; /* Clear the least-significant set bit. */
1850 enum machine_mode mode
;
1852 } arm_fixed_mode_set
;
1854 /* A small helper for setting fixed-point library libfuncs. */
1857 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1858 const char *funcname
, const char *modename
,
1863 if (num_suffix
== 0)
1864 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1866 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1868 set_optab_libfunc (optable
, mode
, buffer
);
1872 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1873 enum machine_mode from
, const char *funcname
,
1874 const char *toname
, const char *fromname
)
1877 const char *maybe_suffix_2
= "";
1879 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1880 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1881 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1882 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1883 maybe_suffix_2
= "2";
1885 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1888 set_conv_libfunc (optable
, to
, from
, buffer
);
1891 /* Set up library functions unique to ARM. */
1894 arm_init_libfuncs (void)
1896 /* For Linux, we have access to kernel support for atomic operations. */
1897 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1898 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1900 /* There are no special library functions unless we are using the
1905 /* The functions below are described in Section 4 of the "Run-Time
1906 ABI for the ARM architecture", Version 1.0. */
1908 /* Double-precision floating-point arithmetic. Table 2. */
1909 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1910 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1911 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1912 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1913 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1915 /* Double-precision comparisons. Table 3. */
1916 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1917 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1918 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1919 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1920 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1921 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1922 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1924 /* Single-precision floating-point arithmetic. Table 4. */
1925 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1926 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1927 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1928 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1929 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1931 /* Single-precision comparisons. Table 5. */
1932 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1933 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1934 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1935 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1936 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1937 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1938 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1940 /* Floating-point to integer conversions. Table 6. */
1941 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1942 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1943 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1944 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1945 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1946 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1947 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1948 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1950 /* Conversions between floating types. Table 7. */
1951 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1952 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1954 /* Integer to floating-point conversions. Table 8. */
1955 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1956 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1957 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1958 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1959 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1960 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1961 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1962 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1964 /* Long long. Table 9. */
1965 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1966 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1967 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1968 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1969 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1970 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1971 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1972 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1974 /* Integer (32/32->32) division. \S 4.3.1. */
1975 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1976 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1978 /* The divmod functions are designed so that they can be used for
1979 plain division, even though they return both the quotient and the
1980 remainder. The quotient is returned in the usual location (i.e.,
1981 r0 for SImode, {r0, r1} for DImode), just as would be expected
1982 for an ordinary division routine. Because the AAPCS calling
1983 conventions specify that all of { r0, r1, r2, r3 } are
1984 callee-saved registers, there is no need to tell the compiler
1985 explicitly that those registers are clobbered by these
1987 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1988 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1990 /* For SImode division the ABI provides div-without-mod routines,
1991 which are faster. */
1992 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1993 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1995 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1996 divmod libcalls instead. */
1997 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1998 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1999 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2000 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2002 /* Half-precision float operations. The compiler handles all operations
2003 with NULL libfuncs by converting the SFmode. */
2004 switch (arm_fp16_format
)
2006 case ARM_FP16_FORMAT_IEEE
:
2007 case ARM_FP16_FORMAT_ALTERNATIVE
:
2010 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2011 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2013 : "__gnu_f2h_alternative"));
2014 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2015 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2017 : "__gnu_h2f_alternative"));
2020 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2021 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2022 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2023 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2024 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2027 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2028 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2029 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2030 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2031 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2032 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2033 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2040 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2042 const arm_fixed_mode_set fixed_arith_modes
[] =
2063 const arm_fixed_mode_set fixed_conv_modes
[] =
2093 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2095 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2096 "add", fixed_arith_modes
[i
].name
, 3);
2097 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2098 "ssadd", fixed_arith_modes
[i
].name
, 3);
2099 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2100 "usadd", fixed_arith_modes
[i
].name
, 3);
2101 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2102 "sub", fixed_arith_modes
[i
].name
, 3);
2103 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2104 "sssub", fixed_arith_modes
[i
].name
, 3);
2105 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2106 "ussub", fixed_arith_modes
[i
].name
, 3);
2107 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2108 "mul", fixed_arith_modes
[i
].name
, 3);
2109 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2110 "ssmul", fixed_arith_modes
[i
].name
, 3);
2111 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2112 "usmul", fixed_arith_modes
[i
].name
, 3);
2113 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2114 "div", fixed_arith_modes
[i
].name
, 3);
2115 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2116 "udiv", fixed_arith_modes
[i
].name
, 3);
2117 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2118 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2119 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2120 "usdiv", fixed_arith_modes
[i
].name
, 3);
2121 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2122 "neg", fixed_arith_modes
[i
].name
, 2);
2123 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2124 "ssneg", fixed_arith_modes
[i
].name
, 2);
2125 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2126 "usneg", fixed_arith_modes
[i
].name
, 2);
2127 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2128 "ashl", fixed_arith_modes
[i
].name
, 3);
2129 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2130 "ashr", fixed_arith_modes
[i
].name
, 3);
2131 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2132 "lshr", fixed_arith_modes
[i
].name
, 3);
2133 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2134 "ssashl", fixed_arith_modes
[i
].name
, 3);
2135 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2136 "usashl", fixed_arith_modes
[i
].name
, 3);
2137 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2138 "cmp", fixed_arith_modes
[i
].name
, 2);
2141 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2142 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2145 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2146 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2149 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2150 fixed_conv_modes
[j
].mode
, "fract",
2151 fixed_conv_modes
[i
].name
,
2152 fixed_conv_modes
[j
].name
);
2153 arm_set_fixed_conv_libfunc (satfract_optab
,
2154 fixed_conv_modes
[i
].mode
,
2155 fixed_conv_modes
[j
].mode
, "satfract",
2156 fixed_conv_modes
[i
].name
,
2157 fixed_conv_modes
[j
].name
);
2158 arm_set_fixed_conv_libfunc (fractuns_optab
,
2159 fixed_conv_modes
[i
].mode
,
2160 fixed_conv_modes
[j
].mode
, "fractuns",
2161 fixed_conv_modes
[i
].name
,
2162 fixed_conv_modes
[j
].name
);
2163 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2164 fixed_conv_modes
[i
].mode
,
2165 fixed_conv_modes
[j
].mode
, "satfractuns",
2166 fixed_conv_modes
[i
].name
,
2167 fixed_conv_modes
[j
].name
);
2171 if (TARGET_AAPCS_BASED
)
2172 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2175 /* On AAPCS systems, this is the "struct __va_list". */
2176 static GTY(()) tree va_list_type
;
2178 /* Return the type to use as __builtin_va_list. */
2180 arm_build_builtin_va_list (void)
2185 if (!TARGET_AAPCS_BASED
)
2186 return std_build_builtin_va_list ();
2188 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2196 The C Library ABI further reinforces this definition in \S
2199 We must follow this definition exactly. The structure tag
2200 name is visible in C++ mangled names, and thus forms a part
2201 of the ABI. The field name may be used by people who
2202 #include <stdarg.h>. */
2203 /* Create the type. */
2204 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2205 /* Give it the required name. */
2206 va_list_name
= build_decl (BUILTINS_LOCATION
,
2208 get_identifier ("__va_list"),
2210 DECL_ARTIFICIAL (va_list_name
) = 1;
2211 TYPE_NAME (va_list_type
) = va_list_name
;
2212 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2213 /* Create the __ap field. */
2214 ap_field
= build_decl (BUILTINS_LOCATION
,
2216 get_identifier ("__ap"),
2218 DECL_ARTIFICIAL (ap_field
) = 1;
2219 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2220 TYPE_FIELDS (va_list_type
) = ap_field
;
2221 /* Compute its layout. */
2222 layout_type (va_list_type
);
2224 return va_list_type
;
2227 /* Return an expression of type "void *" pointing to the next
2228 available argument in a variable-argument list. VALIST is the
2229 user-level va_list object, of type __builtin_va_list. */
2231 arm_extract_valist_ptr (tree valist
)
2233 if (TREE_TYPE (valist
) == error_mark_node
)
2234 return error_mark_node
;
2236 /* On an AAPCS target, the pointer is stored within "struct
2238 if (TARGET_AAPCS_BASED
)
2240 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2241 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2242 valist
, ap_field
, NULL_TREE
);
2248 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2250 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2252 valist
= arm_extract_valist_ptr (valist
);
2253 std_expand_builtin_va_start (valist
, nextarg
);
2256 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2258 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2261 valist
= arm_extract_valist_ptr (valist
);
2262 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2265 /* Fix up any incompatible options that the user has specified. */
2267 arm_option_override (void)
2269 if (global_options_set
.x_arm_arch_option
)
2270 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2272 if (global_options_set
.x_arm_cpu_option
)
2274 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2275 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2278 if (global_options_set
.x_arm_tune_option
)
2279 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2281 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2282 SUBTARGET_OVERRIDE_OPTIONS
;
2285 if (arm_selected_arch
)
2287 if (arm_selected_cpu
)
2289 /* Check for conflict between mcpu and march. */
2290 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2292 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2293 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2294 /* -march wins for code generation.
2295 -mcpu wins for default tuning. */
2296 if (!arm_selected_tune
)
2297 arm_selected_tune
= arm_selected_cpu
;
2299 arm_selected_cpu
= arm_selected_arch
;
2303 arm_selected_arch
= NULL
;
2306 /* Pick a CPU based on the architecture. */
2307 arm_selected_cpu
= arm_selected_arch
;
2310 /* If the user did not specify a processor, choose one for them. */
2311 if (!arm_selected_cpu
)
2313 const struct processors
* sel
;
2314 unsigned int sought
;
2316 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2317 if (!arm_selected_cpu
->name
)
2319 #ifdef SUBTARGET_CPU_DEFAULT
2320 /* Use the subtarget default CPU if none was specified by
2322 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2324 /* Default to ARM6. */
2325 if (!arm_selected_cpu
->name
)
2326 arm_selected_cpu
= &all_cores
[arm6
];
2329 sel
= arm_selected_cpu
;
2330 insn_flags
= sel
->flags
;
2332 /* Now check to see if the user has specified some command line
2333 switch that require certain abilities from the cpu. */
2336 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2338 sought
|= (FL_THUMB
| FL_MODE32
);
2340 /* There are no ARM processors that support both APCS-26 and
2341 interworking. Therefore we force FL_MODE26 to be removed
2342 from insn_flags here (if it was set), so that the search
2343 below will always be able to find a compatible processor. */
2344 insn_flags
&= ~FL_MODE26
;
2347 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2349 /* Try to locate a CPU type that supports all of the abilities
2350 of the default CPU, plus the extra abilities requested by
2352 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2353 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2356 if (sel
->name
== NULL
)
2358 unsigned current_bit_count
= 0;
2359 const struct processors
* best_fit
= NULL
;
2361 /* Ideally we would like to issue an error message here
2362 saying that it was not possible to find a CPU compatible
2363 with the default CPU, but which also supports the command
2364 line options specified by the programmer, and so they
2365 ought to use the -mcpu=<name> command line option to
2366 override the default CPU type.
2368 If we cannot find a cpu that has both the
2369 characteristics of the default cpu and the given
2370 command line options we scan the array again looking
2371 for a best match. */
2372 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2373 if ((sel
->flags
& sought
) == sought
)
2377 count
= bit_count (sel
->flags
& insn_flags
);
2379 if (count
>= current_bit_count
)
2382 current_bit_count
= count
;
2386 gcc_assert (best_fit
);
2390 arm_selected_cpu
= sel
;
2394 gcc_assert (arm_selected_cpu
);
2395 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2396 if (!arm_selected_tune
)
2397 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2399 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2400 insn_flags
= arm_selected_cpu
->flags
;
2401 arm_base_arch
= arm_selected_cpu
->base_arch
;
2403 arm_tune
= arm_selected_tune
->core
;
2404 tune_flags
= arm_selected_tune
->flags
;
2405 current_tune
= arm_selected_tune
->tune
;
2407 /* Make sure that the processor choice does not conflict with any of the
2408 other command line choices. */
2409 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2410 error ("target CPU does not support ARM mode");
2412 /* BPABI targets use linker tricks to allow interworking on cores
2413 without thumb support. */
2414 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2416 warning (0, "target CPU does not support interworking" );
2417 target_flags
&= ~MASK_INTERWORK
;
2420 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2422 warning (0, "target CPU does not support THUMB instructions");
2423 target_flags
&= ~MASK_THUMB
;
2426 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2428 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2429 target_flags
&= ~MASK_APCS_FRAME
;
2432 /* Callee super interworking implies thumb interworking. Adding
2433 this to the flags here simplifies the logic elsewhere. */
2434 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2435 target_flags
|= MASK_INTERWORK
;
2437 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2438 from here where no function is being compiled currently. */
2439 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2440 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2442 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2443 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2445 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2447 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2448 target_flags
|= MASK_APCS_FRAME
;
2451 if (TARGET_POKE_FUNCTION_NAME
)
2452 target_flags
|= MASK_APCS_FRAME
;
2454 if (TARGET_APCS_REENT
&& flag_pic
)
2455 error ("-fpic and -mapcs-reent are incompatible");
2457 if (TARGET_APCS_REENT
)
2458 warning (0, "APCS reentrant code not supported. Ignored");
2460 /* If this target is normally configured to use APCS frames, warn if they
2461 are turned off and debugging is turned on. */
2463 && write_symbols
!= NO_DEBUG
2464 && !TARGET_APCS_FRAME
2465 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2466 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2468 if (TARGET_APCS_FLOAT
)
2469 warning (0, "passing floating point arguments in fp regs not yet supported");
2471 if (TARGET_LITTLE_WORDS
)
2472 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2473 "will be removed in a future release");
2475 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2476 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2477 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2478 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2479 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2480 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2481 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2482 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2483 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2484 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2485 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2486 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2487 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2488 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2489 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2491 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2492 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2493 thumb_code
= TARGET_ARM
== 0;
2494 thumb1_code
= TARGET_THUMB1
!= 0;
2495 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2496 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2497 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2498 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2499 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2500 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2501 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2502 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2503 if (arm_restrict_it
== 2)
2504 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2507 arm_restrict_it
= 0;
2509 /* If we are not using the default (ARM mode) section anchor offset
2510 ranges, then set the correct ranges now. */
2513 /* Thumb-1 LDR instructions cannot have negative offsets.
2514 Permissible positive offset ranges are 5-bit (for byte loads),
2515 6-bit (for halfword loads), or 7-bit (for word loads).
2516 Empirical results suggest a 7-bit anchor range gives the best
2517 overall code size. */
2518 targetm
.min_anchor_offset
= 0;
2519 targetm
.max_anchor_offset
= 127;
2521 else if (TARGET_THUMB2
)
2523 /* The minimum is set such that the total size of the block
2524 for a particular anchor is 248 + 1 + 4095 bytes, which is
2525 divisible by eight, ensuring natural spacing of anchors. */
2526 targetm
.min_anchor_offset
= -248;
2527 targetm
.max_anchor_offset
= 4095;
2530 /* V5 code we generate is completely interworking capable, so we turn off
2531 TARGET_INTERWORK here to avoid many tests later on. */
2533 /* XXX However, we must pass the right pre-processor defines to CPP
2534 or GLD can get confused. This is a hack. */
2535 if (TARGET_INTERWORK
)
2536 arm_cpp_interwork
= 1;
2539 target_flags
&= ~MASK_INTERWORK
;
2541 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2542 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2544 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2545 error ("iwmmxt abi requires an iwmmxt capable cpu");
2547 if (!global_options_set
.x_arm_fpu_index
)
2549 const char *target_fpu_name
;
2552 #ifdef FPUTYPE_DEFAULT
2553 target_fpu_name
= FPUTYPE_DEFAULT
;
2555 target_fpu_name
= "vfp";
2558 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2563 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2565 switch (arm_fpu_desc
->model
)
2567 case ARM_FP_MODEL_VFP
:
2568 arm_fpu_attr
= FPU_VFP
;
2575 if (TARGET_AAPCS_BASED
)
2577 if (TARGET_CALLER_INTERWORKING
)
2578 error ("AAPCS does not support -mcaller-super-interworking");
2580 if (TARGET_CALLEE_INTERWORKING
)
2581 error ("AAPCS does not support -mcallee-super-interworking");
2584 /* iWMMXt and NEON are incompatible. */
2585 if (TARGET_IWMMXT
&& TARGET_NEON
)
2586 error ("iWMMXt and NEON are incompatible");
2588 /* iWMMXt unsupported under Thumb mode. */
2589 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2590 error ("iWMMXt unsupported under Thumb mode");
2592 /* __fp16 support currently assumes the core has ldrh. */
2593 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2594 sorry ("__fp16 and no ldrh");
2596 /* If soft-float is specified then don't use FPU. */
2597 if (TARGET_SOFT_FLOAT
)
2598 arm_fpu_attr
= FPU_NONE
;
2600 if (TARGET_AAPCS_BASED
)
2602 if (arm_abi
== ARM_ABI_IWMMXT
)
2603 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2604 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2605 && TARGET_HARD_FLOAT
2607 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2609 arm_pcs_default
= ARM_PCS_AAPCS
;
2613 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2614 sorry ("-mfloat-abi=hard and VFP");
2616 if (arm_abi
== ARM_ABI_APCS
)
2617 arm_pcs_default
= ARM_PCS_APCS
;
2619 arm_pcs_default
= ARM_PCS_ATPCS
;
2622 /* For arm2/3 there is no need to do any scheduling if we are doing
2623 software floating-point. */
2624 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2625 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2627 /* Use the cp15 method if it is available. */
2628 if (target_thread_pointer
== TP_AUTO
)
2630 if (arm_arch6k
&& !TARGET_THUMB1
)
2631 target_thread_pointer
= TP_CP15
;
2633 target_thread_pointer
= TP_SOFT
;
2636 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2637 error ("can not use -mtp=cp15 with 16-bit Thumb");
2639 /* Override the default structure alignment for AAPCS ABI. */
2640 if (!global_options_set
.x_arm_structure_size_boundary
)
2642 if (TARGET_AAPCS_BASED
)
2643 arm_structure_size_boundary
= 8;
2647 if (arm_structure_size_boundary
!= 8
2648 && arm_structure_size_boundary
!= 32
2649 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2651 if (ARM_DOUBLEWORD_ALIGN
)
2653 "structure size boundary can only be set to 8, 32 or 64");
2655 warning (0, "structure size boundary can only be set to 8 or 32");
2656 arm_structure_size_boundary
2657 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2661 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2663 error ("RTP PIC is incompatible with Thumb");
2667 /* If stack checking is disabled, we can use r10 as the PIC register,
2668 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2669 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2671 if (TARGET_VXWORKS_RTP
)
2672 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2673 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2676 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2677 arm_pic_register
= 9;
2679 if (arm_pic_register_string
!= NULL
)
2681 int pic_register
= decode_reg_name (arm_pic_register_string
);
2684 warning (0, "-mpic-register= is useless without -fpic");
2686 /* Prevent the user from choosing an obviously stupid PIC register. */
2687 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2688 || pic_register
== HARD_FRAME_POINTER_REGNUM
2689 || pic_register
== STACK_POINTER_REGNUM
2690 || pic_register
>= PC_REGNUM
2691 || (TARGET_VXWORKS_RTP
2692 && (unsigned int) pic_register
!= arm_pic_register
))
2693 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2695 arm_pic_register
= pic_register
;
2698 if (TARGET_VXWORKS_RTP
2699 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2700 arm_pic_data_is_text_relative
= 0;
2702 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2703 if (fix_cm3_ldrd
== 2)
2705 if (arm_selected_cpu
->core
== cortexm3
)
2711 /* Enable -munaligned-access by default for
2712 - all ARMv6 architecture-based processors
2713 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2714 - ARMv8 architecture-base processors.
2716 Disable -munaligned-access by default for
2717 - all pre-ARMv6 architecture-based processors
2718 - ARMv6-M architecture-based processors. */
2720 if (unaligned_access
== 2)
2722 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2723 unaligned_access
= 1;
2725 unaligned_access
= 0;
2727 else if (unaligned_access
== 1
2728 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2730 warning (0, "target CPU does not support unaligned accesses");
2731 unaligned_access
= 0;
2734 if (TARGET_THUMB1
&& flag_schedule_insns
)
2736 /* Don't warn since it's on by default in -O2. */
2737 flag_schedule_insns
= 0;
2742 /* If optimizing for size, bump the number of instructions that we
2743 are prepared to conditionally execute (even on a StrongARM). */
2744 max_insns_skipped
= 6;
2747 max_insns_skipped
= current_tune
->max_insns_skipped
;
2749 /* Hot/Cold partitioning is not currently supported, since we can't
2750 handle literal pool placement in that case. */
2751 if (flag_reorder_blocks_and_partition
)
2753 inform (input_location
,
2754 "-freorder-blocks-and-partition not supported on this architecture");
2755 flag_reorder_blocks_and_partition
= 0;
2756 flag_reorder_blocks
= 1;
2760 /* Hoisting PIC address calculations more aggressively provides a small,
2761 but measurable, size reduction for PIC code. Therefore, we decrease
2762 the bar for unrestricted expression hoisting to the cost of PIC address
2763 calculation, which is 2 instructions. */
2764 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2765 global_options
.x_param_values
,
2766 global_options_set
.x_param_values
);
2768 /* ARM EABI defaults to strict volatile bitfields. */
2769 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2770 && abi_version_at_least(2))
2771 flag_strict_volatile_bitfields
= 1;
2773 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2774 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2775 if (flag_prefetch_loop_arrays
< 0
2778 && current_tune
->num_prefetch_slots
> 0)
2779 flag_prefetch_loop_arrays
= 1;
2781 /* Set up parameters to be used in prefetching algorithm. Do not override the
2782 defaults unless we are tuning for a core we have researched values for. */
2783 if (current_tune
->num_prefetch_slots
> 0)
2784 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2785 current_tune
->num_prefetch_slots
,
2786 global_options
.x_param_values
,
2787 global_options_set
.x_param_values
);
2788 if (current_tune
->l1_cache_line_size
>= 0)
2789 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2790 current_tune
->l1_cache_line_size
,
2791 global_options
.x_param_values
,
2792 global_options_set
.x_param_values
);
2793 if (current_tune
->l1_cache_size
>= 0)
2794 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2795 current_tune
->l1_cache_size
,
2796 global_options
.x_param_values
,
2797 global_options_set
.x_param_values
);
2799 /* Use Neon to perform 64-bits operations rather than core
2801 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2802 if (use_neon_for_64bits
== 1)
2803 prefer_neon_for_64bits
= true;
2805 /* Use the alternative scheduling-pressure algorithm by default. */
2806 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2807 global_options
.x_param_values
,
2808 global_options_set
.x_param_values
);
2810 /* Disable shrink-wrap when optimizing function for size, since it tends to
2811 generate additional returns. */
2812 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2813 flag_shrink_wrap
= false;
2814 /* TBD: Dwarf info for apcs frame is not handled yet. */
2815 if (TARGET_APCS_FRAME
)
2816 flag_shrink_wrap
= false;
2818 /* We only support -mslow-flash-data on armv7-m targets. */
2819 if (target_slow_flash_data
2820 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2821 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2822 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2824 /* Currently, for slow flash data, we just disable literal pools. */
2825 if (target_slow_flash_data
)
2826 arm_disable_literal_pool
= true;
2828 /* Register global variables with the garbage collector. */
2829 arm_add_gc_roots ();
2833 arm_add_gc_roots (void)
2835 gcc_obstack_init(&minipool_obstack
);
2836 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2839 /* A table of known ARM exception types.
2840 For use with the interrupt function attribute. */
2844 const char *const arg
;
2845 const unsigned long return_value
;
2849 static const isr_attribute_arg isr_attribute_args
[] =
2851 { "IRQ", ARM_FT_ISR
},
2852 { "irq", ARM_FT_ISR
},
2853 { "FIQ", ARM_FT_FIQ
},
2854 { "fiq", ARM_FT_FIQ
},
2855 { "ABORT", ARM_FT_ISR
},
2856 { "abort", ARM_FT_ISR
},
2857 { "ABORT", ARM_FT_ISR
},
2858 { "abort", ARM_FT_ISR
},
2859 { "UNDEF", ARM_FT_EXCEPTION
},
2860 { "undef", ARM_FT_EXCEPTION
},
2861 { "SWI", ARM_FT_EXCEPTION
},
2862 { "swi", ARM_FT_EXCEPTION
},
2863 { NULL
, ARM_FT_NORMAL
}
2866 /* Returns the (interrupt) function type of the current
2867 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2869 static unsigned long
2870 arm_isr_value (tree argument
)
2872 const isr_attribute_arg
* ptr
;
2876 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2878 /* No argument - default to IRQ. */
2879 if (argument
== NULL_TREE
)
2882 /* Get the value of the argument. */
2883 if (TREE_VALUE (argument
) == NULL_TREE
2884 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2885 return ARM_FT_UNKNOWN
;
2887 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2889 /* Check it against the list of known arguments. */
2890 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2891 if (streq (arg
, ptr
->arg
))
2892 return ptr
->return_value
;
2894 /* An unrecognized interrupt type. */
2895 return ARM_FT_UNKNOWN
;
2898 /* Computes the type of the current function. */
2900 static unsigned long
2901 arm_compute_func_type (void)
2903 unsigned long type
= ARM_FT_UNKNOWN
;
2907 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2909 /* Decide if the current function is volatile. Such functions
2910 never return, and many memory cycles can be saved by not storing
2911 register values that will never be needed again. This optimization
2912 was added to speed up context switching in a kernel application. */
2914 && (TREE_NOTHROW (current_function_decl
)
2915 || !(flag_unwind_tables
2917 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2918 && TREE_THIS_VOLATILE (current_function_decl
))
2919 type
|= ARM_FT_VOLATILE
;
2921 if (cfun
->static_chain_decl
!= NULL
)
2922 type
|= ARM_FT_NESTED
;
2924 attr
= DECL_ATTRIBUTES (current_function_decl
);
2926 a
= lookup_attribute ("naked", attr
);
2928 type
|= ARM_FT_NAKED
;
2930 a
= lookup_attribute ("isr", attr
);
2932 a
= lookup_attribute ("interrupt", attr
);
2935 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2937 type
|= arm_isr_value (TREE_VALUE (a
));
2942 /* Returns the type of the current function. */
2945 arm_current_func_type (void)
2947 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2948 cfun
->machine
->func_type
= arm_compute_func_type ();
2950 return cfun
->machine
->func_type
;
2954 arm_allocate_stack_slots_for_args (void)
2956 /* Naked functions should not allocate stack slots for arguments. */
2957 return !IS_NAKED (arm_current_func_type ());
2961 arm_warn_func_return (tree decl
)
2963 /* Naked functions are implemented entirely in assembly, including the
2964 return sequence, so suppress warnings about this. */
2965 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2969 /* Output assembler code for a block containing the constant parts
2970 of a trampoline, leaving space for the variable parts.
2972 On the ARM, (if r8 is the static chain regnum, and remembering that
2973 referencing pc adds an offset of 8) the trampoline looks like:
2976 .word static chain value
2977 .word function's address
2978 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2981 arm_asm_trampoline_template (FILE *f
)
2985 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2986 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2988 else if (TARGET_THUMB2
)
2990 /* The Thumb-2 trampoline is similar to the arm implementation.
2991 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2992 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2993 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2994 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2998 ASM_OUTPUT_ALIGN (f
, 2);
2999 fprintf (f
, "\t.code\t16\n");
3000 fprintf (f
, ".Ltrampoline_start:\n");
3001 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3002 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3003 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3004 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3005 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3006 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3008 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3009 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3012 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3015 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3017 rtx fnaddr
, mem
, a_tramp
;
3019 emit_block_move (m_tramp
, assemble_trampoline_template (),
3020 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3022 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3023 emit_move_insn (mem
, chain_value
);
3025 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3026 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3027 emit_move_insn (mem
, fnaddr
);
3029 a_tramp
= XEXP (m_tramp
, 0);
3030 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3031 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3032 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3035 /* Thumb trampolines should be entered in thumb mode, so set
3036 the bottom bit of the address. */
3039 arm_trampoline_adjust_address (rtx addr
)
3042 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3043 NULL
, 0, OPTAB_LIB_WIDEN
);
3047 /* Return 1 if it is possible to return using a single instruction.
3048 If SIBLING is non-null, this is a test for a return before a sibling
3049 call. SIBLING is the call insn, so we can examine its register usage. */
3052 use_return_insn (int iscond
, rtx sibling
)
3055 unsigned int func_type
;
3056 unsigned long saved_int_regs
;
3057 unsigned HOST_WIDE_INT stack_adjust
;
3058 arm_stack_offsets
*offsets
;
3060 /* Never use a return instruction before reload has run. */
3061 if (!reload_completed
)
3064 func_type
= arm_current_func_type ();
3066 /* Naked, volatile and stack alignment functions need special
3068 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3071 /* So do interrupt functions that use the frame pointer and Thumb
3072 interrupt functions. */
3073 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3076 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3077 && !optimize_function_for_size_p (cfun
))
3080 offsets
= arm_get_frame_offsets ();
3081 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3083 /* As do variadic functions. */
3084 if (crtl
->args
.pretend_args_size
3085 || cfun
->machine
->uses_anonymous_args
3086 /* Or if the function calls __builtin_eh_return () */
3087 || crtl
->calls_eh_return
3088 /* Or if the function calls alloca */
3089 || cfun
->calls_alloca
3090 /* Or if there is a stack adjustment. However, if the stack pointer
3091 is saved on the stack, we can use a pre-incrementing stack load. */
3092 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3093 && stack_adjust
== 4)))
3096 saved_int_regs
= offsets
->saved_regs_mask
;
3098 /* Unfortunately, the insn
3100 ldmib sp, {..., sp, ...}
3102 triggers a bug on most SA-110 based devices, such that the stack
3103 pointer won't be correctly restored if the instruction takes a
3104 page fault. We work around this problem by popping r3 along with
3105 the other registers, since that is never slower than executing
3106 another instruction.
3108 We test for !arm_arch5 here, because code for any architecture
3109 less than this could potentially be run on one of the buggy
3111 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3113 /* Validate that r3 is a call-clobbered register (always true in
3114 the default abi) ... */
3115 if (!call_used_regs
[3])
3118 /* ... that it isn't being used for a return value ... */
3119 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3122 /* ... or for a tail-call argument ... */
3125 gcc_assert (CALL_P (sibling
));
3127 if (find_regno_fusage (sibling
, USE
, 3))
3131 /* ... and that there are no call-saved registers in r0-r2
3132 (always true in the default ABI). */
3133 if (saved_int_regs
& 0x7)
3137 /* Can't be done if interworking with Thumb, and any registers have been
3139 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3142 /* On StrongARM, conditional returns are expensive if they aren't
3143 taken and multiple registers have been stacked. */
3144 if (iscond
&& arm_tune_strongarm
)
3146 /* Conditional return when just the LR is stored is a simple
3147 conditional-load instruction, that's not expensive. */
3148 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3152 && arm_pic_register
!= INVALID_REGNUM
3153 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3157 /* If there are saved registers but the LR isn't saved, then we need
3158 two instructions for the return. */
3159 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3162 /* Can't be done if any of the VFP regs are pushed,
3163 since this also requires an insn. */
3164 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3165 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3166 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3169 if (TARGET_REALLY_IWMMXT
)
3170 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3171 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3177 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3178 shrink-wrapping if possible. This is the case if we need to emit a
3179 prologue, which we can test by looking at the offsets. */
3181 use_simple_return_p (void)
3183 arm_stack_offsets
*offsets
;
3185 offsets
= arm_get_frame_offsets ();
3186 return offsets
->outgoing_args
!= 0;
3189 /* Return TRUE if int I is a valid immediate ARM constant. */
3192 const_ok_for_arm (HOST_WIDE_INT i
)
3196 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3197 be all zero, or all one. */
3198 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3199 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3200 != ((~(unsigned HOST_WIDE_INT
) 0)
3201 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3204 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3206 /* Fast return for 0 and small values. We must do this for zero, since
3207 the code below can't handle that one case. */
3208 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3211 /* Get the number of trailing zeros. */
3212 lowbit
= ffs((int) i
) - 1;
3214 /* Only even shifts are allowed in ARM mode so round down to the
3215 nearest even number. */
3219 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3224 /* Allow rotated constants in ARM mode. */
3226 && ((i
& ~0xc000003f) == 0
3227 || (i
& ~0xf000000f) == 0
3228 || (i
& ~0xfc000003) == 0))
3235 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3238 if (i
== v
|| i
== (v
| (v
<< 8)))
3241 /* Allow repeated pattern 0xXY00XY00. */
3251 /* Return true if I is a valid constant for the operation CODE. */
3253 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3255 if (const_ok_for_arm (i
))
3261 /* See if we can use movw. */
3262 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3265 /* Otherwise, try mvn. */
3266 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3269 /* See if we can use addw or subw. */
3271 && ((i
& 0xfffff000) == 0
3272 || ((-i
) & 0xfffff000) == 0))
3274 /* else fall through. */
3294 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3296 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3302 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3306 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3313 /* Return true if I is a valid di mode constant for the operation CODE. */
3315 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3317 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3318 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3319 rtx hi
= GEN_INT (hi_val
);
3320 rtx lo
= GEN_INT (lo_val
);
3330 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3331 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3333 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3340 /* Emit a sequence of insns to handle a large constant.
3341 CODE is the code of the operation required, it can be any of SET, PLUS,
3342 IOR, AND, XOR, MINUS;
3343 MODE is the mode in which the operation is being performed;
3344 VAL is the integer to operate on;
3345 SOURCE is the other operand (a register, or a null-pointer for SET);
3346 SUBTARGETS means it is safe to create scratch registers if that will
3347 either produce a simpler sequence, or we will want to cse the values.
3348 Return value is the number of insns emitted. */
3350 /* ??? Tweak this for thumb2. */
3352 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3353 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3357 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3358 cond
= COND_EXEC_TEST (PATTERN (insn
));
3362 if (subtargets
|| code
== SET
3363 || (REG_P (target
) && REG_P (source
)
3364 && REGNO (target
) != REGNO (source
)))
3366 /* After arm_reorg has been called, we can't fix up expensive
3367 constants by pushing them into memory so we must synthesize
3368 them in-line, regardless of the cost. This is only likely to
3369 be more costly on chips that have load delay slots and we are
3370 compiling without running the scheduler (so no splitting
3371 occurred before the final instruction emission).
3373 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3375 if (!after_arm_reorg
3377 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3379 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3384 /* Currently SET is the only monadic value for CODE, all
3385 the rest are diadic. */
3386 if (TARGET_USE_MOVT
)
3387 arm_emit_movpair (target
, GEN_INT (val
));
3389 emit_set_insn (target
, GEN_INT (val
));
3395 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3397 if (TARGET_USE_MOVT
)
3398 arm_emit_movpair (temp
, GEN_INT (val
));
3400 emit_set_insn (temp
, GEN_INT (val
));
3402 /* For MINUS, the value is subtracted from, since we never
3403 have subtraction of a constant. */
3405 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3407 emit_set_insn (target
,
3408 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3414 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3418 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3419 ARM/THUMB2 immediates, and add up to VAL.
3420 Thr function return value gives the number of insns required. */
3422 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3423 struct four_ints
*return_sequence
)
3425 int best_consecutive_zeros
= 0;
3429 struct four_ints tmp_sequence
;
3431 /* If we aren't targeting ARM, the best place to start is always at
3432 the bottom, otherwise look more closely. */
3435 for (i
= 0; i
< 32; i
+= 2)
3437 int consecutive_zeros
= 0;
3439 if (!(val
& (3 << i
)))
3441 while ((i
< 32) && !(val
& (3 << i
)))
3443 consecutive_zeros
+= 2;
3446 if (consecutive_zeros
> best_consecutive_zeros
)
3448 best_consecutive_zeros
= consecutive_zeros
;
3449 best_start
= i
- consecutive_zeros
;
3456 /* So long as it won't require any more insns to do so, it's
3457 desirable to emit a small constant (in bits 0...9) in the last
3458 insn. This way there is more chance that it can be combined with
3459 a later addressing insn to form a pre-indexed load or store
3460 operation. Consider:
3462 *((volatile int *)0xe0000100) = 1;
3463 *((volatile int *)0xe0000110) = 2;
3465 We want this to wind up as:
3469 str rB, [rA, #0x100]
3471 str rB, [rA, #0x110]
3473 rather than having to synthesize both large constants from scratch.
3475 Therefore, we calculate how many insns would be required to emit
3476 the constant starting from `best_start', and also starting from
3477 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3478 yield a shorter sequence, we may as well use zero. */
3479 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3481 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3483 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3484 if (insns2
<= insns1
)
3486 *return_sequence
= tmp_sequence
;
3494 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3496 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3497 struct four_ints
*return_sequence
, int i
)
3499 int remainder
= val
& 0xffffffff;
3502 /* Try and find a way of doing the job in either two or three
3505 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3506 location. We start at position I. This may be the MSB, or
3507 optimial_immediate_sequence may have positioned it at the largest block
3508 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3509 wrapping around to the top of the word when we drop off the bottom.
3510 In the worst case this code should produce no more than four insns.
3512 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3513 constants, shifted to any arbitrary location. We should always start
3518 unsigned int b1
, b2
, b3
, b4
;
3519 unsigned HOST_WIDE_INT result
;
3522 gcc_assert (insns
< 4);
3527 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3528 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3531 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3532 /* We can use addw/subw for the last 12 bits. */
3536 /* Use an 8-bit shifted/rotated immediate. */
3540 result
= remainder
& ((0x0ff << end
)
3541 | ((i
< end
) ? (0xff >> (32 - end
))
3548 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3549 arbitrary shifts. */
3550 i
-= TARGET_ARM
? 2 : 1;
3554 /* Next, see if we can do a better job with a thumb2 replicated
3557 We do it this way around to catch the cases like 0x01F001E0 where
3558 two 8-bit immediates would work, but a replicated constant would
3561 TODO: 16-bit constants that don't clear all the bits, but still win.
3562 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3565 b1
= (remainder
& 0xff000000) >> 24;
3566 b2
= (remainder
& 0x00ff0000) >> 16;
3567 b3
= (remainder
& 0x0000ff00) >> 8;
3568 b4
= remainder
& 0xff;
3572 /* The 8-bit immediate already found clears b1 (and maybe b2),
3573 but must leave b3 and b4 alone. */
3575 /* First try to find a 32-bit replicated constant that clears
3576 almost everything. We can assume that we can't do it in one,
3577 or else we wouldn't be here. */
3578 unsigned int tmp
= b1
& b2
& b3
& b4
;
3579 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3581 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3582 + (tmp
== b3
) + (tmp
== b4
);
3584 && (matching_bytes
>= 3
3585 || (matching_bytes
== 2
3586 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3588 /* At least 3 of the bytes match, and the fourth has at
3589 least as many bits set, or two of the bytes match
3590 and it will only require one more insn to finish. */
3598 /* Second, try to find a 16-bit replicated constant that can
3599 leave three of the bytes clear. If b2 or b4 is already
3600 zero, then we can. If the 8-bit from above would not
3601 clear b2 anyway, then we still win. */
3602 else if (b1
== b3
&& (!b2
|| !b4
3603 || (remainder
& 0x00ff0000 & ~result
)))
3605 result
= remainder
& 0xff00ff00;
3611 /* The 8-bit immediate already found clears b2 (and maybe b3)
3612 and we don't get here unless b1 is alredy clear, but it will
3613 leave b4 unchanged. */
3615 /* If we can clear b2 and b4 at once, then we win, since the
3616 8-bits couldn't possibly reach that far. */
3619 result
= remainder
& 0x00ff00ff;
3625 return_sequence
->i
[insns
++] = result
;
3626 remainder
&= ~result
;
3628 if (code
== SET
|| code
== MINUS
)
3636 /* Emit an instruction with the indicated PATTERN. If COND is
3637 non-NULL, conditionalize the execution of the instruction on COND
3641 emit_constant_insn (rtx cond
, rtx pattern
)
3644 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3645 emit_insn (pattern
);
3648 /* As above, but extra parameter GENERATE which, if clear, suppresses
3652 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3653 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3658 int final_invert
= 0;
3660 int set_sign_bit_copies
= 0;
3661 int clear_sign_bit_copies
= 0;
3662 int clear_zero_bit_copies
= 0;
3663 int set_zero_bit_copies
= 0;
3664 int insns
= 0, neg_insns
, inv_insns
;
3665 unsigned HOST_WIDE_INT temp1
, temp2
;
3666 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3667 struct four_ints
*immediates
;
3668 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3670 /* Find out which operations are safe for a given CODE. Also do a quick
3671 check for degenerate cases; these can occur when DImode operations
3684 if (remainder
== 0xffffffff)
3687 emit_constant_insn (cond
,
3688 gen_rtx_SET (VOIDmode
, target
,
3689 GEN_INT (ARM_SIGN_EXTEND (val
))));
3695 if (reload_completed
&& rtx_equal_p (target
, source
))
3699 emit_constant_insn (cond
,
3700 gen_rtx_SET (VOIDmode
, target
, source
));
3709 emit_constant_insn (cond
,
3710 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3713 if (remainder
== 0xffffffff)
3715 if (reload_completed
&& rtx_equal_p (target
, source
))
3718 emit_constant_insn (cond
,
3719 gen_rtx_SET (VOIDmode
, target
, source
));
3728 if (reload_completed
&& rtx_equal_p (target
, source
))
3731 emit_constant_insn (cond
,
3732 gen_rtx_SET (VOIDmode
, target
, source
));
3736 if (remainder
== 0xffffffff)
3739 emit_constant_insn (cond
,
3740 gen_rtx_SET (VOIDmode
, target
,
3741 gen_rtx_NOT (mode
, source
)));
3748 /* We treat MINUS as (val - source), since (source - val) is always
3749 passed as (source + (-val)). */
3753 emit_constant_insn (cond
,
3754 gen_rtx_SET (VOIDmode
, target
,
3755 gen_rtx_NEG (mode
, source
)));
3758 if (const_ok_for_arm (val
))
3761 emit_constant_insn (cond
,
3762 gen_rtx_SET (VOIDmode
, target
,
3763 gen_rtx_MINUS (mode
, GEN_INT (val
),
3774 /* If we can do it in one insn get out quickly. */
3775 if (const_ok_for_op (val
, code
))
3778 emit_constant_insn (cond
,
3779 gen_rtx_SET (VOIDmode
, target
,
3781 ? gen_rtx_fmt_ee (code
, mode
, source
,
3787 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3789 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3790 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3794 if (mode
== SImode
&& i
== 16)
3795 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3797 emit_constant_insn (cond
,
3798 gen_zero_extendhisi2
3799 (target
, gen_lowpart (HImode
, source
)));
3801 /* Extz only supports SImode, but we can coerce the operands
3803 emit_constant_insn (cond
,
3804 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3805 gen_lowpart (SImode
, source
),
3806 GEN_INT (i
), const0_rtx
));
3812 /* Calculate a few attributes that may be useful for specific
3814 /* Count number of leading zeros. */
3815 for (i
= 31; i
>= 0; i
--)
3817 if ((remainder
& (1 << i
)) == 0)
3818 clear_sign_bit_copies
++;
3823 /* Count number of leading 1's. */
3824 for (i
= 31; i
>= 0; i
--)
3826 if ((remainder
& (1 << i
)) != 0)
3827 set_sign_bit_copies
++;
3832 /* Count number of trailing zero's. */
3833 for (i
= 0; i
<= 31; i
++)
3835 if ((remainder
& (1 << i
)) == 0)
3836 clear_zero_bit_copies
++;
3841 /* Count number of trailing 1's. */
3842 for (i
= 0; i
<= 31; i
++)
3844 if ((remainder
& (1 << i
)) != 0)
3845 set_zero_bit_copies
++;
3853 /* See if we can do this by sign_extending a constant that is known
3854 to be negative. This is a good, way of doing it, since the shift
3855 may well merge into a subsequent insn. */
3856 if (set_sign_bit_copies
> 1)
3858 if (const_ok_for_arm
3859 (temp1
= ARM_SIGN_EXTEND (remainder
3860 << (set_sign_bit_copies
- 1))))
3864 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3865 emit_constant_insn (cond
,
3866 gen_rtx_SET (VOIDmode
, new_src
,
3868 emit_constant_insn (cond
,
3869 gen_ashrsi3 (target
, new_src
,
3870 GEN_INT (set_sign_bit_copies
- 1)));
3874 /* For an inverted constant, we will need to set the low bits,
3875 these will be shifted out of harm's way. */
3876 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3877 if (const_ok_for_arm (~temp1
))
3881 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3882 emit_constant_insn (cond
,
3883 gen_rtx_SET (VOIDmode
, new_src
,
3885 emit_constant_insn (cond
,
3886 gen_ashrsi3 (target
, new_src
,
3887 GEN_INT (set_sign_bit_copies
- 1)));
3893 /* See if we can calculate the value as the difference between two
3894 valid immediates. */
3895 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3897 int topshift
= clear_sign_bit_copies
& ~1;
3899 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3900 & (0xff000000 >> topshift
));
3902 /* If temp1 is zero, then that means the 9 most significant
3903 bits of remainder were 1 and we've caused it to overflow.
3904 When topshift is 0 we don't need to do anything since we
3905 can borrow from 'bit 32'. */
3906 if (temp1
== 0 && topshift
!= 0)
3907 temp1
= 0x80000000 >> (topshift
- 1);
3909 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3911 if (const_ok_for_arm (temp2
))
3915 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3916 emit_constant_insn (cond
,
3917 gen_rtx_SET (VOIDmode
, new_src
,
3919 emit_constant_insn (cond
,
3920 gen_addsi3 (target
, new_src
,
3928 /* See if we can generate this by setting the bottom (or the top)
3929 16 bits, and then shifting these into the other half of the
3930 word. We only look for the simplest cases, to do more would cost
3931 too much. Be careful, however, not to generate this when the
3932 alternative would take fewer insns. */
3933 if (val
& 0xffff0000)
3935 temp1
= remainder
& 0xffff0000;
3936 temp2
= remainder
& 0x0000ffff;
3938 /* Overlaps outside this range are best done using other methods. */
3939 for (i
= 9; i
< 24; i
++)
3941 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3942 && !const_ok_for_arm (temp2
))
3944 rtx new_src
= (subtargets
3945 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3947 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3948 source
, subtargets
, generate
);
3956 gen_rtx_ASHIFT (mode
, source
,
3963 /* Don't duplicate cases already considered. */
3964 for (i
= 17; i
< 24; i
++)
3966 if (((temp1
| (temp1
>> i
)) == remainder
)
3967 && !const_ok_for_arm (temp1
))
3969 rtx new_src
= (subtargets
3970 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3972 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3973 source
, subtargets
, generate
);
3978 gen_rtx_SET (VOIDmode
, target
,
3981 gen_rtx_LSHIFTRT (mode
, source
,
3992 /* If we have IOR or XOR, and the constant can be loaded in a
3993 single instruction, and we can find a temporary to put it in,
3994 then this can be done in two instructions instead of 3-4. */
3996 /* TARGET can't be NULL if SUBTARGETS is 0 */
3997 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3999 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4003 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4005 emit_constant_insn (cond
,
4006 gen_rtx_SET (VOIDmode
, sub
,
4008 emit_constant_insn (cond
,
4009 gen_rtx_SET (VOIDmode
, target
,
4010 gen_rtx_fmt_ee (code
, mode
,
4021 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4022 and the remainder 0s for e.g. 0xfff00000)
4023 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4025 This can be done in 2 instructions by using shifts with mov or mvn.
4030 mvn r0, r0, lsr #12 */
4031 if (set_sign_bit_copies
> 8
4032 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4036 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4037 rtx shift
= GEN_INT (set_sign_bit_copies
);
4041 gen_rtx_SET (VOIDmode
, sub
,
4043 gen_rtx_ASHIFT (mode
,
4048 gen_rtx_SET (VOIDmode
, target
,
4050 gen_rtx_LSHIFTRT (mode
, sub
,
4057 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4059 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4061 For eg. r0 = r0 | 0xfff
4066 if (set_zero_bit_copies
> 8
4067 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4071 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4072 rtx shift
= GEN_INT (set_zero_bit_copies
);
4076 gen_rtx_SET (VOIDmode
, sub
,
4078 gen_rtx_LSHIFTRT (mode
,
4083 gen_rtx_SET (VOIDmode
, target
,
4085 gen_rtx_ASHIFT (mode
, sub
,
4091 /* This will never be reached for Thumb2 because orn is a valid
4092 instruction. This is for Thumb1 and the ARM 32 bit cases.
4094 x = y | constant (such that ~constant is a valid constant)
4096 x = ~(~y & ~constant).
4098 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4102 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4103 emit_constant_insn (cond
,
4104 gen_rtx_SET (VOIDmode
, sub
,
4105 gen_rtx_NOT (mode
, source
)));
4108 sub
= gen_reg_rtx (mode
);
4109 emit_constant_insn (cond
,
4110 gen_rtx_SET (VOIDmode
, sub
,
4111 gen_rtx_AND (mode
, source
,
4113 emit_constant_insn (cond
,
4114 gen_rtx_SET (VOIDmode
, target
,
4115 gen_rtx_NOT (mode
, sub
)));
4122 /* See if two shifts will do 2 or more insn's worth of work. */
4123 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4125 HOST_WIDE_INT shift_mask
= ((0xffffffff
4126 << (32 - clear_sign_bit_copies
))
4129 if ((remainder
| shift_mask
) != 0xffffffff)
4133 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4134 insns
= arm_gen_constant (AND
, mode
, cond
,
4135 remainder
| shift_mask
,
4136 new_src
, source
, subtargets
, 1);
4141 rtx targ
= subtargets
? NULL_RTX
: target
;
4142 insns
= arm_gen_constant (AND
, mode
, cond
,
4143 remainder
| shift_mask
,
4144 targ
, source
, subtargets
, 0);
4150 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4151 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4153 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4154 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4160 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4162 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4164 if ((remainder
| shift_mask
) != 0xffffffff)
4168 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4170 insns
= arm_gen_constant (AND
, mode
, cond
,
4171 remainder
| shift_mask
,
4172 new_src
, source
, subtargets
, 1);
4177 rtx targ
= subtargets
? NULL_RTX
: target
;
4179 insns
= arm_gen_constant (AND
, mode
, cond
,
4180 remainder
| shift_mask
,
4181 targ
, source
, subtargets
, 0);
4187 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4188 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4190 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4191 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4203 /* Calculate what the instruction sequences would be if we generated it
4204 normally, negated, or inverted. */
4206 /* AND cannot be split into multiple insns, so invert and use BIC. */
4209 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4212 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4217 if (can_invert
|| final_invert
)
4218 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4223 immediates
= &pos_immediates
;
4225 /* Is the negated immediate sequence more efficient? */
4226 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4229 immediates
= &neg_immediates
;
4234 /* Is the inverted immediate sequence more efficient?
4235 We must allow for an extra NOT instruction for XOR operations, although
4236 there is some chance that the final 'mvn' will get optimized later. */
4237 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4240 immediates
= &inv_immediates
;
4248 /* Now output the chosen sequence as instructions. */
4251 for (i
= 0; i
< insns
; i
++)
4253 rtx new_src
, temp1_rtx
;
4255 temp1
= immediates
->i
[i
];
4257 if (code
== SET
|| code
== MINUS
)
4258 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4259 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4260 new_src
= gen_reg_rtx (mode
);
4266 else if (can_negate
)
4269 temp1
= trunc_int_for_mode (temp1
, mode
);
4270 temp1_rtx
= GEN_INT (temp1
);
4274 else if (code
== MINUS
)
4275 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4277 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4279 emit_constant_insn (cond
,
4280 gen_rtx_SET (VOIDmode
, new_src
,
4286 can_negate
= can_invert
;
4290 else if (code
== MINUS
)
4298 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4299 gen_rtx_NOT (mode
, source
)));
4306 /* Canonicalize a comparison so that we are more likely to recognize it.
4307 This can be done for a few constant compares, where we can make the
4308 immediate value easier to load. */
4311 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4312 bool op0_preserve_value
)
4314 enum machine_mode mode
;
4315 unsigned HOST_WIDE_INT i
, maxval
;
4317 mode
= GET_MODE (*op0
);
4318 if (mode
== VOIDmode
)
4319 mode
= GET_MODE (*op1
);
4321 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4323 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4324 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4325 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4326 for GTU/LEU in Thumb mode. */
4331 if (*code
== GT
|| *code
== LE
4332 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4334 /* Missing comparison. First try to use an available
4336 if (CONST_INT_P (*op1
))
4344 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4346 *op1
= GEN_INT (i
+ 1);
4347 *code
= *code
== GT
? GE
: LT
;
4353 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4354 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4356 *op1
= GEN_INT (i
+ 1);
4357 *code
= *code
== GTU
? GEU
: LTU
;
4366 /* If that did not work, reverse the condition. */
4367 if (!op0_preserve_value
)
4372 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4378 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4379 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4380 to facilitate possible combining with a cmp into 'ands'. */
4382 && GET_CODE (*op0
) == ZERO_EXTEND
4383 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4384 && GET_MODE (XEXP (*op0
, 0)) == QImode
4385 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4386 && subreg_lowpart_p (XEXP (*op0
, 0))
4387 && *op1
== const0_rtx
)
4388 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4391 /* Comparisons smaller than DImode. Only adjust comparisons against
4392 an out-of-range constant. */
4393 if (!CONST_INT_P (*op1
)
4394 || const_ok_for_arm (INTVAL (*op1
))
4395 || const_ok_for_arm (- INTVAL (*op1
)))
4409 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4411 *op1
= GEN_INT (i
+ 1);
4412 *code
= *code
== GT
? GE
: LT
;
4420 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4422 *op1
= GEN_INT (i
- 1);
4423 *code
= *code
== GE
? GT
: LE
;
4430 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4431 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4433 *op1
= GEN_INT (i
+ 1);
4434 *code
= *code
== GTU
? GEU
: LTU
;
4442 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4444 *op1
= GEN_INT (i
- 1);
4445 *code
= *code
== GEU
? GTU
: LEU
;
4456 /* Define how to find the value returned by a function. */
4459 arm_function_value(const_tree type
, const_tree func
,
4460 bool outgoing ATTRIBUTE_UNUSED
)
4462 enum machine_mode mode
;
4463 int unsignedp ATTRIBUTE_UNUSED
;
4464 rtx r ATTRIBUTE_UNUSED
;
4466 mode
= TYPE_MODE (type
);
4468 if (TARGET_AAPCS_BASED
)
4469 return aapcs_allocate_return_reg (mode
, type
, func
);
4471 /* Promote integer types. */
4472 if (INTEGRAL_TYPE_P (type
))
4473 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4475 /* Promotes small structs returned in a register to full-word size
4476 for big-endian AAPCS. */
4477 if (arm_return_in_msb (type
))
4479 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4480 if (size
% UNITS_PER_WORD
!= 0)
4482 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4483 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4487 return arm_libcall_value_1 (mode
);
4490 /* libcall hashtable helpers. */
4492 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4494 typedef rtx_def value_type
;
4495 typedef rtx_def compare_type
;
4496 static inline hashval_t
hash (const value_type
*);
4497 static inline bool equal (const value_type
*, const compare_type
*);
4498 static inline void remove (value_type
*);
4502 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4504 return rtx_equal_p (p1
, p2
);
4508 libcall_hasher::hash (const value_type
*p1
)
4510 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4513 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4516 add_libcall (libcall_table_type htab
, rtx libcall
)
4518 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4522 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4524 static bool init_done
= false;
4525 static libcall_table_type libcall_htab
;
4531 libcall_htab
.create (31);
4532 add_libcall (libcall_htab
,
4533 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4534 add_libcall (libcall_htab
,
4535 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4536 add_libcall (libcall_htab
,
4537 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4538 add_libcall (libcall_htab
,
4539 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4541 add_libcall (libcall_htab
,
4542 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4543 add_libcall (libcall_htab
,
4544 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4545 add_libcall (libcall_htab
,
4546 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4547 add_libcall (libcall_htab
,
4548 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4550 add_libcall (libcall_htab
,
4551 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4552 add_libcall (libcall_htab
,
4553 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4554 add_libcall (libcall_htab
,
4555 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4556 add_libcall (libcall_htab
,
4557 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4558 add_libcall (libcall_htab
,
4559 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4560 add_libcall (libcall_htab
,
4561 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4562 add_libcall (libcall_htab
,
4563 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4564 add_libcall (libcall_htab
,
4565 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4567 /* Values from double-precision helper functions are returned in core
4568 registers if the selected core only supports single-precision
4569 arithmetic, even if we are using the hard-float ABI. The same is
4570 true for single-precision helpers, but we will never be using the
4571 hard-float ABI on a CPU which doesn't support single-precision
4572 operations in hardware. */
4573 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4574 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4575 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4576 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4577 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4578 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4579 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4580 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4581 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4582 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4583 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4584 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4586 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4590 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4594 arm_libcall_value_1 (enum machine_mode mode
)
4596 if (TARGET_AAPCS_BASED
)
4597 return aapcs_libcall_value (mode
);
4598 else if (TARGET_IWMMXT_ABI
4599 && arm_vector_mode_supported_p (mode
))
4600 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4602 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4605 /* Define how to find the value returned by a library function
4606 assuming the value has mode MODE. */
4609 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4611 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4612 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4614 /* The following libcalls return their result in integer registers,
4615 even though they return a floating point value. */
4616 if (arm_libcall_uses_aapcs_base (libcall
))
4617 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4621 return arm_libcall_value_1 (mode
);
4624 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4627 arm_function_value_regno_p (const unsigned int regno
)
4629 if (regno
== ARG_REGISTER (1)
4631 && TARGET_AAPCS_BASED
4633 && TARGET_HARD_FLOAT
4634 && regno
== FIRST_VFP_REGNUM
)
4635 || (TARGET_IWMMXT_ABI
4636 && regno
== FIRST_IWMMXT_REGNUM
))
4642 /* Determine the amount of memory needed to store the possible return
4643 registers of an untyped call. */
4645 arm_apply_result_size (void)
4651 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4653 if (TARGET_IWMMXT_ABI
)
4660 /* Decide whether TYPE should be returned in memory (true)
4661 or in a register (false). FNTYPE is the type of the function making
4664 arm_return_in_memory (const_tree type
, const_tree fntype
)
4668 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4670 if (TARGET_AAPCS_BASED
)
4672 /* Simple, non-aggregate types (ie not including vectors and
4673 complex) are always returned in a register (or registers).
4674 We don't care about which register here, so we can short-cut
4675 some of the detail. */
4676 if (!AGGREGATE_TYPE_P (type
)
4677 && TREE_CODE (type
) != VECTOR_TYPE
4678 && TREE_CODE (type
) != COMPLEX_TYPE
)
4681 /* Any return value that is no larger than one word can be
4683 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4686 /* Check any available co-processors to see if they accept the
4687 type as a register candidate (VFP, for example, can return
4688 some aggregates in consecutive registers). These aren't
4689 available if the call is variadic. */
4690 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4693 /* Vector values should be returned using ARM registers, not
4694 memory (unless they're over 16 bytes, which will break since
4695 we only have four call-clobbered registers to play with). */
4696 if (TREE_CODE (type
) == VECTOR_TYPE
)
4697 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4699 /* The rest go in memory. */
4703 if (TREE_CODE (type
) == VECTOR_TYPE
)
4704 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4706 if (!AGGREGATE_TYPE_P (type
) &&
4707 (TREE_CODE (type
) != VECTOR_TYPE
))
4708 /* All simple types are returned in registers. */
4711 if (arm_abi
!= ARM_ABI_APCS
)
4713 /* ATPCS and later return aggregate types in memory only if they are
4714 larger than a word (or are variable size). */
4715 return (size
< 0 || size
> UNITS_PER_WORD
);
4718 /* For the arm-wince targets we choose to be compatible with Microsoft's
4719 ARM and Thumb compilers, which always return aggregates in memory. */
4721 /* All structures/unions bigger than one word are returned in memory.
4722 Also catch the case where int_size_in_bytes returns -1. In this case
4723 the aggregate is either huge or of variable size, and in either case
4724 we will want to return it via memory and not in a register. */
4725 if (size
< 0 || size
> UNITS_PER_WORD
)
4728 if (TREE_CODE (type
) == RECORD_TYPE
)
4732 /* For a struct the APCS says that we only return in a register
4733 if the type is 'integer like' and every addressable element
4734 has an offset of zero. For practical purposes this means
4735 that the structure can have at most one non bit-field element
4736 and that this element must be the first one in the structure. */
4738 /* Find the first field, ignoring non FIELD_DECL things which will
4739 have been created by C++. */
4740 for (field
= TYPE_FIELDS (type
);
4741 field
&& TREE_CODE (field
) != FIELD_DECL
;
4742 field
= DECL_CHAIN (field
))
4746 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4748 /* Check that the first field is valid for returning in a register. */
4750 /* ... Floats are not allowed */
4751 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4754 /* ... Aggregates that are not themselves valid for returning in
4755 a register are not allowed. */
4756 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4759 /* Now check the remaining fields, if any. Only bitfields are allowed,
4760 since they are not addressable. */
4761 for (field
= DECL_CHAIN (field
);
4763 field
= DECL_CHAIN (field
))
4765 if (TREE_CODE (field
) != FIELD_DECL
)
4768 if (!DECL_BIT_FIELD_TYPE (field
))
4775 if (TREE_CODE (type
) == UNION_TYPE
)
4779 /* Unions can be returned in registers if every element is
4780 integral, or can be returned in an integer register. */
4781 for (field
= TYPE_FIELDS (type
);
4783 field
= DECL_CHAIN (field
))
4785 if (TREE_CODE (field
) != FIELD_DECL
)
4788 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4791 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4797 #endif /* not ARM_WINCE */
4799 /* Return all other types in memory. */
4803 const struct pcs_attribute_arg
4807 } pcs_attribute_args
[] =
4809 {"aapcs", ARM_PCS_AAPCS
},
4810 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4812 /* We could recognize these, but changes would be needed elsewhere
4813 * to implement them. */
4814 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4815 {"atpcs", ARM_PCS_ATPCS
},
4816 {"apcs", ARM_PCS_APCS
},
4818 {NULL
, ARM_PCS_UNKNOWN
}
4822 arm_pcs_from_attribute (tree attr
)
4824 const struct pcs_attribute_arg
*ptr
;
4827 /* Get the value of the argument. */
4828 if (TREE_VALUE (attr
) == NULL_TREE
4829 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4830 return ARM_PCS_UNKNOWN
;
4832 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4834 /* Check it against the list of known arguments. */
4835 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4836 if (streq (arg
, ptr
->arg
))
4839 /* An unrecognized interrupt type. */
4840 return ARM_PCS_UNKNOWN
;
4843 /* Get the PCS variant to use for this call. TYPE is the function's type
4844 specification, DECL is the specific declartion. DECL may be null if
4845 the call could be indirect or if this is a library call. */
4847 arm_get_pcs_model (const_tree type
, const_tree decl
)
4849 bool user_convention
= false;
4850 enum arm_pcs user_pcs
= arm_pcs_default
;
4855 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4858 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4859 user_convention
= true;
4862 if (TARGET_AAPCS_BASED
)
4864 /* Detect varargs functions. These always use the base rules
4865 (no argument is ever a candidate for a co-processor
4867 bool base_rules
= stdarg_p (type
);
4869 if (user_convention
)
4871 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4872 sorry ("non-AAPCS derived PCS variant");
4873 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4874 error ("variadic functions must use the base AAPCS variant");
4878 return ARM_PCS_AAPCS
;
4879 else if (user_convention
)
4881 else if (decl
&& flag_unit_at_a_time
)
4883 /* Local functions never leak outside this compilation unit,
4884 so we are free to use whatever conventions are
4886 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4887 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4889 return ARM_PCS_AAPCS_LOCAL
;
4892 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4893 sorry ("PCS variant");
4895 /* For everything else we use the target's default. */
4896 return arm_pcs_default
;
4901 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4902 const_tree fntype ATTRIBUTE_UNUSED
,
4903 rtx libcall ATTRIBUTE_UNUSED
,
4904 const_tree fndecl ATTRIBUTE_UNUSED
)
4906 /* Record the unallocated VFP registers. */
4907 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4908 pcum
->aapcs_vfp_reg_alloc
= 0;
4911 /* Walk down the type tree of TYPE counting consecutive base elements.
4912 If *MODEP is VOIDmode, then set it to the first valid floating point
4913 type. If a non-floating point type is found, or if a floating point
4914 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4915 otherwise return the count in the sub-tree. */
4917 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4919 enum machine_mode mode
;
4922 switch (TREE_CODE (type
))
4925 mode
= TYPE_MODE (type
);
4926 if (mode
!= DFmode
&& mode
!= SFmode
)
4929 if (*modep
== VOIDmode
)
4938 mode
= TYPE_MODE (TREE_TYPE (type
));
4939 if (mode
!= DFmode
&& mode
!= SFmode
)
4942 if (*modep
== VOIDmode
)
4951 /* Use V2SImode and V4SImode as representatives of all 64-bit
4952 and 128-bit vector types, whether or not those modes are
4953 supported with the present options. */
4954 size
= int_size_in_bytes (type
);
4967 if (*modep
== VOIDmode
)
4970 /* Vector modes are considered to be opaque: two vectors are
4971 equivalent for the purposes of being homogeneous aggregates
4972 if they are the same size. */
4981 tree index
= TYPE_DOMAIN (type
);
4983 /* Can't handle incomplete types. */
4984 if (!COMPLETE_TYPE_P (type
))
4987 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4990 || !TYPE_MAX_VALUE (index
)
4991 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
4992 || !TYPE_MIN_VALUE (index
)
4993 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
4997 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
4998 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5000 /* There must be no padding. */
5001 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5002 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5003 != count
* GET_MODE_BITSIZE (*modep
)))
5015 /* Can't handle incomplete types. */
5016 if (!COMPLETE_TYPE_P (type
))
5019 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5021 if (TREE_CODE (field
) != FIELD_DECL
)
5024 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5030 /* There must be no padding. */
5031 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5032 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5033 != count
* GET_MODE_BITSIZE (*modep
)))
5040 case QUAL_UNION_TYPE
:
5042 /* These aren't very interesting except in a degenerate case. */
5047 /* Can't handle incomplete types. */
5048 if (!COMPLETE_TYPE_P (type
))
5051 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5053 if (TREE_CODE (field
) != FIELD_DECL
)
5056 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5059 count
= count
> sub_count
? count
: sub_count
;
5062 /* There must be no padding. */
5063 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5064 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5065 != count
* GET_MODE_BITSIZE (*modep
)))
5078 /* Return true if PCS_VARIANT should use VFP registers. */
5080 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5082 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5084 static bool seen_thumb1_vfp
= false;
5086 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5088 sorry ("Thumb-1 hard-float VFP ABI");
5089 /* sorry() is not immediately fatal, so only display this once. */
5090 seen_thumb1_vfp
= true;
5096 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5099 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5100 (TARGET_VFP_DOUBLE
|| !is_double
));
5103 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5104 suitable for passing or returning in VFP registers for the PCS
5105 variant selected. If it is, then *BASE_MODE is updated to contain
5106 a machine mode describing each element of the argument's type and
5107 *COUNT to hold the number of such elements. */
5109 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5110 enum machine_mode mode
, const_tree type
,
5111 enum machine_mode
*base_mode
, int *count
)
5113 enum machine_mode new_mode
= VOIDmode
;
5115 /* If we have the type information, prefer that to working things
5116 out from the mode. */
5119 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5121 if (ag_count
> 0 && ag_count
<= 4)
5126 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5127 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5128 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5133 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5136 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5142 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5145 *base_mode
= new_mode
;
5150 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5151 enum machine_mode mode
, const_tree type
)
5153 int count ATTRIBUTE_UNUSED
;
5154 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5156 if (!use_vfp_abi (pcs_variant
, false))
5158 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5163 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5166 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5169 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5170 &pcum
->aapcs_vfp_rmode
,
5171 &pcum
->aapcs_vfp_rcount
);
5175 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5176 const_tree type ATTRIBUTE_UNUSED
)
5178 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5179 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5182 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5183 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5185 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5187 || (mode
== TImode
&& ! TARGET_NEON
)
5188 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5191 int rcount
= pcum
->aapcs_vfp_rcount
;
5193 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5197 /* Avoid using unsupported vector modes. */
5198 if (rmode
== V2SImode
)
5200 else if (rmode
== V4SImode
)
5207 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5208 for (i
= 0; i
< rcount
; i
++)
5210 rtx tmp
= gen_rtx_REG (rmode
,
5211 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5212 tmp
= gen_rtx_EXPR_LIST
5214 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5215 XVECEXP (par
, 0, i
) = tmp
;
5218 pcum
->aapcs_reg
= par
;
5221 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5228 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5229 enum machine_mode mode
,
5230 const_tree type ATTRIBUTE_UNUSED
)
5232 if (!use_vfp_abi (pcs_variant
, false))
5235 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5238 enum machine_mode ag_mode
;
5243 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5248 if (ag_mode
== V2SImode
)
5250 else if (ag_mode
== V4SImode
)
5256 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5257 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5258 for (i
= 0; i
< count
; i
++)
5260 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5261 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5262 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5263 XVECEXP (par
, 0, i
) = tmp
;
5269 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5273 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5274 enum machine_mode mode ATTRIBUTE_UNUSED
,
5275 const_tree type ATTRIBUTE_UNUSED
)
5277 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5278 pcum
->aapcs_vfp_reg_alloc
= 0;
5282 #define AAPCS_CP(X) \
5284 aapcs_ ## X ## _cum_init, \
5285 aapcs_ ## X ## _is_call_candidate, \
5286 aapcs_ ## X ## _allocate, \
5287 aapcs_ ## X ## _is_return_candidate, \
5288 aapcs_ ## X ## _allocate_return_reg, \
5289 aapcs_ ## X ## _advance \
5292 /* Table of co-processors that can be used to pass arguments in
5293 registers. Idealy no arugment should be a candidate for more than
5294 one co-processor table entry, but the table is processed in order
5295 and stops after the first match. If that entry then fails to put
5296 the argument into a co-processor register, the argument will go on
5300 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5301 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5303 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5304 BLKmode) is a candidate for this co-processor's registers; this
5305 function should ignore any position-dependent state in
5306 CUMULATIVE_ARGS and only use call-type dependent information. */
5307 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5309 /* Return true if the argument does get a co-processor register; it
5310 should set aapcs_reg to an RTX of the register allocated as is
5311 required for a return from FUNCTION_ARG. */
5312 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5314 /* Return true if a result of mode MODE (or type TYPE if MODE is
5315 BLKmode) is can be returned in this co-processor's registers. */
5316 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5318 /* Allocate and return an RTX element to hold the return type of a
5319 call, this routine must not fail and will only be called if
5320 is_return_candidate returned true with the same parameters. */
5321 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5323 /* Finish processing this argument and prepare to start processing
5325 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5326 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5334 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5339 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5340 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5347 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5349 /* We aren't passed a decl, so we can't check that a call is local.
5350 However, it isn't clear that that would be a win anyway, since it
5351 might limit some tail-calling opportunities. */
5352 enum arm_pcs pcs_variant
;
5356 const_tree fndecl
= NULL_TREE
;
5358 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5361 fntype
= TREE_TYPE (fntype
);
5364 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5367 pcs_variant
= arm_pcs_default
;
5369 if (pcs_variant
!= ARM_PCS_AAPCS
)
5373 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5374 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5383 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5386 /* We aren't passed a decl, so we can't check that a call is local.
5387 However, it isn't clear that that would be a win anyway, since it
5388 might limit some tail-calling opportunities. */
5389 enum arm_pcs pcs_variant
;
5390 int unsignedp ATTRIBUTE_UNUSED
;
5394 const_tree fndecl
= NULL_TREE
;
5396 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5399 fntype
= TREE_TYPE (fntype
);
5402 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5405 pcs_variant
= arm_pcs_default
;
5407 /* Promote integer types. */
5408 if (type
&& INTEGRAL_TYPE_P (type
))
5409 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5411 if (pcs_variant
!= ARM_PCS_AAPCS
)
5415 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5416 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5418 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5422 /* Promotes small structs returned in a register to full-word size
5423 for big-endian AAPCS. */
5424 if (type
&& arm_return_in_msb (type
))
5426 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5427 if (size
% UNITS_PER_WORD
!= 0)
5429 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5430 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5434 return gen_rtx_REG (mode
, R0_REGNUM
);
5438 aapcs_libcall_value (enum machine_mode mode
)
5440 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5441 && GET_MODE_SIZE (mode
) <= 4)
5444 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5447 /* Lay out a function argument using the AAPCS rules. The rule
5448 numbers referred to here are those in the AAPCS. */
5450 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5451 const_tree type
, bool named
)
5456 /* We only need to do this once per argument. */
5457 if (pcum
->aapcs_arg_processed
)
5460 pcum
->aapcs_arg_processed
= true;
5462 /* Special case: if named is false then we are handling an incoming
5463 anonymous argument which is on the stack. */
5467 /* Is this a potential co-processor register candidate? */
5468 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5470 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5471 pcum
->aapcs_cprc_slot
= slot
;
5473 /* We don't have to apply any of the rules from part B of the
5474 preparation phase, these are handled elsewhere in the
5479 /* A Co-processor register candidate goes either in its own
5480 class of registers or on the stack. */
5481 if (!pcum
->aapcs_cprc_failed
[slot
])
5483 /* C1.cp - Try to allocate the argument to co-processor
5485 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5488 /* C2.cp - Put the argument on the stack and note that we
5489 can't assign any more candidates in this slot. We also
5490 need to note that we have allocated stack space, so that
5491 we won't later try to split a non-cprc candidate between
5492 core registers and the stack. */
5493 pcum
->aapcs_cprc_failed
[slot
] = true;
5494 pcum
->can_split
= false;
5497 /* We didn't get a register, so this argument goes on the
5499 gcc_assert (pcum
->can_split
== false);
5504 /* C3 - For double-word aligned arguments, round the NCRN up to the
5505 next even number. */
5506 ncrn
= pcum
->aapcs_ncrn
;
5507 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5510 nregs
= ARM_NUM_REGS2(mode
, type
);
5512 /* Sigh, this test should really assert that nregs > 0, but a GCC
5513 extension allows empty structs and then gives them empty size; it
5514 then allows such a structure to be passed by value. For some of
5515 the code below we have to pretend that such an argument has
5516 non-zero size so that we 'locate' it correctly either in
5517 registers or on the stack. */
5518 gcc_assert (nregs
>= 0);
5520 nregs2
= nregs
? nregs
: 1;
5522 /* C4 - Argument fits entirely in core registers. */
5523 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5525 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5526 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5530 /* C5 - Some core registers left and there are no arguments already
5531 on the stack: split this argument between the remaining core
5532 registers and the stack. */
5533 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5535 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5536 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5537 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5541 /* C6 - NCRN is set to 4. */
5542 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5544 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is NULL. */
5552 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5554 tree fndecl ATTRIBUTE_UNUSED
)
5556 /* Long call handling. */
5558 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5560 pcum
->pcs_variant
= arm_pcs_default
;
5562 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5564 if (arm_libcall_uses_aapcs_base (libname
))
5565 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5567 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5568 pcum
->aapcs_reg
= NULL_RTX
;
5569 pcum
->aapcs_partial
= 0;
5570 pcum
->aapcs_arg_processed
= false;
5571 pcum
->aapcs_cprc_slot
= -1;
5572 pcum
->can_split
= true;
5574 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5578 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5580 pcum
->aapcs_cprc_failed
[i
] = false;
5581 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5589 /* On the ARM, the offset starts at 0. */
5591 pcum
->iwmmxt_nregs
= 0;
5592 pcum
->can_split
= true;
5594 /* Varargs vectors are treated the same as long long.
5595 named_count avoids having to change the way arm handles 'named' */
5596 pcum
->named_count
= 0;
5599 if (TARGET_REALLY_IWMMXT
&& fntype
)
5603 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5605 fn_arg
= TREE_CHAIN (fn_arg
))
5606 pcum
->named_count
+= 1;
5608 if (! pcum
->named_count
)
5609 pcum
->named_count
= INT_MAX
;
5613 /* Return true if we use LRA instead of reload pass. */
5617 return arm_lra_flag
;
5620 /* Return true if mode/type need doubleword alignment. */
5622 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5624 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5625 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5629 /* Determine where to put an argument to a function.
5630 Value is zero to push the argument on the stack,
5631 or a hard register in which to store the argument.
5633 MODE is the argument's machine mode.
5634 TYPE is the data type of the argument (as a tree).
5635 This is null for libcalls where that information may
5637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5638 the preceding args and about the function being called.
5639 NAMED is nonzero if this argument is a named parameter
5640 (otherwise it is an extra parameter matching an ellipsis).
5642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5643 other arguments are passed on the stack. If (NAMED == 0) (which happens
5644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5645 defined), say it is passed in the stack (function_prologue will
5646 indeed make it pass in the stack if necessary). */
5649 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5650 const_tree type
, bool named
)
5652 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5656 a call insn (op3 of a call_value insn). */
5657 if (mode
== VOIDmode
)
5660 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5662 aapcs_layout_arg (pcum
, mode
, type
, named
);
5663 return pcum
->aapcs_reg
;
5666 /* Varargs vectors are treated the same as long long.
5667 named_count avoids having to change the way arm handles 'named' */
5668 if (TARGET_IWMMXT_ABI
5669 && arm_vector_mode_supported_p (mode
)
5670 && pcum
->named_count
> pcum
->nargs
+ 1)
5672 if (pcum
->iwmmxt_nregs
<= 9)
5673 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5676 pcum
->can_split
= false;
5681 /* Put doubleword aligned quantities in even register pairs. */
5683 && ARM_DOUBLEWORD_ALIGN
5684 && arm_needs_doubleword_align (mode
, type
))
5687 /* Only allow splitting an arg between regs and memory if all preceding
5688 args were allocated to regs. For args passed by reference we only count
5689 the reference pointer. */
5690 if (pcum
->can_split
)
5693 nregs
= ARM_NUM_REGS2 (mode
, type
);
5695 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5698 return gen_rtx_REG (mode
, pcum
->nregs
);
5702 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5704 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5705 ? DOUBLEWORD_ALIGNMENT
5710 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5711 tree type
, bool named
)
5713 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5714 int nregs
= pcum
->nregs
;
5716 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5718 aapcs_layout_arg (pcum
, mode
, type
, named
);
5719 return pcum
->aapcs_partial
;
5722 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5725 if (NUM_ARG_REGS
> nregs
5726 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5728 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5733 /* Update the data in PCUM to advance over an argument
5734 of mode MODE and data type TYPE.
5735 (TYPE is null for libcalls where that information may not be available.) */
5738 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5739 const_tree type
, bool named
)
5741 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5743 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5745 aapcs_layout_arg (pcum
, mode
, type
, named
);
5747 if (pcum
->aapcs_cprc_slot
>= 0)
5749 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5751 pcum
->aapcs_cprc_slot
= -1;
5754 /* Generic stuff. */
5755 pcum
->aapcs_arg_processed
= false;
5756 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5757 pcum
->aapcs_reg
= NULL_RTX
;
5758 pcum
->aapcs_partial
= 0;
5763 if (arm_vector_mode_supported_p (mode
)
5764 && pcum
->named_count
> pcum
->nargs
5765 && TARGET_IWMMXT_ABI
)
5766 pcum
->iwmmxt_nregs
+= 1;
5768 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5772 /* Variable sized types are passed by reference. This is a GCC
5773 extension to the ARM ABI. */
5776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5777 enum machine_mode mode ATTRIBUTE_UNUSED
,
5778 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5780 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5783 /* Encode the current state of the #pragma [no_]long_calls. */
5786 OFF
, /* No #pragma [no_]long_calls is in effect. */
5787 LONG
, /* #pragma long_calls is in effect. */
5788 SHORT
/* #pragma no_long_calls is in effect. */
5791 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5794 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5796 arm_pragma_long_calls
= LONG
;
5800 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5802 arm_pragma_long_calls
= SHORT
;
5806 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5808 arm_pragma_long_calls
= OFF
;
5811 /* Handle an attribute requiring a FUNCTION_DECL;
5812 arguments as in struct attribute_spec.handler. */
5814 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5815 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5817 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5819 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5821 *no_add_attrs
= true;
5827 /* Handle an "interrupt" or "isr" attribute;
5828 arguments as in struct attribute_spec.handler. */
5830 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5835 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5837 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5839 *no_add_attrs
= true;
5841 /* FIXME: the argument if any is checked for type attributes;
5842 should it be checked for decl ones? */
5846 if (TREE_CODE (*node
) == FUNCTION_TYPE
5847 || TREE_CODE (*node
) == METHOD_TYPE
)
5849 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5851 warning (OPT_Wattributes
, "%qE attribute ignored",
5853 *no_add_attrs
= true;
5856 else if (TREE_CODE (*node
) == POINTER_TYPE
5857 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5858 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5859 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5861 *node
= build_variant_type_copy (*node
);
5862 TREE_TYPE (*node
) = build_type_attribute_variant
5864 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5865 *no_add_attrs
= true;
5869 /* Possibly pass this attribute on from the type to a decl. */
5870 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5871 | (int) ATTR_FLAG_FUNCTION_NEXT
5872 | (int) ATTR_FLAG_ARRAY_NEXT
))
5874 *no_add_attrs
= true;
5875 return tree_cons (name
, args
, NULL_TREE
);
5879 warning (OPT_Wattributes
, "%qE attribute ignored",
5888 /* Handle a "pcs" attribute; arguments as in struct
5889 attribute_spec.handler. */
5891 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5892 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5894 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5896 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5897 *no_add_attrs
= true;
5902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5903 /* Handle the "notshared" attribute. This attribute is another way of
5904 requesting hidden visibility. ARM's compiler supports
5905 "__declspec(notshared)"; we support the same thing via an
5909 arm_handle_notshared_attribute (tree
*node
,
5910 tree name ATTRIBUTE_UNUSED
,
5911 tree args ATTRIBUTE_UNUSED
,
5912 int flags ATTRIBUTE_UNUSED
,
5915 tree decl
= TYPE_NAME (*node
);
5919 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5920 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5921 *no_add_attrs
= false;
5927 /* Return 0 if the attributes for two types are incompatible, 1 if they
5928 are compatible, and 2 if they are nearly compatible (which causes a
5929 warning to be generated). */
5931 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5935 /* Check for mismatch of non-default calling convention. */
5936 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5939 /* Check for mismatched call attributes. */
5940 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5941 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5942 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5943 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5945 /* Only bother to check if an attribute is defined. */
5946 if (l1
| l2
| s1
| s2
)
5948 /* If one type has an attribute, the other must have the same attribute. */
5949 if ((l1
!= l2
) || (s1
!= s2
))
5952 /* Disallow mixed attributes. */
5953 if ((l1
& s2
) || (l2
& s1
))
5957 /* Check for mismatched ISR attribute. */
5958 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5960 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5961 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5963 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5970 /* Assigns default attributes to newly defined type. This is used to
5971 set short_call/long_call attributes for function types of
5972 functions defined inside corresponding #pragma scopes. */
5974 arm_set_default_type_attributes (tree type
)
5976 /* Add __attribute__ ((long_call)) to all functions, when
5977 inside #pragma long_calls or __attribute__ ((short_call)),
5978 when inside #pragma no_long_calls. */
5979 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5981 tree type_attr_list
, attr_name
;
5982 type_attr_list
= TYPE_ATTRIBUTES (type
);
5984 if (arm_pragma_long_calls
== LONG
)
5985 attr_name
= get_identifier ("long_call");
5986 else if (arm_pragma_long_calls
== SHORT
)
5987 attr_name
= get_identifier ("short_call");
5991 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5992 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5996 /* Return true if DECL is known to be linked into section SECTION. */
5999 arm_function_in_section_p (tree decl
, section
*section
)
6001 /* We can only be certain about functions defined in the same
6002 compilation unit. */
6003 if (!TREE_STATIC (decl
))
6006 /* Make sure that SYMBOL always binds to the definition in this
6007 compilation unit. */
6008 if (!targetm
.binds_local_p (decl
))
6011 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6012 if (!DECL_SECTION_NAME (decl
))
6014 /* Make sure that we will not create a unique section for DECL. */
6015 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6019 return function_section (decl
) == section
;
6022 /* Return nonzero if a 32-bit "long_call" should be generated for
6023 a call from the current function to DECL. We generate a long_call
6026 a. has an __attribute__((long call))
6027 or b. is within the scope of a #pragma long_calls
6028 or c. the -mlong-calls command line switch has been specified
6030 However we do not generate a long call if the function:
6032 d. has an __attribute__ ((short_call))
6033 or e. is inside the scope of a #pragma no_long_calls
6034 or f. is defined in the same section as the current function. */
6037 arm_is_long_call_p (tree decl
)
6042 return TARGET_LONG_CALLS
;
6044 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6045 if (lookup_attribute ("short_call", attrs
))
6048 /* For "f", be conservative, and only cater for cases in which the
6049 whole of the current function is placed in the same section. */
6050 if (!flag_reorder_blocks_and_partition
6051 && TREE_CODE (decl
) == FUNCTION_DECL
6052 && arm_function_in_section_p (decl
, current_function_section ()))
6055 if (lookup_attribute ("long_call", attrs
))
6058 return TARGET_LONG_CALLS
;
6061 /* Return nonzero if it is ok to make a tail-call to DECL. */
6063 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6065 unsigned long func_type
;
6067 if (cfun
->machine
->sibcall_blocked
)
6070 /* Never tailcall something if we are generating code for Thumb-1. */
6074 /* The PIC register is live on entry to VxWorks PLT entries, so we
6075 must make the call before restoring the PIC register. */
6076 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6079 /* Cannot tail-call to long calls, since these are out of range of
6080 a branch instruction. */
6081 if (decl
&& arm_is_long_call_p (decl
))
6084 /* If we are interworking and the function is not declared static
6085 then we can't tail-call it unless we know that it exists in this
6086 compilation unit (since it might be a Thumb routine). */
6087 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6088 && !TREE_ASM_WRITTEN (decl
))
6091 func_type
= arm_current_func_type ();
6092 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6093 if (IS_INTERRUPT (func_type
))
6096 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6098 /* Check that the return value locations are the same. For
6099 example that we aren't returning a value from the sibling in
6100 a VFP register but then need to transfer it to a core
6104 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6105 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6107 if (!rtx_equal_p (a
, b
))
6111 /* Never tailcall if function may be called with a misaligned SP. */
6112 if (IS_STACKALIGN (func_type
))
6115 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6116 references should become a NOP. Don't convert such calls into
6118 if (TARGET_AAPCS_BASED
6119 && arm_abi
== ARM_ABI_AAPCS
6121 && DECL_WEAK (decl
))
6124 /* Everything else is ok. */
6129 /* Addressing mode support functions. */
6131 /* Return nonzero if X is a legitimate immediate operand when compiling
6132 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6134 legitimate_pic_operand_p (rtx x
)
6136 if (GET_CODE (x
) == SYMBOL_REF
6137 || (GET_CODE (x
) == CONST
6138 && GET_CODE (XEXP (x
, 0)) == PLUS
6139 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6145 /* Record that the current function needs a PIC register. Initialize
6146 cfun->machine->pic_reg if we have not already done so. */
6149 require_pic_register (void)
6151 /* A lot of the logic here is made obscure by the fact that this
6152 routine gets called as part of the rtx cost estimation process.
6153 We don't want those calls to affect any assumptions about the real
6154 function; and further, we can't call entry_of_function() until we
6155 start the real expansion process. */
6156 if (!crtl
->uses_pic_offset_table
)
6158 gcc_assert (can_create_pseudo_p ());
6159 if (arm_pic_register
!= INVALID_REGNUM
6160 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6162 if (!cfun
->machine
->pic_reg
)
6163 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6165 /* Play games to avoid marking the function as needing pic
6166 if we are being called as part of the cost-estimation
6168 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6169 crtl
->uses_pic_offset_table
= 1;
6175 if (!cfun
->machine
->pic_reg
)
6176 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6178 /* Play games to avoid marking the function as needing pic
6179 if we are being called as part of the cost-estimation
6181 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6183 crtl
->uses_pic_offset_table
= 1;
6186 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6187 && arm_pic_register
> LAST_LO_REGNUM
)
6188 emit_move_insn (cfun
->machine
->pic_reg
,
6189 gen_rtx_REG (Pmode
, arm_pic_register
));
6191 arm_load_pic_register (0UL);
6196 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6198 INSN_LOCATION (insn
) = prologue_location
;
6200 /* We can be called during expansion of PHI nodes, where
6201 we can't yet emit instructions directly in the final
6202 insn stream. Queue the insns on the entry edge, they will
6203 be committed after everything else is expanded. */
6204 insert_insn_on_edge (seq
,
6205 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6212 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6214 if (GET_CODE (orig
) == SYMBOL_REF
6215 || GET_CODE (orig
) == LABEL_REF
)
6221 gcc_assert (can_create_pseudo_p ());
6222 reg
= gen_reg_rtx (Pmode
);
6225 /* VxWorks does not impose a fixed gap between segments; the run-time
6226 gap can be different from the object-file gap. We therefore can't
6227 use GOTOFF unless we are absolutely sure that the symbol is in the
6228 same segment as the GOT. Unfortunately, the flexibility of linker
6229 scripts means that we can't be sure of that in general, so assume
6230 that GOTOFF is never valid on VxWorks. */
6231 if ((GET_CODE (orig
) == LABEL_REF
6232 || (GET_CODE (orig
) == SYMBOL_REF
&&
6233 SYMBOL_REF_LOCAL_P (orig
)))
6235 && arm_pic_data_is_text_relative
)
6236 insn
= arm_pic_static_addr (orig
, reg
);
6242 /* If this function doesn't have a pic register, create one now. */
6243 require_pic_register ();
6245 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6247 /* Make the MEM as close to a constant as possible. */
6248 mem
= SET_SRC (pat
);
6249 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6250 MEM_READONLY_P (mem
) = 1;
6251 MEM_NOTRAP_P (mem
) = 1;
6253 insn
= emit_insn (pat
);
6256 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6258 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6262 else if (GET_CODE (orig
) == CONST
)
6266 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6267 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6270 /* Handle the case where we have: const (UNSPEC_TLS). */
6271 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6272 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6275 /* Handle the case where we have:
6276 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6278 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6279 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6280 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6282 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6288 gcc_assert (can_create_pseudo_p ());
6289 reg
= gen_reg_rtx (Pmode
);
6292 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6294 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6295 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6296 base
== reg
? 0 : reg
);
6298 if (CONST_INT_P (offset
))
6300 /* The base register doesn't really matter, we only want to
6301 test the index for the appropriate mode. */
6302 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6304 gcc_assert (can_create_pseudo_p ());
6305 offset
= force_reg (Pmode
, offset
);
6308 if (CONST_INT_P (offset
))
6309 return plus_constant (Pmode
, base
, INTVAL (offset
));
6312 if (GET_MODE_SIZE (mode
) > 4
6313 && (GET_MODE_CLASS (mode
) == MODE_INT
6314 || TARGET_SOFT_FLOAT
))
6316 emit_insn (gen_addsi3 (reg
, base
, offset
));
6320 return gen_rtx_PLUS (Pmode
, base
, offset
);
6327 /* Find a spare register to use during the prolog of a function. */
6330 thumb_find_work_register (unsigned long pushed_regs_mask
)
6334 /* Check the argument registers first as these are call-used. The
6335 register allocation order means that sometimes r3 might be used
6336 but earlier argument registers might not, so check them all. */
6337 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6338 if (!df_regs_ever_live_p (reg
))
6341 /* Before going on to check the call-saved registers we can try a couple
6342 more ways of deducing that r3 is available. The first is when we are
6343 pushing anonymous arguments onto the stack and we have less than 4
6344 registers worth of fixed arguments(*). In this case r3 will be part of
6345 the variable argument list and so we can be sure that it will be
6346 pushed right at the start of the function. Hence it will be available
6347 for the rest of the prologue.
6348 (*): ie crtl->args.pretend_args_size is greater than 0. */
6349 if (cfun
->machine
->uses_anonymous_args
6350 && crtl
->args
.pretend_args_size
> 0)
6351 return LAST_ARG_REGNUM
;
6353 /* The other case is when we have fixed arguments but less than 4 registers
6354 worth. In this case r3 might be used in the body of the function, but
6355 it is not being used to convey an argument into the function. In theory
6356 we could just check crtl->args.size to see how many bytes are
6357 being passed in argument registers, but it seems that it is unreliable.
6358 Sometimes it will have the value 0 when in fact arguments are being
6359 passed. (See testcase execute/20021111-1.c for an example). So we also
6360 check the args_info.nregs field as well. The problem with this field is
6361 that it makes no allowances for arguments that are passed to the
6362 function but which are not used. Hence we could miss an opportunity
6363 when a function has an unused argument in r3. But it is better to be
6364 safe than to be sorry. */
6365 if (! cfun
->machine
->uses_anonymous_args
6366 && crtl
->args
.size
>= 0
6367 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6368 && (TARGET_AAPCS_BASED
6369 ? crtl
->args
.info
.aapcs_ncrn
< 4
6370 : crtl
->args
.info
.nregs
< 4))
6371 return LAST_ARG_REGNUM
;
6373 /* Otherwise look for a call-saved register that is going to be pushed. */
6374 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6375 if (pushed_regs_mask
& (1 << reg
))
6380 /* Thumb-2 can use high regs. */
6381 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6382 if (pushed_regs_mask
& (1 << reg
))
6385 /* Something went wrong - thumb_compute_save_reg_mask()
6386 should have arranged for a suitable register to be pushed. */
6390 static GTY(()) int pic_labelno
;
6392 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6396 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6398 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6400 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6403 gcc_assert (flag_pic
);
6405 pic_reg
= cfun
->machine
->pic_reg
;
6406 if (TARGET_VXWORKS_RTP
)
6408 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6409 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6410 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6412 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6414 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6415 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6419 /* We use an UNSPEC rather than a LABEL_REF because this label
6420 never appears in the code stream. */
6422 labelno
= GEN_INT (pic_labelno
++);
6423 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6424 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6426 /* On the ARM the PC register contains 'dot + 8' at the time of the
6427 addition, on the Thumb it is 'dot + 4'. */
6428 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6429 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6431 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6435 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6437 else /* TARGET_THUMB1 */
6439 if (arm_pic_register
!= INVALID_REGNUM
6440 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6442 /* We will have pushed the pic register, so we should always be
6443 able to find a work register. */
6444 pic_tmp
= gen_rtx_REG (SImode
,
6445 thumb_find_work_register (saved_regs
));
6446 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6447 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6448 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6450 else if (arm_pic_register
!= INVALID_REGNUM
6451 && arm_pic_register
> LAST_LO_REGNUM
6452 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6454 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6455 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6456 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6459 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6463 /* Need to emit this whether or not we obey regdecls,
6464 since setjmp/longjmp can cause life info to screw up. */
6468 /* Generate code to load the address of a static var when flag_pic is set. */
6470 arm_pic_static_addr (rtx orig
, rtx reg
)
6472 rtx l1
, labelno
, offset_rtx
, insn
;
6474 gcc_assert (flag_pic
);
6476 /* We use an UNSPEC rather than a LABEL_REF because this label
6477 never appears in the code stream. */
6478 labelno
= GEN_INT (pic_labelno
++);
6479 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6480 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6482 /* On the ARM the PC register contains 'dot + 8' at the time of the
6483 addition, on the Thumb it is 'dot + 4'. */
6484 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6485 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6486 UNSPEC_SYMBOL_OFFSET
);
6487 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6489 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6493 /* Return nonzero if X is valid as an ARM state addressing register. */
6495 arm_address_register_rtx_p (rtx x
, int strict_p
)
6505 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6507 return (regno
<= LAST_ARM_REGNUM
6508 || regno
>= FIRST_PSEUDO_REGISTER
6509 || regno
== FRAME_POINTER_REGNUM
6510 || regno
== ARG_POINTER_REGNUM
);
6513 /* Return TRUE if this rtx is the difference of a symbol and a label,
6514 and will reduce to a PC-relative relocation in the object file.
6515 Expressions like this can be left alone when generating PIC, rather
6516 than forced through the GOT. */
6518 pcrel_constant_p (rtx x
)
6520 if (GET_CODE (x
) == MINUS
)
6521 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6526 /* Return true if X will surely end up in an index register after next
6529 will_be_in_index_register (const_rtx x
)
6531 /* arm.md: calculate_pic_address will split this into a register. */
6532 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6535 /* Return nonzero if X is a valid ARM state address operand. */
6537 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6541 enum rtx_code code
= GET_CODE (x
);
6543 if (arm_address_register_rtx_p (x
, strict_p
))
6546 use_ldrd
= (TARGET_LDRD
6548 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6550 if (code
== POST_INC
|| code
== PRE_DEC
6551 || ((code
== PRE_INC
|| code
== POST_DEC
)
6552 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6553 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6555 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6556 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6557 && GET_CODE (XEXP (x
, 1)) == PLUS
6558 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6560 rtx addend
= XEXP (XEXP (x
, 1), 1);
6562 /* Don't allow ldrd post increment by register because it's hard
6563 to fixup invalid register choices. */
6565 && GET_CODE (x
) == POST_MODIFY
6569 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6570 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6573 /* After reload constants split into minipools will have addresses
6574 from a LABEL_REF. */
6575 else if (reload_completed
6576 && (code
== LABEL_REF
6578 && GET_CODE (XEXP (x
, 0)) == PLUS
6579 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6580 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6583 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6586 else if (code
== PLUS
)
6588 rtx xop0
= XEXP (x
, 0);
6589 rtx xop1
= XEXP (x
, 1);
6591 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6592 && ((CONST_INT_P (xop1
)
6593 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6594 || (!strict_p
&& will_be_in_index_register (xop1
))))
6595 || (arm_address_register_rtx_p (xop1
, strict_p
)
6596 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6600 /* Reload currently can't handle MINUS, so disable this for now */
6601 else if (GET_CODE (x
) == MINUS
)
6603 rtx xop0
= XEXP (x
, 0);
6604 rtx xop1
= XEXP (x
, 1);
6606 return (arm_address_register_rtx_p (xop0
, strict_p
)
6607 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6611 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6612 && code
== SYMBOL_REF
6613 && CONSTANT_POOL_ADDRESS_P (x
)
6615 && symbol_mentioned_p (get_pool_constant (x
))
6616 && ! pcrel_constant_p (get_pool_constant (x
))))
6622 /* Return nonzero if X is a valid Thumb-2 address operand. */
6624 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6627 enum rtx_code code
= GET_CODE (x
);
6629 if (arm_address_register_rtx_p (x
, strict_p
))
6632 use_ldrd
= (TARGET_LDRD
6634 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6636 if (code
== POST_INC
|| code
== PRE_DEC
6637 || ((code
== PRE_INC
|| code
== POST_DEC
)
6638 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6639 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6641 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6642 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6643 && GET_CODE (XEXP (x
, 1)) == PLUS
6644 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6646 /* Thumb-2 only has autoincrement by constant. */
6647 rtx addend
= XEXP (XEXP (x
, 1), 1);
6648 HOST_WIDE_INT offset
;
6650 if (!CONST_INT_P (addend
))
6653 offset
= INTVAL(addend
);
6654 if (GET_MODE_SIZE (mode
) <= 4)
6655 return (offset
> -256 && offset
< 256);
6657 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6658 && (offset
& 3) == 0);
6661 /* After reload constants split into minipools will have addresses
6662 from a LABEL_REF. */
6663 else if (reload_completed
6664 && (code
== LABEL_REF
6666 && GET_CODE (XEXP (x
, 0)) == PLUS
6667 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6668 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6671 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6674 else if (code
== PLUS
)
6676 rtx xop0
= XEXP (x
, 0);
6677 rtx xop1
= XEXP (x
, 1);
6679 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6680 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6681 || (!strict_p
&& will_be_in_index_register (xop1
))))
6682 || (arm_address_register_rtx_p (xop1
, strict_p
)
6683 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6686 /* Normally we can assign constant values to target registers without
6687 the help of constant pool. But there are cases we have to use constant
6689 1) assign a label to register.
6690 2) sign-extend a 8bit value to 32bit and then assign to register.
6692 Constant pool access in format:
6693 (set (reg r0) (mem (symbol_ref (".LC0"))))
6694 will cause the use of literal pool (later in function arm_reorg).
6695 So here we mark such format as an invalid format, then the compiler
6696 will adjust it into:
6697 (set (reg r0) (symbol_ref (".LC0")))
6698 (set (reg r0) (mem (reg r0))).
6699 No extra register is required, and (mem (reg r0)) won't cause the use
6700 of literal pools. */
6701 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6702 && CONSTANT_POOL_ADDRESS_P (x
))
6705 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6706 && code
== SYMBOL_REF
6707 && CONSTANT_POOL_ADDRESS_P (x
)
6709 && symbol_mentioned_p (get_pool_constant (x
))
6710 && ! pcrel_constant_p (get_pool_constant (x
))))
6716 /* Return nonzero if INDEX is valid for an address index operand in
6719 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6722 HOST_WIDE_INT range
;
6723 enum rtx_code code
= GET_CODE (index
);
6725 /* Standard coprocessor addressing modes. */
6726 if (TARGET_HARD_FLOAT
6728 && (mode
== SFmode
|| mode
== DFmode
))
6729 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6730 && INTVAL (index
) > -1024
6731 && (INTVAL (index
) & 3) == 0);
6733 /* For quad modes, we restrict the constant offset to be slightly less
6734 than what the instruction format permits. We do this because for
6735 quad mode moves, we will actually decompose them into two separate
6736 double-mode reads or writes. INDEX must therefore be a valid
6737 (double-mode) offset and so should INDEX+8. */
6738 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6739 return (code
== CONST_INT
6740 && INTVAL (index
) < 1016
6741 && INTVAL (index
) > -1024
6742 && (INTVAL (index
) & 3) == 0);
6744 /* We have no such constraint on double mode offsets, so we permit the
6745 full range of the instruction format. */
6746 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6747 return (code
== CONST_INT
6748 && INTVAL (index
) < 1024
6749 && INTVAL (index
) > -1024
6750 && (INTVAL (index
) & 3) == 0);
6752 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6753 return (code
== CONST_INT
6754 && INTVAL (index
) < 1024
6755 && INTVAL (index
) > -1024
6756 && (INTVAL (index
) & 3) == 0);
6758 if (arm_address_register_rtx_p (index
, strict_p
)
6759 && (GET_MODE_SIZE (mode
) <= 4))
6762 if (mode
== DImode
|| mode
== DFmode
)
6764 if (code
== CONST_INT
)
6766 HOST_WIDE_INT val
= INTVAL (index
);
6769 return val
> -256 && val
< 256;
6771 return val
> -4096 && val
< 4092;
6774 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6777 if (GET_MODE_SIZE (mode
) <= 4
6781 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6785 rtx xiop0
= XEXP (index
, 0);
6786 rtx xiop1
= XEXP (index
, 1);
6788 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6789 && power_of_two_operand (xiop1
, SImode
))
6790 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6791 && power_of_two_operand (xiop0
, SImode
)));
6793 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6794 || code
== ASHIFT
|| code
== ROTATERT
)
6796 rtx op
= XEXP (index
, 1);
6798 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6801 && INTVAL (op
) <= 31);
6805 /* For ARM v4 we may be doing a sign-extend operation during the
6811 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6817 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6819 return (code
== CONST_INT
6820 && INTVAL (index
) < range
6821 && INTVAL (index
) > -range
);
6824 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6825 index operand. i.e. 1, 2, 4 or 8. */
6827 thumb2_index_mul_operand (rtx op
)
6831 if (!CONST_INT_P (op
))
6835 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6838 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6840 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6842 enum rtx_code code
= GET_CODE (index
);
6844 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6845 /* Standard coprocessor addressing modes. */
6846 if (TARGET_HARD_FLOAT
6848 && (mode
== SFmode
|| mode
== DFmode
))
6849 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6850 /* Thumb-2 allows only > -256 index range for it's core register
6851 load/stores. Since we allow SF/DF in core registers, we have
6852 to use the intersection between -256~4096 (core) and -1024~1024
6854 && INTVAL (index
) > -256
6855 && (INTVAL (index
) & 3) == 0);
6857 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6859 /* For DImode assume values will usually live in core regs
6860 and only allow LDRD addressing modes. */
6861 if (!TARGET_LDRD
|| mode
!= DImode
)
6862 return (code
== CONST_INT
6863 && INTVAL (index
) < 1024
6864 && INTVAL (index
) > -1024
6865 && (INTVAL (index
) & 3) == 0);
6868 /* For quad modes, we restrict the constant offset to be slightly less
6869 than what the instruction format permits. We do this because for
6870 quad mode moves, we will actually decompose them into two separate
6871 double-mode reads or writes. INDEX must therefore be a valid
6872 (double-mode) offset and so should INDEX+8. */
6873 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6874 return (code
== CONST_INT
6875 && INTVAL (index
) < 1016
6876 && INTVAL (index
) > -1024
6877 && (INTVAL (index
) & 3) == 0);
6879 /* We have no such constraint on double mode offsets, so we permit the
6880 full range of the instruction format. */
6881 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6882 return (code
== CONST_INT
6883 && INTVAL (index
) < 1024
6884 && INTVAL (index
) > -1024
6885 && (INTVAL (index
) & 3) == 0);
6887 if (arm_address_register_rtx_p (index
, strict_p
)
6888 && (GET_MODE_SIZE (mode
) <= 4))
6891 if (mode
== DImode
|| mode
== DFmode
)
6893 if (code
== CONST_INT
)
6895 HOST_WIDE_INT val
= INTVAL (index
);
6896 /* ??? Can we assume ldrd for thumb2? */
6897 /* Thumb-2 ldrd only has reg+const addressing modes. */
6898 /* ldrd supports offsets of +-1020.
6899 However the ldr fallback does not. */
6900 return val
> -256 && val
< 256 && (val
& 3) == 0;
6908 rtx xiop0
= XEXP (index
, 0);
6909 rtx xiop1
= XEXP (index
, 1);
6911 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6912 && thumb2_index_mul_operand (xiop1
))
6913 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6914 && thumb2_index_mul_operand (xiop0
)));
6916 else if (code
== ASHIFT
)
6918 rtx op
= XEXP (index
, 1);
6920 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6923 && INTVAL (op
) <= 3);
6926 return (code
== CONST_INT
6927 && INTVAL (index
) < 4096
6928 && INTVAL (index
) > -256);
6931 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6933 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6943 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6945 return (regno
<= LAST_LO_REGNUM
6946 || regno
> LAST_VIRTUAL_REGISTER
6947 || regno
== FRAME_POINTER_REGNUM
6948 || (GET_MODE_SIZE (mode
) >= 4
6949 && (regno
== STACK_POINTER_REGNUM
6950 || regno
>= FIRST_PSEUDO_REGISTER
6951 || x
== hard_frame_pointer_rtx
6952 || x
== arg_pointer_rtx
)));
6955 /* Return nonzero if x is a legitimate index register. This is the case
6956 for any base register that can access a QImode object. */
6958 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6960 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6963 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6965 The AP may be eliminated to either the SP or the FP, so we use the
6966 least common denominator, e.g. SImode, and offsets from 0 to 64.
6968 ??? Verify whether the above is the right approach.
6970 ??? Also, the FP may be eliminated to the SP, so perhaps that
6971 needs special handling also.
6973 ??? Look at how the mips16 port solves this problem. It probably uses
6974 better ways to solve some of these problems.
6976 Although it is not incorrect, we don't accept QImode and HImode
6977 addresses based on the frame pointer or arg pointer until the
6978 reload pass starts. This is so that eliminating such addresses
6979 into stack based ones won't produce impossible code. */
6981 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6983 /* ??? Not clear if this is right. Experiment. */
6984 if (GET_MODE_SIZE (mode
) < 4
6985 && !(reload_in_progress
|| reload_completed
)
6986 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6987 || reg_mentioned_p (arg_pointer_rtx
, x
)
6988 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6989 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6990 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6991 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6994 /* Accept any base register. SP only in SImode or larger. */
6995 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6998 /* This is PC relative data before arm_reorg runs. */
6999 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7000 && GET_CODE (x
) == SYMBOL_REF
7001 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7004 /* This is PC relative data after arm_reorg runs. */
7005 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7007 && (GET_CODE (x
) == LABEL_REF
7008 || (GET_CODE (x
) == CONST
7009 && GET_CODE (XEXP (x
, 0)) == PLUS
7010 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7011 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7014 /* Post-inc indexing only supported for SImode and larger. */
7015 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7016 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7019 else if (GET_CODE (x
) == PLUS
)
7021 /* REG+REG address can be any two index registers. */
7022 /* We disallow FRAME+REG addressing since we know that FRAME
7023 will be replaced with STACK, and SP relative addressing only
7024 permits SP+OFFSET. */
7025 if (GET_MODE_SIZE (mode
) <= 4
7026 && XEXP (x
, 0) != frame_pointer_rtx
7027 && XEXP (x
, 1) != frame_pointer_rtx
7028 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7029 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7030 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7033 /* REG+const has 5-7 bit offset for non-SP registers. */
7034 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7035 || XEXP (x
, 0) == arg_pointer_rtx
)
7036 && CONST_INT_P (XEXP (x
, 1))
7037 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7040 /* REG+const has 10-bit offset for SP, but only SImode and
7041 larger is supported. */
7042 /* ??? Should probably check for DI/DFmode overflow here
7043 just like GO_IF_LEGITIMATE_OFFSET does. */
7044 else if (REG_P (XEXP (x
, 0))
7045 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7046 && GET_MODE_SIZE (mode
) >= 4
7047 && CONST_INT_P (XEXP (x
, 1))
7048 && INTVAL (XEXP (x
, 1)) >= 0
7049 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7050 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7053 else if (REG_P (XEXP (x
, 0))
7054 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7055 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7056 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7057 && REGNO (XEXP (x
, 0))
7058 <= LAST_VIRTUAL_POINTER_REGISTER
))
7059 && GET_MODE_SIZE (mode
) >= 4
7060 && CONST_INT_P (XEXP (x
, 1))
7061 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7065 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7066 && GET_MODE_SIZE (mode
) == 4
7067 && GET_CODE (x
) == SYMBOL_REF
7068 && CONSTANT_POOL_ADDRESS_P (x
)
7070 && symbol_mentioned_p (get_pool_constant (x
))
7071 && ! pcrel_constant_p (get_pool_constant (x
))))
7077 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7078 instruction of mode MODE. */
7080 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7082 switch (GET_MODE_SIZE (mode
))
7085 return val
>= 0 && val
< 32;
7088 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7092 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7098 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7101 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7102 else if (TARGET_THUMB2
)
7103 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7104 else /* if (TARGET_THUMB1) */
7105 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7108 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7110 Given an rtx X being reloaded into a reg required to be
7111 in class CLASS, return the class of reg to actually use.
7112 In general this is just CLASS, but for the Thumb core registers and
7113 immediate constants we prefer a LO_REGS class or a subset. */
7116 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7122 if (rclass
== GENERAL_REGS
)
7129 /* Build the SYMBOL_REF for __tls_get_addr. */
7131 static GTY(()) rtx tls_get_addr_libfunc
;
7134 get_tls_get_addr (void)
7136 if (!tls_get_addr_libfunc
)
7137 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7138 return tls_get_addr_libfunc
;
7142 arm_load_tp (rtx target
)
7145 target
= gen_reg_rtx (SImode
);
7149 /* Can return in any reg. */
7150 emit_insn (gen_load_tp_hard (target
));
7154 /* Always returned in r0. Immediately copy the result into a pseudo,
7155 otherwise other uses of r0 (e.g. setting up function arguments) may
7156 clobber the value. */
7160 emit_insn (gen_load_tp_soft ());
7162 tmp
= gen_rtx_REG (SImode
, 0);
7163 emit_move_insn (target
, tmp
);
7169 load_tls_operand (rtx x
, rtx reg
)
7173 if (reg
== NULL_RTX
)
7174 reg
= gen_reg_rtx (SImode
);
7176 tmp
= gen_rtx_CONST (SImode
, x
);
7178 emit_move_insn (reg
, tmp
);
7184 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7186 rtx insns
, label
, labelno
, sum
;
7188 gcc_assert (reloc
!= TLS_DESCSEQ
);
7191 labelno
= GEN_INT (pic_labelno
++);
7192 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7193 label
= gen_rtx_CONST (VOIDmode
, label
);
7195 sum
= gen_rtx_UNSPEC (Pmode
,
7196 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7197 GEN_INT (TARGET_ARM
? 8 : 4)),
7199 reg
= load_tls_operand (sum
, reg
);
7202 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7204 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7206 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7207 LCT_PURE
, /* LCT_CONST? */
7208 Pmode
, 1, reg
, Pmode
);
7210 insns
= get_insns ();
7217 arm_tls_descseq_addr (rtx x
, rtx reg
)
7219 rtx labelno
= GEN_INT (pic_labelno
++);
7220 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7221 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7222 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7223 gen_rtx_CONST (VOIDmode
, label
),
7224 GEN_INT (!TARGET_ARM
)),
7226 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7228 emit_insn (gen_tlscall (x
, labelno
));
7230 reg
= gen_reg_rtx (SImode
);
7232 gcc_assert (REGNO (reg
) != 0);
7234 emit_move_insn (reg
, reg0
);
7240 legitimize_tls_address (rtx x
, rtx reg
)
7242 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7243 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7247 case TLS_MODEL_GLOBAL_DYNAMIC
:
7248 if (TARGET_GNU2_TLS
)
7250 reg
= arm_tls_descseq_addr (x
, reg
);
7252 tp
= arm_load_tp (NULL_RTX
);
7254 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7258 /* Original scheme */
7259 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7260 dest
= gen_reg_rtx (Pmode
);
7261 emit_libcall_block (insns
, dest
, ret
, x
);
7265 case TLS_MODEL_LOCAL_DYNAMIC
:
7266 if (TARGET_GNU2_TLS
)
7268 reg
= arm_tls_descseq_addr (x
, reg
);
7270 tp
= arm_load_tp (NULL_RTX
);
7272 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7276 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7278 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7279 share the LDM result with other LD model accesses. */
7280 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7282 dest
= gen_reg_rtx (Pmode
);
7283 emit_libcall_block (insns
, dest
, ret
, eqv
);
7285 /* Load the addend. */
7286 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7287 GEN_INT (TLS_LDO32
)),
7289 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7290 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7294 case TLS_MODEL_INITIAL_EXEC
:
7295 labelno
= GEN_INT (pic_labelno
++);
7296 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7297 label
= gen_rtx_CONST (VOIDmode
, label
);
7298 sum
= gen_rtx_UNSPEC (Pmode
,
7299 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7300 GEN_INT (TARGET_ARM
? 8 : 4)),
7302 reg
= load_tls_operand (sum
, reg
);
7305 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7306 else if (TARGET_THUMB2
)
7307 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7310 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7311 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7314 tp
= arm_load_tp (NULL_RTX
);
7316 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7318 case TLS_MODEL_LOCAL_EXEC
:
7319 tp
= arm_load_tp (NULL_RTX
);
7321 reg
= gen_rtx_UNSPEC (Pmode
,
7322 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7324 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7326 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7333 /* Try machine-dependent ways of modifying an illegitimate address
7334 to be legitimate. If we find one, return the new, valid address. */
7336 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7338 if (arm_tls_referenced_p (x
))
7342 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7344 addend
= XEXP (XEXP (x
, 0), 1);
7345 x
= XEXP (XEXP (x
, 0), 0);
7348 if (GET_CODE (x
) != SYMBOL_REF
)
7351 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7353 x
= legitimize_tls_address (x
, NULL_RTX
);
7357 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7366 /* TODO: legitimize_address for Thumb2. */
7369 return thumb_legitimize_address (x
, orig_x
, mode
);
7372 if (GET_CODE (x
) == PLUS
)
7374 rtx xop0
= XEXP (x
, 0);
7375 rtx xop1
= XEXP (x
, 1);
7377 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7378 xop0
= force_reg (SImode
, xop0
);
7380 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7381 && !symbol_mentioned_p (xop1
))
7382 xop1
= force_reg (SImode
, xop1
);
7384 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7385 && CONST_INT_P (xop1
))
7387 HOST_WIDE_INT n
, low_n
;
7391 /* VFP addressing modes actually allow greater offsets, but for
7392 now we just stick with the lowest common denominator. */
7394 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7406 low_n
= ((mode
) == TImode
? 0
7407 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7411 base_reg
= gen_reg_rtx (SImode
);
7412 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7413 emit_move_insn (base_reg
, val
);
7414 x
= plus_constant (Pmode
, base_reg
, low_n
);
7416 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7417 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7420 /* XXX We don't allow MINUS any more -- see comment in
7421 arm_legitimate_address_outer_p (). */
7422 else if (GET_CODE (x
) == MINUS
)
7424 rtx xop0
= XEXP (x
, 0);
7425 rtx xop1
= XEXP (x
, 1);
7427 if (CONSTANT_P (xop0
))
7428 xop0
= force_reg (SImode
, xop0
);
7430 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7431 xop1
= force_reg (SImode
, xop1
);
7433 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7434 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7437 /* Make sure to take full advantage of the pre-indexed addressing mode
7438 with absolute addresses which often allows for the base register to
7439 be factorized for multiple adjacent memory references, and it might
7440 even allows for the mini pool to be avoided entirely. */
7441 else if (CONST_INT_P (x
) && optimize
> 0)
7444 HOST_WIDE_INT mask
, base
, index
;
7447 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7448 use a 8-bit index. So let's use a 12-bit index for SImode only and
7449 hope that arm_gen_constant will enable ldrb to use more bits. */
7450 bits
= (mode
== SImode
) ? 12 : 8;
7451 mask
= (1 << bits
) - 1;
7452 base
= INTVAL (x
) & ~mask
;
7453 index
= INTVAL (x
) & mask
;
7454 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7456 /* It'll most probably be more efficient to generate the base
7457 with more bits set and use a negative index instead. */
7461 base_reg
= force_reg (SImode
, GEN_INT (base
));
7462 x
= plus_constant (Pmode
, base_reg
, index
);
7467 /* We need to find and carefully transform any SYMBOL and LABEL
7468 references; so go back to the original address expression. */
7469 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7471 if (new_x
!= orig_x
)
7479 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7480 to be legitimate. If we find one, return the new, valid address. */
7482 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7484 if (GET_CODE (x
) == PLUS
7485 && CONST_INT_P (XEXP (x
, 1))
7486 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7487 || INTVAL (XEXP (x
, 1)) < 0))
7489 rtx xop0
= XEXP (x
, 0);
7490 rtx xop1
= XEXP (x
, 1);
7491 HOST_WIDE_INT offset
= INTVAL (xop1
);
7493 /* Try and fold the offset into a biasing of the base register and
7494 then offsetting that. Don't do this when optimizing for space
7495 since it can cause too many CSEs. */
7496 if (optimize_size
&& offset
>= 0
7497 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7499 HOST_WIDE_INT delta
;
7502 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7503 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7504 delta
= 31 * GET_MODE_SIZE (mode
);
7506 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7508 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7510 x
= plus_constant (Pmode
, xop0
, delta
);
7512 else if (offset
< 0 && offset
> -256)
7513 /* Small negative offsets are best done with a subtract before the
7514 dereference, forcing these into a register normally takes two
7516 x
= force_operand (x
, NULL_RTX
);
7519 /* For the remaining cases, force the constant into a register. */
7520 xop1
= force_reg (SImode
, xop1
);
7521 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7524 else if (GET_CODE (x
) == PLUS
7525 && s_register_operand (XEXP (x
, 1), SImode
)
7526 && !s_register_operand (XEXP (x
, 0), SImode
))
7528 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7530 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7535 /* We need to find and carefully transform any SYMBOL and LABEL
7536 references; so go back to the original address expression. */
7537 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7539 if (new_x
!= orig_x
)
7547 arm_legitimize_reload_address (rtx
*p
,
7548 enum machine_mode mode
,
7549 int opnum
, int type
,
7550 int ind_levels ATTRIBUTE_UNUSED
)
7552 /* We must recognize output that we have already generated ourselves. */
7553 if (GET_CODE (*p
) == PLUS
7554 && GET_CODE (XEXP (*p
, 0)) == PLUS
7555 && REG_P (XEXP (XEXP (*p
, 0), 0))
7556 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7557 && CONST_INT_P (XEXP (*p
, 1)))
7559 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7560 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7561 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7565 if (GET_CODE (*p
) == PLUS
7566 && REG_P (XEXP (*p
, 0))
7567 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7568 /* If the base register is equivalent to a constant, let the generic
7569 code handle it. Otherwise we will run into problems if a future
7570 reload pass decides to rematerialize the constant. */
7571 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7572 && CONST_INT_P (XEXP (*p
, 1)))
7574 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7575 HOST_WIDE_INT low
, high
;
7577 /* Detect coprocessor load/stores. */
7578 bool coproc_p
= ((TARGET_HARD_FLOAT
7580 && (mode
== SFmode
|| mode
== DFmode
))
7581 || (TARGET_REALLY_IWMMXT
7582 && VALID_IWMMXT_REG_MODE (mode
))
7584 && (VALID_NEON_DREG_MODE (mode
)
7585 || VALID_NEON_QREG_MODE (mode
))));
7587 /* For some conditions, bail out when lower two bits are unaligned. */
7588 if ((val
& 0x3) != 0
7589 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7591 /* For DI, and DF under soft-float: */
7592 || ((mode
== DImode
|| mode
== DFmode
)
7593 /* Without ldrd, we use stm/ldm, which does not
7594 fair well with unaligned bits. */
7596 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7597 || TARGET_THUMB2
))))
7600 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7601 of which the (reg+high) gets turned into a reload add insn,
7602 we try to decompose the index into high/low values that can often
7603 also lead to better reload CSE.
7605 ldr r0, [r2, #4100] // Offset too large
7606 ldr r1, [r2, #4104] // Offset too large
7608 is best reloaded as:
7614 which post-reload CSE can simplify in most cases to eliminate the
7615 second add instruction:
7620 The idea here is that we want to split out the bits of the constant
7621 as a mask, rather than as subtracting the maximum offset that the
7622 respective type of load/store used can handle.
7624 When encountering negative offsets, we can still utilize it even if
7625 the overall offset is positive; sometimes this may lead to an immediate
7626 that can be constructed with fewer instructions.
7628 ldr r0, [r2, #0x3FFFFC]
7630 This is best reloaded as:
7631 add t1, r2, #0x400000
7634 The trick for spotting this for a load insn with N bits of offset
7635 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7636 negative offset that is going to make bit N and all the bits below
7637 it become zero in the remainder part.
7639 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7640 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7641 used in most cases of ARM load/store instructions. */
7643 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7644 (((VAL) & ((1 << (N)) - 1)) \
7645 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7650 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7652 /* NEON quad-word load/stores are made of two double-word accesses,
7653 so the valid index range is reduced by 8. Treat as 9-bit range if
7655 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7656 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7658 else if (GET_MODE_SIZE (mode
) == 8)
7661 low
= (TARGET_THUMB2
7662 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7663 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7665 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7666 to access doublewords. The supported load/store offsets are
7667 -8, -4, and 4, which we try to produce here. */
7668 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7670 else if (GET_MODE_SIZE (mode
) < 8)
7672 /* NEON element load/stores do not have an offset. */
7673 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7678 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7679 Try the wider 12-bit range first, and re-try if the result
7681 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7683 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7687 if (mode
== HImode
|| mode
== HFmode
)
7690 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7693 /* The storehi/movhi_bytes fallbacks can use only
7694 [-4094,+4094] of the full ldrb/strb index range. */
7695 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7696 if (low
== 4095 || low
== -4095)
7701 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7707 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7708 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7709 - (unsigned HOST_WIDE_INT
) 0x80000000);
7710 /* Check for overflow or zero */
7711 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7714 /* Reload the high part into a base reg; leave the low part
7716 Note that replacing this gen_rtx_PLUS with plus_constant is
7717 wrong in this case because we rely on the
7718 (plus (plus reg c1) c2) structure being preserved so that
7719 XEXP (*p, 0) in push_reload below uses the correct term. */
7720 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7721 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7724 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7725 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7726 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7734 thumb_legitimize_reload_address (rtx
*x_p
,
7735 enum machine_mode mode
,
7736 int opnum
, int type
,
7737 int ind_levels ATTRIBUTE_UNUSED
)
7741 if (GET_CODE (x
) == PLUS
7742 && GET_MODE_SIZE (mode
) < 4
7743 && REG_P (XEXP (x
, 0))
7744 && XEXP (x
, 0) == stack_pointer_rtx
7745 && CONST_INT_P (XEXP (x
, 1))
7746 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7751 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7752 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7756 /* If both registers are hi-regs, then it's better to reload the
7757 entire expression rather than each register individually. That
7758 only requires one reload register rather than two. */
7759 if (GET_CODE (x
) == PLUS
7760 && REG_P (XEXP (x
, 0))
7761 && REG_P (XEXP (x
, 1))
7762 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7763 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7768 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7769 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7776 /* Test for various thread-local symbols. */
7778 /* Helper for arm_tls_referenced_p. */
7781 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7783 if (GET_CODE (*x
) == SYMBOL_REF
)
7784 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7786 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7787 TLS offsets, not real symbol references. */
7788 if (GET_CODE (*x
) == UNSPEC
7789 && XINT (*x
, 1) == UNSPEC_TLS
)
7795 /* Return TRUE if X contains any TLS symbol references. */
7798 arm_tls_referenced_p (rtx x
)
7800 if (! TARGET_HAVE_TLS
)
7803 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7806 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7808 On the ARM, allow any integer (invalid ones are removed later by insn
7809 patterns), nice doubles and symbol_refs which refer to the function's
7812 When generating pic allow anything. */
7815 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7817 /* At present, we have no support for Neon structure constants, so forbid
7818 them here. It might be possible to handle simple cases like 0 and -1
7820 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7823 return flag_pic
|| !label_mentioned_p (x
);
7827 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7829 return (CONST_INT_P (x
)
7830 || CONST_DOUBLE_P (x
)
7831 || CONSTANT_ADDRESS_P (x
)
7836 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7838 return (!arm_cannot_force_const_mem (mode
, x
)
7840 ? arm_legitimate_constant_p_1 (mode
, x
)
7841 : thumb_legitimate_constant_p (mode
, x
)));
7844 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7847 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7851 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7853 split_const (x
, &base
, &offset
);
7854 if (GET_CODE (base
) == SYMBOL_REF
7855 && !offset_within_block_p (base
, INTVAL (offset
)))
7858 return arm_tls_referenced_p (x
);
7861 #define REG_OR_SUBREG_REG(X) \
7863 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7865 #define REG_OR_SUBREG_RTX(X) \
7866 (REG_P (X) ? (X) : SUBREG_REG (X))
7869 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7871 enum machine_mode mode
= GET_MODE (x
);
7880 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7887 return COSTS_N_INSNS (1);
7890 if (CONST_INT_P (XEXP (x
, 1)))
7893 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7900 return COSTS_N_INSNS (2) + cycles
;
7902 return COSTS_N_INSNS (1) + 16;
7905 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7907 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7908 return (COSTS_N_INSNS (words
)
7909 + 4 * ((MEM_P (SET_SRC (x
)))
7910 + MEM_P (SET_DEST (x
))));
7915 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7917 if (thumb_shiftable_const (INTVAL (x
)))
7918 return COSTS_N_INSNS (2);
7919 return COSTS_N_INSNS (3);
7921 else if ((outer
== PLUS
|| outer
== COMPARE
)
7922 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7924 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7925 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7926 return COSTS_N_INSNS (1);
7927 else if (outer
== AND
)
7930 /* This duplicates the tests in the andsi3 expander. */
7931 for (i
= 9; i
<= 31; i
++)
7932 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7933 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7934 return COSTS_N_INSNS (2);
7936 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7937 || outer
== LSHIFTRT
)
7939 return COSTS_N_INSNS (2);
7945 return COSTS_N_INSNS (3);
7963 /* XXX another guess. */
7964 /* Memory costs quite a lot for the first word, but subsequent words
7965 load at the equivalent of a single insn each. */
7966 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7967 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7972 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7978 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7979 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7985 return total
+ COSTS_N_INSNS (1);
7987 /* Assume a two-shift sequence. Increase the cost slightly so
7988 we prefer actual shifts over an extend operation. */
7989 return total
+ 1 + COSTS_N_INSNS (2);
7997 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7999 enum machine_mode mode
= GET_MODE (x
);
8000 enum rtx_code subcode
;
8002 enum rtx_code code
= GET_CODE (x
);
8008 /* Memory costs quite a lot for the first word, but subsequent words
8009 load at the equivalent of a single insn each. */
8010 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8017 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8018 *total
= COSTS_N_INSNS (2);
8019 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8020 *total
= COSTS_N_INSNS (4);
8022 *total
= COSTS_N_INSNS (20);
8026 if (REG_P (XEXP (x
, 1)))
8027 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8028 else if (!CONST_INT_P (XEXP (x
, 1)))
8029 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8035 *total
+= COSTS_N_INSNS (4);
8040 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8041 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8044 *total
+= COSTS_N_INSNS (3);
8048 *total
+= COSTS_N_INSNS (1);
8049 /* Increase the cost of complex shifts because they aren't any faster,
8050 and reduce dual issue opportunities. */
8051 if (arm_tune_cortex_a9
8052 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8060 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8061 if (CONST_INT_P (XEXP (x
, 0))
8062 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8064 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8068 if (CONST_INT_P (XEXP (x
, 1))
8069 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8071 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8078 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8080 if (TARGET_HARD_FLOAT
8082 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8084 *total
= COSTS_N_INSNS (1);
8085 if (CONST_DOUBLE_P (XEXP (x
, 0))
8086 && arm_const_double_rtx (XEXP (x
, 0)))
8088 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8092 if (CONST_DOUBLE_P (XEXP (x
, 1))
8093 && arm_const_double_rtx (XEXP (x
, 1)))
8095 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8101 *total
= COSTS_N_INSNS (20);
8105 *total
= COSTS_N_INSNS (1);
8106 if (CONST_INT_P (XEXP (x
, 0))
8107 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8109 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8113 subcode
= GET_CODE (XEXP (x
, 1));
8114 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8115 || subcode
== LSHIFTRT
8116 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8118 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8119 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8123 /* A shift as a part of RSB costs no more than RSB itself. */
8124 if (GET_CODE (XEXP (x
, 0)) == MULT
8125 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8127 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8128 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8133 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8135 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8136 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8140 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8141 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8143 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8144 if (REG_P (XEXP (XEXP (x
, 1), 0))
8145 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8146 *total
+= COSTS_N_INSNS (1);
8154 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8155 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8156 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8158 *total
= COSTS_N_INSNS (1);
8159 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8161 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8165 /* MLA: All arguments must be registers. We filter out
8166 multiplication by a power of two, so that we fall down into
8168 if (GET_CODE (XEXP (x
, 0)) == MULT
8169 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8171 /* The cost comes from the cost of the multiply. */
8175 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8177 if (TARGET_HARD_FLOAT
8179 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8181 *total
= COSTS_N_INSNS (1);
8182 if (CONST_DOUBLE_P (XEXP (x
, 1))
8183 && arm_const_double_rtx (XEXP (x
, 1)))
8185 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8192 *total
= COSTS_N_INSNS (20);
8196 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8197 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8199 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8200 if (REG_P (XEXP (XEXP (x
, 0), 0))
8201 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8202 *total
+= COSTS_N_INSNS (1);
8208 case AND
: case XOR
: case IOR
:
8210 /* Normally the frame registers will be spilt into reg+const during
8211 reload, so it is a bad idea to combine them with other instructions,
8212 since then they might not be moved outside of loops. As a compromise
8213 we allow integration with ops that have a constant as their second
8215 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8216 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8217 && !CONST_INT_P (XEXP (x
, 1)))
8218 *total
= COSTS_N_INSNS (1);
8222 *total
+= COSTS_N_INSNS (2);
8223 if (CONST_INT_P (XEXP (x
, 1))
8224 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8226 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8233 *total
+= COSTS_N_INSNS (1);
8234 if (CONST_INT_P (XEXP (x
, 1))
8235 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8237 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8240 subcode
= GET_CODE (XEXP (x
, 0));
8241 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8242 || subcode
== LSHIFTRT
8243 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8245 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8246 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8251 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8253 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8254 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8258 if (subcode
== UMIN
|| subcode
== UMAX
8259 || subcode
== SMIN
|| subcode
== SMAX
)
8261 *total
= COSTS_N_INSNS (3);
8268 /* This should have been handled by the CPU specific routines. */
8272 if (arm_arch3m
&& mode
== SImode
8273 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8274 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8275 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8276 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8277 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8278 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8280 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8283 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8287 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8289 if (TARGET_HARD_FLOAT
8291 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8293 *total
= COSTS_N_INSNS (1);
8296 *total
= COSTS_N_INSNS (2);
8302 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8303 if (mode
== SImode
&& code
== NOT
)
8305 subcode
= GET_CODE (XEXP (x
, 0));
8306 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8307 || subcode
== LSHIFTRT
8308 || subcode
== ROTATE
|| subcode
== ROTATERT
8310 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8312 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8313 /* Register shifts cost an extra cycle. */
8314 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8315 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8324 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8326 *total
= COSTS_N_INSNS (4);
8330 operand
= XEXP (x
, 0);
8332 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8333 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8334 && REG_P (XEXP (operand
, 0))
8335 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8336 *total
+= COSTS_N_INSNS (1);
8337 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8338 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8342 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8344 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8350 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8351 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8353 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8359 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8360 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8362 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8382 /* SCC insns. In the case where the comparison has already been
8383 performed, then they cost 2 instructions. Otherwise they need
8384 an additional comparison before them. */
8385 *total
= COSTS_N_INSNS (2);
8386 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8393 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8399 *total
+= COSTS_N_INSNS (1);
8400 if (CONST_INT_P (XEXP (x
, 1))
8401 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8403 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8407 subcode
= GET_CODE (XEXP (x
, 0));
8408 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8409 || subcode
== LSHIFTRT
8410 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8412 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8413 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8418 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8420 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8421 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8431 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8432 if (!CONST_INT_P (XEXP (x
, 1))
8433 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8434 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8438 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8440 if (TARGET_HARD_FLOAT
8442 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8444 *total
= COSTS_N_INSNS (1);
8447 *total
= COSTS_N_INSNS (20);
8450 *total
= COSTS_N_INSNS (1);
8452 *total
+= COSTS_N_INSNS (3);
8458 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8460 rtx op
= XEXP (x
, 0);
8461 enum machine_mode opmode
= GET_MODE (op
);
8464 *total
+= COSTS_N_INSNS (1);
8466 if (opmode
!= SImode
)
8470 /* If !arm_arch4, we use one of the extendhisi2_mem
8471 or movhi_bytes patterns for HImode. For a QImode
8472 sign extension, we first zero-extend from memory
8473 and then perform a shift sequence. */
8474 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8475 *total
+= COSTS_N_INSNS (2);
8478 *total
+= COSTS_N_INSNS (1);
8480 /* We don't have the necessary insn, so we need to perform some
8482 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8483 /* An and with constant 255. */
8484 *total
+= COSTS_N_INSNS (1);
8486 /* A shift sequence. Increase costs slightly to avoid
8487 combining two shifts into an extend operation. */
8488 *total
+= COSTS_N_INSNS (2) + 1;
8494 switch (GET_MODE (XEXP (x
, 0)))
8501 *total
= COSTS_N_INSNS (1);
8511 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8515 if (const_ok_for_arm (INTVAL (x
))
8516 || const_ok_for_arm (~INTVAL (x
)))
8517 *total
= COSTS_N_INSNS (1);
8519 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8520 INTVAL (x
), NULL_RTX
,
8527 *total
= COSTS_N_INSNS (3);
8531 *total
= COSTS_N_INSNS (1);
8535 *total
= COSTS_N_INSNS (1);
8536 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8540 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8541 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8542 *total
= COSTS_N_INSNS (1);
8544 *total
= COSTS_N_INSNS (4);
8548 /* The vec_extract patterns accept memory operands that require an
8549 address reload. Account for the cost of that reload to give the
8550 auto-inc-dec pass an incentive to try to replace them. */
8551 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8552 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8554 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8555 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8556 *total
+= COSTS_N_INSNS (1);
8559 /* Likewise for the vec_set patterns. */
8560 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8561 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8562 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8564 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8565 *total
= rtx_cost (mem
, code
, 0, speed
);
8566 if (!neon_vector_mem_operand (mem
, 2, true))
8567 *total
+= COSTS_N_INSNS (1);
8573 /* We cost this as high as our memory costs to allow this to
8574 be hoisted from loops. */
8575 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8577 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8583 && TARGET_HARD_FLOAT
8585 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8586 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8587 *total
= COSTS_N_INSNS (1);
8589 *total
= COSTS_N_INSNS (4);
8593 *total
= COSTS_N_INSNS (4);
8598 /* Estimates the size cost of thumb1 instructions.
8599 For now most of the code is copied from thumb1_rtx_costs. We need more
8600 fine grain tuning when we have more related test cases. */
8602 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8604 enum machine_mode mode
= GET_MODE (x
);
8613 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8617 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8618 defined by RTL expansion, especially for the expansion of
8620 if ((GET_CODE (XEXP (x
, 0)) == MULT
8621 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8622 || (GET_CODE (XEXP (x
, 1)) == MULT
8623 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8624 return COSTS_N_INSNS (2);
8625 /* On purpose fall through for normal RTX. */
8629 return COSTS_N_INSNS (1);
8632 if (CONST_INT_P (XEXP (x
, 1)))
8634 /* Thumb1 mul instruction can't operate on const. We must Load it
8635 into a register first. */
8636 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8637 return COSTS_N_INSNS (1) + const_size
;
8639 return COSTS_N_INSNS (1);
8642 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8644 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8645 return (COSTS_N_INSNS (words
)
8646 + 4 * ((MEM_P (SET_SRC (x
)))
8647 + MEM_P (SET_DEST (x
))));
8652 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8653 return COSTS_N_INSNS (1);
8654 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8655 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8656 return COSTS_N_INSNS (2);
8657 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8658 if (thumb_shiftable_const (INTVAL (x
)))
8659 return COSTS_N_INSNS (2);
8660 return COSTS_N_INSNS (3);
8662 else if ((outer
== PLUS
|| outer
== COMPARE
)
8663 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8665 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8666 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8667 return COSTS_N_INSNS (1);
8668 else if (outer
== AND
)
8671 /* This duplicates the tests in the andsi3 expander. */
8672 for (i
= 9; i
<= 31; i
++)
8673 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8674 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8675 return COSTS_N_INSNS (2);
8677 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8678 || outer
== LSHIFTRT
)
8680 return COSTS_N_INSNS (2);
8686 return COSTS_N_INSNS (3);
8704 /* XXX another guess. */
8705 /* Memory costs quite a lot for the first word, but subsequent words
8706 load at the equivalent of a single insn each. */
8707 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8708 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8713 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8718 /* XXX still guessing. */
8719 switch (GET_MODE (XEXP (x
, 0)))
8722 return (1 + (mode
== DImode
? 4 : 0)
8723 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8726 return (4 + (mode
== DImode
? 4 : 0)
8727 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8730 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8741 /* RTX costs when optimizing for size. */
8743 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8746 enum machine_mode mode
= GET_MODE (x
);
8749 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8753 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8757 /* A memory access costs 1 insn if the mode is small, or the address is
8758 a single register, otherwise it costs one insn per word. */
8759 if (REG_P (XEXP (x
, 0)))
8760 *total
= COSTS_N_INSNS (1);
8762 && GET_CODE (XEXP (x
, 0)) == PLUS
8763 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8764 /* This will be split into two instructions.
8765 See arm.md:calculate_pic_address. */
8766 *total
= COSTS_N_INSNS (2);
8768 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8775 /* Needs a libcall, so it costs about this. */
8776 *total
= COSTS_N_INSNS (2);
8780 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8782 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8790 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8792 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8795 else if (mode
== SImode
)
8797 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8798 /* Slightly disparage register shifts, but not by much. */
8799 if (!CONST_INT_P (XEXP (x
, 1)))
8800 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8804 /* Needs a libcall. */
8805 *total
= COSTS_N_INSNS (2);
8809 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8810 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8812 *total
= COSTS_N_INSNS (1);
8818 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8819 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8821 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8822 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8823 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8824 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8825 || subcode1
== ASHIFTRT
)
8827 /* It's just the cost of the two operands. */
8832 *total
= COSTS_N_INSNS (1);
8836 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8840 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8841 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8843 *total
= COSTS_N_INSNS (1);
8847 /* A shift as a part of ADD costs nothing. */
8848 if (GET_CODE (XEXP (x
, 0)) == MULT
8849 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8851 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8852 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8853 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8858 case AND
: case XOR
: case IOR
:
8861 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8863 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8864 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8865 || (code
== AND
&& subcode
== NOT
))
8867 /* It's just the cost of the two operands. */
8873 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8877 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8881 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8882 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8884 *total
= COSTS_N_INSNS (1);
8890 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8899 if (cc_register (XEXP (x
, 0), VOIDmode
))
8902 *total
= COSTS_N_INSNS (1);
8906 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8907 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8908 *total
= COSTS_N_INSNS (1);
8910 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8915 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8918 if (const_ok_for_arm (INTVAL (x
)))
8919 /* A multiplication by a constant requires another instruction
8920 to load the constant to a register. */
8921 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8923 else if (const_ok_for_arm (~INTVAL (x
)))
8924 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8925 else if (const_ok_for_arm (-INTVAL (x
)))
8927 if (outer_code
== COMPARE
|| outer_code
== PLUS
8928 || outer_code
== MINUS
)
8931 *total
= COSTS_N_INSNS (1);
8934 *total
= COSTS_N_INSNS (2);
8940 *total
= COSTS_N_INSNS (2);
8944 *total
= COSTS_N_INSNS (4);
8949 && TARGET_HARD_FLOAT
8950 && outer_code
== SET
8951 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8952 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8953 *total
= COSTS_N_INSNS (1);
8955 *total
= COSTS_N_INSNS (4);
8960 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8961 cost of these slightly. */
8962 *total
= COSTS_N_INSNS (1) + 1;
8969 if (mode
!= VOIDmode
)
8970 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8972 *total
= COSTS_N_INSNS (4); /* How knows? */
8977 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8978 operand, then return the operand that is being shifted. If the shift
8979 is not by a constant, then set SHIFT_REG to point to the operand.
8980 Return NULL if OP is not a shifter operand. */
8982 shifter_op_p (rtx op
, rtx
*shift_reg
)
8984 enum rtx_code code
= GET_CODE (op
);
8986 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8987 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8988 return XEXP (op
, 0);
8989 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8990 return XEXP (op
, 0);
8991 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8992 || code
== ASHIFTRT
)
8994 if (!CONST_INT_P (XEXP (op
, 1)))
8995 *shift_reg
= XEXP (op
, 1);
8996 return XEXP (op
, 0);
9003 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9005 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9006 gcc_assert (GET_CODE (x
) == UNSPEC
);
9008 switch (XINT (x
, 1))
9010 case UNSPEC_UNALIGNED_LOAD
:
9011 /* We can only do unaligned loads into the integer unit, and we can't
9013 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9015 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9016 + extra_cost
->ldst
.load_unaligned
);
9019 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9020 ADDR_SPACE_GENERIC
, speed_p
);
9024 case UNSPEC_UNALIGNED_STORE
:
9025 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9027 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9028 + extra_cost
->ldst
.store_unaligned
);
9030 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9032 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9033 ADDR_SPACE_GENERIC
, speed_p
);
9043 *cost
= COSTS_N_INSNS (1);
9045 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9049 *cost
= COSTS_N_INSNS (2);
9055 /* Cost of a libcall. We assume one insn per argument, an amount for the
9056 call (one insn for -Os) and then one for processing the result. */
9057 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9059 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9062 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9063 if (shift_op != NULL \
9064 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9069 *cost += extra_cost->alu.arith_shift_reg; \
9070 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9073 *cost += extra_cost->alu.arith_shift; \
9075 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9076 + rtx_cost (XEXP (x, 1 - IDX), \
9083 /* RTX costs. Make an estimate of the cost of executing the operation
9084 X, which is contained with an operation with code OUTER_CODE.
9085 SPEED_P indicates whether the cost desired is the performance cost,
9086 or the size cost. The estimate is stored in COST and the return
9087 value is TRUE if the cost calculation is final, or FALSE if the
9088 caller should recurse through the operands of X to add additional
9091 We currently make no attempt to model the size savings of Thumb-2
9092 16-bit instructions. At the normal points in compilation where
9093 this code is called we have no measure of whether the condition
9094 flags are live or not, and thus no realistic way to determine what
9095 the size will eventually be. */
9097 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9098 const struct cpu_cost_table
*extra_cost
,
9099 int *cost
, bool speed_p
)
9101 enum machine_mode mode
= GET_MODE (x
);
9106 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9108 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9116 /* SET RTXs don't have a mode so we get it from the destination. */
9117 mode
= GET_MODE (SET_DEST (x
));
9119 if (REG_P (SET_SRC (x
))
9120 && REG_P (SET_DEST (x
)))
9122 /* Assume that most copies can be done with a single insn,
9123 unless we don't have HW FP, in which case everything
9124 larger than word mode will require two insns. */
9125 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9126 && GET_MODE_SIZE (mode
) > 4)
9129 /* Conditional register moves can be encoded
9130 in 16 bits in Thumb mode. */
9131 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9137 if (CONST_INT_P (SET_SRC (x
)))
9139 /* Handle CONST_INT here, since the value doesn't have a mode
9140 and we would otherwise be unable to work out the true cost. */
9141 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9143 /* Slightly lower the cost of setting a core reg to a constant.
9144 This helps break up chains and allows for better scheduling. */
9145 if (REG_P (SET_DEST (x
))
9146 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9149 /* Immediate moves with an immediate in the range [0, 255] can be
9150 encoded in 16 bits in Thumb mode. */
9151 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9152 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9154 goto const_int_cost
;
9160 /* A memory access costs 1 insn if the mode is small, or the address is
9161 a single register, otherwise it costs one insn per word. */
9162 if (REG_P (XEXP (x
, 0)))
9163 *cost
= COSTS_N_INSNS (1);
9165 && GET_CODE (XEXP (x
, 0)) == PLUS
9166 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9167 /* This will be split into two instructions.
9168 See arm.md:calculate_pic_address. */
9169 *cost
= COSTS_N_INSNS (2);
9171 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9173 /* For speed optimizations, add the costs of the address and
9174 accessing memory. */
9177 *cost
+= (extra_cost
->ldst
.load
9178 + arm_address_cost (XEXP (x
, 0), mode
,
9179 ADDR_SPACE_GENERIC
, speed_p
));
9181 *cost
+= extra_cost
->ldst
.load
;
9187 /* Calculations of LDM costs are complex. We assume an initial cost
9188 (ldm_1st) which will load the number of registers mentioned in
9189 ldm_regs_per_insn_1st registers; then each additional
9190 ldm_regs_per_insn_subsequent registers cost one more insn. The
9191 formula for N regs is thus:
9193 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9194 + ldm_regs_per_insn_subsequent - 1)
9195 / ldm_regs_per_insn_subsequent).
9197 Additional costs may also be added for addressing. A similar
9198 formula is used for STM. */
9200 bool is_ldm
= load_multiple_operation (x
, SImode
);
9201 bool is_stm
= store_multiple_operation (x
, SImode
);
9203 *cost
= COSTS_N_INSNS (1);
9205 if (is_ldm
|| is_stm
)
9209 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9210 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9211 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9212 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9213 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9214 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9215 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9217 *cost
+= regs_per_insn_1st
9218 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9219 + regs_per_insn_sub
- 1)
9220 / regs_per_insn_sub
);
9229 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9230 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9231 *cost
= COSTS_N_INSNS (speed_p
9232 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9233 else if (mode
== SImode
&& TARGET_IDIV
)
9234 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9236 *cost
= LIBCALL_COST (2);
9237 return false; /* All arguments must be in registers. */
9241 *cost
= LIBCALL_COST (2);
9242 return false; /* All arguments must be in registers. */
9245 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9247 *cost
= (COSTS_N_INSNS (2)
9248 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9250 *cost
+= extra_cost
->alu
.shift_reg
;
9258 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9260 *cost
= (COSTS_N_INSNS (3)
9261 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9263 *cost
+= 2 * extra_cost
->alu
.shift
;
9266 else if (mode
== SImode
)
9268 *cost
= (COSTS_N_INSNS (1)
9269 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9270 /* Slightly disparage register shifts at -Os, but not by much. */
9271 if (!CONST_INT_P (XEXP (x
, 1)))
9272 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9273 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9276 else if (GET_MODE_CLASS (mode
) == MODE_INT
9277 && GET_MODE_SIZE (mode
) < 4)
9281 *cost
= (COSTS_N_INSNS (1)
9282 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9283 /* Slightly disparage register shifts at -Os, but not by
9285 if (!CONST_INT_P (XEXP (x
, 1)))
9286 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9287 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9289 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9291 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9293 /* Can use SBFX/UBFX. */
9294 *cost
= COSTS_N_INSNS (1);
9296 *cost
+= extra_cost
->alu
.bfx
;
9297 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9301 *cost
= COSTS_N_INSNS (2);
9302 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9305 if (CONST_INT_P (XEXP (x
, 1)))
9306 *cost
+= 2 * extra_cost
->alu
.shift
;
9308 *cost
+= (extra_cost
->alu
.shift
9309 + extra_cost
->alu
.shift_reg
);
9312 /* Slightly disparage register shifts. */
9313 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9318 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9319 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9322 if (CONST_INT_P (XEXP (x
, 1)))
9323 *cost
+= (2 * extra_cost
->alu
.shift
9324 + extra_cost
->alu
.log_shift
);
9326 *cost
+= (extra_cost
->alu
.shift
9327 + extra_cost
->alu
.shift_reg
9328 + extra_cost
->alu
.log_shift_reg
);
9334 *cost
= LIBCALL_COST (2);
9338 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9339 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9341 *cost
= COSTS_N_INSNS (1);
9342 if (GET_CODE (XEXP (x
, 0)) == MULT
9343 || GET_CODE (XEXP (x
, 1)) == MULT
)
9345 rtx mul_op0
, mul_op1
, sub_op
;
9348 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9350 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9352 mul_op0
= XEXP (XEXP (x
, 0), 0);
9353 mul_op1
= XEXP (XEXP (x
, 0), 1);
9354 sub_op
= XEXP (x
, 1);
9358 mul_op0
= XEXP (XEXP (x
, 1), 0);
9359 mul_op1
= XEXP (XEXP (x
, 1), 1);
9360 sub_op
= XEXP (x
, 0);
9363 /* The first operand of the multiply may be optionally
9365 if (GET_CODE (mul_op0
) == NEG
)
9366 mul_op0
= XEXP (mul_op0
, 0);
9368 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9369 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9370 + rtx_cost (sub_op
, code
, 0, speed_p
));
9376 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9382 rtx shift_by_reg
= NULL
;
9386 *cost
= COSTS_N_INSNS (1);
9388 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9389 if (shift_op
== NULL
)
9391 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9392 non_shift_op
= XEXP (x
, 0);
9395 non_shift_op
= XEXP (x
, 1);
9397 if (shift_op
!= NULL
)
9399 if (shift_by_reg
!= NULL
)
9402 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9403 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9406 *cost
+= extra_cost
->alu
.arith_shift
;
9408 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9409 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9414 && GET_CODE (XEXP (x
, 1)) == MULT
)
9418 *cost
+= extra_cost
->mult
[0].add
;
9419 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9420 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9421 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9425 if (CONST_INT_P (XEXP (x
, 0)))
9427 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9428 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9430 *cost
= COSTS_N_INSNS (insns
);
9432 *cost
+= insns
* extra_cost
->alu
.arith
;
9433 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9440 if (GET_MODE_CLASS (mode
) == MODE_INT
9441 && GET_MODE_SIZE (mode
) < 4)
9443 rtx shift_op
, shift_reg
;
9446 /* We check both sides of the MINUS for shifter operands since,
9447 unlike PLUS, it's not commutative. */
9449 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9450 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9452 /* Slightly disparage, as we might need to widen the result. */
9453 *cost
= 1 + COSTS_N_INSNS (1);
9455 *cost
+= extra_cost
->alu
.arith
;
9457 if (CONST_INT_P (XEXP (x
, 0)))
9459 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9468 *cost
= COSTS_N_INSNS (2);
9470 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9472 rtx op1
= XEXP (x
, 1);
9475 *cost
+= 2 * extra_cost
->alu
.arith
;
9477 if (GET_CODE (op1
) == ZERO_EXTEND
)
9478 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9480 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9481 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9485 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9488 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9489 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9491 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9494 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9495 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9498 *cost
+= (extra_cost
->alu
.arith
9499 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9500 ? extra_cost
->alu
.arith
9501 : extra_cost
->alu
.arith_shift
));
9502 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9503 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9504 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9509 *cost
+= 2 * extra_cost
->alu
.arith
;
9515 *cost
= LIBCALL_COST (2);
9519 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9520 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9522 *cost
= COSTS_N_INSNS (1);
9523 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9525 rtx mul_op0
, mul_op1
, add_op
;
9528 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9530 mul_op0
= XEXP (XEXP (x
, 0), 0);
9531 mul_op1
= XEXP (XEXP (x
, 0), 1);
9532 add_op
= XEXP (x
, 1);
9534 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9535 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9536 + rtx_cost (add_op
, code
, 0, speed_p
));
9542 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9545 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9547 *cost
= LIBCALL_COST (2);
9551 /* Narrow modes can be synthesized in SImode, but the range
9552 of useful sub-operations is limited. Check for shift operations
9553 on one of the operands. Only left shifts can be used in the
9555 if (GET_MODE_CLASS (mode
) == MODE_INT
9556 && GET_MODE_SIZE (mode
) < 4)
9558 rtx shift_op
, shift_reg
;
9561 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9563 if (CONST_INT_P (XEXP (x
, 1)))
9565 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9566 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9568 *cost
= COSTS_N_INSNS (insns
);
9570 *cost
+= insns
* extra_cost
->alu
.arith
;
9571 /* Slightly penalize a narrow operation as the result may
9573 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9577 /* Slightly penalize a narrow operation as the result may
9579 *cost
= 1 + COSTS_N_INSNS (1);
9581 *cost
+= extra_cost
->alu
.arith
;
9588 rtx shift_op
, shift_reg
;
9590 *cost
= COSTS_N_INSNS (1);
9592 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9593 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9595 /* UXTA[BH] or SXTA[BH]. */
9597 *cost
+= extra_cost
->alu
.extend_arith
;
9598 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9600 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9605 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9606 if (shift_op
!= NULL
)
9611 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9612 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9615 *cost
+= extra_cost
->alu
.arith_shift
;
9617 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9618 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9621 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9623 rtx mul_op
= XEXP (x
, 0);
9625 *cost
= COSTS_N_INSNS (1);
9627 if (TARGET_DSP_MULTIPLY
9628 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9629 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9630 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9631 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9632 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9633 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9634 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9635 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9636 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9637 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9638 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9639 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9644 *cost
+= extra_cost
->mult
[0].extend_add
;
9645 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9646 SIGN_EXTEND
, 0, speed_p
)
9647 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9648 SIGN_EXTEND
, 0, speed_p
)
9649 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9654 *cost
+= extra_cost
->mult
[0].add
;
9655 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9656 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9657 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9660 if (CONST_INT_P (XEXP (x
, 1)))
9662 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9663 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9665 *cost
= COSTS_N_INSNS (insns
);
9667 *cost
+= insns
* extra_cost
->alu
.arith
;
9668 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9677 && GET_CODE (XEXP (x
, 0)) == MULT
9678 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9679 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9680 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9681 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9683 *cost
= COSTS_N_INSNS (1);
9685 *cost
+= extra_cost
->mult
[1].extend_add
;
9686 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9687 ZERO_EXTEND
, 0, speed_p
)
9688 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9689 ZERO_EXTEND
, 0, speed_p
)
9690 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9694 *cost
= COSTS_N_INSNS (2);
9696 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9697 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9700 *cost
+= (extra_cost
->alu
.arith
9701 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9702 ? extra_cost
->alu
.arith
9703 : extra_cost
->alu
.arith_shift
));
9705 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9707 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9712 *cost
+= 2 * extra_cost
->alu
.arith
;
9717 *cost
= LIBCALL_COST (2);
9720 case AND
: case XOR
: case IOR
:
9723 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9724 rtx op0
= XEXP (x
, 0);
9725 rtx shift_op
, shift_reg
;
9727 *cost
= COSTS_N_INSNS (1);
9731 || (code
== IOR
&& TARGET_THUMB2
)))
9732 op0
= XEXP (op0
, 0);
9735 shift_op
= shifter_op_p (op0
, &shift_reg
);
9736 if (shift_op
!= NULL
)
9741 *cost
+= extra_cost
->alu
.log_shift_reg
;
9742 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9745 *cost
+= extra_cost
->alu
.log_shift
;
9747 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9748 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9752 if (CONST_INT_P (XEXP (x
, 1)))
9754 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9755 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9758 *cost
= COSTS_N_INSNS (insns
);
9760 *cost
+= insns
* extra_cost
->alu
.logical
;
9761 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9766 *cost
+= extra_cost
->alu
.logical
;
9767 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9768 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9774 rtx op0
= XEXP (x
, 0);
9775 enum rtx_code subcode
= GET_CODE (op0
);
9777 *cost
= COSTS_N_INSNS (2);
9781 || (code
== IOR
&& TARGET_THUMB2
)))
9782 op0
= XEXP (op0
, 0);
9784 if (GET_CODE (op0
) == ZERO_EXTEND
)
9787 *cost
+= 2 * extra_cost
->alu
.logical
;
9789 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9790 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9793 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9796 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9798 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9799 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9804 *cost
+= 2 * extra_cost
->alu
.logical
;
9810 *cost
= LIBCALL_COST (2);
9814 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9815 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9817 rtx op0
= XEXP (x
, 0);
9819 *cost
= COSTS_N_INSNS (1);
9821 if (GET_CODE (op0
) == NEG
)
9822 op0
= XEXP (op0
, 0);
9825 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9827 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9828 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9831 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9833 *cost
= LIBCALL_COST (2);
9839 *cost
= COSTS_N_INSNS (1);
9840 if (TARGET_DSP_MULTIPLY
9841 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9842 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9843 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9844 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9845 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9846 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9847 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9848 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9849 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9850 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9851 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9852 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9857 *cost
+= extra_cost
->mult
[0].extend
;
9858 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9859 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9863 *cost
+= extra_cost
->mult
[0].simple
;
9870 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9871 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9872 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9873 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9875 *cost
= COSTS_N_INSNS (1);
9877 *cost
+= extra_cost
->mult
[1].extend
;
9878 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9879 ZERO_EXTEND
, 0, speed_p
)
9880 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9881 ZERO_EXTEND
, 0, speed_p
));
9885 *cost
= LIBCALL_COST (2);
9890 *cost
= LIBCALL_COST (2);
9894 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9895 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9897 *cost
= COSTS_N_INSNS (1);
9899 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9903 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9905 *cost
= LIBCALL_COST (1);
9911 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9913 *cost
= COSTS_N_INSNS (2);
9914 /* Assume the non-flag-changing variant. */
9916 *cost
+= (extra_cost
->alu
.log_shift
9917 + extra_cost
->alu
.arith_shift
);
9918 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9922 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9923 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9925 *cost
= COSTS_N_INSNS (2);
9926 /* No extra cost for MOV imm and MVN imm. */
9927 /* If the comparison op is using the flags, there's no further
9928 cost, otherwise we need to add the cost of the comparison. */
9929 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9930 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9931 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9933 *cost
+= (COSTS_N_INSNS (1)
9934 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9936 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9939 *cost
+= extra_cost
->alu
.arith
;
9943 *cost
= COSTS_N_INSNS (1);
9945 *cost
+= extra_cost
->alu
.arith
;
9949 if (GET_MODE_CLASS (mode
) == MODE_INT
9950 && GET_MODE_SIZE (mode
) < 4)
9952 /* Slightly disparage, as we might need an extend operation. */
9953 *cost
= 1 + COSTS_N_INSNS (1);
9955 *cost
+= extra_cost
->alu
.arith
;
9961 *cost
= COSTS_N_INSNS (2);
9963 *cost
+= 2 * extra_cost
->alu
.arith
;
9968 *cost
= LIBCALL_COST (1);
9975 rtx shift_reg
= NULL
;
9977 *cost
= COSTS_N_INSNS (1);
9978 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9982 if (shift_reg
!= NULL
)
9985 *cost
+= extra_cost
->alu
.log_shift_reg
;
9986 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9989 *cost
+= extra_cost
->alu
.log_shift
;
9990 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9995 *cost
+= extra_cost
->alu
.logical
;
10000 *cost
= COSTS_N_INSNS (2);
10006 *cost
+= LIBCALL_COST (1);
10011 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10013 *cost
= COSTS_N_INSNS (4);
10016 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10017 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10019 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10020 /* Assume that if one arm of the if_then_else is a register,
10021 that it will be tied with the result and eliminate the
10022 conditional insn. */
10023 if (REG_P (XEXP (x
, 1)))
10025 else if (REG_P (XEXP (x
, 2)))
10031 if (extra_cost
->alu
.non_exec_costs_exec
)
10032 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10034 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10037 *cost
+= op1cost
+ op2cost
;
10043 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10047 enum machine_mode op0mode
;
10048 /* We'll mostly assume that the cost of a compare is the cost of the
10049 LHS. However, there are some notable exceptions. */
10051 /* Floating point compares are never done as side-effects. */
10052 op0mode
= GET_MODE (XEXP (x
, 0));
10053 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10054 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10056 *cost
= COSTS_N_INSNS (1);
10058 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10060 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10062 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10068 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10070 *cost
= LIBCALL_COST (2);
10074 /* DImode compares normally take two insns. */
10075 if (op0mode
== DImode
)
10077 *cost
= COSTS_N_INSNS (2);
10079 *cost
+= 2 * extra_cost
->alu
.arith
;
10083 if (op0mode
== SImode
)
10088 if (XEXP (x
, 1) == const0_rtx
10089 && !(REG_P (XEXP (x
, 0))
10090 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10091 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10093 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10095 /* Multiply operations that set the flags are often
10096 significantly more expensive. */
10098 && GET_CODE (XEXP (x
, 0)) == MULT
10099 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10100 *cost
+= extra_cost
->mult
[0].flag_setting
;
10103 && GET_CODE (XEXP (x
, 0)) == PLUS
10104 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10105 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10107 *cost
+= extra_cost
->mult
[0].flag_setting
;
10112 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10113 if (shift_op
!= NULL
)
10115 *cost
= COSTS_N_INSNS (1);
10116 if (shift_reg
!= NULL
)
10118 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10120 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10123 *cost
+= extra_cost
->alu
.arith_shift
;
10124 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10125 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10129 *cost
= COSTS_N_INSNS (1);
10131 *cost
+= extra_cost
->alu
.arith
;
10132 if (CONST_INT_P (XEXP (x
, 1))
10133 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10135 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10143 *cost
= LIBCALL_COST (2);
10166 if (outer_code
== SET
)
10168 /* Is it a store-flag operation? */
10169 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10170 && XEXP (x
, 1) == const0_rtx
)
10172 /* Thumb also needs an IT insn. */
10173 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10176 if (XEXP (x
, 1) == const0_rtx
)
10181 /* LSR Rd, Rn, #31. */
10182 *cost
= COSTS_N_INSNS (1);
10184 *cost
+= extra_cost
->alu
.shift
;
10194 *cost
= COSTS_N_INSNS (2);
10198 /* RSBS T1, Rn, Rn, LSR #31
10200 *cost
= COSTS_N_INSNS (2);
10202 *cost
+= extra_cost
->alu
.arith_shift
;
10206 /* RSB Rd, Rn, Rn, ASR #1
10207 LSR Rd, Rd, #31. */
10208 *cost
= COSTS_N_INSNS (2);
10210 *cost
+= (extra_cost
->alu
.arith_shift
10211 + extra_cost
->alu
.shift
);
10217 *cost
= COSTS_N_INSNS (2);
10219 *cost
+= extra_cost
->alu
.shift
;
10223 /* Remaining cases are either meaningless or would take
10224 three insns anyway. */
10225 *cost
= COSTS_N_INSNS (3);
10228 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10233 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10234 if (CONST_INT_P (XEXP (x
, 1))
10235 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10237 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10244 /* Not directly inside a set. If it involves the condition code
10245 register it must be the condition for a branch, cond_exec or
10246 I_T_E operation. Since the comparison is performed elsewhere
10247 this is just the control part which has no additional
10249 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10250 && XEXP (x
, 1) == const0_rtx
)
10258 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10259 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10261 *cost
= COSTS_N_INSNS (1);
10263 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10267 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10269 *cost
= LIBCALL_COST (1);
10273 if (mode
== SImode
)
10275 *cost
= COSTS_N_INSNS (1);
10277 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10281 *cost
= LIBCALL_COST (1);
10285 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10286 && MEM_P (XEXP (x
, 0)))
10288 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10290 if (mode
== DImode
)
10291 *cost
+= COSTS_N_INSNS (1);
10296 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10297 *cost
+= extra_cost
->ldst
.load
;
10299 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10301 if (mode
== DImode
)
10302 *cost
+= extra_cost
->alu
.shift
;
10307 /* Widening from less than 32-bits requires an extend operation. */
10308 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10310 /* We have SXTB/SXTH. */
10311 *cost
= COSTS_N_INSNS (1);
10312 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10314 *cost
+= extra_cost
->alu
.extend
;
10316 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10318 /* Needs two shifts. */
10319 *cost
= COSTS_N_INSNS (2);
10320 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10322 *cost
+= 2 * extra_cost
->alu
.shift
;
10325 /* Widening beyond 32-bits requires one more insn. */
10326 if (mode
== DImode
)
10328 *cost
+= COSTS_N_INSNS (1);
10330 *cost
+= extra_cost
->alu
.shift
;
10337 || GET_MODE (XEXP (x
, 0)) == SImode
10338 || GET_MODE (XEXP (x
, 0)) == QImode
)
10339 && MEM_P (XEXP (x
, 0)))
10341 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10343 if (mode
== DImode
)
10344 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10349 /* Widening from less than 32-bits requires an extend operation. */
10350 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10352 /* UXTB can be a shorter instruction in Thumb2, but it might
10353 be slower than the AND Rd, Rn, #255 alternative. When
10354 optimizing for speed it should never be slower to use
10355 AND, and we don't really model 16-bit vs 32-bit insns
10357 *cost
= COSTS_N_INSNS (1);
10359 *cost
+= extra_cost
->alu
.logical
;
10361 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10363 /* We have UXTB/UXTH. */
10364 *cost
= COSTS_N_INSNS (1);
10365 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10367 *cost
+= extra_cost
->alu
.extend
;
10369 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10371 /* Needs two shifts. It's marginally preferable to use
10372 shifts rather than two BIC instructions as the second
10373 shift may merge with a subsequent insn as a shifter
10375 *cost
= COSTS_N_INSNS (2);
10376 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10378 *cost
+= 2 * extra_cost
->alu
.shift
;
10380 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10381 *cost
= COSTS_N_INSNS (1);
10383 /* Widening beyond 32-bits requires one more insn. */
10384 if (mode
== DImode
)
10386 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10393 /* CONST_INT has no mode, so we cannot tell for sure how many
10394 insns are really going to be needed. The best we can do is
10395 look at the value passed. If it fits in SImode, then assume
10396 that's the mode it will be used for. Otherwise assume it
10397 will be used in DImode. */
10398 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10403 /* Avoid blowing up in arm_gen_constant (). */
10404 if (!(outer_code
== PLUS
10405 || outer_code
== AND
10406 || outer_code
== IOR
10407 || outer_code
== XOR
10408 || outer_code
== MINUS
))
10412 if (mode
== SImode
)
10414 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10415 INTVAL (x
), NULL
, NULL
,
10421 *cost
+= COSTS_N_INSNS (arm_gen_constant
10422 (outer_code
, SImode
, NULL
,
10423 trunc_int_for_mode (INTVAL (x
), SImode
),
10425 + arm_gen_constant (outer_code
, SImode
, NULL
,
10426 INTVAL (x
) >> 32, NULL
,
10438 if (arm_arch_thumb2
&& !flag_pic
)
10439 *cost
= COSTS_N_INSNS (2);
10441 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10444 *cost
= COSTS_N_INSNS (2);
10448 *cost
+= COSTS_N_INSNS (1);
10450 *cost
+= extra_cost
->alu
.arith
;
10456 *cost
= COSTS_N_INSNS (4);
10461 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10462 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10464 if (vfp3_const_double_rtx (x
))
10466 *cost
= COSTS_N_INSNS (1);
10468 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10474 *cost
= COSTS_N_INSNS (1);
10475 if (mode
== DFmode
)
10476 *cost
+= extra_cost
->ldst
.loadd
;
10478 *cost
+= extra_cost
->ldst
.loadf
;
10481 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10485 *cost
= COSTS_N_INSNS (4);
10491 && TARGET_HARD_FLOAT
10492 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10493 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10494 *cost
= COSTS_N_INSNS (1);
10496 *cost
= COSTS_N_INSNS (4);
10501 *cost
= COSTS_N_INSNS (1);
10502 /* When optimizing for size, we prefer constant pool entries to
10503 MOVW/MOVT pairs, so bump the cost of these slightly. */
10509 *cost
= COSTS_N_INSNS (1);
10511 *cost
+= extra_cost
->alu
.clz
;
10515 if (XEXP (x
, 1) == const0_rtx
)
10517 *cost
= COSTS_N_INSNS (1);
10519 *cost
+= extra_cost
->alu
.log_shift
;
10520 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10523 /* Fall through. */
10527 *cost
= COSTS_N_INSNS (2);
10531 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10532 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10533 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10534 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10535 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10536 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10537 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10538 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10541 *cost
= COSTS_N_INSNS (1);
10543 *cost
+= extra_cost
->mult
[1].extend
;
10544 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10546 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10550 *cost
= LIBCALL_COST (1);
10554 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10557 /* Reading the PC is like reading any other register. Writing it
10558 is more expensive, but we take that into account elsewhere. */
10563 /* TODO: Simple zero_extract of bottom bits using AND. */
10564 /* Fall through. */
10568 && CONST_INT_P (XEXP (x
, 1))
10569 && CONST_INT_P (XEXP (x
, 2)))
10571 *cost
= COSTS_N_INSNS (1);
10573 *cost
+= extra_cost
->alu
.bfx
;
10574 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10577 /* Without UBFX/SBFX, need to resort to shift operations. */
10578 *cost
= COSTS_N_INSNS (2);
10580 *cost
+= 2 * extra_cost
->alu
.shift
;
10581 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10585 if (TARGET_HARD_FLOAT
)
10587 *cost
= COSTS_N_INSNS (1);
10589 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10590 if (!TARGET_FPU_ARMV8
10591 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10593 /* Pre v8, widening HF->DF is a two-step process, first
10594 widening to SFmode. */
10595 *cost
+= COSTS_N_INSNS (1);
10597 *cost
+= extra_cost
->fp
[0].widen
;
10599 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10603 *cost
= LIBCALL_COST (1);
10606 case FLOAT_TRUNCATE
:
10607 if (TARGET_HARD_FLOAT
)
10609 *cost
= COSTS_N_INSNS (1);
10611 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10612 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10614 /* Vector modes? */
10616 *cost
= LIBCALL_COST (1);
10621 if (TARGET_HARD_FLOAT
)
10623 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10625 *cost
= COSTS_N_INSNS (1);
10627 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10628 /* Strip of the 'cost' of rounding towards zero. */
10629 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10630 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10632 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10633 /* ??? Increase the cost to deal with transferring from
10634 FP -> CORE registers? */
10637 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10638 && TARGET_FPU_ARMV8
)
10640 *cost
= COSTS_N_INSNS (1);
10642 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10645 /* Vector costs? */
10647 *cost
= LIBCALL_COST (1);
10651 case UNSIGNED_FLOAT
:
10652 if (TARGET_HARD_FLOAT
)
10654 /* ??? Increase the cost to deal with transferring from CORE
10655 -> FP registers? */
10656 *cost
= COSTS_N_INSNS (1);
10658 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10661 *cost
= LIBCALL_COST (1);
10665 *cost
= COSTS_N_INSNS (1);
10669 /* Just a guess. Cost one insn per input. */
10670 *cost
= COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x
));
10674 if (mode
!= VOIDmode
)
10675 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10677 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10682 #undef HANDLE_NARROW_SHIFT_ARITH
10684 /* RTX costs when optimizing for size. */
10686 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10687 int *total
, bool speed
)
10691 if (TARGET_OLD_RTX_COSTS
10692 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10694 /* Old way. (Deprecated.) */
10696 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10697 (enum rtx_code
) outer_code
, total
);
10699 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10700 (enum rtx_code
) outer_code
, total
,
10706 if (current_tune
->insn_extra_cost
)
10707 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10708 (enum rtx_code
) outer_code
,
10709 current_tune
->insn_extra_cost
,
10711 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10712 && current_tune->insn_extra_cost != NULL */
10714 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10715 (enum rtx_code
) outer_code
,
10716 &generic_extra_costs
, total
, speed
);
10719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10721 print_rtl_single (dump_file
, x
);
10722 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10723 *total
, result
? "final" : "partial");
10728 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10729 supported on any "slowmul" cores, so it can be ignored. */
10732 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10733 int *total
, bool speed
)
10735 enum machine_mode mode
= GET_MODE (x
);
10739 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10746 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10749 *total
= COSTS_N_INSNS (20);
10753 if (CONST_INT_P (XEXP (x
, 1)))
10755 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10756 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10757 int cost
, const_ok
= const_ok_for_arm (i
);
10758 int j
, booth_unit_size
;
10760 /* Tune as appropriate. */
10761 cost
= const_ok
? 4 : 8;
10762 booth_unit_size
= 2;
10763 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10765 i
>>= booth_unit_size
;
10769 *total
= COSTS_N_INSNS (cost
);
10770 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10774 *total
= COSTS_N_INSNS (20);
10778 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10783 /* RTX cost for cores with a fast multiply unit (M variants). */
10786 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10787 int *total
, bool speed
)
10789 enum machine_mode mode
= GET_MODE (x
);
10793 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10797 /* ??? should thumb2 use different costs? */
10801 /* There is no point basing this on the tuning, since it is always the
10802 fast variant if it exists at all. */
10804 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10805 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10806 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10808 *total
= COSTS_N_INSNS(2);
10813 if (mode
== DImode
)
10815 *total
= COSTS_N_INSNS (5);
10819 if (CONST_INT_P (XEXP (x
, 1)))
10821 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10822 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10823 int cost
, const_ok
= const_ok_for_arm (i
);
10824 int j
, booth_unit_size
;
10826 /* Tune as appropriate. */
10827 cost
= const_ok
? 4 : 8;
10828 booth_unit_size
= 8;
10829 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10831 i
>>= booth_unit_size
;
10835 *total
= COSTS_N_INSNS(cost
);
10839 if (mode
== SImode
)
10841 *total
= COSTS_N_INSNS (4);
10845 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10847 if (TARGET_HARD_FLOAT
10849 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10851 *total
= COSTS_N_INSNS (1);
10856 /* Requires a lib call */
10857 *total
= COSTS_N_INSNS (20);
10861 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10866 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10867 so it can be ignored. */
10870 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10871 int *total
, bool speed
)
10873 enum machine_mode mode
= GET_MODE (x
);
10877 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10884 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10885 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10887 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10888 will stall until the multiplication is complete. */
10889 *total
= COSTS_N_INSNS (3);
10893 /* There is no point basing this on the tuning, since it is always the
10894 fast variant if it exists at all. */
10896 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10897 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10898 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10900 *total
= COSTS_N_INSNS (2);
10905 if (mode
== DImode
)
10907 *total
= COSTS_N_INSNS (5);
10911 if (CONST_INT_P (XEXP (x
, 1)))
10913 /* If operand 1 is a constant we can more accurately
10914 calculate the cost of the multiply. The multiplier can
10915 retire 15 bits on the first cycle and a further 12 on the
10916 second. We do, of course, have to load the constant into
10917 a register first. */
10918 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10919 /* There's a general overhead of one cycle. */
10921 unsigned HOST_WIDE_INT masked_const
;
10923 if (i
& 0x80000000)
10926 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10928 masked_const
= i
& 0xffff8000;
10929 if (masked_const
!= 0)
10932 masked_const
= i
& 0xf8000000;
10933 if (masked_const
!= 0)
10936 *total
= COSTS_N_INSNS (cost
);
10940 if (mode
== SImode
)
10942 *total
= COSTS_N_INSNS (3);
10946 /* Requires a lib call */
10947 *total
= COSTS_N_INSNS (20);
10951 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10956 /* RTX costs for 9e (and later) cores. */
10959 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10960 int *total
, bool speed
)
10962 enum machine_mode mode
= GET_MODE (x
);
10969 *total
= COSTS_N_INSNS (3);
10973 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10981 /* There is no point basing this on the tuning, since it is always the
10982 fast variant if it exists at all. */
10984 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10985 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10986 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10988 *total
= COSTS_N_INSNS (2);
10993 if (mode
== DImode
)
10995 *total
= COSTS_N_INSNS (5);
10999 if (mode
== SImode
)
11001 *total
= COSTS_N_INSNS (2);
11005 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11007 if (TARGET_HARD_FLOAT
11009 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11011 *total
= COSTS_N_INSNS (1);
11016 *total
= COSTS_N_INSNS (20);
11020 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11023 /* All address computations that can be done are free, but rtx cost returns
11024 the same for practically all of them. So we weight the different types
11025 of address here in the order (most pref first):
11026 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11028 arm_arm_address_cost (rtx x
)
11030 enum rtx_code c
= GET_CODE (x
);
11032 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11034 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11039 if (CONST_INT_P (XEXP (x
, 1)))
11042 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11052 arm_thumb_address_cost (rtx x
)
11054 enum rtx_code c
= GET_CODE (x
);
11059 && REG_P (XEXP (x
, 0))
11060 && CONST_INT_P (XEXP (x
, 1)))
11067 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11068 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11070 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11073 /* Adjust cost hook for XScale. */
11075 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11077 /* Some true dependencies can have a higher cost depending
11078 on precisely how certain input operands are used. */
11079 if (REG_NOTE_KIND(link
) == 0
11080 && recog_memoized (insn
) >= 0
11081 && recog_memoized (dep
) >= 0)
11083 int shift_opnum
= get_attr_shift (insn
);
11084 enum attr_type attr_type
= get_attr_type (dep
);
11086 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11087 operand for INSN. If we have a shifted input operand and the
11088 instruction we depend on is another ALU instruction, then we may
11089 have to account for an additional stall. */
11090 if (shift_opnum
!= 0
11091 && (attr_type
== TYPE_ALU_SHIFT_IMM
11092 || attr_type
== TYPE_ALUS_SHIFT_IMM
11093 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11094 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11095 || attr_type
== TYPE_ALU_SHIFT_REG
11096 || attr_type
== TYPE_ALUS_SHIFT_REG
11097 || attr_type
== TYPE_LOGIC_SHIFT_REG
11098 || attr_type
== TYPE_LOGICS_SHIFT_REG
11099 || attr_type
== TYPE_MOV_SHIFT
11100 || attr_type
== TYPE_MVN_SHIFT
11101 || attr_type
== TYPE_MOV_SHIFT_REG
11102 || attr_type
== TYPE_MVN_SHIFT_REG
))
11104 rtx shifted_operand
;
11107 /* Get the shifted operand. */
11108 extract_insn (insn
);
11109 shifted_operand
= recog_data
.operand
[shift_opnum
];
11111 /* Iterate over all the operands in DEP. If we write an operand
11112 that overlaps with SHIFTED_OPERAND, then we have increase the
11113 cost of this dependency. */
11114 extract_insn (dep
);
11115 preprocess_constraints ();
11116 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11118 /* We can ignore strict inputs. */
11119 if (recog_data
.operand_type
[opno
] == OP_IN
)
11122 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11134 /* Adjust cost hook for Cortex A9. */
11136 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11138 switch (REG_NOTE_KIND (link
))
11145 case REG_DEP_OUTPUT
:
11146 if (recog_memoized (insn
) >= 0
11147 && recog_memoized (dep
) >= 0)
11149 if (GET_CODE (PATTERN (insn
)) == SET
)
11152 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11154 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11156 enum attr_type attr_type_insn
= get_attr_type (insn
);
11157 enum attr_type attr_type_dep
= get_attr_type (dep
);
11159 /* By default all dependencies of the form
11162 have an extra latency of 1 cycle because
11163 of the input and output dependency in this
11164 case. However this gets modeled as an true
11165 dependency and hence all these checks. */
11166 if (REG_P (SET_DEST (PATTERN (insn
)))
11167 && REG_P (SET_DEST (PATTERN (dep
)))
11168 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11169 SET_DEST (PATTERN (dep
))))
11171 /* FMACS is a special case where the dependent
11172 instruction can be issued 3 cycles before
11173 the normal latency in case of an output
11175 if ((attr_type_insn
== TYPE_FMACS
11176 || attr_type_insn
== TYPE_FMACD
)
11177 && (attr_type_dep
== TYPE_FMACS
11178 || attr_type_dep
== TYPE_FMACD
))
11180 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11181 *cost
= insn_default_latency (dep
) - 3;
11183 *cost
= insn_default_latency (dep
);
11188 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11189 *cost
= insn_default_latency (dep
) + 1;
11191 *cost
= insn_default_latency (dep
);
11201 gcc_unreachable ();
11207 /* Adjust cost hook for FA726TE. */
11209 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11211 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11212 have penalty of 3. */
11213 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11214 && recog_memoized (insn
) >= 0
11215 && recog_memoized (dep
) >= 0
11216 && get_attr_conds (dep
) == CONDS_SET
)
11218 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11219 if (get_attr_conds (insn
) == CONDS_USE
11220 && get_attr_type (insn
) != TYPE_BRANCH
)
11226 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11227 || get_attr_conds (insn
) == CONDS_USE
)
11237 /* Implement TARGET_REGISTER_MOVE_COST.
11239 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11240 it is typically more expensive than a single memory access. We set
11241 the cost to less than two memory accesses so that floating
11242 point to integer conversion does not go through memory. */
11245 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11246 reg_class_t from
, reg_class_t to
)
11250 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11251 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11253 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11254 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11256 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11263 if (from
== HI_REGS
|| to
== HI_REGS
)
11270 /* Implement TARGET_MEMORY_MOVE_COST. */
11273 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11274 bool in ATTRIBUTE_UNUSED
)
11280 if (GET_MODE_SIZE (mode
) < 4)
11283 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11287 /* Vectorizer cost model implementation. */
11289 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11291 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11293 int misalign ATTRIBUTE_UNUSED
)
11297 switch (type_of_cost
)
11300 return current_tune
->vec_costs
->scalar_stmt_cost
;
11303 return current_tune
->vec_costs
->scalar_load_cost
;
11306 return current_tune
->vec_costs
->scalar_store_cost
;
11309 return current_tune
->vec_costs
->vec_stmt_cost
;
11312 return current_tune
->vec_costs
->vec_align_load_cost
;
11315 return current_tune
->vec_costs
->vec_store_cost
;
11317 case vec_to_scalar
:
11318 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11320 case scalar_to_vec
:
11321 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11323 case unaligned_load
:
11324 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11326 case unaligned_store
:
11327 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11329 case cond_branch_taken
:
11330 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11332 case cond_branch_not_taken
:
11333 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11336 case vec_promote_demote
:
11337 return current_tune
->vec_costs
->vec_stmt_cost
;
11339 case vec_construct
:
11340 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11341 return elements
/ 2 + 1;
11344 gcc_unreachable ();
11348 /* Implement targetm.vectorize.add_stmt_cost. */
11351 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11352 struct _stmt_vec_info
*stmt_info
, int misalign
,
11353 enum vect_cost_model_location where
)
11355 unsigned *cost
= (unsigned *) data
;
11356 unsigned retval
= 0;
11358 if (flag_vect_cost_model
)
11360 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11361 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11363 /* Statements in an inner loop relative to the loop being
11364 vectorized are weighted more heavily. The value here is
11365 arbitrary and could potentially be improved with analysis. */
11366 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11367 count
*= 50; /* FIXME. */
11369 retval
= (unsigned) (count
* stmt_cost
);
11370 cost
[where
] += retval
;
11376 /* Return true if and only if this insn can dual-issue only as older. */
11378 cortexa7_older_only (rtx insn
)
11380 if (recog_memoized (insn
) < 0)
11383 switch (get_attr_type (insn
))
11386 case TYPE_ALUS_REG
:
11387 case TYPE_LOGIC_REG
:
11388 case TYPE_LOGICS_REG
:
11390 case TYPE_ADCS_REG
:
11395 case TYPE_SHIFT_IMM
:
11396 case TYPE_SHIFT_REG
:
11397 case TYPE_LOAD_BYTE
:
11400 case TYPE_FFARITHS
:
11402 case TYPE_FFARITHD
:
11420 case TYPE_F_STORES
:
11427 /* Return true if and only if this insn can dual-issue as younger. */
11429 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11431 if (recog_memoized (insn
) < 0)
11434 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11438 switch (get_attr_type (insn
))
11441 case TYPE_ALUS_IMM
:
11442 case TYPE_LOGIC_IMM
:
11443 case TYPE_LOGICS_IMM
:
11448 case TYPE_MOV_SHIFT
:
11449 case TYPE_MOV_SHIFT_REG
:
11459 /* Look for an instruction that can dual issue only as an older
11460 instruction, and move it in front of any instructions that can
11461 dual-issue as younger, while preserving the relative order of all
11462 other instructions in the ready list. This is a hueuristic to help
11463 dual-issue in later cycles, by postponing issue of more flexible
11464 instructions. This heuristic may affect dual issue opportunities
11465 in the current cycle. */
11467 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11471 int first_older_only
= -1, first_younger
= -1;
11475 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11479 /* Traverse the ready list from the head (the instruction to issue
11480 first), and looking for the first instruction that can issue as
11481 younger and the first instruction that can dual-issue only as
11483 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11485 rtx insn
= ready
[i
];
11486 if (cortexa7_older_only (insn
))
11488 first_older_only
= i
;
11490 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11493 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11497 /* Nothing to reorder because either no younger insn found or insn
11498 that can dual-issue only as older appears before any insn that
11499 can dual-issue as younger. */
11500 if (first_younger
== -1)
11503 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11507 /* Nothing to reorder because no older-only insn in the ready list. */
11508 if (first_older_only
== -1)
11511 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11515 /* Move first_older_only insn before first_younger. */
11517 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11518 INSN_UID(ready
[first_older_only
]),
11519 INSN_UID(ready
[first_younger
]));
11520 rtx first_older_only_insn
= ready
[first_older_only
];
11521 for (i
= first_older_only
; i
< first_younger
; i
++)
11523 ready
[i
] = ready
[i
+1];
11526 ready
[i
] = first_older_only_insn
;
11530 /* Implement TARGET_SCHED_REORDER. */
11532 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11538 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11541 /* Do nothing for other cores. */
11545 return arm_issue_rate ();
11548 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11549 It corrects the value of COST based on the relationship between
11550 INSN and DEP through the dependence LINK. It returns the new
11551 value. There is a per-core adjust_cost hook to adjust scheduler costs
11552 and the per-core hook can choose to completely override the generic
11553 adjust_cost function. Only put bits of code into arm_adjust_cost that
11554 are common across all cores. */
11556 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11560 /* When generating Thumb-1 code, we want to place flag-setting operations
11561 close to a conditional branch which depends on them, so that we can
11562 omit the comparison. */
11564 && REG_NOTE_KIND (link
) == 0
11565 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11566 && recog_memoized (dep
) >= 0
11567 && get_attr_conds (dep
) == CONDS_SET
)
11570 if (current_tune
->sched_adjust_cost
!= NULL
)
11572 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11576 /* XXX Is this strictly true? */
11577 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11578 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11581 /* Call insns don't incur a stall, even if they follow a load. */
11582 if (REG_NOTE_KIND (link
) == 0
11586 if ((i_pat
= single_set (insn
)) != NULL
11587 && MEM_P (SET_SRC (i_pat
))
11588 && (d_pat
= single_set (dep
)) != NULL
11589 && MEM_P (SET_DEST (d_pat
)))
11591 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11592 /* This is a load after a store, there is no conflict if the load reads
11593 from a cached area. Assume that loads from the stack, and from the
11594 constant pool are cached, and that others will miss. This is a
11597 if ((GET_CODE (src_mem
) == SYMBOL_REF
11598 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11599 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11600 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11601 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11609 arm_max_conditional_execute (void)
11611 return max_insns_skipped
;
11615 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11618 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11620 return (optimize
> 0) ? 2 : 0;
11624 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11626 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11629 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11630 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11631 sequences of non-executed instructions in IT blocks probably take the same
11632 amount of time as executed instructions (and the IT instruction itself takes
11633 space in icache). This function was experimentally determined to give good
11634 results on a popular embedded benchmark. */
11637 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11639 return (TARGET_32BIT
&& speed_p
) ? 1
11640 : arm_default_branch_cost (speed_p
, predictable_p
);
11643 static bool fp_consts_inited
= false;
11645 static REAL_VALUE_TYPE value_fp0
;
11648 init_fp_table (void)
11652 r
= REAL_VALUE_ATOF ("0", DFmode
);
11654 fp_consts_inited
= true;
11657 /* Return TRUE if rtx X is a valid immediate FP constant. */
11659 arm_const_double_rtx (rtx x
)
11663 if (!fp_consts_inited
)
11666 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11667 if (REAL_VALUE_MINUS_ZERO (r
))
11670 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11676 /* VFPv3 has a fairly wide range of representable immediates, formed from
11677 "quarter-precision" floating-point values. These can be evaluated using this
11678 formula (with ^ for exponentiation):
11682 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11683 16 <= n <= 31 and 0 <= r <= 7.
11685 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11687 - A (most-significant) is the sign bit.
11688 - BCD are the exponent (encoded as r XOR 3).
11689 - EFGH are the mantissa (encoded as n - 16).
11692 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11693 fconst[sd] instruction, or -1 if X isn't suitable. */
11695 vfp3_const_double_index (rtx x
)
11697 REAL_VALUE_TYPE r
, m
;
11698 int sign
, exponent
;
11699 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11700 unsigned HOST_WIDE_INT mask
;
11701 HOST_WIDE_INT m1
, m2
;
11702 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11704 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11707 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11709 /* We can't represent these things, so detect them first. */
11710 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11713 /* Extract sign, exponent and mantissa. */
11714 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11715 r
= real_value_abs (&r
);
11716 exponent
= REAL_EXP (&r
);
11717 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11718 highest (sign) bit, with a fixed binary point at bit point_pos.
11719 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11720 bits for the mantissa, this may fail (low bits would be lost). */
11721 real_ldexp (&m
, &r
, point_pos
- exponent
);
11722 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11726 /* If there are bits set in the low part of the mantissa, we can't
11727 represent this value. */
11731 /* Now make it so that mantissa contains the most-significant bits, and move
11732 the point_pos to indicate that the least-significant bits have been
11734 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11735 mantissa
= mant_hi
;
11737 /* We can permit four significant bits of mantissa only, plus a high bit
11738 which is always 1. */
11739 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11740 if ((mantissa
& mask
) != 0)
11743 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11744 mantissa
>>= point_pos
- 5;
11746 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11747 floating-point immediate zero with Neon using an integer-zero load, but
11748 that case is handled elsewhere.) */
11752 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11754 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11755 normalized significands are in the range [1, 2). (Our mantissa is shifted
11756 left 4 places at this point relative to normalized IEEE754 values). GCC
11757 internally uses [0.5, 1) (see real.c), so the exponent returned from
11758 REAL_EXP must be altered. */
11759 exponent
= 5 - exponent
;
11761 if (exponent
< 0 || exponent
> 7)
11764 /* Sign, mantissa and exponent are now in the correct form to plug into the
11765 formula described in the comment above. */
11766 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11769 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11771 vfp3_const_double_rtx (rtx x
)
11776 return vfp3_const_double_index (x
) != -1;
11779 /* Recognize immediates which can be used in various Neon instructions. Legal
11780 immediates are described by the following table (for VMVN variants, the
11781 bitwise inverse of the constant shown is recognized. In either case, VMOV
11782 is output and the correct instruction to use for a given constant is chosen
11783 by the assembler). The constant shown is replicated across all elements of
11784 the destination vector.
11786 insn elems variant constant (binary)
11787 ---- ----- ------- -----------------
11788 vmov i32 0 00000000 00000000 00000000 abcdefgh
11789 vmov i32 1 00000000 00000000 abcdefgh 00000000
11790 vmov i32 2 00000000 abcdefgh 00000000 00000000
11791 vmov i32 3 abcdefgh 00000000 00000000 00000000
11792 vmov i16 4 00000000 abcdefgh
11793 vmov i16 5 abcdefgh 00000000
11794 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11795 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11796 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11797 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11798 vmvn i16 10 00000000 abcdefgh
11799 vmvn i16 11 abcdefgh 00000000
11800 vmov i32 12 00000000 00000000 abcdefgh 11111111
11801 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11802 vmov i32 14 00000000 abcdefgh 11111111 11111111
11803 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11804 vmov i8 16 abcdefgh
11805 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11806 eeeeeeee ffffffff gggggggg hhhhhhhh
11807 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11808 vmov f32 19 00000000 00000000 00000000 00000000
11810 For case 18, B = !b. Representable values are exactly those accepted by
11811 vfp3_const_double_index, but are output as floating-point numbers rather
11814 For case 19, we will change it to vmov.i32 when assembling.
11816 Variants 0-5 (inclusive) may also be used as immediates for the second
11817 operand of VORR/VBIC instructions.
11819 The INVERSE argument causes the bitwise inverse of the given operand to be
11820 recognized instead (used for recognizing legal immediates for the VAND/VORN
11821 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11822 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11823 output, rather than the real insns vbic/vorr).
11825 INVERSE makes no difference to the recognition of float vectors.
11827 The return value is the variant of immediate as shown in the above table, or
11828 -1 if the given value doesn't match any of the listed patterns.
11831 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11832 rtx
*modconst
, int *elementwidth
)
11834 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11836 for (i = 0; i < idx; i += (STRIDE)) \
11841 immtype = (CLASS); \
11842 elsize = (ELSIZE); \
11846 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11847 unsigned int innersize
;
11848 unsigned char bytes
[16];
11849 int immtype
= -1, matches
;
11850 unsigned int invmask
= inverse
? 0xff : 0;
11851 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11855 n_elts
= CONST_VECTOR_NUNITS (op
);
11856 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11861 if (mode
== VOIDmode
)
11863 innersize
= GET_MODE_SIZE (mode
);
11866 /* Vectors of float constants. */
11867 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11869 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11870 REAL_VALUE_TYPE r0
;
11872 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11875 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11877 for (i
= 1; i
< n_elts
; i
++)
11879 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11880 REAL_VALUE_TYPE re
;
11882 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11884 if (!REAL_VALUES_EQUAL (r0
, re
))
11889 *modconst
= CONST_VECTOR_ELT (op
, 0);
11894 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11900 /* Splat vector constant out into a byte vector. */
11901 for (i
= 0; i
< n_elts
; i
++)
11903 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11904 unsigned HOST_WIDE_INT elpart
;
11905 unsigned int part
, parts
;
11907 if (CONST_INT_P (el
))
11909 elpart
= INTVAL (el
);
11912 else if (CONST_DOUBLE_P (el
))
11914 elpart
= CONST_DOUBLE_LOW (el
);
11918 gcc_unreachable ();
11920 for (part
= 0; part
< parts
; part
++)
11923 for (byte
= 0; byte
< innersize
; byte
++)
11925 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11926 elpart
>>= BITS_PER_UNIT
;
11928 if (CONST_DOUBLE_P (el
))
11929 elpart
= CONST_DOUBLE_HIGH (el
);
11933 /* Sanity check. */
11934 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11938 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11939 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11941 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11942 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11944 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11945 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11947 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11948 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11950 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11952 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11954 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11955 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11957 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11958 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11960 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11961 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11963 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11964 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11966 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11968 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11970 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11971 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11973 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11974 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11976 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11977 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11979 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11980 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11982 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11984 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11985 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11993 *elementwidth
= elsize
;
11997 unsigned HOST_WIDE_INT imm
= 0;
11999 /* Un-invert bytes of recognized vector, if necessary. */
12001 for (i
= 0; i
< idx
; i
++)
12002 bytes
[i
] ^= invmask
;
12006 /* FIXME: Broken on 32-bit H_W_I hosts. */
12007 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12009 for (i
= 0; i
< 8; i
++)
12010 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12011 << (i
* BITS_PER_UNIT
);
12013 *modconst
= GEN_INT (imm
);
12017 unsigned HOST_WIDE_INT imm
= 0;
12019 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12020 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12022 *modconst
= GEN_INT (imm
);
12030 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12031 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12032 float elements), and a modified constant (whatever should be output for a
12033 VMOV) in *MODCONST. */
12036 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12037 rtx
*modconst
, int *elementwidth
)
12041 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12047 *modconst
= tmpconst
;
12050 *elementwidth
= tmpwidth
;
12055 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12056 the immediate is valid, write a constant suitable for using as an operand
12057 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12058 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12061 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12062 rtx
*modconst
, int *elementwidth
)
12066 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12068 if (retval
< 0 || retval
> 5)
12072 *modconst
= tmpconst
;
12075 *elementwidth
= tmpwidth
;
12080 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12081 the immediate is valid, write a constant suitable for using as an operand
12082 to VSHR/VSHL to *MODCONST and the corresponding element width to
12083 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12084 because they have different limitations. */
12087 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12088 rtx
*modconst
, int *elementwidth
,
12091 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12092 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12093 unsigned HOST_WIDE_INT last_elt
= 0;
12094 unsigned HOST_WIDE_INT maxshift
;
12096 /* Split vector constant out into a byte vector. */
12097 for (i
= 0; i
< n_elts
; i
++)
12099 rtx el
= CONST_VECTOR_ELT (op
, i
);
12100 unsigned HOST_WIDE_INT elpart
;
12102 if (CONST_INT_P (el
))
12103 elpart
= INTVAL (el
);
12104 else if (CONST_DOUBLE_P (el
))
12107 gcc_unreachable ();
12109 if (i
!= 0 && elpart
!= last_elt
)
12115 /* Shift less than element size. */
12116 maxshift
= innersize
* 8;
12120 /* Left shift immediate value can be from 0 to <size>-1. */
12121 if (last_elt
>= maxshift
)
12126 /* Right shift immediate value can be from 1 to <size>. */
12127 if (last_elt
== 0 || last_elt
> maxshift
)
12132 *elementwidth
= innersize
* 8;
12135 *modconst
= CONST_VECTOR_ELT (op
, 0);
12140 /* Return a string suitable for output of Neon immediate logic operation
12144 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12145 int inverse
, int quad
)
12147 int width
, is_valid
;
12148 static char templ
[40];
12150 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12152 gcc_assert (is_valid
!= 0);
12155 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12157 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12162 /* Return a string suitable for output of Neon immediate shift operation
12163 (VSHR or VSHL) MNEM. */
12166 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12167 enum machine_mode mode
, int quad
,
12170 int width
, is_valid
;
12171 static char templ
[40];
12173 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12174 gcc_assert (is_valid
!= 0);
12177 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12179 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12184 /* Output a sequence of pairwise operations to implement a reduction.
12185 NOTE: We do "too much work" here, because pairwise operations work on two
12186 registers-worth of operands in one go. Unfortunately we can't exploit those
12187 extra calculations to do the full operation in fewer steps, I don't think.
12188 Although all vector elements of the result but the first are ignored, we
12189 actually calculate the same result in each of the elements. An alternative
12190 such as initially loading a vector with zero to use as each of the second
12191 operands would use up an additional register and take an extra instruction,
12192 for no particular gain. */
12195 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12196 rtx (*reduc
) (rtx
, rtx
, rtx
))
12198 enum machine_mode inner
= GET_MODE_INNER (mode
);
12199 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12202 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12204 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12205 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12210 /* If VALS is a vector constant that can be loaded into a register
12211 using VDUP, generate instructions to do so and return an RTX to
12212 assign to the register. Otherwise return NULL_RTX. */
12215 neon_vdup_constant (rtx vals
)
12217 enum machine_mode mode
= GET_MODE (vals
);
12218 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12219 int n_elts
= GET_MODE_NUNITS (mode
);
12220 bool all_same
= true;
12224 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12227 for (i
= 0; i
< n_elts
; ++i
)
12229 x
= XVECEXP (vals
, 0, i
);
12230 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12235 /* The elements are not all the same. We could handle repeating
12236 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12237 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12241 /* We can load this constant by using VDUP and a constant in a
12242 single ARM register. This will be cheaper than a vector
12245 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12246 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12249 /* Generate code to load VALS, which is a PARALLEL containing only
12250 constants (for vec_init) or CONST_VECTOR, efficiently into a
12251 register. Returns an RTX to copy into the register, or NULL_RTX
12252 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12255 neon_make_constant (rtx vals
)
12257 enum machine_mode mode
= GET_MODE (vals
);
12259 rtx const_vec
= NULL_RTX
;
12260 int n_elts
= GET_MODE_NUNITS (mode
);
12264 if (GET_CODE (vals
) == CONST_VECTOR
)
12266 else if (GET_CODE (vals
) == PARALLEL
)
12268 /* A CONST_VECTOR must contain only CONST_INTs and
12269 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12270 Only store valid constants in a CONST_VECTOR. */
12271 for (i
= 0; i
< n_elts
; ++i
)
12273 rtx x
= XVECEXP (vals
, 0, i
);
12274 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12277 if (n_const
== n_elts
)
12278 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12281 gcc_unreachable ();
12283 if (const_vec
!= NULL
12284 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12285 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12287 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12288 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12289 pipeline cycle; creating the constant takes one or two ARM
12290 pipeline cycles. */
12292 else if (const_vec
!= NULL_RTX
)
12293 /* Load from constant pool. On Cortex-A8 this takes two cycles
12294 (for either double or quad vectors). We can not take advantage
12295 of single-cycle VLD1 because we need a PC-relative addressing
12299 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12300 We can not construct an initializer. */
12304 /* Initialize vector TARGET to VALS. */
12307 neon_expand_vector_init (rtx target
, rtx vals
)
12309 enum machine_mode mode
= GET_MODE (target
);
12310 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12311 int n_elts
= GET_MODE_NUNITS (mode
);
12312 int n_var
= 0, one_var
= -1;
12313 bool all_same
= true;
12317 for (i
= 0; i
< n_elts
; ++i
)
12319 x
= XVECEXP (vals
, 0, i
);
12320 if (!CONSTANT_P (x
))
12321 ++n_var
, one_var
= i
;
12323 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12329 rtx constant
= neon_make_constant (vals
);
12330 if (constant
!= NULL_RTX
)
12332 emit_move_insn (target
, constant
);
12337 /* Splat a single non-constant element if we can. */
12338 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12340 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12341 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12342 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12346 /* One field is non-constant. Load constant then overwrite varying
12347 field. This is more efficient than using the stack. */
12350 rtx copy
= copy_rtx (vals
);
12351 rtx index
= GEN_INT (one_var
);
12353 /* Load constant part of vector, substitute neighboring value for
12354 varying element. */
12355 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12356 neon_expand_vector_init (target
, copy
);
12358 /* Insert variable. */
12359 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12363 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12366 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12369 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12372 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12375 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12378 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12381 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12384 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12387 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12390 gcc_unreachable ();
12395 /* Construct the vector in memory one field at a time
12396 and load the whole vector. */
12397 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12398 for (i
= 0; i
< n_elts
; i
++)
12399 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12400 i
* GET_MODE_SIZE (inner_mode
)),
12401 XVECEXP (vals
, 0, i
));
12402 emit_move_insn (target
, mem
);
12405 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12406 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12407 reported source locations are bogus. */
12410 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12413 HOST_WIDE_INT lane
;
12415 gcc_assert (CONST_INT_P (operand
));
12417 lane
= INTVAL (operand
);
12419 if (lane
< low
|| lane
>= high
)
12423 /* Bounds-check lanes. */
12426 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12428 bounds_check (operand
, low
, high
, "lane out of range");
12431 /* Bounds-check constants. */
12434 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12436 bounds_check (operand
, low
, high
, "constant out of range");
12440 neon_element_bits (enum machine_mode mode
)
12442 if (mode
== DImode
)
12443 return GET_MODE_BITSIZE (mode
);
12445 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12449 /* Predicates for `match_operand' and `match_operator'. */
12451 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12452 WB is true if full writeback address modes are allowed and is false
12453 if limited writeback address modes (POST_INC and PRE_DEC) are
12457 arm_coproc_mem_operand (rtx op
, bool wb
)
12461 /* Reject eliminable registers. */
12462 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12463 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12464 || reg_mentioned_p (arg_pointer_rtx
, op
)
12465 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12466 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12467 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12468 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12471 /* Constants are converted into offsets from labels. */
12475 ind
= XEXP (op
, 0);
12477 if (reload_completed
12478 && (GET_CODE (ind
) == LABEL_REF
12479 || (GET_CODE (ind
) == CONST
12480 && GET_CODE (XEXP (ind
, 0)) == PLUS
12481 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12482 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12485 /* Match: (mem (reg)). */
12487 return arm_address_register_rtx_p (ind
, 0);
12489 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12490 acceptable in any case (subject to verification by
12491 arm_address_register_rtx_p). We need WB to be true to accept
12492 PRE_INC and POST_DEC. */
12493 if (GET_CODE (ind
) == POST_INC
12494 || GET_CODE (ind
) == PRE_DEC
12496 && (GET_CODE (ind
) == PRE_INC
12497 || GET_CODE (ind
) == POST_DEC
)))
12498 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12501 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12502 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12503 && GET_CODE (XEXP (ind
, 1)) == PLUS
12504 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12505 ind
= XEXP (ind
, 1);
12510 if (GET_CODE (ind
) == PLUS
12511 && REG_P (XEXP (ind
, 0))
12512 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12513 && CONST_INT_P (XEXP (ind
, 1))
12514 && INTVAL (XEXP (ind
, 1)) > -1024
12515 && INTVAL (XEXP (ind
, 1)) < 1024
12516 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12522 /* Return TRUE if OP is a memory operand which we can load or store a vector
12523 to/from. TYPE is one of the following values:
12524 0 - Vector load/stor (vldr)
12525 1 - Core registers (ldm)
12526 2 - Element/structure loads (vld1)
12529 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12533 /* Reject eliminable registers. */
12534 if (! (reload_in_progress
|| reload_completed
)
12535 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12536 || reg_mentioned_p (arg_pointer_rtx
, op
)
12537 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12538 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12539 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12540 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12543 /* Constants are converted into offsets from labels. */
12547 ind
= XEXP (op
, 0);
12549 if (reload_completed
12550 && (GET_CODE (ind
) == LABEL_REF
12551 || (GET_CODE (ind
) == CONST
12552 && GET_CODE (XEXP (ind
, 0)) == PLUS
12553 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12554 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12557 /* Match: (mem (reg)). */
12559 return arm_address_register_rtx_p (ind
, 0);
12561 /* Allow post-increment with Neon registers. */
12562 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12563 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12564 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12566 /* FIXME: vld1 allows register post-modify. */
12572 && GET_CODE (ind
) == PLUS
12573 && REG_P (XEXP (ind
, 0))
12574 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12575 && CONST_INT_P (XEXP (ind
, 1))
12576 && INTVAL (XEXP (ind
, 1)) > -1024
12577 /* For quad modes, we restrict the constant offset to be slightly less
12578 than what the instruction format permits. We have no such constraint
12579 on double mode offsets. (This must match arm_legitimate_index_p.) */
12580 && (INTVAL (XEXP (ind
, 1))
12581 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12582 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12588 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12591 neon_struct_mem_operand (rtx op
)
12595 /* Reject eliminable registers. */
12596 if (! (reload_in_progress
|| reload_completed
)
12597 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12598 || reg_mentioned_p (arg_pointer_rtx
, op
)
12599 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12600 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12601 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12602 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12605 /* Constants are converted into offsets from labels. */
12609 ind
= XEXP (op
, 0);
12611 if (reload_completed
12612 && (GET_CODE (ind
) == LABEL_REF
12613 || (GET_CODE (ind
) == CONST
12614 && GET_CODE (XEXP (ind
, 0)) == PLUS
12615 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12616 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12619 /* Match: (mem (reg)). */
12621 return arm_address_register_rtx_p (ind
, 0);
12623 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12624 if (GET_CODE (ind
) == POST_INC
12625 || GET_CODE (ind
) == PRE_DEC
)
12626 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12631 /* Return true if X is a register that will be eliminated later on. */
12633 arm_eliminable_register (rtx x
)
12635 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12636 || REGNO (x
) == ARG_POINTER_REGNUM
12637 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12638 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12641 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12642 coprocessor registers. Otherwise return NO_REGS. */
12645 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12647 if (mode
== HFmode
)
12649 if (!TARGET_NEON_FP16
)
12650 return GENERAL_REGS
;
12651 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12653 return GENERAL_REGS
;
12656 /* The neon move patterns handle all legitimate vector and struct
12659 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12660 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12661 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12662 || VALID_NEON_STRUCT_MODE (mode
)))
12665 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12668 return GENERAL_REGS
;
12671 /* Values which must be returned in the most-significant end of the return
12675 arm_return_in_msb (const_tree valtype
)
12677 return (TARGET_AAPCS_BASED
12678 && BYTES_BIG_ENDIAN
12679 && (AGGREGATE_TYPE_P (valtype
)
12680 || TREE_CODE (valtype
) == COMPLEX_TYPE
12681 || FIXED_POINT_TYPE_P (valtype
)));
12684 /* Return TRUE if X references a SYMBOL_REF. */
12686 symbol_mentioned_p (rtx x
)
12691 if (GET_CODE (x
) == SYMBOL_REF
)
12694 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12695 are constant offsets, not symbols. */
12696 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12699 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12701 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12707 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12708 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12711 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12718 /* Return TRUE if X references a LABEL_REF. */
12720 label_mentioned_p (rtx x
)
12725 if (GET_CODE (x
) == LABEL_REF
)
12728 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12729 instruction, but they are constant offsets, not symbols. */
12730 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12733 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12734 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12740 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12741 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12744 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12752 tls_mentioned_p (rtx x
)
12754 switch (GET_CODE (x
))
12757 return tls_mentioned_p (XEXP (x
, 0));
12760 if (XINT (x
, 1) == UNSPEC_TLS
)
12768 /* Must not copy any rtx that uses a pc-relative address. */
12771 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12773 if (GET_CODE (*x
) == UNSPEC
12774 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12775 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12781 arm_cannot_copy_insn_p (rtx insn
)
12783 /* The tls call insn cannot be copied, as it is paired with a data
12785 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12788 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12792 minmax_code (rtx x
)
12794 enum rtx_code code
= GET_CODE (x
);
12807 gcc_unreachable ();
12811 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12814 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12815 int *mask
, bool *signed_sat
)
12817 /* The high bound must be a power of two minus one. */
12818 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12822 /* The low bound is either zero (for usat) or one less than the
12823 negation of the high bound (for ssat). */
12824 if (INTVAL (lo_bound
) == 0)
12829 *signed_sat
= false;
12834 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12839 *signed_sat
= true;
12847 /* Return 1 if memory locations are adjacent. */
12849 adjacent_mem_locations (rtx a
, rtx b
)
12851 /* We don't guarantee to preserve the order of these memory refs. */
12852 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12855 if ((REG_P (XEXP (a
, 0))
12856 || (GET_CODE (XEXP (a
, 0)) == PLUS
12857 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12858 && (REG_P (XEXP (b
, 0))
12859 || (GET_CODE (XEXP (b
, 0)) == PLUS
12860 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12862 HOST_WIDE_INT val0
= 0, val1
= 0;
12866 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12868 reg0
= XEXP (XEXP (a
, 0), 0);
12869 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12872 reg0
= XEXP (a
, 0);
12874 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12876 reg1
= XEXP (XEXP (b
, 0), 0);
12877 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12880 reg1
= XEXP (b
, 0);
12882 /* Don't accept any offset that will require multiple
12883 instructions to handle, since this would cause the
12884 arith_adjacentmem pattern to output an overlong sequence. */
12885 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12888 /* Don't allow an eliminable register: register elimination can make
12889 the offset too large. */
12890 if (arm_eliminable_register (reg0
))
12893 val_diff
= val1
- val0
;
12897 /* If the target has load delay slots, then there's no benefit
12898 to using an ldm instruction unless the offset is zero and
12899 we are optimizing for size. */
12900 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12901 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12902 && (val_diff
== 4 || val_diff
== -4));
12905 return ((REGNO (reg0
) == REGNO (reg1
))
12906 && (val_diff
== 4 || val_diff
== -4));
12912 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12913 for load operations, false for store operations. CONSECUTIVE is true
12914 if the register numbers in the operation must be consecutive in the register
12915 bank. RETURN_PC is true if value is to be loaded in PC.
12916 The pattern we are trying to match for load is:
12917 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12918 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12921 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12924 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12925 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12926 3. If consecutive is TRUE, then for kth register being loaded,
12927 REGNO (R_dk) = REGNO (R_d0) + k.
12928 The pattern for store is similar. */
12930 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12931 bool consecutive
, bool return_pc
)
12933 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12934 rtx reg
, mem
, addr
;
12936 unsigned first_regno
;
12937 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12939 bool addr_reg_in_reglist
= false;
12940 bool update
= false;
12945 /* If not in SImode, then registers must be consecutive
12946 (e.g., VLDM instructions for DFmode). */
12947 gcc_assert ((mode
== SImode
) || consecutive
);
12948 /* Setting return_pc for stores is illegal. */
12949 gcc_assert (!return_pc
|| load
);
12951 /* Set up the increments and the regs per val based on the mode. */
12952 reg_increment
= GET_MODE_SIZE (mode
);
12953 regs_per_val
= reg_increment
/ 4;
12954 offset_adj
= return_pc
? 1 : 0;
12957 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12958 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12961 /* Check if this is a write-back. */
12962 elt
= XVECEXP (op
, 0, offset_adj
);
12963 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12969 /* The offset adjustment must be the number of registers being
12970 popped times the size of a single register. */
12971 if (!REG_P (SET_DEST (elt
))
12972 || !REG_P (XEXP (SET_SRC (elt
), 0))
12973 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12974 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12975 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12976 ((count
- 1 - offset_adj
) * reg_increment
))
12980 i
= i
+ offset_adj
;
12981 base
= base
+ offset_adj
;
12982 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12983 success depends on the type: VLDM can do just one reg,
12984 LDM must do at least two. */
12985 if ((count
<= i
) && (mode
== SImode
))
12988 elt
= XVECEXP (op
, 0, i
- 1);
12989 if (GET_CODE (elt
) != SET
)
12994 reg
= SET_DEST (elt
);
12995 mem
= SET_SRC (elt
);
12999 reg
= SET_SRC (elt
);
13000 mem
= SET_DEST (elt
);
13003 if (!REG_P (reg
) || !MEM_P (mem
))
13006 regno
= REGNO (reg
);
13007 first_regno
= regno
;
13008 addr
= XEXP (mem
, 0);
13009 if (GET_CODE (addr
) == PLUS
)
13011 if (!CONST_INT_P (XEXP (addr
, 1)))
13014 offset
= INTVAL (XEXP (addr
, 1));
13015 addr
= XEXP (addr
, 0);
13021 /* Don't allow SP to be loaded unless it is also the base register. It
13022 guarantees that SP is reset correctly when an LDM instruction
13023 is interrupted. Otherwise, we might end up with a corrupt stack. */
13024 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13027 for (; i
< count
; i
++)
13029 elt
= XVECEXP (op
, 0, i
);
13030 if (GET_CODE (elt
) != SET
)
13035 reg
= SET_DEST (elt
);
13036 mem
= SET_SRC (elt
);
13040 reg
= SET_SRC (elt
);
13041 mem
= SET_DEST (elt
);
13045 || GET_MODE (reg
) != mode
13046 || REGNO (reg
) <= regno
13049 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13050 /* Don't allow SP to be loaded unless it is also the base register. It
13051 guarantees that SP is reset correctly when an LDM instruction
13052 is interrupted. Otherwise, we might end up with a corrupt stack. */
13053 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13055 || GET_MODE (mem
) != mode
13056 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13057 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13058 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13059 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13060 offset
+ (i
- base
) * reg_increment
))
13061 && (!REG_P (XEXP (mem
, 0))
13062 || offset
+ (i
- base
) * reg_increment
!= 0)))
13065 regno
= REGNO (reg
);
13066 if (regno
== REGNO (addr
))
13067 addr_reg_in_reglist
= true;
13072 if (update
&& addr_reg_in_reglist
)
13075 /* For Thumb-1, address register is always modified - either by write-back
13076 or by explicit load. If the pattern does not describe an update,
13077 then the address register must be in the list of loaded registers. */
13079 return update
|| addr_reg_in_reglist
;
13085 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13086 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13087 instruction. ADD_OFFSET is nonzero if the base address register needs
13088 to be modified with an add instruction before we can use it. */
13091 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13092 int nops
, HOST_WIDE_INT add_offset
)
13094 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13095 if the offset isn't small enough. The reason 2 ldrs are faster
13096 is because these ARMs are able to do more than one cache access
13097 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13098 whilst the ARM8 has a double bandwidth cache. This means that
13099 these cores can do both an instruction fetch and a data fetch in
13100 a single cycle, so the trick of calculating the address into a
13101 scratch register (one of the result regs) and then doing a load
13102 multiple actually becomes slower (and no smaller in code size).
13103 That is the transformation
13105 ldr rd1, [rbase + offset]
13106 ldr rd2, [rbase + offset + 4]
13110 add rd1, rbase, offset
13111 ldmia rd1, {rd1, rd2}
13113 produces worse code -- '3 cycles + any stalls on rd2' instead of
13114 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13115 access per cycle, the first sequence could never complete in less
13116 than 6 cycles, whereas the ldm sequence would only take 5 and
13117 would make better use of sequential accesses if not hitting the
13120 We cheat here and test 'arm_ld_sched' which we currently know to
13121 only be true for the ARM8, ARM9 and StrongARM. If this ever
13122 changes, then the test below needs to be reworked. */
13123 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13126 /* XScale has load-store double instructions, but they have stricter
13127 alignment requirements than load-store multiple, so we cannot
13130 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13131 the pipeline until completion.
13139 An ldr instruction takes 1-3 cycles, but does not block the
13148 Best case ldr will always win. However, the more ldr instructions
13149 we issue, the less likely we are to be able to schedule them well.
13150 Using ldr instructions also increases code size.
13152 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13153 for counts of 3 or 4 regs. */
13154 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13159 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13160 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13161 an array ORDER which describes the sequence to use when accessing the
13162 offsets that produces an ascending order. In this sequence, each
13163 offset must be larger by exactly 4 than the previous one. ORDER[0]
13164 must have been filled in with the lowest offset by the caller.
13165 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13166 we use to verify that ORDER produces an ascending order of registers.
13167 Return true if it was possible to construct such an order, false if
13171 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13172 int *unsorted_regs
)
13175 for (i
= 1; i
< nops
; i
++)
13179 order
[i
] = order
[i
- 1];
13180 for (j
= 0; j
< nops
; j
++)
13181 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13183 /* We must find exactly one offset that is higher than the
13184 previous one by 4. */
13185 if (order
[i
] != order
[i
- 1])
13189 if (order
[i
] == order
[i
- 1])
13191 /* The register numbers must be ascending. */
13192 if (unsorted_regs
!= NULL
13193 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13199 /* Used to determine in a peephole whether a sequence of load
13200 instructions can be changed into a load-multiple instruction.
13201 NOPS is the number of separate load instructions we are examining. The
13202 first NOPS entries in OPERANDS are the destination registers, the
13203 next NOPS entries are memory operands. If this function is
13204 successful, *BASE is set to the common base register of the memory
13205 accesses; *LOAD_OFFSET is set to the first memory location's offset
13206 from that base register.
13207 REGS is an array filled in with the destination register numbers.
13208 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13209 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13210 the sequence of registers in REGS matches the loads from ascending memory
13211 locations, and the function verifies that the register numbers are
13212 themselves ascending. If CHECK_REGS is false, the register numbers
13213 are stored in the order they are found in the operands. */
13215 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13216 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13218 int unsorted_regs
[MAX_LDM_STM_OPS
];
13219 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13220 int order
[MAX_LDM_STM_OPS
];
13221 rtx base_reg_rtx
= NULL
;
13225 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13226 easily extended if required. */
13227 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13229 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13231 /* Loop over the operands and check that the memory references are
13232 suitable (i.e. immediate offsets from the same base register). At
13233 the same time, extract the target register, and the memory
13235 for (i
= 0; i
< nops
; i
++)
13240 /* Convert a subreg of a mem into the mem itself. */
13241 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13242 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13244 gcc_assert (MEM_P (operands
[nops
+ i
]));
13246 /* Don't reorder volatile memory references; it doesn't seem worth
13247 looking for the case where the order is ok anyway. */
13248 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13251 offset
= const0_rtx
;
13253 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13254 || (GET_CODE (reg
) == SUBREG
13255 && REG_P (reg
= SUBREG_REG (reg
))))
13256 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13257 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13258 || (GET_CODE (reg
) == SUBREG
13259 && REG_P (reg
= SUBREG_REG (reg
))))
13260 && (CONST_INT_P (offset
13261 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13265 base_reg
= REGNO (reg
);
13266 base_reg_rtx
= reg
;
13267 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13270 else if (base_reg
!= (int) REGNO (reg
))
13271 /* Not addressed from the same base register. */
13274 unsorted_regs
[i
] = (REG_P (operands
[i
])
13275 ? REGNO (operands
[i
])
13276 : REGNO (SUBREG_REG (operands
[i
])));
13278 /* If it isn't an integer register, or if it overwrites the
13279 base register but isn't the last insn in the list, then
13280 we can't do this. */
13281 if (unsorted_regs
[i
] < 0
13282 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13283 || unsorted_regs
[i
] > 14
13284 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13287 /* Don't allow SP to be loaded unless it is also the base
13288 register. It guarantees that SP is reset correctly when
13289 an LDM instruction is interrupted. Otherwise, we might
13290 end up with a corrupt stack. */
13291 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13294 unsorted_offsets
[i
] = INTVAL (offset
);
13295 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13299 /* Not a suitable memory address. */
13303 /* All the useful information has now been extracted from the
13304 operands into unsorted_regs and unsorted_offsets; additionally,
13305 order[0] has been set to the lowest offset in the list. Sort
13306 the offsets into order, verifying that they are adjacent, and
13307 check that the register numbers are ascending. */
13308 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13309 check_regs
? unsorted_regs
: NULL
))
13313 memcpy (saved_order
, order
, sizeof order
);
13319 for (i
= 0; i
< nops
; i
++)
13320 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13322 *load_offset
= unsorted_offsets
[order
[0]];
13326 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13329 if (unsorted_offsets
[order
[0]] == 0)
13330 ldm_case
= 1; /* ldmia */
13331 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13332 ldm_case
= 2; /* ldmib */
13333 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13334 ldm_case
= 3; /* ldmda */
13335 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13336 ldm_case
= 4; /* ldmdb */
13337 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13338 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13343 if (!multiple_operation_profitable_p (false, nops
,
13345 ? unsorted_offsets
[order
[0]] : 0))
13351 /* Used to determine in a peephole whether a sequence of store instructions can
13352 be changed into a store-multiple instruction.
13353 NOPS is the number of separate store instructions we are examining.
13354 NOPS_TOTAL is the total number of instructions recognized by the peephole
13356 The first NOPS entries in OPERANDS are the source registers, the next
13357 NOPS entries are memory operands. If this function is successful, *BASE is
13358 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13359 to the first memory location's offset from that base register. REGS is an
13360 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13361 likewise filled with the corresponding rtx's.
13362 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13363 numbers to an ascending order of stores.
13364 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13365 from ascending memory locations, and the function verifies that the register
13366 numbers are themselves ascending. If CHECK_REGS is false, the register
13367 numbers are stored in the order they are found in the operands. */
13369 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13370 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13371 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13373 int unsorted_regs
[MAX_LDM_STM_OPS
];
13374 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13375 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13376 int order
[MAX_LDM_STM_OPS
];
13378 rtx base_reg_rtx
= NULL
;
13381 /* Write back of base register is currently only supported for Thumb 1. */
13382 int base_writeback
= TARGET_THUMB1
;
13384 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13385 easily extended if required. */
13386 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13388 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13390 /* Loop over the operands and check that the memory references are
13391 suitable (i.e. immediate offsets from the same base register). At
13392 the same time, extract the target register, and the memory
13394 for (i
= 0; i
< nops
; i
++)
13399 /* Convert a subreg of a mem into the mem itself. */
13400 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13401 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13403 gcc_assert (MEM_P (operands
[nops
+ i
]));
13405 /* Don't reorder volatile memory references; it doesn't seem worth
13406 looking for the case where the order is ok anyway. */
13407 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13410 offset
= const0_rtx
;
13412 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13413 || (GET_CODE (reg
) == SUBREG
13414 && REG_P (reg
= SUBREG_REG (reg
))))
13415 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13416 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13417 || (GET_CODE (reg
) == SUBREG
13418 && REG_P (reg
= SUBREG_REG (reg
))))
13419 && (CONST_INT_P (offset
13420 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13422 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13423 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13424 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13428 base_reg
= REGNO (reg
);
13429 base_reg_rtx
= reg
;
13430 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13433 else if (base_reg
!= (int) REGNO (reg
))
13434 /* Not addressed from the same base register. */
13437 /* If it isn't an integer register, then we can't do this. */
13438 if (unsorted_regs
[i
] < 0
13439 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13440 /* The effects are unpredictable if the base register is
13441 both updated and stored. */
13442 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13443 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13444 || unsorted_regs
[i
] > 14)
13447 unsorted_offsets
[i
] = INTVAL (offset
);
13448 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13452 /* Not a suitable memory address. */
13456 /* All the useful information has now been extracted from the
13457 operands into unsorted_regs and unsorted_offsets; additionally,
13458 order[0] has been set to the lowest offset in the list. Sort
13459 the offsets into order, verifying that they are adjacent, and
13460 check that the register numbers are ascending. */
13461 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13462 check_regs
? unsorted_regs
: NULL
))
13466 memcpy (saved_order
, order
, sizeof order
);
13472 for (i
= 0; i
< nops
; i
++)
13474 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13476 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13479 *load_offset
= unsorted_offsets
[order
[0]];
13483 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13486 if (unsorted_offsets
[order
[0]] == 0)
13487 stm_case
= 1; /* stmia */
13488 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13489 stm_case
= 2; /* stmib */
13490 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13491 stm_case
= 3; /* stmda */
13492 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13493 stm_case
= 4; /* stmdb */
13497 if (!multiple_operation_profitable_p (false, nops
, 0))
13503 /* Routines for use in generating RTL. */
13505 /* Generate a load-multiple instruction. COUNT is the number of loads in
13506 the instruction; REGS and MEMS are arrays containing the operands.
13507 BASEREG is the base register to be used in addressing the memory operands.
13508 WBACK_OFFSET is nonzero if the instruction should update the base
13512 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13513 HOST_WIDE_INT wback_offset
)
13518 if (!multiple_operation_profitable_p (false, count
, 0))
13524 for (i
= 0; i
< count
; i
++)
13525 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13527 if (wback_offset
!= 0)
13528 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13530 seq
= get_insns ();
13536 result
= gen_rtx_PARALLEL (VOIDmode
,
13537 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13538 if (wback_offset
!= 0)
13540 XVECEXP (result
, 0, 0)
13541 = gen_rtx_SET (VOIDmode
, basereg
,
13542 plus_constant (Pmode
, basereg
, wback_offset
));
13547 for (j
= 0; i
< count
; i
++, j
++)
13548 XVECEXP (result
, 0, i
)
13549 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13554 /* Generate a store-multiple instruction. COUNT is the number of stores in
13555 the instruction; REGS and MEMS are arrays containing the operands.
13556 BASEREG is the base register to be used in addressing the memory operands.
13557 WBACK_OFFSET is nonzero if the instruction should update the base
13561 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13562 HOST_WIDE_INT wback_offset
)
13567 if (GET_CODE (basereg
) == PLUS
)
13568 basereg
= XEXP (basereg
, 0);
13570 if (!multiple_operation_profitable_p (false, count
, 0))
13576 for (i
= 0; i
< count
; i
++)
13577 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13579 if (wback_offset
!= 0)
13580 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13582 seq
= get_insns ();
13588 result
= gen_rtx_PARALLEL (VOIDmode
,
13589 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13590 if (wback_offset
!= 0)
13592 XVECEXP (result
, 0, 0)
13593 = gen_rtx_SET (VOIDmode
, basereg
,
13594 plus_constant (Pmode
, basereg
, wback_offset
));
13599 for (j
= 0; i
< count
; i
++, j
++)
13600 XVECEXP (result
, 0, i
)
13601 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13606 /* Generate either a load-multiple or a store-multiple instruction. This
13607 function can be used in situations where we can start with a single MEM
13608 rtx and adjust its address upwards.
13609 COUNT is the number of operations in the instruction, not counting a
13610 possible update of the base register. REGS is an array containing the
13612 BASEREG is the base register to be used in addressing the memory operands,
13613 which are constructed from BASEMEM.
13614 WRITE_BACK specifies whether the generated instruction should include an
13615 update of the base register.
13616 OFFSETP is used to pass an offset to and from this function; this offset
13617 is not used when constructing the address (instead BASEMEM should have an
13618 appropriate offset in its address), it is used only for setting
13619 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13622 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13623 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13625 rtx mems
[MAX_LDM_STM_OPS
];
13626 HOST_WIDE_INT offset
= *offsetp
;
13629 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13631 if (GET_CODE (basereg
) == PLUS
)
13632 basereg
= XEXP (basereg
, 0);
13634 for (i
= 0; i
< count
; i
++)
13636 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13637 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13645 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13646 write_back
? 4 * count
: 0);
13648 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13649 write_back
? 4 * count
: 0);
13653 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13654 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13656 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13661 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13662 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13664 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13668 /* Called from a peephole2 expander to turn a sequence of loads into an
13669 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13670 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13671 is true if we can reorder the registers because they are used commutatively
13673 Returns true iff we could generate a new instruction. */
13676 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13678 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13679 rtx mems
[MAX_LDM_STM_OPS
];
13680 int i
, j
, base_reg
;
13682 HOST_WIDE_INT offset
;
13683 int write_back
= FALSE
;
13687 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13688 &base_reg
, &offset
, !sort_regs
);
13694 for (i
= 0; i
< nops
- 1; i
++)
13695 for (j
= i
+ 1; j
< nops
; j
++)
13696 if (regs
[i
] > regs
[j
])
13702 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13706 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13707 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13713 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13714 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13716 if (!TARGET_THUMB1
)
13718 base_reg
= regs
[0];
13719 base_reg_rtx
= newbase
;
13723 for (i
= 0; i
< nops
; i
++)
13725 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13726 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13729 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13730 write_back
? offset
+ i
* 4 : 0));
13734 /* Called from a peephole2 expander to turn a sequence of stores into an
13735 STM instruction. OPERANDS are the operands found by the peephole matcher;
13736 NOPS indicates how many separate stores we are trying to combine.
13737 Returns true iff we could generate a new instruction. */
13740 gen_stm_seq (rtx
*operands
, int nops
)
13743 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13744 rtx mems
[MAX_LDM_STM_OPS
];
13747 HOST_WIDE_INT offset
;
13748 int write_back
= FALSE
;
13751 bool base_reg_dies
;
13753 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13754 mem_order
, &base_reg
, &offset
, true);
13759 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13761 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13764 gcc_assert (base_reg_dies
);
13770 gcc_assert (base_reg_dies
);
13771 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13775 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13777 for (i
= 0; i
< nops
; i
++)
13779 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13780 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13783 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13784 write_back
? offset
+ i
* 4 : 0));
13788 /* Called from a peephole2 expander to turn a sequence of stores that are
13789 preceded by constant loads into an STM instruction. OPERANDS are the
13790 operands found by the peephole matcher; NOPS indicates how many
13791 separate stores we are trying to combine; there are 2 * NOPS
13792 instructions in the peephole.
13793 Returns true iff we could generate a new instruction. */
13796 gen_const_stm_seq (rtx
*operands
, int nops
)
13798 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13799 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13800 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13801 rtx mems
[MAX_LDM_STM_OPS
];
13804 HOST_WIDE_INT offset
;
13805 int write_back
= FALSE
;
13808 bool base_reg_dies
;
13810 HARD_REG_SET allocated
;
13812 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13813 mem_order
, &base_reg
, &offset
, false);
13818 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13820 /* If the same register is used more than once, try to find a free
13822 CLEAR_HARD_REG_SET (allocated
);
13823 for (i
= 0; i
< nops
; i
++)
13825 for (j
= i
+ 1; j
< nops
; j
++)
13826 if (regs
[i
] == regs
[j
])
13828 rtx t
= peep2_find_free_register (0, nops
* 2,
13829 TARGET_THUMB1
? "l" : "r",
13830 SImode
, &allocated
);
13834 regs
[i
] = REGNO (t
);
13838 /* Compute an ordering that maps the register numbers to an ascending
13841 for (i
= 0; i
< nops
; i
++)
13842 if (regs
[i
] < regs
[reg_order
[0]])
13845 for (i
= 1; i
< nops
; i
++)
13847 int this_order
= reg_order
[i
- 1];
13848 for (j
= 0; j
< nops
; j
++)
13849 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13850 && (this_order
== reg_order
[i
- 1]
13851 || regs
[j
] < regs
[this_order
]))
13853 reg_order
[i
] = this_order
;
13856 /* Ensure that registers that must be live after the instruction end
13857 up with the correct value. */
13858 for (i
= 0; i
< nops
; i
++)
13860 int this_order
= reg_order
[i
];
13861 if ((this_order
!= mem_order
[i
]
13862 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13863 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13867 /* Load the constants. */
13868 for (i
= 0; i
< nops
; i
++)
13870 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13871 sorted_regs
[i
] = regs
[reg_order
[i
]];
13872 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13875 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13877 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13880 gcc_assert (base_reg_dies
);
13886 gcc_assert (base_reg_dies
);
13887 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13891 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13893 for (i
= 0; i
< nops
; i
++)
13895 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13896 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13899 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13900 write_back
? offset
+ i
* 4 : 0));
13904 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13905 unaligned copies on processors which support unaligned semantics for those
13906 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13907 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13908 An interleave factor of 1 (the minimum) will perform no interleaving.
13909 Load/store multiple are used for aligned addresses where possible. */
13912 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13913 HOST_WIDE_INT length
,
13914 unsigned int interleave_factor
)
13916 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13917 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13918 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13919 HOST_WIDE_INT i
, j
;
13920 HOST_WIDE_INT remaining
= length
, words
;
13921 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13923 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13924 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13925 HOST_WIDE_INT srcoffset
, dstoffset
;
13926 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13929 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13931 /* Use hard registers if we have aligned source or destination so we can use
13932 load/store multiple with contiguous registers. */
13933 if (dst_aligned
|| src_aligned
)
13934 for (i
= 0; i
< interleave_factor
; i
++)
13935 regs
[i
] = gen_rtx_REG (SImode
, i
);
13937 for (i
= 0; i
< interleave_factor
; i
++)
13938 regs
[i
] = gen_reg_rtx (SImode
);
13940 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13941 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13943 srcoffset
= dstoffset
= 0;
13945 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13946 For copying the last bytes we want to subtract this offset again. */
13947 src_autoinc
= dst_autoinc
= 0;
13949 for (i
= 0; i
< interleave_factor
; i
++)
13952 /* Copy BLOCK_SIZE_BYTES chunks. */
13954 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13957 if (src_aligned
&& interleave_factor
> 1)
13959 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13960 TRUE
, srcbase
, &srcoffset
));
13961 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13965 for (j
= 0; j
< interleave_factor
; j
++)
13967 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13969 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13970 srcoffset
+ j
* UNITS_PER_WORD
);
13971 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13973 srcoffset
+= block_size_bytes
;
13977 if (dst_aligned
&& interleave_factor
> 1)
13979 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13980 TRUE
, dstbase
, &dstoffset
));
13981 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13985 for (j
= 0; j
< interleave_factor
; j
++)
13987 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13989 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13990 dstoffset
+ j
* UNITS_PER_WORD
);
13991 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13993 dstoffset
+= block_size_bytes
;
13996 remaining
-= block_size_bytes
;
13999 /* Copy any whole words left (note these aren't interleaved with any
14000 subsequent halfword/byte load/stores in the interests of simplicity). */
14002 words
= remaining
/ UNITS_PER_WORD
;
14004 gcc_assert (words
< interleave_factor
);
14006 if (src_aligned
&& words
> 1)
14008 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14010 src_autoinc
+= UNITS_PER_WORD
* words
;
14014 for (j
= 0; j
< words
; j
++)
14016 addr
= plus_constant (Pmode
, src
,
14017 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14018 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14019 srcoffset
+ j
* UNITS_PER_WORD
);
14020 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14022 srcoffset
+= words
* UNITS_PER_WORD
;
14025 if (dst_aligned
&& words
> 1)
14027 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14029 dst_autoinc
+= words
* UNITS_PER_WORD
;
14033 for (j
= 0; j
< words
; j
++)
14035 addr
= plus_constant (Pmode
, dst
,
14036 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14037 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14038 dstoffset
+ j
* UNITS_PER_WORD
);
14039 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14041 dstoffset
+= words
* UNITS_PER_WORD
;
14044 remaining
-= words
* UNITS_PER_WORD
;
14046 gcc_assert (remaining
< 4);
14048 /* Copy a halfword if necessary. */
14050 if (remaining
>= 2)
14052 halfword_tmp
= gen_reg_rtx (SImode
);
14054 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14055 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14056 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14058 /* Either write out immediately, or delay until we've loaded the last
14059 byte, depending on interleave factor. */
14060 if (interleave_factor
== 1)
14062 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14063 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14064 emit_insn (gen_unaligned_storehi (mem
,
14065 gen_lowpart (HImode
, halfword_tmp
)));
14066 halfword_tmp
= NULL
;
14074 gcc_assert (remaining
< 2);
14076 /* Copy last byte. */
14078 if ((remaining
& 1) != 0)
14080 byte_tmp
= gen_reg_rtx (SImode
);
14082 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14083 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14084 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14086 if (interleave_factor
== 1)
14088 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14089 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14090 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14099 /* Store last halfword if we haven't done so already. */
14103 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14104 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14105 emit_insn (gen_unaligned_storehi (mem
,
14106 gen_lowpart (HImode
, halfword_tmp
)));
14110 /* Likewise for last byte. */
14114 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14115 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14116 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14120 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14123 /* From mips_adjust_block_mem:
14125 Helper function for doing a loop-based block operation on memory
14126 reference MEM. Each iteration of the loop will operate on LENGTH
14129 Create a new base register for use within the loop and point it to
14130 the start of MEM. Create a new memory reference that uses this
14131 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14134 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14137 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14139 /* Although the new mem does not refer to a known location,
14140 it does keep up to LENGTH bytes of alignment. */
14141 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14142 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14145 /* From mips_block_move_loop:
14147 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14148 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14149 the memory regions do not overlap. */
14152 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14153 unsigned int interleave_factor
,
14154 HOST_WIDE_INT bytes_per_iter
)
14156 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14157 HOST_WIDE_INT leftover
;
14159 leftover
= length
% bytes_per_iter
;
14160 length
-= leftover
;
14162 /* Create registers and memory references for use within the loop. */
14163 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14164 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14166 /* Calculate the value that SRC_REG should have after the last iteration of
14168 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14169 0, 0, OPTAB_WIDEN
);
14171 /* Emit the start of the loop. */
14172 label
= gen_label_rtx ();
14173 emit_label (label
);
14175 /* Emit the loop body. */
14176 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14177 interleave_factor
);
14179 /* Move on to the next block. */
14180 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14181 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14183 /* Emit the loop condition. */
14184 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14185 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14187 /* Mop up any left-over bytes. */
14189 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14192 /* Emit a block move when either the source or destination is unaligned (not
14193 aligned to a four-byte boundary). This may need further tuning depending on
14194 core type, optimize_size setting, etc. */
14197 arm_movmemqi_unaligned (rtx
*operands
)
14199 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14203 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14204 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14205 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14206 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14207 or dst_aligned though: allow more interleaving in those cases since the
14208 resulting code can be smaller. */
14209 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14210 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14213 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14214 interleave_factor
, bytes_per_iter
);
14216 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14217 interleave_factor
);
14221 /* Note that the loop created by arm_block_move_unaligned_loop may be
14222 subject to loop unrolling, which makes tuning this condition a little
14225 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14227 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14234 arm_gen_movmemqi (rtx
*operands
)
14236 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14237 HOST_WIDE_INT srcoffset
, dstoffset
;
14239 rtx src
, dst
, srcbase
, dstbase
;
14240 rtx part_bytes_reg
= NULL
;
14243 if (!CONST_INT_P (operands
[2])
14244 || !CONST_INT_P (operands
[3])
14245 || INTVAL (operands
[2]) > 64)
14248 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14249 return arm_movmemqi_unaligned (operands
);
14251 if (INTVAL (operands
[3]) & 3)
14254 dstbase
= operands
[0];
14255 srcbase
= operands
[1];
14257 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14258 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14260 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14261 out_words_to_go
= INTVAL (operands
[2]) / 4;
14262 last_bytes
= INTVAL (operands
[2]) & 3;
14263 dstoffset
= srcoffset
= 0;
14265 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14266 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14268 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14270 if (in_words_to_go
> 4)
14271 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14272 TRUE
, srcbase
, &srcoffset
));
14274 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14275 src
, FALSE
, srcbase
,
14278 if (out_words_to_go
)
14280 if (out_words_to_go
> 4)
14281 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14282 TRUE
, dstbase
, &dstoffset
));
14283 else if (out_words_to_go
!= 1)
14284 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14285 out_words_to_go
, dst
,
14288 dstbase
, &dstoffset
));
14291 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14292 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14293 if (last_bytes
!= 0)
14295 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14301 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14302 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14305 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14306 if (out_words_to_go
)
14310 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14311 sreg
= copy_to_reg (mem
);
14313 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14314 emit_move_insn (mem
, sreg
);
14317 gcc_assert (!in_words_to_go
); /* Sanity check */
14320 if (in_words_to_go
)
14322 gcc_assert (in_words_to_go
> 0);
14324 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14325 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14328 gcc_assert (!last_bytes
|| part_bytes_reg
);
14330 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14332 rtx tmp
= gen_reg_rtx (SImode
);
14334 /* The bytes we want are in the top end of the word. */
14335 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14336 GEN_INT (8 * (4 - last_bytes
))));
14337 part_bytes_reg
= tmp
;
14341 mem
= adjust_automodify_address (dstbase
, QImode
,
14342 plus_constant (Pmode
, dst
,
14344 dstoffset
+ last_bytes
- 1);
14345 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14349 tmp
= gen_reg_rtx (SImode
);
14350 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14351 part_bytes_reg
= tmp
;
14358 if (last_bytes
> 1)
14360 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14361 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14365 rtx tmp
= gen_reg_rtx (SImode
);
14366 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14367 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14368 part_bytes_reg
= tmp
;
14375 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14376 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14383 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14386 next_consecutive_mem (rtx mem
)
14388 enum machine_mode mode
= GET_MODE (mem
);
14389 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14390 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14392 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14395 /* Copy using LDRD/STRD instructions whenever possible.
14396 Returns true upon success. */
14398 gen_movmem_ldrd_strd (rtx
*operands
)
14400 unsigned HOST_WIDE_INT len
;
14401 HOST_WIDE_INT align
;
14402 rtx src
, dst
, base
;
14404 bool src_aligned
, dst_aligned
;
14405 bool src_volatile
, dst_volatile
;
14407 gcc_assert (CONST_INT_P (operands
[2]));
14408 gcc_assert (CONST_INT_P (operands
[3]));
14410 len
= UINTVAL (operands
[2]);
14414 /* Maximum alignment we can assume for both src and dst buffers. */
14415 align
= INTVAL (operands
[3]);
14417 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14420 /* Place src and dst addresses in registers
14421 and update the corresponding mem rtx. */
14423 dst_volatile
= MEM_VOLATILE_P (dst
);
14424 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14425 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14426 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14429 src_volatile
= MEM_VOLATILE_P (src
);
14430 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14431 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14432 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14434 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14437 if (src_volatile
|| dst_volatile
)
14440 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14441 if (!(dst_aligned
|| src_aligned
))
14442 return arm_gen_movmemqi (operands
);
14444 src
= adjust_address (src
, DImode
, 0);
14445 dst
= adjust_address (dst
, DImode
, 0);
14449 reg0
= gen_reg_rtx (DImode
);
14451 emit_move_insn (reg0
, src
);
14453 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14456 emit_move_insn (dst
, reg0
);
14458 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14460 src
= next_consecutive_mem (src
);
14461 dst
= next_consecutive_mem (dst
);
14464 gcc_assert (len
< 8);
14467 /* More than a word but less than a double-word to copy. Copy a word. */
14468 reg0
= gen_reg_rtx (SImode
);
14469 src
= adjust_address (src
, SImode
, 0);
14470 dst
= adjust_address (dst
, SImode
, 0);
14472 emit_move_insn (reg0
, src
);
14474 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14477 emit_move_insn (dst
, reg0
);
14479 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14481 src
= next_consecutive_mem (src
);
14482 dst
= next_consecutive_mem (dst
);
14489 /* Copy the remaining bytes. */
14492 dst
= adjust_address (dst
, HImode
, 0);
14493 src
= adjust_address (src
, HImode
, 0);
14494 reg0
= gen_reg_rtx (SImode
);
14496 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14498 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14501 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14503 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14505 src
= next_consecutive_mem (src
);
14506 dst
= next_consecutive_mem (dst
);
14511 dst
= adjust_address (dst
, QImode
, 0);
14512 src
= adjust_address (src
, QImode
, 0);
14513 reg0
= gen_reg_rtx (QImode
);
14514 emit_move_insn (reg0
, src
);
14515 emit_move_insn (dst
, reg0
);
14519 /* Select a dominance comparison mode if possible for a test of the general
14520 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14521 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14522 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14523 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14524 In all cases OP will be either EQ or NE, but we don't need to know which
14525 here. If we are unable to support a dominance comparison we return
14526 CC mode. This will then fail to match for the RTL expressions that
14527 generate this call. */
14529 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14531 enum rtx_code cond1
, cond2
;
14534 /* Currently we will probably get the wrong result if the individual
14535 comparisons are not simple. This also ensures that it is safe to
14536 reverse a comparison if necessary. */
14537 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14539 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14543 /* The if_then_else variant of this tests the second condition if the
14544 first passes, but is true if the first fails. Reverse the first
14545 condition to get a true "inclusive-or" expression. */
14546 if (cond_or
== DOM_CC_NX_OR_Y
)
14547 cond1
= reverse_condition (cond1
);
14549 /* If the comparisons are not equal, and one doesn't dominate the other,
14550 then we can't do this. */
14552 && !comparison_dominates_p (cond1
, cond2
)
14553 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14558 enum rtx_code temp
= cond1
;
14566 if (cond_or
== DOM_CC_X_AND_Y
)
14571 case EQ
: return CC_DEQmode
;
14572 case LE
: return CC_DLEmode
;
14573 case LEU
: return CC_DLEUmode
;
14574 case GE
: return CC_DGEmode
;
14575 case GEU
: return CC_DGEUmode
;
14576 default: gcc_unreachable ();
14580 if (cond_or
== DOM_CC_X_AND_Y
)
14592 gcc_unreachable ();
14596 if (cond_or
== DOM_CC_X_AND_Y
)
14608 gcc_unreachable ();
14612 if (cond_or
== DOM_CC_X_AND_Y
)
14613 return CC_DLTUmode
;
14618 return CC_DLTUmode
;
14620 return CC_DLEUmode
;
14624 gcc_unreachable ();
14628 if (cond_or
== DOM_CC_X_AND_Y
)
14629 return CC_DGTUmode
;
14634 return CC_DGTUmode
;
14636 return CC_DGEUmode
;
14640 gcc_unreachable ();
14643 /* The remaining cases only occur when both comparisons are the
14646 gcc_assert (cond1
== cond2
);
14650 gcc_assert (cond1
== cond2
);
14654 gcc_assert (cond1
== cond2
);
14658 gcc_assert (cond1
== cond2
);
14659 return CC_DLEUmode
;
14662 gcc_assert (cond1
== cond2
);
14663 return CC_DGEUmode
;
14666 gcc_unreachable ();
14671 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14673 /* All floating point compares return CCFP if it is an equality
14674 comparison, and CCFPE otherwise. */
14675 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14698 gcc_unreachable ();
14702 /* A compare with a shifted operand. Because of canonicalization, the
14703 comparison will have to be swapped when we emit the assembler. */
14704 if (GET_MODE (y
) == SImode
14705 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14706 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14707 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14708 || GET_CODE (x
) == ROTATERT
))
14711 /* This operation is performed swapped, but since we only rely on the Z
14712 flag we don't need an additional mode. */
14713 if (GET_MODE (y
) == SImode
14714 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14715 && GET_CODE (x
) == NEG
14716 && (op
== EQ
|| op
== NE
))
14719 /* This is a special case that is used by combine to allow a
14720 comparison of a shifted byte load to be split into a zero-extend
14721 followed by a comparison of the shifted integer (only valid for
14722 equalities and unsigned inequalities). */
14723 if (GET_MODE (x
) == SImode
14724 && GET_CODE (x
) == ASHIFT
14725 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14726 && GET_CODE (XEXP (x
, 0)) == SUBREG
14727 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14728 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14729 && (op
== EQ
|| op
== NE
14730 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14731 && CONST_INT_P (y
))
14734 /* A construct for a conditional compare, if the false arm contains
14735 0, then both conditions must be true, otherwise either condition
14736 must be true. Not all conditions are possible, so CCmode is
14737 returned if it can't be done. */
14738 if (GET_CODE (x
) == IF_THEN_ELSE
14739 && (XEXP (x
, 2) == const0_rtx
14740 || XEXP (x
, 2) == const1_rtx
)
14741 && COMPARISON_P (XEXP (x
, 0))
14742 && COMPARISON_P (XEXP (x
, 1)))
14743 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14744 INTVAL (XEXP (x
, 2)));
14746 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14747 if (GET_CODE (x
) == AND
14748 && (op
== EQ
|| op
== NE
)
14749 && COMPARISON_P (XEXP (x
, 0))
14750 && COMPARISON_P (XEXP (x
, 1)))
14751 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14754 if (GET_CODE (x
) == IOR
14755 && (op
== EQ
|| op
== NE
)
14756 && COMPARISON_P (XEXP (x
, 0))
14757 && COMPARISON_P (XEXP (x
, 1)))
14758 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14761 /* An operation (on Thumb) where we want to test for a single bit.
14762 This is done by shifting that bit up into the top bit of a
14763 scratch register; we can then branch on the sign bit. */
14765 && GET_MODE (x
) == SImode
14766 && (op
== EQ
|| op
== NE
)
14767 && GET_CODE (x
) == ZERO_EXTRACT
14768 && XEXP (x
, 1) == const1_rtx
)
14771 /* An operation that sets the condition codes as a side-effect, the
14772 V flag is not set correctly, so we can only use comparisons where
14773 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14775 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14776 if (GET_MODE (x
) == SImode
14778 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14779 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14780 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14781 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14782 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14783 || GET_CODE (x
) == LSHIFTRT
14784 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14785 || GET_CODE (x
) == ROTATERT
14786 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14787 return CC_NOOVmode
;
14789 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14792 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14793 && GET_CODE (x
) == PLUS
14794 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14797 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14803 /* A DImode comparison against zero can be implemented by
14804 or'ing the two halves together. */
14805 if (y
== const0_rtx
)
14808 /* We can do an equality test in three Thumb instructions. */
14818 /* DImode unsigned comparisons can be implemented by cmp +
14819 cmpeq without a scratch register. Not worth doing in
14830 /* DImode signed and unsigned comparisons can be implemented
14831 by cmp + sbcs with a scratch register, but that does not
14832 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14833 gcc_assert (op
!= EQ
&& op
!= NE
);
14837 gcc_unreachable ();
14841 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14842 return GET_MODE (x
);
14847 /* X and Y are two things to compare using CODE. Emit the compare insn and
14848 return the rtx for register 0 in the proper mode. FP means this is a
14849 floating point compare: I don't think that it is needed on the arm. */
14851 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14853 enum machine_mode mode
;
14855 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14857 /* We might have X as a constant, Y as a register because of the predicates
14858 used for cmpdi. If so, force X to a register here. */
14859 if (dimode_comparison
&& !REG_P (x
))
14860 x
= force_reg (DImode
, x
);
14862 mode
= SELECT_CC_MODE (code
, x
, y
);
14863 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14865 if (dimode_comparison
14866 && mode
!= CC_CZmode
)
14870 /* To compare two non-zero values for equality, XOR them and
14871 then compare against zero. Not used for ARM mode; there
14872 CC_CZmode is cheaper. */
14873 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14875 gcc_assert (!reload_completed
);
14876 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14880 /* A scratch register is required. */
14881 if (reload_completed
)
14882 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14884 scratch
= gen_rtx_SCRATCH (SImode
);
14886 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14887 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14888 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14891 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14896 /* Generate a sequence of insns that will generate the correct return
14897 address mask depending on the physical architecture that the program
14900 arm_gen_return_addr_mask (void)
14902 rtx reg
= gen_reg_rtx (Pmode
);
14904 emit_insn (gen_return_addr_mask (reg
));
14909 arm_reload_in_hi (rtx
*operands
)
14911 rtx ref
= operands
[1];
14913 HOST_WIDE_INT offset
= 0;
14915 if (GET_CODE (ref
) == SUBREG
)
14917 offset
= SUBREG_BYTE (ref
);
14918 ref
= SUBREG_REG (ref
);
14923 /* We have a pseudo which has been spilt onto the stack; there
14924 are two cases here: the first where there is a simple
14925 stack-slot replacement and a second where the stack-slot is
14926 out of range, or is used as a subreg. */
14927 if (reg_equiv_mem (REGNO (ref
)))
14929 ref
= reg_equiv_mem (REGNO (ref
));
14930 base
= find_replacement (&XEXP (ref
, 0));
14933 /* The slot is out of range, or was dressed up in a SUBREG. */
14934 base
= reg_equiv_address (REGNO (ref
));
14937 base
= find_replacement (&XEXP (ref
, 0));
14939 /* Handle the case where the address is too complex to be offset by 1. */
14940 if (GET_CODE (base
) == MINUS
14941 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14943 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14945 emit_set_insn (base_plus
, base
);
14948 else if (GET_CODE (base
) == PLUS
)
14950 /* The addend must be CONST_INT, or we would have dealt with it above. */
14951 HOST_WIDE_INT hi
, lo
;
14953 offset
+= INTVAL (XEXP (base
, 1));
14954 base
= XEXP (base
, 0);
14956 /* Rework the address into a legal sequence of insns. */
14957 /* Valid range for lo is -4095 -> 4095 */
14960 : -((-offset
) & 0xfff));
14962 /* Corner case, if lo is the max offset then we would be out of range
14963 once we have added the additional 1 below, so bump the msb into the
14964 pre-loading insn(s). */
14968 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14969 ^ (HOST_WIDE_INT
) 0x80000000)
14970 - (HOST_WIDE_INT
) 0x80000000);
14972 gcc_assert (hi
+ lo
== offset
);
14976 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14978 /* Get the base address; addsi3 knows how to handle constants
14979 that require more than one insn. */
14980 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14986 /* Operands[2] may overlap operands[0] (though it won't overlap
14987 operands[1]), that's why we asked for a DImode reg -- so we can
14988 use the bit that does not overlap. */
14989 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14990 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14992 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14994 emit_insn (gen_zero_extendqisi2 (scratch
,
14995 gen_rtx_MEM (QImode
,
14996 plus_constant (Pmode
, base
,
14998 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14999 gen_rtx_MEM (QImode
,
15000 plus_constant (Pmode
, base
,
15002 if (!BYTES_BIG_ENDIAN
)
15003 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15004 gen_rtx_IOR (SImode
,
15007 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15011 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15012 gen_rtx_IOR (SImode
,
15013 gen_rtx_ASHIFT (SImode
, scratch
,
15015 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15018 /* Handle storing a half-word to memory during reload by synthesizing as two
15019 byte stores. Take care not to clobber the input values until after we
15020 have moved them somewhere safe. This code assumes that if the DImode
15021 scratch in operands[2] overlaps either the input value or output address
15022 in some way, then that value must die in this insn (we absolutely need
15023 two scratch registers for some corner cases). */
15025 arm_reload_out_hi (rtx
*operands
)
15027 rtx ref
= operands
[0];
15028 rtx outval
= operands
[1];
15030 HOST_WIDE_INT offset
= 0;
15032 if (GET_CODE (ref
) == SUBREG
)
15034 offset
= SUBREG_BYTE (ref
);
15035 ref
= SUBREG_REG (ref
);
15040 /* We have a pseudo which has been spilt onto the stack; there
15041 are two cases here: the first where there is a simple
15042 stack-slot replacement and a second where the stack-slot is
15043 out of range, or is used as a subreg. */
15044 if (reg_equiv_mem (REGNO (ref
)))
15046 ref
= reg_equiv_mem (REGNO (ref
));
15047 base
= find_replacement (&XEXP (ref
, 0));
15050 /* The slot is out of range, or was dressed up in a SUBREG. */
15051 base
= reg_equiv_address (REGNO (ref
));
15054 base
= find_replacement (&XEXP (ref
, 0));
15056 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15058 /* Handle the case where the address is too complex to be offset by 1. */
15059 if (GET_CODE (base
) == MINUS
15060 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15062 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15064 /* Be careful not to destroy OUTVAL. */
15065 if (reg_overlap_mentioned_p (base_plus
, outval
))
15067 /* Updating base_plus might destroy outval, see if we can
15068 swap the scratch and base_plus. */
15069 if (!reg_overlap_mentioned_p (scratch
, outval
))
15072 scratch
= base_plus
;
15077 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15079 /* Be conservative and copy OUTVAL into the scratch now,
15080 this should only be necessary if outval is a subreg
15081 of something larger than a word. */
15082 /* XXX Might this clobber base? I can't see how it can,
15083 since scratch is known to overlap with OUTVAL, and
15084 must be wider than a word. */
15085 emit_insn (gen_movhi (scratch_hi
, outval
));
15086 outval
= scratch_hi
;
15090 emit_set_insn (base_plus
, base
);
15093 else if (GET_CODE (base
) == PLUS
)
15095 /* The addend must be CONST_INT, or we would have dealt with it above. */
15096 HOST_WIDE_INT hi
, lo
;
15098 offset
+= INTVAL (XEXP (base
, 1));
15099 base
= XEXP (base
, 0);
15101 /* Rework the address into a legal sequence of insns. */
15102 /* Valid range for lo is -4095 -> 4095 */
15105 : -((-offset
) & 0xfff));
15107 /* Corner case, if lo is the max offset then we would be out of range
15108 once we have added the additional 1 below, so bump the msb into the
15109 pre-loading insn(s). */
15113 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15114 ^ (HOST_WIDE_INT
) 0x80000000)
15115 - (HOST_WIDE_INT
) 0x80000000);
15117 gcc_assert (hi
+ lo
== offset
);
15121 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15123 /* Be careful not to destroy OUTVAL. */
15124 if (reg_overlap_mentioned_p (base_plus
, outval
))
15126 /* Updating base_plus might destroy outval, see if we
15127 can swap the scratch and base_plus. */
15128 if (!reg_overlap_mentioned_p (scratch
, outval
))
15131 scratch
= base_plus
;
15136 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15138 /* Be conservative and copy outval into scratch now,
15139 this should only be necessary if outval is a
15140 subreg of something larger than a word. */
15141 /* XXX Might this clobber base? I can't see how it
15142 can, since scratch is known to overlap with
15144 emit_insn (gen_movhi (scratch_hi
, outval
));
15145 outval
= scratch_hi
;
15149 /* Get the base address; addsi3 knows how to handle constants
15150 that require more than one insn. */
15151 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15157 if (BYTES_BIG_ENDIAN
)
15159 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15160 plus_constant (Pmode
, base
,
15162 gen_lowpart (QImode
, outval
)));
15163 emit_insn (gen_lshrsi3 (scratch
,
15164 gen_rtx_SUBREG (SImode
, outval
, 0),
15166 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15168 gen_lowpart (QImode
, scratch
)));
15172 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15174 gen_lowpart (QImode
, outval
)));
15175 emit_insn (gen_lshrsi3 (scratch
,
15176 gen_rtx_SUBREG (SImode
, outval
, 0),
15178 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15179 plus_constant (Pmode
, base
,
15181 gen_lowpart (QImode
, scratch
)));
15185 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15186 (padded to the size of a word) should be passed in a register. */
15189 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15191 if (TARGET_AAPCS_BASED
)
15192 return must_pass_in_stack_var_size (mode
, type
);
15194 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15198 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15199 Return true if an argument passed on the stack should be padded upwards,
15200 i.e. if the least-significant byte has useful data.
15201 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15202 aggregate types are placed in the lowest memory address. */
15205 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15207 if (!TARGET_AAPCS_BASED
)
15208 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15210 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15217 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15218 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15219 register has useful data, and return the opposite if the most
15220 significant byte does. */
15223 arm_pad_reg_upward (enum machine_mode mode
,
15224 tree type
, int first ATTRIBUTE_UNUSED
)
15226 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15228 /* For AAPCS, small aggregates, small fixed-point types,
15229 and small complex types are always padded upwards. */
15232 if ((AGGREGATE_TYPE_P (type
)
15233 || TREE_CODE (type
) == COMPLEX_TYPE
15234 || FIXED_POINT_TYPE_P (type
))
15235 && int_size_in_bytes (type
) <= 4)
15240 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15241 && GET_MODE_SIZE (mode
) <= 4)
15246 /* Otherwise, use default padding. */
15247 return !BYTES_BIG_ENDIAN
;
15250 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15251 assuming that the address in the base register is word aligned. */
15253 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15255 HOST_WIDE_INT max_offset
;
15257 /* Offset must be a multiple of 4 in Thumb mode. */
15258 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15263 else if (TARGET_ARM
)
15268 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15271 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15272 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15273 Assumes that the address in the base register RN is word aligned. Pattern
15274 guarantees that both memory accesses use the same base register,
15275 the offsets are constants within the range, and the gap between the offsets is 4.
15276 If preload complete then check that registers are legal. WBACK indicates whether
15277 address is updated. LOAD indicates whether memory access is load or store. */
15279 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15280 bool wback
, bool load
)
15282 unsigned int t
, t2
, n
;
15284 if (!reload_completed
)
15287 if (!offset_ok_for_ldrd_strd (offset
))
15294 if ((TARGET_THUMB2
)
15295 && ((wback
&& (n
== t
|| n
== t2
))
15296 || (t
== SP_REGNUM
)
15297 || (t
== PC_REGNUM
)
15298 || (t2
== SP_REGNUM
)
15299 || (t2
== PC_REGNUM
)
15300 || (!load
&& (n
== PC_REGNUM
))
15301 || (load
&& (t
== t2
))
15302 /* Triggers Cortex-M3 LDRD errata. */
15303 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15307 && ((wback
&& (n
== t
|| n
== t2
))
15308 || (t2
== PC_REGNUM
)
15309 || (t
% 2 != 0) /* First destination register is not even. */
15311 /* PC can be used as base register (for offset addressing only),
15312 but it is depricated. */
15313 || (n
== PC_REGNUM
)))
15319 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15320 operand MEM's address contains an immediate offset from the base
15321 register and has no side effects, in which case it sets BASE and
15322 OFFSET accordingly. */
15324 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15328 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15330 /* TODO: Handle more general memory operand patterns, such as
15331 PRE_DEC and PRE_INC. */
15333 if (side_effects_p (mem
))
15336 /* Can't deal with subregs. */
15337 if (GET_CODE (mem
) == SUBREG
)
15340 gcc_assert (MEM_P (mem
));
15342 *offset
= const0_rtx
;
15344 addr
= XEXP (mem
, 0);
15346 /* If addr isn't valid for DImode, then we can't handle it. */
15347 if (!arm_legitimate_address_p (DImode
, addr
,
15348 reload_in_progress
|| reload_completed
))
15356 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15358 *base
= XEXP (addr
, 0);
15359 *offset
= XEXP (addr
, 1);
15360 return (REG_P (*base
) && CONST_INT_P (*offset
));
15366 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15368 /* Called from a peephole2 to replace two word-size accesses with a
15369 single LDRD/STRD instruction. Returns true iff we can generate a
15370 new instruction sequence. That is, both accesses use the same base
15371 register and the gap between constant offsets is 4. This function
15372 may reorder its operands to match ldrd/strd RTL templates.
15373 OPERANDS are the operands found by the peephole matcher;
15374 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15375 corresponding memory operands. LOAD indicaates whether the access
15376 is load or store. CONST_STORE indicates a store of constant
15377 integer values held in OPERANDS[4,5] and assumes that the pattern
15378 is of length 4 insn, for the purpose of checking dead registers.
15379 COMMUTE indicates that register operands may be reordered. */
15381 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15382 bool const_store
, bool commute
)
15385 HOST_WIDE_INT offsets
[2], offset
;
15386 rtx base
= NULL_RTX
;
15387 rtx cur_base
, cur_offset
, tmp
;
15389 HARD_REG_SET regset
;
15391 gcc_assert (!const_store
|| !load
);
15392 /* Check that the memory references are immediate offsets from the
15393 same base register. Extract the base register, the destination
15394 registers, and the corresponding memory offsets. */
15395 for (i
= 0; i
< nops
; i
++)
15397 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15402 else if (REGNO (base
) != REGNO (cur_base
))
15405 offsets
[i
] = INTVAL (cur_offset
);
15406 if (GET_CODE (operands
[i
]) == SUBREG
)
15408 tmp
= SUBREG_REG (operands
[i
]);
15409 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15414 /* Make sure there is no dependency between the individual loads. */
15415 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15416 return false; /* RAW */
15418 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15419 return false; /* WAW */
15421 /* If the same input register is used in both stores
15422 when storing different constants, try to find a free register.
15423 For example, the code
15428 can be transformed into
15431 in Thumb mode assuming that r1 is free. */
15433 && REGNO (operands
[0]) == REGNO (operands
[1])
15434 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15438 CLEAR_HARD_REG_SET (regset
);
15439 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15440 if (tmp
== NULL_RTX
)
15443 /* Use the new register in the first load to ensure that
15444 if the original input register is not dead after peephole,
15445 then it will have the correct constant value. */
15448 else if (TARGET_ARM
)
15451 int regno
= REGNO (operands
[0]);
15452 if (!peep2_reg_dead_p (4, operands
[0]))
15454 /* When the input register is even and is not dead after the
15455 pattern, it has to hold the second constant but we cannot
15456 form a legal STRD in ARM mode with this register as the second
15458 if (regno
% 2 == 0)
15461 /* Is regno-1 free? */
15462 SET_HARD_REG_SET (regset
);
15463 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15464 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15465 if (tmp
== NULL_RTX
)
15472 /* Find a DImode register. */
15473 CLEAR_HARD_REG_SET (regset
);
15474 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15475 if (tmp
!= NULL_RTX
)
15477 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15478 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15482 /* Can we use the input register to form a DI register? */
15483 SET_HARD_REG_SET (regset
);
15484 CLEAR_HARD_REG_BIT(regset
,
15485 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15486 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15487 if (tmp
== NULL_RTX
)
15489 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15493 gcc_assert (operands
[0] != NULL_RTX
);
15494 gcc_assert (operands
[1] != NULL_RTX
);
15495 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15496 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15500 /* Make sure the instructions are ordered with lower memory access first. */
15501 if (offsets
[0] > offsets
[1])
15503 gap
= offsets
[0] - offsets
[1];
15504 offset
= offsets
[1];
15506 /* Swap the instructions such that lower memory is accessed first. */
15507 SWAP_RTX (operands
[0], operands
[1]);
15508 SWAP_RTX (operands
[2], operands
[3]);
15510 SWAP_RTX (operands
[4], operands
[5]);
15514 gap
= offsets
[1] - offsets
[0];
15515 offset
= offsets
[0];
15518 /* Make sure accesses are to consecutive memory locations. */
15522 /* Make sure we generate legal instructions. */
15523 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15527 /* In Thumb state, where registers are almost unconstrained, there
15528 is little hope to fix it. */
15532 if (load
&& commute
)
15534 /* Try reordering registers. */
15535 SWAP_RTX (operands
[0], operands
[1]);
15536 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15543 /* If input registers are dead after this pattern, they can be
15544 reordered or replaced by other registers that are free in the
15545 current pattern. */
15546 if (!peep2_reg_dead_p (4, operands
[0])
15547 || !peep2_reg_dead_p (4, operands
[1]))
15550 /* Try to reorder the input registers. */
15551 /* For example, the code
15556 can be transformed into
15561 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15564 SWAP_RTX (operands
[0], operands
[1]);
15568 /* Try to find a free DI register. */
15569 CLEAR_HARD_REG_SET (regset
);
15570 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15571 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15574 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15575 if (tmp
== NULL_RTX
)
15578 /* DREG must be an even-numbered register in DImode.
15579 Split it into SI registers. */
15580 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15581 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15582 gcc_assert (operands
[0] != NULL_RTX
);
15583 gcc_assert (operands
[1] != NULL_RTX
);
15584 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15585 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15587 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15600 /* Print a symbolic form of X to the debug file, F. */
15602 arm_print_value (FILE *f
, rtx x
)
15604 switch (GET_CODE (x
))
15607 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15611 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15619 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15621 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15622 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15630 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15634 fprintf (f
, "`%s'", XSTR (x
, 0));
15638 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15642 arm_print_value (f
, XEXP (x
, 0));
15646 arm_print_value (f
, XEXP (x
, 0));
15648 arm_print_value (f
, XEXP (x
, 1));
15656 fprintf (f
, "????");
15661 /* Routines for manipulation of the constant pool. */
15663 /* Arm instructions cannot load a large constant directly into a
15664 register; they have to come from a pc relative load. The constant
15665 must therefore be placed in the addressable range of the pc
15666 relative load. Depending on the precise pc relative load
15667 instruction the range is somewhere between 256 bytes and 4k. This
15668 means that we often have to dump a constant inside a function, and
15669 generate code to branch around it.
15671 It is important to minimize this, since the branches will slow
15672 things down and make the code larger.
15674 Normally we can hide the table after an existing unconditional
15675 branch so that there is no interruption of the flow, but in the
15676 worst case the code looks like this:
15694 We fix this by performing a scan after scheduling, which notices
15695 which instructions need to have their operands fetched from the
15696 constant table and builds the table.
15698 The algorithm starts by building a table of all the constants that
15699 need fixing up and all the natural barriers in the function (places
15700 where a constant table can be dropped without breaking the flow).
15701 For each fixup we note how far the pc-relative replacement will be
15702 able to reach and the offset of the instruction into the function.
15704 Having built the table we then group the fixes together to form
15705 tables that are as large as possible (subject to addressing
15706 constraints) and emit each table of constants after the last
15707 barrier that is within range of all the instructions in the group.
15708 If a group does not contain a barrier, then we forcibly create one
15709 by inserting a jump instruction into the flow. Once the table has
15710 been inserted, the insns are then modified to reference the
15711 relevant entry in the pool.
15713 Possible enhancements to the algorithm (not implemented) are:
15715 1) For some processors and object formats, there may be benefit in
15716 aligning the pools to the start of cache lines; this alignment
15717 would need to be taken into account when calculating addressability
15720 /* These typedefs are located at the start of this file, so that
15721 they can be used in the prototypes there. This comment is to
15722 remind readers of that fact so that the following structures
15723 can be understood more easily.
15725 typedef struct minipool_node Mnode;
15726 typedef struct minipool_fixup Mfix; */
15728 struct minipool_node
15730 /* Doubly linked chain of entries. */
15733 /* The maximum offset into the code that this entry can be placed. While
15734 pushing fixes for forward references, all entries are sorted in order
15735 of increasing max_address. */
15736 HOST_WIDE_INT max_address
;
15737 /* Similarly for an entry inserted for a backwards ref. */
15738 HOST_WIDE_INT min_address
;
15739 /* The number of fixes referencing this entry. This can become zero
15740 if we "unpush" an entry. In this case we ignore the entry when we
15741 come to emit the code. */
15743 /* The offset from the start of the minipool. */
15744 HOST_WIDE_INT offset
;
15745 /* The value in table. */
15747 /* The mode of value. */
15748 enum machine_mode mode
;
15749 /* The size of the value. With iWMMXt enabled
15750 sizes > 4 also imply an alignment of 8-bytes. */
15754 struct minipool_fixup
15758 HOST_WIDE_INT address
;
15760 enum machine_mode mode
;
15764 HOST_WIDE_INT forwards
;
15765 HOST_WIDE_INT backwards
;
15768 /* Fixes less than a word need padding out to a word boundary. */
15769 #define MINIPOOL_FIX_SIZE(mode) \
15770 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15772 static Mnode
* minipool_vector_head
;
15773 static Mnode
* minipool_vector_tail
;
15774 static rtx minipool_vector_label
;
15775 static int minipool_pad
;
15777 /* The linked list of all minipool fixes required for this function. */
15778 Mfix
* minipool_fix_head
;
15779 Mfix
* minipool_fix_tail
;
15780 /* The fix entry for the current minipool, once it has been placed. */
15781 Mfix
* minipool_barrier
;
15783 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15784 #define JUMP_TABLES_IN_TEXT_SECTION 0
15787 static HOST_WIDE_INT
15788 get_jump_table_size (rtx insn
)
15790 /* ADDR_VECs only take room if read-only data does into the text
15792 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15794 rtx body
= PATTERN (insn
);
15795 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15796 HOST_WIDE_INT size
;
15797 HOST_WIDE_INT modesize
;
15799 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15800 size
= modesize
* XVECLEN (body
, elt
);
15804 /* Round up size of TBB table to a halfword boundary. */
15805 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15808 /* No padding necessary for TBH. */
15811 /* Add two bytes for alignment on Thumb. */
15816 gcc_unreachable ();
15824 /* Return the maximum amount of padding that will be inserted before
15827 static HOST_WIDE_INT
15828 get_label_padding (rtx label
)
15830 HOST_WIDE_INT align
, min_insn_size
;
15832 align
= 1 << label_to_alignment (label
);
15833 min_insn_size
= TARGET_THUMB
? 2 : 4;
15834 return align
> min_insn_size
? align
- min_insn_size
: 0;
15837 /* Move a minipool fix MP from its current location to before MAX_MP.
15838 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15839 constraints may need updating. */
15841 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15842 HOST_WIDE_INT max_address
)
15844 /* The code below assumes these are different. */
15845 gcc_assert (mp
!= max_mp
);
15847 if (max_mp
== NULL
)
15849 if (max_address
< mp
->max_address
)
15850 mp
->max_address
= max_address
;
15854 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15855 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15857 mp
->max_address
= max_address
;
15859 /* Unlink MP from its current position. Since max_mp is non-null,
15860 mp->prev must be non-null. */
15861 mp
->prev
->next
= mp
->next
;
15862 if (mp
->next
!= NULL
)
15863 mp
->next
->prev
= mp
->prev
;
15865 minipool_vector_tail
= mp
->prev
;
15867 /* Re-insert it before MAX_MP. */
15869 mp
->prev
= max_mp
->prev
;
15872 if (mp
->prev
!= NULL
)
15873 mp
->prev
->next
= mp
;
15875 minipool_vector_head
= mp
;
15878 /* Save the new entry. */
15881 /* Scan over the preceding entries and adjust their addresses as
15883 while (mp
->prev
!= NULL
15884 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15886 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15893 /* Add a constant to the minipool for a forward reference. Returns the
15894 node added or NULL if the constant will not fit in this pool. */
15896 add_minipool_forward_ref (Mfix
*fix
)
15898 /* If set, max_mp is the first pool_entry that has a lower
15899 constraint than the one we are trying to add. */
15900 Mnode
* max_mp
= NULL
;
15901 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15904 /* If the minipool starts before the end of FIX->INSN then this FIX
15905 can not be placed into the current pool. Furthermore, adding the
15906 new constant pool entry may cause the pool to start FIX_SIZE bytes
15908 if (minipool_vector_head
&&
15909 (fix
->address
+ get_attr_length (fix
->insn
)
15910 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15913 /* Scan the pool to see if a constant with the same value has
15914 already been added. While we are doing this, also note the
15915 location where we must insert the constant if it doesn't already
15917 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15919 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15920 && fix
->mode
== mp
->mode
15921 && (!LABEL_P (fix
->value
)
15922 || (CODE_LABEL_NUMBER (fix
->value
)
15923 == CODE_LABEL_NUMBER (mp
->value
)))
15924 && rtx_equal_p (fix
->value
, mp
->value
))
15926 /* More than one fix references this entry. */
15928 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15931 /* Note the insertion point if necessary. */
15933 && mp
->max_address
> max_address
)
15936 /* If we are inserting an 8-bytes aligned quantity and
15937 we have not already found an insertion point, then
15938 make sure that all such 8-byte aligned quantities are
15939 placed at the start of the pool. */
15940 if (ARM_DOUBLEWORD_ALIGN
15942 && fix
->fix_size
>= 8
15943 && mp
->fix_size
< 8)
15946 max_address
= mp
->max_address
;
15950 /* The value is not currently in the minipool, so we need to create
15951 a new entry for it. If MAX_MP is NULL, the entry will be put on
15952 the end of the list since the placement is less constrained than
15953 any existing entry. Otherwise, we insert the new fix before
15954 MAX_MP and, if necessary, adjust the constraints on the other
15957 mp
->fix_size
= fix
->fix_size
;
15958 mp
->mode
= fix
->mode
;
15959 mp
->value
= fix
->value
;
15961 /* Not yet required for a backwards ref. */
15962 mp
->min_address
= -65536;
15964 if (max_mp
== NULL
)
15966 mp
->max_address
= max_address
;
15968 mp
->prev
= minipool_vector_tail
;
15970 if (mp
->prev
== NULL
)
15972 minipool_vector_head
= mp
;
15973 minipool_vector_label
= gen_label_rtx ();
15976 mp
->prev
->next
= mp
;
15978 minipool_vector_tail
= mp
;
15982 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15983 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15985 mp
->max_address
= max_address
;
15988 mp
->prev
= max_mp
->prev
;
15990 if (mp
->prev
!= NULL
)
15991 mp
->prev
->next
= mp
;
15993 minipool_vector_head
= mp
;
15996 /* Save the new entry. */
15999 /* Scan over the preceding entries and adjust their addresses as
16001 while (mp
->prev
!= NULL
16002 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16004 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16012 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16013 HOST_WIDE_INT min_address
)
16015 HOST_WIDE_INT offset
;
16017 /* The code below assumes these are different. */
16018 gcc_assert (mp
!= min_mp
);
16020 if (min_mp
== NULL
)
16022 if (min_address
> mp
->min_address
)
16023 mp
->min_address
= min_address
;
16027 /* We will adjust this below if it is too loose. */
16028 mp
->min_address
= min_address
;
16030 /* Unlink MP from its current position. Since min_mp is non-null,
16031 mp->next must be non-null. */
16032 mp
->next
->prev
= mp
->prev
;
16033 if (mp
->prev
!= NULL
)
16034 mp
->prev
->next
= mp
->next
;
16036 minipool_vector_head
= mp
->next
;
16038 /* Reinsert it after MIN_MP. */
16040 mp
->next
= min_mp
->next
;
16042 if (mp
->next
!= NULL
)
16043 mp
->next
->prev
= mp
;
16045 minipool_vector_tail
= mp
;
16051 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16053 mp
->offset
= offset
;
16054 if (mp
->refcount
> 0)
16055 offset
+= mp
->fix_size
;
16057 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16058 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16064 /* Add a constant to the minipool for a backward reference. Returns the
16065 node added or NULL if the constant will not fit in this pool.
16067 Note that the code for insertion for a backwards reference can be
16068 somewhat confusing because the calculated offsets for each fix do
16069 not take into account the size of the pool (which is still under
16072 add_minipool_backward_ref (Mfix
*fix
)
16074 /* If set, min_mp is the last pool_entry that has a lower constraint
16075 than the one we are trying to add. */
16076 Mnode
*min_mp
= NULL
;
16077 /* This can be negative, since it is only a constraint. */
16078 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16081 /* If we can't reach the current pool from this insn, or if we can't
16082 insert this entry at the end of the pool without pushing other
16083 fixes out of range, then we don't try. This ensures that we
16084 can't fail later on. */
16085 if (min_address
>= minipool_barrier
->address
16086 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16087 >= minipool_barrier
->address
))
16090 /* Scan the pool to see if a constant with the same value has
16091 already been added. While we are doing this, also note the
16092 location where we must insert the constant if it doesn't already
16094 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16096 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16097 && fix
->mode
== mp
->mode
16098 && (!LABEL_P (fix
->value
)
16099 || (CODE_LABEL_NUMBER (fix
->value
)
16100 == CODE_LABEL_NUMBER (mp
->value
)))
16101 && rtx_equal_p (fix
->value
, mp
->value
)
16102 /* Check that there is enough slack to move this entry to the
16103 end of the table (this is conservative). */
16104 && (mp
->max_address
16105 > (minipool_barrier
->address
16106 + minipool_vector_tail
->offset
16107 + minipool_vector_tail
->fix_size
)))
16110 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16113 if (min_mp
!= NULL
)
16114 mp
->min_address
+= fix
->fix_size
;
16117 /* Note the insertion point if necessary. */
16118 if (mp
->min_address
< min_address
)
16120 /* For now, we do not allow the insertion of 8-byte alignment
16121 requiring nodes anywhere but at the start of the pool. */
16122 if (ARM_DOUBLEWORD_ALIGN
16123 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16128 else if (mp
->max_address
16129 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16131 /* Inserting before this entry would push the fix beyond
16132 its maximum address (which can happen if we have
16133 re-located a forwards fix); force the new fix to come
16135 if (ARM_DOUBLEWORD_ALIGN
16136 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16141 min_address
= mp
->min_address
+ fix
->fix_size
;
16144 /* Do not insert a non-8-byte aligned quantity before 8-byte
16145 aligned quantities. */
16146 else if (ARM_DOUBLEWORD_ALIGN
16147 && fix
->fix_size
< 8
16148 && mp
->fix_size
>= 8)
16151 min_address
= mp
->min_address
+ fix
->fix_size
;
16156 /* We need to create a new entry. */
16158 mp
->fix_size
= fix
->fix_size
;
16159 mp
->mode
= fix
->mode
;
16160 mp
->value
= fix
->value
;
16162 mp
->max_address
= minipool_barrier
->address
+ 65536;
16164 mp
->min_address
= min_address
;
16166 if (min_mp
== NULL
)
16169 mp
->next
= minipool_vector_head
;
16171 if (mp
->next
== NULL
)
16173 minipool_vector_tail
= mp
;
16174 minipool_vector_label
= gen_label_rtx ();
16177 mp
->next
->prev
= mp
;
16179 minipool_vector_head
= mp
;
16183 mp
->next
= min_mp
->next
;
16187 if (mp
->next
!= NULL
)
16188 mp
->next
->prev
= mp
;
16190 minipool_vector_tail
= mp
;
16193 /* Save the new entry. */
16201 /* Scan over the following entries and adjust their offsets. */
16202 while (mp
->next
!= NULL
)
16204 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16205 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16208 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16210 mp
->next
->offset
= mp
->offset
;
16219 assign_minipool_offsets (Mfix
*barrier
)
16221 HOST_WIDE_INT offset
= 0;
16224 minipool_barrier
= barrier
;
16226 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16228 mp
->offset
= offset
;
16230 if (mp
->refcount
> 0)
16231 offset
+= mp
->fix_size
;
16235 /* Output the literal table */
16237 dump_minipool (rtx scan
)
16243 if (ARM_DOUBLEWORD_ALIGN
)
16244 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16245 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16252 fprintf (dump_file
,
16253 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16254 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16256 scan
= emit_label_after (gen_label_rtx (), scan
);
16257 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16258 scan
= emit_label_after (minipool_vector_label
, scan
);
16260 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16262 if (mp
->refcount
> 0)
16266 fprintf (dump_file
,
16267 ";; Offset %u, min %ld, max %ld ",
16268 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16269 (unsigned long) mp
->max_address
);
16270 arm_print_value (dump_file
, mp
->value
);
16271 fputc ('\n', dump_file
);
16274 switch (mp
->fix_size
)
16276 #ifdef HAVE_consttable_1
16278 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16282 #ifdef HAVE_consttable_2
16284 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16288 #ifdef HAVE_consttable_4
16290 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16294 #ifdef HAVE_consttable_8
16296 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16300 #ifdef HAVE_consttable_16
16302 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16307 gcc_unreachable ();
16315 minipool_vector_head
= minipool_vector_tail
= NULL
;
16316 scan
= emit_insn_after (gen_consttable_end (), scan
);
16317 scan
= emit_barrier_after (scan
);
16320 /* Return the cost of forcibly inserting a barrier after INSN. */
16322 arm_barrier_cost (rtx insn
)
16324 /* Basing the location of the pool on the loop depth is preferable,
16325 but at the moment, the basic block information seems to be
16326 corrupt by this stage of the compilation. */
16327 int base_cost
= 50;
16328 rtx next
= next_nonnote_insn (insn
);
16330 if (next
!= NULL
&& LABEL_P (next
))
16333 switch (GET_CODE (insn
))
16336 /* It will always be better to place the table before the label, rather
16345 return base_cost
- 10;
16348 return base_cost
+ 10;
16352 /* Find the best place in the insn stream in the range
16353 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16354 Create the barrier by inserting a jump and add a new fix entry for
16357 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16359 HOST_WIDE_INT count
= 0;
16361 rtx from
= fix
->insn
;
16362 /* The instruction after which we will insert the jump. */
16363 rtx selected
= NULL
;
16365 /* The address at which the jump instruction will be placed. */
16366 HOST_WIDE_INT selected_address
;
16368 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16369 rtx label
= gen_label_rtx ();
16371 selected_cost
= arm_barrier_cost (from
);
16372 selected_address
= fix
->address
;
16374 while (from
&& count
< max_count
)
16379 /* This code shouldn't have been called if there was a natural barrier
16381 gcc_assert (!BARRIER_P (from
));
16383 /* Count the length of this insn. This must stay in sync with the
16384 code that pushes minipool fixes. */
16385 if (LABEL_P (from
))
16386 count
+= get_label_padding (from
);
16388 count
+= get_attr_length (from
);
16390 /* If there is a jump table, add its length. */
16391 if (tablejump_p (from
, NULL
, &tmp
))
16393 count
+= get_jump_table_size (tmp
);
16395 /* Jump tables aren't in a basic block, so base the cost on
16396 the dispatch insn. If we select this location, we will
16397 still put the pool after the table. */
16398 new_cost
= arm_barrier_cost (from
);
16400 if (count
< max_count
16401 && (!selected
|| new_cost
<= selected_cost
))
16404 selected_cost
= new_cost
;
16405 selected_address
= fix
->address
+ count
;
16408 /* Continue after the dispatch table. */
16409 from
= NEXT_INSN (tmp
);
16413 new_cost
= arm_barrier_cost (from
);
16415 if (count
< max_count
16416 && (!selected
|| new_cost
<= selected_cost
))
16419 selected_cost
= new_cost
;
16420 selected_address
= fix
->address
+ count
;
16423 from
= NEXT_INSN (from
);
16426 /* Make sure that we found a place to insert the jump. */
16427 gcc_assert (selected
);
16429 /* Make sure we do not split a call and its corresponding
16430 CALL_ARG_LOCATION note. */
16431 if (CALL_P (selected
))
16433 rtx next
= NEXT_INSN (selected
);
16434 if (next
&& NOTE_P (next
)
16435 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16439 /* Create a new JUMP_INSN that branches around a barrier. */
16440 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16441 JUMP_LABEL (from
) = label
;
16442 barrier
= emit_barrier_after (from
);
16443 emit_label_after (label
, barrier
);
16445 /* Create a minipool barrier entry for the new barrier. */
16446 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16447 new_fix
->insn
= barrier
;
16448 new_fix
->address
= selected_address
;
16449 new_fix
->next
= fix
->next
;
16450 fix
->next
= new_fix
;
16455 /* Record that there is a natural barrier in the insn stream at
16458 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16460 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16463 fix
->address
= address
;
16466 if (minipool_fix_head
!= NULL
)
16467 minipool_fix_tail
->next
= fix
;
16469 minipool_fix_head
= fix
;
16471 minipool_fix_tail
= fix
;
16474 /* Record INSN, which will need fixing up to load a value from the
16475 minipool. ADDRESS is the offset of the insn since the start of the
16476 function; LOC is a pointer to the part of the insn which requires
16477 fixing; VALUE is the constant that must be loaded, which is of type
16480 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16481 enum machine_mode mode
, rtx value
)
16483 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16486 fix
->address
= address
;
16489 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16490 fix
->value
= value
;
16491 fix
->forwards
= get_attr_pool_range (insn
);
16492 fix
->backwards
= get_attr_neg_pool_range (insn
);
16493 fix
->minipool
= NULL
;
16495 /* If an insn doesn't have a range defined for it, then it isn't
16496 expecting to be reworked by this code. Better to stop now than
16497 to generate duff assembly code. */
16498 gcc_assert (fix
->forwards
|| fix
->backwards
);
16500 /* If an entry requires 8-byte alignment then assume all constant pools
16501 require 4 bytes of padding. Trying to do this later on a per-pool
16502 basis is awkward because existing pool entries have to be modified. */
16503 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16508 fprintf (dump_file
,
16509 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16510 GET_MODE_NAME (mode
),
16511 INSN_UID (insn
), (unsigned long) address
,
16512 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16513 arm_print_value (dump_file
, fix
->value
);
16514 fprintf (dump_file
, "\n");
16517 /* Add it to the chain of fixes. */
16520 if (minipool_fix_head
!= NULL
)
16521 minipool_fix_tail
->next
= fix
;
16523 minipool_fix_head
= fix
;
16525 minipool_fix_tail
= fix
;
16528 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16529 Returns the number of insns needed, or 99 if we always want to synthesize
16532 arm_max_const_double_inline_cost ()
16534 /* Let the value get synthesized to avoid the use of literal pools. */
16535 if (arm_disable_literal_pool
)
16538 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16541 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16542 Returns the number of insns needed, or 99 if we don't know how to
16545 arm_const_double_inline_cost (rtx val
)
16547 rtx lowpart
, highpart
;
16548 enum machine_mode mode
;
16550 mode
= GET_MODE (val
);
16552 if (mode
== VOIDmode
)
16555 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16557 lowpart
= gen_lowpart (SImode
, val
);
16558 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16560 gcc_assert (CONST_INT_P (lowpart
));
16561 gcc_assert (CONST_INT_P (highpart
));
16563 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16564 NULL_RTX
, NULL_RTX
, 0, 0)
16565 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16566 NULL_RTX
, NULL_RTX
, 0, 0));
16569 /* Return true if it is worthwhile to split a 64-bit constant into two
16570 32-bit operations. This is the case if optimizing for size, or
16571 if we have load delay slots, or if one 32-bit part can be done with
16572 a single data operation. */
16574 arm_const_double_by_parts (rtx val
)
16576 enum machine_mode mode
= GET_MODE (val
);
16579 if (optimize_size
|| arm_ld_sched
)
16582 if (mode
== VOIDmode
)
16585 part
= gen_highpart_mode (SImode
, mode
, val
);
16587 gcc_assert (CONST_INT_P (part
));
16589 if (const_ok_for_arm (INTVAL (part
))
16590 || const_ok_for_arm (~INTVAL (part
)))
16593 part
= gen_lowpart (SImode
, val
);
16595 gcc_assert (CONST_INT_P (part
));
16597 if (const_ok_for_arm (INTVAL (part
))
16598 || const_ok_for_arm (~INTVAL (part
)))
16604 /* Return true if it is possible to inline both the high and low parts
16605 of a 64-bit constant into 32-bit data processing instructions. */
16607 arm_const_double_by_immediates (rtx val
)
16609 enum machine_mode mode
= GET_MODE (val
);
16612 if (mode
== VOIDmode
)
16615 part
= gen_highpart_mode (SImode
, mode
, val
);
16617 gcc_assert (CONST_INT_P (part
));
16619 if (!const_ok_for_arm (INTVAL (part
)))
16622 part
= gen_lowpart (SImode
, val
);
16624 gcc_assert (CONST_INT_P (part
));
16626 if (!const_ok_for_arm (INTVAL (part
)))
16632 /* Scan INSN and note any of its operands that need fixing.
16633 If DO_PUSHES is false we do not actually push any of the fixups
16636 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16640 extract_insn (insn
);
16642 if (!constrain_operands (1))
16643 fatal_insn_not_found (insn
);
16645 if (recog_data
.n_alternatives
== 0)
16648 /* Fill in recog_op_alt with information about the constraints of
16650 preprocess_constraints ();
16652 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16654 /* Things we need to fix can only occur in inputs. */
16655 if (recog_data
.operand_type
[opno
] != OP_IN
)
16658 /* If this alternative is a memory reference, then any mention
16659 of constants in this alternative is really to fool reload
16660 into allowing us to accept one there. We need to fix them up
16661 now so that we output the right code. */
16662 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16664 rtx op
= recog_data
.operand
[opno
];
16666 if (CONSTANT_P (op
))
16669 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16670 recog_data
.operand_mode
[opno
], op
);
16672 else if (MEM_P (op
)
16673 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16674 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16678 rtx cop
= avoid_constant_pool_reference (op
);
16680 /* Casting the address of something to a mode narrower
16681 than a word can cause avoid_constant_pool_reference()
16682 to return the pool reference itself. That's no good to
16683 us here. Lets just hope that we can use the
16684 constant pool value directly. */
16686 cop
= get_pool_constant (XEXP (op
, 0));
16688 push_minipool_fix (insn
, address
,
16689 recog_data
.operand_loc
[opno
],
16690 recog_data
.operand_mode
[opno
], cop
);
16700 /* Rewrite move insn into subtract of 0 if the condition codes will
16701 be useful in next conditional jump insn. */
16704 thumb1_reorg (void)
16708 FOR_EACH_BB_FN (bb
, cfun
)
16711 rtx pat
, op0
, set
= NULL
;
16712 rtx prev
, insn
= BB_END (bb
);
16713 bool insn_clobbered
= false;
16715 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16716 insn
= PREV_INSN (insn
);
16718 /* Find the last cbranchsi4_insn in basic block BB. */
16719 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16722 /* Get the register with which we are comparing. */
16723 pat
= PATTERN (insn
);
16724 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16726 /* Find the first flag setting insn before INSN in basic block BB. */
16727 gcc_assert (insn
!= BB_HEAD (bb
));
16728 for (prev
= PREV_INSN (insn
);
16730 && prev
!= BB_HEAD (bb
)
16732 || DEBUG_INSN_P (prev
)
16733 || ((set
= single_set (prev
)) != NULL
16734 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16735 prev
= PREV_INSN (prev
))
16737 if (reg_set_p (op0
, prev
))
16738 insn_clobbered
= true;
16741 /* Skip if op0 is clobbered by insn other than prev. */
16742 if (insn_clobbered
)
16748 dest
= SET_DEST (set
);
16749 src
= SET_SRC (set
);
16750 if (!low_register_operand (dest
, SImode
)
16751 || !low_register_operand (src
, SImode
))
16754 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16755 in INSN. Both src and dest of the move insn are checked. */
16756 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16758 dest
= copy_rtx (dest
);
16759 src
= copy_rtx (src
);
16760 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16761 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16762 INSN_CODE (prev
) = -1;
16763 /* Set test register in INSN to dest. */
16764 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16765 INSN_CODE (insn
) = -1;
16770 /* Convert instructions to their cc-clobbering variant if possible, since
16771 that allows us to use smaller encodings. */
16774 thumb2_reorg (void)
16779 INIT_REG_SET (&live
);
16781 /* We are freeing block_for_insn in the toplev to keep compatibility
16782 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16783 compute_bb_for_insn ();
16786 FOR_EACH_BB_FN (bb
, cfun
)
16790 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16791 df_simulate_initialize_backwards (bb
, &live
);
16792 FOR_BB_INSNS_REVERSE (bb
, insn
)
16794 if (NONJUMP_INSN_P (insn
)
16795 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16796 && GET_CODE (PATTERN (insn
)) == SET
)
16798 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16799 rtx pat
= PATTERN (insn
);
16800 rtx dst
= XEXP (pat
, 0);
16801 rtx src
= XEXP (pat
, 1);
16802 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16804 if (!OBJECT_P (src
))
16805 op0
= XEXP (src
, 0);
16807 if (BINARY_P (src
))
16808 op1
= XEXP (src
, 1);
16810 if (low_register_operand (dst
, SImode
))
16812 switch (GET_CODE (src
))
16815 /* Adding two registers and storing the result
16816 in the first source is already a 16-bit
16818 if (rtx_equal_p (dst
, op0
)
16819 && register_operand (op1
, SImode
))
16822 if (low_register_operand (op0
, SImode
))
16824 /* ADDS <Rd>,<Rn>,<Rm> */
16825 if (low_register_operand (op1
, SImode
))
16827 /* ADDS <Rdn>,#<imm8> */
16828 /* SUBS <Rdn>,#<imm8> */
16829 else if (rtx_equal_p (dst
, op0
)
16830 && CONST_INT_P (op1
)
16831 && IN_RANGE (INTVAL (op1
), -255, 255))
16833 /* ADDS <Rd>,<Rn>,#<imm3> */
16834 /* SUBS <Rd>,<Rn>,#<imm3> */
16835 else if (CONST_INT_P (op1
)
16836 && IN_RANGE (INTVAL (op1
), -7, 7))
16839 /* ADCS <Rd>, <Rn> */
16840 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16841 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16842 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16844 && COMPARISON_P (op1
)
16845 && cc_register (XEXP (op1
, 0), VOIDmode
)
16846 && maybe_get_arm_condition_code (op1
) == ARM_CS
16847 && XEXP (op1
, 1) == const0_rtx
)
16852 /* RSBS <Rd>,<Rn>,#0
16853 Not handled here: see NEG below. */
16854 /* SUBS <Rd>,<Rn>,#<imm3>
16856 Not handled here: see PLUS above. */
16857 /* SUBS <Rd>,<Rn>,<Rm> */
16858 if (low_register_operand (op0
, SImode
)
16859 && low_register_operand (op1
, SImode
))
16864 /* MULS <Rdm>,<Rn>,<Rdm>
16865 As an exception to the rule, this is only used
16866 when optimizing for size since MULS is slow on all
16867 known implementations. We do not even want to use
16868 MULS in cold code, if optimizing for speed, so we
16869 test the global flag here. */
16870 if (!optimize_size
)
16872 /* else fall through. */
16876 /* ANDS <Rdn>,<Rm> */
16877 if (rtx_equal_p (dst
, op0
)
16878 && low_register_operand (op1
, SImode
))
16880 else if (rtx_equal_p (dst
, op1
)
16881 && low_register_operand (op0
, SImode
))
16882 action
= SWAP_CONV
;
16888 /* ASRS <Rdn>,<Rm> */
16889 /* LSRS <Rdn>,<Rm> */
16890 /* LSLS <Rdn>,<Rm> */
16891 if (rtx_equal_p (dst
, op0
)
16892 && low_register_operand (op1
, SImode
))
16894 /* ASRS <Rd>,<Rm>,#<imm5> */
16895 /* LSRS <Rd>,<Rm>,#<imm5> */
16896 /* LSLS <Rd>,<Rm>,#<imm5> */
16897 else if (low_register_operand (op0
, SImode
)
16898 && CONST_INT_P (op1
)
16899 && IN_RANGE (INTVAL (op1
), 0, 31))
16904 /* RORS <Rdn>,<Rm> */
16905 if (rtx_equal_p (dst
, op0
)
16906 && low_register_operand (op1
, SImode
))
16912 /* MVNS <Rd>,<Rm> */
16913 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16914 if (low_register_operand (op0
, SImode
))
16919 /* MOVS <Rd>,#<imm8> */
16920 if (CONST_INT_P (src
)
16921 && IN_RANGE (INTVAL (src
), 0, 255))
16926 /* MOVS and MOV<c> with registers have different
16927 encodings, so are not relevant here. */
16935 if (action
!= SKIP
)
16937 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16938 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16941 if (action
== SWAP_CONV
)
16943 src
= copy_rtx (src
);
16944 XEXP (src
, 0) = op1
;
16945 XEXP (src
, 1) = op0
;
16946 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16947 vec
= gen_rtvec (2, pat
, clobber
);
16949 else /* action == CONV */
16950 vec
= gen_rtvec (2, pat
, clobber
);
16952 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16953 INSN_CODE (insn
) = -1;
16957 if (NONDEBUG_INSN_P (insn
))
16958 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16962 CLEAR_REG_SET (&live
);
16965 /* Gcc puts the pool in the wrong place for ARM, since we can only
16966 load addresses a limited distance around the pc. We do some
16967 special munging to move the constant pool values to the correct
16968 point in the code. */
16973 HOST_WIDE_INT address
= 0;
16978 else if (TARGET_THUMB2
)
16981 /* Ensure all insns that must be split have been split at this point.
16982 Otherwise, the pool placement code below may compute incorrect
16983 insn lengths. Note that when optimizing, all insns have already
16984 been split at this point. */
16986 split_all_insns_noflow ();
16988 minipool_fix_head
= minipool_fix_tail
= NULL
;
16990 /* The first insn must always be a note, or the code below won't
16991 scan it properly. */
16992 insn
= get_insns ();
16993 gcc_assert (NOTE_P (insn
));
16996 /* Scan all the insns and record the operands that will need fixing. */
16997 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16999 if (BARRIER_P (insn
))
17000 push_minipool_barrier (insn
, address
);
17001 else if (INSN_P (insn
))
17005 note_invalid_constants (insn
, address
, true);
17006 address
+= get_attr_length (insn
);
17008 /* If the insn is a vector jump, add the size of the table
17009 and skip the table. */
17010 if (tablejump_p (insn
, NULL
, &table
))
17012 address
+= get_jump_table_size (table
);
17016 else if (LABEL_P (insn
))
17017 /* Add the worst-case padding due to alignment. We don't add
17018 the _current_ padding because the minipool insertions
17019 themselves might change it. */
17020 address
+= get_label_padding (insn
);
17023 fix
= minipool_fix_head
;
17025 /* Now scan the fixups and perform the required changes. */
17030 Mfix
* last_added_fix
;
17031 Mfix
* last_barrier
= NULL
;
17034 /* Skip any further barriers before the next fix. */
17035 while (fix
&& BARRIER_P (fix
->insn
))
17038 /* No more fixes. */
17042 last_added_fix
= NULL
;
17044 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17046 if (BARRIER_P (ftmp
->insn
))
17048 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17051 last_barrier
= ftmp
;
17053 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17056 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17059 /* If we found a barrier, drop back to that; any fixes that we
17060 could have reached but come after the barrier will now go in
17061 the next mini-pool. */
17062 if (last_barrier
!= NULL
)
17064 /* Reduce the refcount for those fixes that won't go into this
17066 for (fdel
= last_barrier
->next
;
17067 fdel
&& fdel
!= ftmp
;
17070 fdel
->minipool
->refcount
--;
17071 fdel
->minipool
= NULL
;
17074 ftmp
= last_barrier
;
17078 /* ftmp is first fix that we can't fit into this pool and
17079 there no natural barriers that we could use. Insert a
17080 new barrier in the code somewhere between the previous
17081 fix and this one, and arrange to jump around it. */
17082 HOST_WIDE_INT max_address
;
17084 /* The last item on the list of fixes must be a barrier, so
17085 we can never run off the end of the list of fixes without
17086 last_barrier being set. */
17089 max_address
= minipool_vector_head
->max_address
;
17090 /* Check that there isn't another fix that is in range that
17091 we couldn't fit into this pool because the pool was
17092 already too large: we need to put the pool before such an
17093 instruction. The pool itself may come just after the
17094 fix because create_fix_barrier also allows space for a
17095 jump instruction. */
17096 if (ftmp
->address
< max_address
)
17097 max_address
= ftmp
->address
+ 1;
17099 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17102 assign_minipool_offsets (last_barrier
);
17106 if (!BARRIER_P (ftmp
->insn
)
17107 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17114 /* Scan over the fixes we have identified for this pool, fixing them
17115 up and adding the constants to the pool itself. */
17116 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17117 this_fix
= this_fix
->next
)
17118 if (!BARRIER_P (this_fix
->insn
))
17121 = plus_constant (Pmode
,
17122 gen_rtx_LABEL_REF (VOIDmode
,
17123 minipool_vector_label
),
17124 this_fix
->minipool
->offset
);
17125 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17128 dump_minipool (last_barrier
->insn
);
17132 /* From now on we must synthesize any constants that we can't handle
17133 directly. This can happen if the RTL gets split during final
17134 instruction generation. */
17135 after_arm_reorg
= 1;
17137 /* Free the minipool memory. */
17138 obstack_free (&minipool_obstack
, minipool_startobj
);
17141 /* Routines to output assembly language. */
17143 /* If the rtx is the correct value then return the string of the number.
17144 In this way we can ensure that valid double constants are generated even
17145 when cross compiling. */
17147 fp_immediate_constant (rtx x
)
17151 if (!fp_consts_inited
)
17154 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17156 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17160 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17161 static const char *
17162 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17164 if (!fp_consts_inited
)
17167 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17171 /* OPERANDS[0] is the entire list of insns that constitute pop,
17172 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17173 is in the list, UPDATE is true iff the list contains explicit
17174 update of base register. */
17176 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17182 const char *conditional
;
17183 int num_saves
= XVECLEN (operands
[0], 0);
17184 unsigned int regno
;
17185 unsigned int regno_base
= REGNO (operands
[1]);
17188 offset
+= update
? 1 : 0;
17189 offset
+= return_pc
? 1 : 0;
17191 /* Is the base register in the list? */
17192 for (i
= offset
; i
< num_saves
; i
++)
17194 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17195 /* If SP is in the list, then the base register must be SP. */
17196 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17197 /* If base register is in the list, there must be no explicit update. */
17198 if (regno
== regno_base
)
17199 gcc_assert (!update
);
17202 conditional
= reverse
? "%?%D0" : "%?%d0";
17203 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17205 /* Output pop (not stmfd) because it has a shorter encoding. */
17206 gcc_assert (update
);
17207 sprintf (pattern
, "pop%s\t{", conditional
);
17211 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17212 It's just a convention, their semantics are identical. */
17213 if (regno_base
== SP_REGNUM
)
17214 sprintf (pattern
, "ldm%sfd\t", conditional
);
17215 else if (TARGET_UNIFIED_ASM
)
17216 sprintf (pattern
, "ldmia%s\t", conditional
);
17218 sprintf (pattern
, "ldm%sia\t", conditional
);
17220 strcat (pattern
, reg_names
[regno_base
]);
17222 strcat (pattern
, "!, {");
17224 strcat (pattern
, ", {");
17227 /* Output the first destination register. */
17229 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17231 /* Output the rest of the destination registers. */
17232 for (i
= offset
+ 1; i
< num_saves
; i
++)
17234 strcat (pattern
, ", ");
17236 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17239 strcat (pattern
, "}");
17241 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17242 strcat (pattern
, "^");
17244 output_asm_insn (pattern
, &cond
);
17248 /* Output the assembly for a store multiple. */
17251 vfp_output_fstmd (rtx
* operands
)
17258 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17259 p
= strlen (pattern
);
17261 gcc_assert (REG_P (operands
[1]));
17263 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17264 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17266 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17268 strcpy (&pattern
[p
], "}");
17270 output_asm_insn (pattern
, operands
);
17275 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17276 number of bytes pushed. */
17279 vfp_emit_fstmd (int base_reg
, int count
)
17286 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17287 register pairs are stored by a store multiple insn. We avoid this
17288 by pushing an extra pair. */
17289 if (count
== 2 && !arm_arch6
)
17291 if (base_reg
== LAST_VFP_REGNUM
- 3)
17296 /* FSTMD may not store more than 16 doubleword registers at once. Split
17297 larger stores into multiple parts (up to a maximum of two, in
17302 /* NOTE: base_reg is an internal register number, so each D register
17304 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17305 saved
+= vfp_emit_fstmd (base_reg
, 16);
17309 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17310 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17312 reg
= gen_rtx_REG (DFmode
, base_reg
);
17315 XVECEXP (par
, 0, 0)
17316 = gen_rtx_SET (VOIDmode
,
17319 gen_rtx_PRE_MODIFY (Pmode
,
17322 (Pmode
, stack_pointer_rtx
,
17325 gen_rtx_UNSPEC (BLKmode
,
17326 gen_rtvec (1, reg
),
17327 UNSPEC_PUSH_MULT
));
17329 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17330 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17331 RTX_FRAME_RELATED_P (tmp
) = 1;
17332 XVECEXP (dwarf
, 0, 0) = tmp
;
17334 tmp
= gen_rtx_SET (VOIDmode
,
17335 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17337 RTX_FRAME_RELATED_P (tmp
) = 1;
17338 XVECEXP (dwarf
, 0, 1) = tmp
;
17340 for (i
= 1; i
< count
; i
++)
17342 reg
= gen_rtx_REG (DFmode
, base_reg
);
17344 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17346 tmp
= gen_rtx_SET (VOIDmode
,
17347 gen_frame_mem (DFmode
,
17348 plus_constant (Pmode
,
17352 RTX_FRAME_RELATED_P (tmp
) = 1;
17353 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17356 par
= emit_insn (par
);
17357 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17358 RTX_FRAME_RELATED_P (par
) = 1;
17363 /* Emit a call instruction with pattern PAT. ADDR is the address of
17364 the call target. */
17367 arm_emit_call_insn (rtx pat
, rtx addr
)
17371 insn
= emit_call_insn (pat
);
17373 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17374 If the call might use such an entry, add a use of the PIC register
17375 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17376 if (TARGET_VXWORKS_RTP
17378 && GET_CODE (addr
) == SYMBOL_REF
17379 && (SYMBOL_REF_DECL (addr
)
17380 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17381 : !SYMBOL_REF_LOCAL_P (addr
)))
17383 require_pic_register ();
17384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17388 /* Output a 'call' insn. */
17390 output_call (rtx
*operands
)
17392 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17394 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17395 if (REGNO (operands
[0]) == LR_REGNUM
)
17397 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17398 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17403 if (TARGET_INTERWORK
|| arm_arch4t
)
17404 output_asm_insn ("bx%?\t%0", operands
);
17406 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17411 /* Output a 'call' insn that is a reference in memory. This is
17412 disabled for ARMv5 and we prefer a blx instead because otherwise
17413 there's a significant performance overhead. */
17415 output_call_mem (rtx
*operands
)
17417 gcc_assert (!arm_arch5
);
17418 if (TARGET_INTERWORK
)
17420 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17421 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17422 output_asm_insn ("bx%?\t%|ip", operands
);
17424 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17426 /* LR is used in the memory address. We load the address in the
17427 first instruction. It's safe to use IP as the target of the
17428 load since the call will kill it anyway. */
17429 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17430 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17432 output_asm_insn ("bx%?\t%|ip", operands
);
17434 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17438 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17439 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17446 /* Output a move from arm registers to arm registers of a long double
17447 OPERANDS[0] is the destination.
17448 OPERANDS[1] is the source. */
17450 output_mov_long_double_arm_from_arm (rtx
*operands
)
17452 /* We have to be careful here because the two might overlap. */
17453 int dest_start
= REGNO (operands
[0]);
17454 int src_start
= REGNO (operands
[1]);
17458 if (dest_start
< src_start
)
17460 for (i
= 0; i
< 3; i
++)
17462 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17463 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17464 output_asm_insn ("mov%?\t%0, %1", ops
);
17469 for (i
= 2; i
>= 0; i
--)
17471 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17472 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17473 output_asm_insn ("mov%?\t%0, %1", ops
);
17481 arm_emit_movpair (rtx dest
, rtx src
)
17483 /* If the src is an immediate, simplify it. */
17484 if (CONST_INT_P (src
))
17486 HOST_WIDE_INT val
= INTVAL (src
);
17487 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17488 if ((val
>> 16) & 0x0000ffff)
17489 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17491 GEN_INT ((val
>> 16) & 0x0000ffff));
17494 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17495 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17498 /* Output a move between double words. It must be REG<-MEM
17501 output_move_double (rtx
*operands
, bool emit
, int *count
)
17503 enum rtx_code code0
= GET_CODE (operands
[0]);
17504 enum rtx_code code1
= GET_CODE (operands
[1]);
17509 /* The only case when this might happen is when
17510 you are looking at the length of a DImode instruction
17511 that has an invalid constant in it. */
17512 if (code0
== REG
&& code1
!= MEM
)
17514 gcc_assert (!emit
);
17521 unsigned int reg0
= REGNO (operands
[0]);
17523 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17525 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17527 switch (GET_CODE (XEXP (operands
[1], 0)))
17534 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17535 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17537 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17542 gcc_assert (TARGET_LDRD
);
17544 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17551 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17553 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17561 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17563 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17568 gcc_assert (TARGET_LDRD
);
17570 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17575 /* Autoicrement addressing modes should never have overlapping
17576 base and destination registers, and overlapping index registers
17577 are already prohibited, so this doesn't need to worry about
17579 otherops
[0] = operands
[0];
17580 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17581 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17583 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17585 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17587 /* Registers overlap so split out the increment. */
17590 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17591 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17598 /* Use a single insn if we can.
17599 FIXME: IWMMXT allows offsets larger than ldrd can
17600 handle, fix these up with a pair of ldr. */
17602 || !CONST_INT_P (otherops
[2])
17603 || (INTVAL (otherops
[2]) > -256
17604 && INTVAL (otherops
[2]) < 256))
17607 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17613 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17614 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17624 /* Use a single insn if we can.
17625 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17626 fix these up with a pair of ldr. */
17628 || !CONST_INT_P (otherops
[2])
17629 || (INTVAL (otherops
[2]) > -256
17630 && INTVAL (otherops
[2]) < 256))
17633 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17639 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17640 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17650 /* We might be able to use ldrd %0, %1 here. However the range is
17651 different to ldr/adr, and it is broken on some ARMv7-M
17652 implementations. */
17653 /* Use the second register of the pair to avoid problematic
17655 otherops
[1] = operands
[1];
17657 output_asm_insn ("adr%?\t%0, %1", otherops
);
17658 operands
[1] = otherops
[0];
17662 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17664 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17671 /* ??? This needs checking for thumb2. */
17673 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17674 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17676 otherops
[0] = operands
[0];
17677 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17678 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17680 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17682 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17684 switch ((int) INTVAL (otherops
[2]))
17688 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17694 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17700 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17704 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17705 operands
[1] = otherops
[0];
17707 && (REG_P (otherops
[2])
17709 || (CONST_INT_P (otherops
[2])
17710 && INTVAL (otherops
[2]) > -256
17711 && INTVAL (otherops
[2]) < 256)))
17713 if (reg_overlap_mentioned_p (operands
[0],
17717 /* Swap base and index registers over to
17718 avoid a conflict. */
17720 otherops
[1] = otherops
[2];
17723 /* If both registers conflict, it will usually
17724 have been fixed by a splitter. */
17725 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17726 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17730 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17731 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17738 otherops
[0] = operands
[0];
17740 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17745 if (CONST_INT_P (otherops
[2]))
17749 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17750 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17752 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17758 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17764 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17771 return "ldr%(d%)\t%0, [%1]";
17773 return "ldm%(ia%)\t%1, %M0";
17777 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17778 /* Take care of overlapping base/data reg. */
17779 if (reg_mentioned_p (operands
[0], operands
[1]))
17783 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17784 output_asm_insn ("ldr%?\t%0, %1", operands
);
17794 output_asm_insn ("ldr%?\t%0, %1", operands
);
17795 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17805 /* Constraints should ensure this. */
17806 gcc_assert (code0
== MEM
&& code1
== REG
);
17807 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17808 || (TARGET_ARM
&& TARGET_LDRD
));
17810 switch (GET_CODE (XEXP (operands
[0], 0)))
17816 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17818 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17823 gcc_assert (TARGET_LDRD
);
17825 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17832 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17834 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17842 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17844 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17849 gcc_assert (TARGET_LDRD
);
17851 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17856 otherops
[0] = operands
[1];
17857 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17858 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17860 /* IWMMXT allows offsets larger than ldrd can handle,
17861 fix these up with a pair of ldr. */
17863 && CONST_INT_P (otherops
[2])
17864 && (INTVAL(otherops
[2]) <= -256
17865 || INTVAL(otherops
[2]) >= 256))
17867 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17871 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17872 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17881 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17882 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17888 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17891 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17896 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17901 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17902 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17904 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17908 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17915 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17922 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17927 && (REG_P (otherops
[2])
17929 || (CONST_INT_P (otherops
[2])
17930 && INTVAL (otherops
[2]) > -256
17931 && INTVAL (otherops
[2]) < 256)))
17933 otherops
[0] = operands
[1];
17934 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17936 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17942 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17943 otherops
[1] = operands
[1];
17946 output_asm_insn ("str%?\t%1, %0", operands
);
17947 output_asm_insn ("str%?\t%H1, %0", otherops
);
17957 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17958 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17961 output_move_quad (rtx
*operands
)
17963 if (REG_P (operands
[0]))
17965 /* Load, or reg->reg move. */
17967 if (MEM_P (operands
[1]))
17969 switch (GET_CODE (XEXP (operands
[1], 0)))
17972 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17977 output_asm_insn ("adr%?\t%0, %1", operands
);
17978 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17982 gcc_unreachable ();
17990 gcc_assert (REG_P (operands
[1]));
17992 dest
= REGNO (operands
[0]);
17993 src
= REGNO (operands
[1]);
17995 /* This seems pretty dumb, but hopefully GCC won't try to do it
17998 for (i
= 0; i
< 4; i
++)
18000 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18001 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18002 output_asm_insn ("mov%?\t%0, %1", ops
);
18005 for (i
= 3; i
>= 0; i
--)
18007 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18008 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18009 output_asm_insn ("mov%?\t%0, %1", ops
);
18015 gcc_assert (MEM_P (operands
[0]));
18016 gcc_assert (REG_P (operands
[1]));
18017 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18019 switch (GET_CODE (XEXP (operands
[0], 0)))
18022 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18026 gcc_unreachable ();
18033 /* Output a VFP load or store instruction. */
18036 output_move_vfp (rtx
*operands
)
18038 rtx reg
, mem
, addr
, ops
[2];
18039 int load
= REG_P (operands
[0]);
18040 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18041 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18044 enum machine_mode mode
;
18046 reg
= operands
[!load
];
18047 mem
= operands
[load
];
18049 mode
= GET_MODE (reg
);
18051 gcc_assert (REG_P (reg
));
18052 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18053 gcc_assert (mode
== SFmode
18057 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18058 gcc_assert (MEM_P (mem
));
18060 addr
= XEXP (mem
, 0);
18062 switch (GET_CODE (addr
))
18065 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18066 ops
[0] = XEXP (addr
, 0);
18071 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18072 ops
[0] = XEXP (addr
, 0);
18077 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18083 sprintf (buff
, templ
,
18084 load
? "ld" : "st",
18087 integer_p
? "\t%@ int" : "");
18088 output_asm_insn (buff
, ops
);
18093 /* Output a Neon double-word or quad-word load or store, or a load
18094 or store for larger structure modes.
18096 WARNING: The ordering of elements is weird in big-endian mode,
18097 because the EABI requires that vectors stored in memory appear
18098 as though they were stored by a VSTM, as required by the EABI.
18099 GCC RTL defines element ordering based on in-memory order.
18100 This can be different from the architectural ordering of elements
18101 within a NEON register. The intrinsics defined in arm_neon.h use the
18102 NEON register element ordering, not the GCC RTL element ordering.
18104 For example, the in-memory ordering of a big-endian a quadword
18105 vector with 16-bit elements when stored from register pair {d0,d1}
18106 will be (lowest address first, d0[N] is NEON register element N):
18108 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18110 When necessary, quadword registers (dN, dN+1) are moved to ARM
18111 registers from rN in the order:
18113 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18115 So that STM/LDM can be used on vectors in ARM registers, and the
18116 same memory layout will result as if VSTM/VLDM were used.
18118 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18119 possible, which allows use of appropriate alignment tags.
18120 Note that the choice of "64" is independent of the actual vector
18121 element size; this size simply ensures that the behavior is
18122 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18124 Due to limitations of those instructions, use of VST1.64/VLD1.64
18125 is not possible if:
18126 - the address contains PRE_DEC, or
18127 - the mode refers to more than 4 double-word registers
18129 In those cases, it would be possible to replace VSTM/VLDM by a
18130 sequence of instructions; this is not currently implemented since
18131 this is not certain to actually improve performance. */
18134 output_move_neon (rtx
*operands
)
18136 rtx reg
, mem
, addr
, ops
[2];
18137 int regno
, nregs
, load
= REG_P (operands
[0]);
18140 enum machine_mode mode
;
18142 reg
= operands
[!load
];
18143 mem
= operands
[load
];
18145 mode
= GET_MODE (reg
);
18147 gcc_assert (REG_P (reg
));
18148 regno
= REGNO (reg
);
18149 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18150 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18151 || NEON_REGNO_OK_FOR_QUAD (regno
));
18152 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18153 || VALID_NEON_QREG_MODE (mode
)
18154 || VALID_NEON_STRUCT_MODE (mode
));
18155 gcc_assert (MEM_P (mem
));
18157 addr
= XEXP (mem
, 0);
18159 /* Strip off const from addresses like (const (plus (...))). */
18160 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18161 addr
= XEXP (addr
, 0);
18163 switch (GET_CODE (addr
))
18166 /* We have to use vldm / vstm for too-large modes. */
18169 templ
= "v%smia%%?\t%%0!, %%h1";
18170 ops
[0] = XEXP (addr
, 0);
18174 templ
= "v%s1.64\t%%h1, %%A0";
18181 /* We have to use vldm / vstm in this case, since there is no
18182 pre-decrement form of the vld1 / vst1 instructions. */
18183 templ
= "v%smdb%%?\t%%0!, %%h1";
18184 ops
[0] = XEXP (addr
, 0);
18189 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18190 gcc_unreachable ();
18197 for (i
= 0; i
< nregs
; i
++)
18199 /* We're only using DImode here because it's a convenient size. */
18200 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18201 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18202 if (reg_overlap_mentioned_p (ops
[0], mem
))
18204 gcc_assert (overlap
== -1);
18209 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18210 output_asm_insn (buff
, ops
);
18215 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18216 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18217 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18218 output_asm_insn (buff
, ops
);
18225 /* We have to use vldm / vstm for too-large modes. */
18227 templ
= "v%smia%%?\t%%m0, %%h1";
18229 templ
= "v%s1.64\t%%h1, %%A0";
18235 sprintf (buff
, templ
, load
? "ld" : "st");
18236 output_asm_insn (buff
, ops
);
18241 /* Compute and return the length of neon_mov<mode>, where <mode> is
18242 one of VSTRUCT modes: EI, OI, CI or XI. */
18244 arm_attr_length_move_neon (rtx insn
)
18246 rtx reg
, mem
, addr
;
18248 enum machine_mode mode
;
18250 extract_insn_cached (insn
);
18252 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18254 mode
= GET_MODE (recog_data
.operand
[0]);
18265 gcc_unreachable ();
18269 load
= REG_P (recog_data
.operand
[0]);
18270 reg
= recog_data
.operand
[!load
];
18271 mem
= recog_data
.operand
[load
];
18273 gcc_assert (MEM_P (mem
));
18275 mode
= GET_MODE (reg
);
18276 addr
= XEXP (mem
, 0);
18278 /* Strip off const from addresses like (const (plus (...))). */
18279 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18280 addr
= XEXP (addr
, 0);
18282 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18284 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18291 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18295 arm_address_offset_is_imm (rtx insn
)
18299 extract_insn_cached (insn
);
18301 if (REG_P (recog_data
.operand
[0]))
18304 mem
= recog_data
.operand
[0];
18306 gcc_assert (MEM_P (mem
));
18308 addr
= XEXP (mem
, 0);
18311 || (GET_CODE (addr
) == PLUS
18312 && REG_P (XEXP (addr
, 0))
18313 && CONST_INT_P (XEXP (addr
, 1))))
18319 /* Output an ADD r, s, #n where n may be too big for one instruction.
18320 If adding zero to one register, output nothing. */
18322 output_add_immediate (rtx
*operands
)
18324 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18326 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18329 output_multi_immediate (operands
,
18330 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18333 output_multi_immediate (operands
,
18334 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18341 /* Output a multiple immediate operation.
18342 OPERANDS is the vector of operands referred to in the output patterns.
18343 INSTR1 is the output pattern to use for the first constant.
18344 INSTR2 is the output pattern to use for subsequent constants.
18345 IMMED_OP is the index of the constant slot in OPERANDS.
18346 N is the constant value. */
18347 static const char *
18348 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18349 int immed_op
, HOST_WIDE_INT n
)
18351 #if HOST_BITS_PER_WIDE_INT > 32
18357 /* Quick and easy output. */
18358 operands
[immed_op
] = const0_rtx
;
18359 output_asm_insn (instr1
, operands
);
18364 const char * instr
= instr1
;
18366 /* Note that n is never zero here (which would give no output). */
18367 for (i
= 0; i
< 32; i
+= 2)
18371 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18372 output_asm_insn (instr
, operands
);
18382 /* Return the name of a shifter operation. */
18383 static const char *
18384 arm_shift_nmem(enum rtx_code code
)
18389 return ARM_LSL_NAME
;
18405 /* Return the appropriate ARM instruction for the operation code.
18406 The returned result should not be overwritten. OP is the rtx of the
18407 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18410 arithmetic_instr (rtx op
, int shift_first_arg
)
18412 switch (GET_CODE (op
))
18418 return shift_first_arg
? "rsb" : "sub";
18433 return arm_shift_nmem(GET_CODE(op
));
18436 gcc_unreachable ();
18440 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18441 for the operation code. The returned result should not be overwritten.
18442 OP is the rtx code of the shift.
18443 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18445 static const char *
18446 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18449 enum rtx_code code
= GET_CODE (op
);
18454 if (!CONST_INT_P (XEXP (op
, 1)))
18456 output_operand_lossage ("invalid shift operand");
18461 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18469 mnem
= arm_shift_nmem(code
);
18470 if (CONST_INT_P (XEXP (op
, 1)))
18472 *amountp
= INTVAL (XEXP (op
, 1));
18474 else if (REG_P (XEXP (op
, 1)))
18481 output_operand_lossage ("invalid shift operand");
18487 /* We never have to worry about the amount being other than a
18488 power of 2, since this case can never be reloaded from a reg. */
18489 if (!CONST_INT_P (XEXP (op
, 1)))
18491 output_operand_lossage ("invalid shift operand");
18495 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18497 /* Amount must be a power of two. */
18498 if (*amountp
& (*amountp
- 1))
18500 output_operand_lossage ("invalid shift operand");
18504 *amountp
= int_log2 (*amountp
);
18505 return ARM_LSL_NAME
;
18508 output_operand_lossage ("invalid shift operand");
18512 /* This is not 100% correct, but follows from the desire to merge
18513 multiplication by a power of 2 with the recognizer for a
18514 shift. >=32 is not a valid shift for "lsl", so we must try and
18515 output a shift that produces the correct arithmetical result.
18516 Using lsr #32 is identical except for the fact that the carry bit
18517 is not set correctly if we set the flags; but we never use the
18518 carry bit from such an operation, so we can ignore that. */
18519 if (code
== ROTATERT
)
18520 /* Rotate is just modulo 32. */
18522 else if (*amountp
!= (*amountp
& 31))
18524 if (code
== ASHIFT
)
18529 /* Shifts of 0 are no-ops. */
18536 /* Obtain the shift from the POWER of two. */
18538 static HOST_WIDE_INT
18539 int_log2 (HOST_WIDE_INT power
)
18541 HOST_WIDE_INT shift
= 0;
18543 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18545 gcc_assert (shift
<= 31);
18552 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18553 because /bin/as is horribly restrictive. The judgement about
18554 whether or not each character is 'printable' (and can be output as
18555 is) or not (and must be printed with an octal escape) must be made
18556 with reference to the *host* character set -- the situation is
18557 similar to that discussed in the comments above pp_c_char in
18558 c-pretty-print.c. */
18560 #define MAX_ASCII_LEN 51
18563 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18566 int len_so_far
= 0;
18568 fputs ("\t.ascii\t\"", stream
);
18570 for (i
= 0; i
< len
; i
++)
18574 if (len_so_far
>= MAX_ASCII_LEN
)
18576 fputs ("\"\n\t.ascii\t\"", stream
);
18582 if (c
== '\\' || c
== '\"')
18584 putc ('\\', stream
);
18592 fprintf (stream
, "\\%03o", c
);
18597 fputs ("\"\n", stream
);
18600 /* Compute the register save mask for registers 0 through 12
18601 inclusive. This code is used by arm_compute_save_reg_mask. */
18603 static unsigned long
18604 arm_compute_save_reg0_reg12_mask (void)
18606 unsigned long func_type
= arm_current_func_type ();
18607 unsigned long save_reg_mask
= 0;
18610 if (IS_INTERRUPT (func_type
))
18612 unsigned int max_reg
;
18613 /* Interrupt functions must not corrupt any registers,
18614 even call clobbered ones. If this is a leaf function
18615 we can just examine the registers used by the RTL, but
18616 otherwise we have to assume that whatever function is
18617 called might clobber anything, and so we have to save
18618 all the call-clobbered registers as well. */
18619 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18620 /* FIQ handlers have registers r8 - r12 banked, so
18621 we only need to check r0 - r7, Normal ISRs only
18622 bank r14 and r15, so we must check up to r12.
18623 r13 is the stack pointer which is always preserved,
18624 so we do not need to consider it here. */
18629 for (reg
= 0; reg
<= max_reg
; reg
++)
18630 if (df_regs_ever_live_p (reg
)
18631 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18632 save_reg_mask
|= (1 << reg
);
18634 /* Also save the pic base register if necessary. */
18636 && !TARGET_SINGLE_PIC_BASE
18637 && arm_pic_register
!= INVALID_REGNUM
18638 && crtl
->uses_pic_offset_table
)
18639 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18641 else if (IS_VOLATILE(func_type
))
18643 /* For noreturn functions we historically omitted register saves
18644 altogether. However this really messes up debugging. As a
18645 compromise save just the frame pointers. Combined with the link
18646 register saved elsewhere this should be sufficient to get
18648 if (frame_pointer_needed
)
18649 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18650 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18651 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18652 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18653 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18657 /* In the normal case we only need to save those registers
18658 which are call saved and which are used by this function. */
18659 for (reg
= 0; reg
<= 11; reg
++)
18660 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18661 save_reg_mask
|= (1 << reg
);
18663 /* Handle the frame pointer as a special case. */
18664 if (frame_pointer_needed
)
18665 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18667 /* If we aren't loading the PIC register,
18668 don't stack it even though it may be live. */
18670 && !TARGET_SINGLE_PIC_BASE
18671 && arm_pic_register
!= INVALID_REGNUM
18672 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18673 || crtl
->uses_pic_offset_table
))
18674 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18676 /* The prologue will copy SP into R0, so save it. */
18677 if (IS_STACKALIGN (func_type
))
18678 save_reg_mask
|= 1;
18681 /* Save registers so the exception handler can modify them. */
18682 if (crtl
->calls_eh_return
)
18688 reg
= EH_RETURN_DATA_REGNO (i
);
18689 if (reg
== INVALID_REGNUM
)
18691 save_reg_mask
|= 1 << reg
;
18695 return save_reg_mask
;
18698 /* Return true if r3 is live at the start of the function. */
18701 arm_r3_live_at_start_p (void)
18703 /* Just look at cfg info, which is still close enough to correct at this
18704 point. This gives false positives for broken functions that might use
18705 uninitialized data that happens to be allocated in r3, but who cares? */
18706 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18709 /* Compute the number of bytes used to store the static chain register on the
18710 stack, above the stack frame. We need to know this accurately to get the
18711 alignment of the rest of the stack frame correct. */
18714 arm_compute_static_chain_stack_bytes (void)
18716 /* See the defining assertion in arm_expand_prologue. */
18717 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18718 && IS_NESTED (arm_current_func_type ())
18719 && arm_r3_live_at_start_p ()
18720 && crtl
->args
.pretend_args_size
== 0)
18726 /* Compute a bit mask of which registers need to be
18727 saved on the stack for the current function.
18728 This is used by arm_get_frame_offsets, which may add extra registers. */
18730 static unsigned long
18731 arm_compute_save_reg_mask (void)
18733 unsigned int save_reg_mask
= 0;
18734 unsigned long func_type
= arm_current_func_type ();
18737 if (IS_NAKED (func_type
))
18738 /* This should never really happen. */
18741 /* If we are creating a stack frame, then we must save the frame pointer,
18742 IP (which will hold the old stack pointer), LR and the PC. */
18743 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18745 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18748 | (1 << PC_REGNUM
);
18750 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18752 /* Decide if we need to save the link register.
18753 Interrupt routines have their own banked link register,
18754 so they never need to save it.
18755 Otherwise if we do not use the link register we do not need to save
18756 it. If we are pushing other registers onto the stack however, we
18757 can save an instruction in the epilogue by pushing the link register
18758 now and then popping it back into the PC. This incurs extra memory
18759 accesses though, so we only do it when optimizing for size, and only
18760 if we know that we will not need a fancy return sequence. */
18761 if (df_regs_ever_live_p (LR_REGNUM
)
18764 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18765 && !crtl
->calls_eh_return
))
18766 save_reg_mask
|= 1 << LR_REGNUM
;
18768 if (cfun
->machine
->lr_save_eliminated
)
18769 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18771 if (TARGET_REALLY_IWMMXT
18772 && ((bit_count (save_reg_mask
)
18773 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18774 arm_compute_static_chain_stack_bytes())
18777 /* The total number of registers that are going to be pushed
18778 onto the stack is odd. We need to ensure that the stack
18779 is 64-bit aligned before we start to save iWMMXt registers,
18780 and also before we start to create locals. (A local variable
18781 might be a double or long long which we will load/store using
18782 an iWMMXt instruction). Therefore we need to push another
18783 ARM register, so that the stack will be 64-bit aligned. We
18784 try to avoid using the arg registers (r0 -r3) as they might be
18785 used to pass values in a tail call. */
18786 for (reg
= 4; reg
<= 12; reg
++)
18787 if ((save_reg_mask
& (1 << reg
)) == 0)
18791 save_reg_mask
|= (1 << reg
);
18794 cfun
->machine
->sibcall_blocked
= 1;
18795 save_reg_mask
|= (1 << 3);
18799 /* We may need to push an additional register for use initializing the
18800 PIC base register. */
18801 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18802 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18804 reg
= thumb_find_work_register (1 << 4);
18805 if (!call_used_regs
[reg
])
18806 save_reg_mask
|= (1 << reg
);
18809 return save_reg_mask
;
18813 /* Compute a bit mask of which registers need to be
18814 saved on the stack for the current function. */
18815 static unsigned long
18816 thumb1_compute_save_reg_mask (void)
18818 unsigned long mask
;
18822 for (reg
= 0; reg
< 12; reg
++)
18823 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18827 && !TARGET_SINGLE_PIC_BASE
18828 && arm_pic_register
!= INVALID_REGNUM
18829 && crtl
->uses_pic_offset_table
)
18830 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18832 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18833 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18834 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18836 /* LR will also be pushed if any lo regs are pushed. */
18837 if (mask
& 0xff || thumb_force_lr_save ())
18838 mask
|= (1 << LR_REGNUM
);
18840 /* Make sure we have a low work register if we need one.
18841 We will need one if we are going to push a high register,
18842 but we are not currently intending to push a low register. */
18843 if ((mask
& 0xff) == 0
18844 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18846 /* Use thumb_find_work_register to choose which register
18847 we will use. If the register is live then we will
18848 have to push it. Use LAST_LO_REGNUM as our fallback
18849 choice for the register to select. */
18850 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18851 /* Make sure the register returned by thumb_find_work_register is
18852 not part of the return value. */
18853 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18854 reg
= LAST_LO_REGNUM
;
18856 if (! call_used_regs
[reg
])
18860 /* The 504 below is 8 bytes less than 512 because there are two possible
18861 alignment words. We can't tell here if they will be present or not so we
18862 have to play it safe and assume that they are. */
18863 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18864 ROUND_UP_WORD (get_frame_size ()) +
18865 crtl
->outgoing_args_size
) >= 504)
18867 /* This is the same as the code in thumb1_expand_prologue() which
18868 determines which register to use for stack decrement. */
18869 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18870 if (mask
& (1 << reg
))
18873 if (reg
> LAST_LO_REGNUM
)
18875 /* Make sure we have a register available for stack decrement. */
18876 mask
|= 1 << LAST_LO_REGNUM
;
18884 /* Return the number of bytes required to save VFP registers. */
18886 arm_get_vfp_saved_size (void)
18888 unsigned int regno
;
18893 /* Space for saved VFP registers. */
18894 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18897 for (regno
= FIRST_VFP_REGNUM
;
18898 regno
< LAST_VFP_REGNUM
;
18901 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18902 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18906 /* Workaround ARM10 VFPr1 bug. */
18907 if (count
== 2 && !arm_arch6
)
18909 saved
+= count
* 8;
18918 if (count
== 2 && !arm_arch6
)
18920 saved
+= count
* 8;
18927 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18928 everything bar the final return instruction. If simple_return is true,
18929 then do not output epilogue, because it has already been emitted in RTL. */
18931 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18932 bool simple_return
)
18934 char conditional
[10];
18937 unsigned long live_regs_mask
;
18938 unsigned long func_type
;
18939 arm_stack_offsets
*offsets
;
18941 func_type
= arm_current_func_type ();
18943 if (IS_NAKED (func_type
))
18946 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18948 /* If this function was declared non-returning, and we have
18949 found a tail call, then we have to trust that the called
18950 function won't return. */
18955 /* Otherwise, trap an attempted return by aborting. */
18957 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18959 assemble_external_libcall (ops
[1]);
18960 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18966 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18968 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18970 cfun
->machine
->return_used_this_function
= 1;
18972 offsets
= arm_get_frame_offsets ();
18973 live_regs_mask
= offsets
->saved_regs_mask
;
18975 if (!simple_return
&& live_regs_mask
)
18977 const char * return_reg
;
18979 /* If we do not have any special requirements for function exit
18980 (e.g. interworking) then we can load the return address
18981 directly into the PC. Otherwise we must load it into LR. */
18983 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18984 return_reg
= reg_names
[PC_REGNUM
];
18986 return_reg
= reg_names
[LR_REGNUM
];
18988 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18990 /* There are three possible reasons for the IP register
18991 being saved. 1) a stack frame was created, in which case
18992 IP contains the old stack pointer, or 2) an ISR routine
18993 corrupted it, or 3) it was saved to align the stack on
18994 iWMMXt. In case 1, restore IP into SP, otherwise just
18996 if (frame_pointer_needed
)
18998 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18999 live_regs_mask
|= (1 << SP_REGNUM
);
19002 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19005 /* On some ARM architectures it is faster to use LDR rather than
19006 LDM to load a single register. On other architectures, the
19007 cost is the same. In 26 bit mode, or for exception handlers,
19008 we have to use LDM to load the PC so that the CPSR is also
19010 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19011 if (live_regs_mask
== (1U << reg
))
19014 if (reg
<= LAST_ARM_REGNUM
19015 && (reg
!= LR_REGNUM
19017 || ! IS_INTERRUPT (func_type
)))
19019 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19020 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19027 /* Generate the load multiple instruction to restore the
19028 registers. Note we can get here, even if
19029 frame_pointer_needed is true, but only if sp already
19030 points to the base of the saved core registers. */
19031 if (live_regs_mask
& (1 << SP_REGNUM
))
19033 unsigned HOST_WIDE_INT stack_adjust
;
19035 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19036 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19038 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19039 if (TARGET_UNIFIED_ASM
)
19040 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19042 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19045 /* If we can't use ldmib (SA110 bug),
19046 then try to pop r3 instead. */
19048 live_regs_mask
|= 1 << 3;
19050 if (TARGET_UNIFIED_ASM
)
19051 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19053 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19057 if (TARGET_UNIFIED_ASM
)
19058 sprintf (instr
, "pop%s\t{", conditional
);
19060 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19062 p
= instr
+ strlen (instr
);
19064 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19065 if (live_regs_mask
& (1 << reg
))
19067 int l
= strlen (reg_names
[reg
]);
19073 memcpy (p
, ", ", 2);
19077 memcpy (p
, "%|", 2);
19078 memcpy (p
+ 2, reg_names
[reg
], l
);
19082 if (live_regs_mask
& (1 << LR_REGNUM
))
19084 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19085 /* If returning from an interrupt, restore the CPSR. */
19086 if (IS_INTERRUPT (func_type
))
19093 output_asm_insn (instr
, & operand
);
19095 /* See if we need to generate an extra instruction to
19096 perform the actual function return. */
19098 && func_type
!= ARM_FT_INTERWORKED
19099 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19101 /* The return has already been handled
19102 by loading the LR into the PC. */
19109 switch ((int) ARM_FUNC_TYPE (func_type
))
19113 /* ??? This is wrong for unified assembly syntax. */
19114 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19117 case ARM_FT_INTERWORKED
:
19118 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19121 case ARM_FT_EXCEPTION
:
19122 /* ??? This is wrong for unified assembly syntax. */
19123 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19127 /* Use bx if it's available. */
19128 if (arm_arch5
|| arm_arch4t
)
19129 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19131 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19135 output_asm_insn (instr
, & operand
);
19141 /* Write the function name into the code section, directly preceding
19142 the function prologue.
19144 Code will be output similar to this:
19146 .ascii "arm_poke_function_name", 0
19149 .word 0xff000000 + (t1 - t0)
19150 arm_poke_function_name
19152 stmfd sp!, {fp, ip, lr, pc}
19155 When performing a stack backtrace, code can inspect the value
19156 of 'pc' stored at 'fp' + 0. If the trace function then looks
19157 at location pc - 12 and the top 8 bits are set, then we know
19158 that there is a function name embedded immediately preceding this
19159 location and has length ((pc[-3]) & 0xff000000).
19161 We assume that pc is declared as a pointer to an unsigned long.
19163 It is of no benefit to output the function name if we are assembling
19164 a leaf function. These function types will not contain a stack
19165 backtrace structure, therefore it is not possible to determine the
19168 arm_poke_function_name (FILE *stream
, const char *name
)
19170 unsigned long alignlength
;
19171 unsigned long length
;
19174 length
= strlen (name
) + 1;
19175 alignlength
= ROUND_UP_WORD (length
);
19177 ASM_OUTPUT_ASCII (stream
, name
, length
);
19178 ASM_OUTPUT_ALIGN (stream
, 2);
19179 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19180 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19183 /* Place some comments into the assembler stream
19184 describing the current function. */
19186 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19188 unsigned long func_type
;
19190 /* ??? Do we want to print some of the below anyway? */
19194 /* Sanity check. */
19195 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19197 func_type
= arm_current_func_type ();
19199 switch ((int) ARM_FUNC_TYPE (func_type
))
19202 case ARM_FT_NORMAL
:
19204 case ARM_FT_INTERWORKED
:
19205 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19208 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19211 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19213 case ARM_FT_EXCEPTION
:
19214 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19218 if (IS_NAKED (func_type
))
19219 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19221 if (IS_VOLATILE (func_type
))
19222 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19224 if (IS_NESTED (func_type
))
19225 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19226 if (IS_STACKALIGN (func_type
))
19227 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19229 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19231 crtl
->args
.pretend_args_size
, frame_size
);
19233 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19234 frame_pointer_needed
,
19235 cfun
->machine
->uses_anonymous_args
);
19237 if (cfun
->machine
->lr_save_eliminated
)
19238 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19240 if (crtl
->calls_eh_return
)
19241 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19246 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19247 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19249 arm_stack_offsets
*offsets
;
19255 /* Emit any call-via-reg trampolines that are needed for v4t support
19256 of call_reg and call_value_reg type insns. */
19257 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19259 rtx label
= cfun
->machine
->call_via
[regno
];
19263 switch_to_section (function_section (current_function_decl
));
19264 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19265 CODE_LABEL_NUMBER (label
));
19266 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19270 /* ??? Probably not safe to set this here, since it assumes that a
19271 function will be emitted as assembly immediately after we generate
19272 RTL for it. This does not happen for inline functions. */
19273 cfun
->machine
->return_used_this_function
= 0;
19275 else /* TARGET_32BIT */
19277 /* We need to take into account any stack-frame rounding. */
19278 offsets
= arm_get_frame_offsets ();
19280 gcc_assert (!use_return_insn (FALSE
, NULL
)
19281 || (cfun
->machine
->return_used_this_function
!= 0)
19282 || offsets
->saved_regs
== offsets
->outgoing_args
19283 || frame_pointer_needed
);
19285 /* Reset the ARM-specific per-function variables. */
19286 after_arm_reorg
= 0;
19290 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19291 STR and STRD. If an even number of registers are being pushed, one
19292 or more STRD patterns are created for each register pair. If an
19293 odd number of registers are pushed, emit an initial STR followed by
19294 as many STRD instructions as are needed. This works best when the
19295 stack is initially 64-bit aligned (the normal case), since it
19296 ensures that each STRD is also 64-bit aligned. */
19298 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19303 rtx par
= NULL_RTX
;
19304 rtx dwarf
= NULL_RTX
;
19308 num_regs
= bit_count (saved_regs_mask
);
19310 /* Must be at least one register to save, and can't save SP or PC. */
19311 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19312 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19313 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19315 /* Create sequence for DWARF info. All the frame-related data for
19316 debugging is held in this wrapper. */
19317 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19319 /* Describe the stack adjustment. */
19320 tmp
= gen_rtx_SET (VOIDmode
,
19322 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19323 RTX_FRAME_RELATED_P (tmp
) = 1;
19324 XVECEXP (dwarf
, 0, 0) = tmp
;
19326 /* Find the first register. */
19327 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19332 /* If there's an odd number of registers to push. Start off by
19333 pushing a single register. This ensures that subsequent strd
19334 operations are dword aligned (assuming that SP was originally
19335 64-bit aligned). */
19336 if ((num_regs
& 1) != 0)
19338 rtx reg
, mem
, insn
;
19340 reg
= gen_rtx_REG (SImode
, regno
);
19342 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19343 stack_pointer_rtx
));
19345 mem
= gen_frame_mem (Pmode
,
19347 (Pmode
, stack_pointer_rtx
,
19348 plus_constant (Pmode
, stack_pointer_rtx
,
19351 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19352 RTX_FRAME_RELATED_P (tmp
) = 1;
19353 insn
= emit_insn (tmp
);
19354 RTX_FRAME_RELATED_P (insn
) = 1;
19355 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19356 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19358 RTX_FRAME_RELATED_P (tmp
) = 1;
19361 XVECEXP (dwarf
, 0, i
) = tmp
;
19365 while (i
< num_regs
)
19366 if (saved_regs_mask
& (1 << regno
))
19368 rtx reg1
, reg2
, mem1
, mem2
;
19369 rtx tmp0
, tmp1
, tmp2
;
19372 /* Find the register to pair with this one. */
19373 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19377 reg1
= gen_rtx_REG (SImode
, regno
);
19378 reg2
= gen_rtx_REG (SImode
, regno2
);
19385 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19388 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19390 -4 * (num_regs
- 1)));
19391 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19392 plus_constant (Pmode
, stack_pointer_rtx
,
19394 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19395 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19396 RTX_FRAME_RELATED_P (tmp0
) = 1;
19397 RTX_FRAME_RELATED_P (tmp1
) = 1;
19398 RTX_FRAME_RELATED_P (tmp2
) = 1;
19399 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19400 XVECEXP (par
, 0, 0) = tmp0
;
19401 XVECEXP (par
, 0, 1) = tmp1
;
19402 XVECEXP (par
, 0, 2) = tmp2
;
19403 insn
= emit_insn (par
);
19404 RTX_FRAME_RELATED_P (insn
) = 1;
19405 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19409 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19412 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19415 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19416 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19417 RTX_FRAME_RELATED_P (tmp1
) = 1;
19418 RTX_FRAME_RELATED_P (tmp2
) = 1;
19419 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19420 XVECEXP (par
, 0, 0) = tmp1
;
19421 XVECEXP (par
, 0, 1) = tmp2
;
19425 /* Create unwind information. This is an approximation. */
19426 tmp1
= gen_rtx_SET (VOIDmode
,
19427 gen_frame_mem (Pmode
,
19428 plus_constant (Pmode
,
19432 tmp2
= gen_rtx_SET (VOIDmode
,
19433 gen_frame_mem (Pmode
,
19434 plus_constant (Pmode
,
19439 RTX_FRAME_RELATED_P (tmp1
) = 1;
19440 RTX_FRAME_RELATED_P (tmp2
) = 1;
19441 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19442 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19444 regno
= regno2
+ 1;
19452 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19453 whenever possible, otherwise it emits single-word stores. The first store
19454 also allocates stack space for all saved registers, using writeback with
19455 post-addressing mode. All other stores use offset addressing. If no STRD
19456 can be emitted, this function emits a sequence of single-word stores,
19457 and not an STM as before, because single-word stores provide more freedom
19458 scheduling and can be turned into an STM by peephole optimizations. */
19460 arm_emit_strd_push (unsigned long saved_regs_mask
)
19463 int i
, j
, dwarf_index
= 0;
19465 rtx dwarf
= NULL_RTX
;
19466 rtx insn
= NULL_RTX
;
19469 /* TODO: A more efficient code can be emitted by changing the
19470 layout, e.g., first push all pairs that can use STRD to keep the
19471 stack aligned, and then push all other registers. */
19472 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19473 if (saved_regs_mask
& (1 << i
))
19476 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19477 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19478 gcc_assert (num_regs
> 0);
19480 /* Create sequence for DWARF info. */
19481 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19483 /* For dwarf info, we generate explicit stack update. */
19484 tmp
= gen_rtx_SET (VOIDmode
,
19486 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19487 RTX_FRAME_RELATED_P (tmp
) = 1;
19488 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19490 /* Save registers. */
19491 offset
= - 4 * num_regs
;
19493 while (j
<= LAST_ARM_REGNUM
)
19494 if (saved_regs_mask
& (1 << j
))
19497 && (saved_regs_mask
& (1 << (j
+ 1))))
19499 /* Current register and previous register form register pair for
19500 which STRD can be generated. */
19503 /* Allocate stack space for all saved registers. */
19504 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19505 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19506 mem
= gen_frame_mem (DImode
, tmp
);
19509 else if (offset
> 0)
19510 mem
= gen_frame_mem (DImode
,
19511 plus_constant (Pmode
,
19515 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19517 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19518 RTX_FRAME_RELATED_P (tmp
) = 1;
19519 tmp
= emit_insn (tmp
);
19521 /* Record the first store insn. */
19522 if (dwarf_index
== 1)
19525 /* Generate dwarf info. */
19526 mem
= gen_frame_mem (SImode
,
19527 plus_constant (Pmode
,
19530 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19531 RTX_FRAME_RELATED_P (tmp
) = 1;
19532 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19534 mem
= gen_frame_mem (SImode
,
19535 plus_constant (Pmode
,
19538 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19539 RTX_FRAME_RELATED_P (tmp
) = 1;
19540 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19547 /* Emit a single word store. */
19550 /* Allocate stack space for all saved registers. */
19551 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19552 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19553 mem
= gen_frame_mem (SImode
, tmp
);
19556 else if (offset
> 0)
19557 mem
= gen_frame_mem (SImode
,
19558 plus_constant (Pmode
,
19562 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19564 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19565 RTX_FRAME_RELATED_P (tmp
) = 1;
19566 tmp
= emit_insn (tmp
);
19568 /* Record the first store insn. */
19569 if (dwarf_index
== 1)
19572 /* Generate dwarf info. */
19573 mem
= gen_frame_mem (SImode
,
19574 plus_constant(Pmode
,
19577 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19578 RTX_FRAME_RELATED_P (tmp
) = 1;
19579 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19588 /* Attach dwarf info to the first insn we generate. */
19589 gcc_assert (insn
!= NULL_RTX
);
19590 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19591 RTX_FRAME_RELATED_P (insn
) = 1;
19594 /* Generate and emit an insn that we will recognize as a push_multi.
19595 Unfortunately, since this insn does not reflect very well the actual
19596 semantics of the operation, we need to annotate the insn for the benefit
19597 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19598 MASK for registers that should be annotated for DWARF2 frame unwind
19601 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19604 int num_dwarf_regs
= 0;
19608 int dwarf_par_index
;
19611 /* We don't record the PC in the dwarf frame information. */
19612 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19614 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19616 if (mask
& (1 << i
))
19618 if (dwarf_regs_mask
& (1 << i
))
19622 gcc_assert (num_regs
&& num_regs
<= 16);
19623 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19625 /* For the body of the insn we are going to generate an UNSPEC in
19626 parallel with several USEs. This allows the insn to be recognized
19627 by the push_multi pattern in the arm.md file.
19629 The body of the insn looks something like this:
19632 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19633 (const_int:SI <num>)))
19634 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19640 For the frame note however, we try to be more explicit and actually
19641 show each register being stored into the stack frame, plus a (single)
19642 decrement of the stack pointer. We do it this way in order to be
19643 friendly to the stack unwinding code, which only wants to see a single
19644 stack decrement per instruction. The RTL we generate for the note looks
19645 something like this:
19648 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19649 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19650 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19651 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19655 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19656 instead we'd have a parallel expression detailing all
19657 the stores to the various memory addresses so that debug
19658 information is more up-to-date. Remember however while writing
19659 this to take care of the constraints with the push instruction.
19661 Note also that this has to be taken care of for the VFP registers.
19663 For more see PR43399. */
19665 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19666 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19667 dwarf_par_index
= 1;
19669 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19671 if (mask
& (1 << i
))
19673 reg
= gen_rtx_REG (SImode
, i
);
19675 XVECEXP (par
, 0, 0)
19676 = gen_rtx_SET (VOIDmode
,
19679 gen_rtx_PRE_MODIFY (Pmode
,
19682 (Pmode
, stack_pointer_rtx
,
19685 gen_rtx_UNSPEC (BLKmode
,
19686 gen_rtvec (1, reg
),
19687 UNSPEC_PUSH_MULT
));
19689 if (dwarf_regs_mask
& (1 << i
))
19691 tmp
= gen_rtx_SET (VOIDmode
,
19692 gen_frame_mem (SImode
, stack_pointer_rtx
),
19694 RTX_FRAME_RELATED_P (tmp
) = 1;
19695 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19702 for (j
= 1, i
++; j
< num_regs
; i
++)
19704 if (mask
& (1 << i
))
19706 reg
= gen_rtx_REG (SImode
, i
);
19708 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19710 if (dwarf_regs_mask
& (1 << i
))
19713 = gen_rtx_SET (VOIDmode
,
19716 plus_constant (Pmode
, stack_pointer_rtx
,
19719 RTX_FRAME_RELATED_P (tmp
) = 1;
19720 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19727 par
= emit_insn (par
);
19729 tmp
= gen_rtx_SET (VOIDmode
,
19731 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19732 RTX_FRAME_RELATED_P (tmp
) = 1;
19733 XVECEXP (dwarf
, 0, 0) = tmp
;
19735 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19740 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19741 SIZE is the offset to be adjusted.
19742 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19744 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19748 RTX_FRAME_RELATED_P (insn
) = 1;
19749 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19750 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19753 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19754 SAVED_REGS_MASK shows which registers need to be restored.
19756 Unfortunately, since this insn does not reflect very well the actual
19757 semantics of the operation, we need to annotate the insn for the benefit
19758 of DWARF2 frame unwind information. */
19760 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19765 rtx dwarf
= NULL_RTX
;
19771 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19772 offset_adj
= return_in_pc
? 1 : 0;
19773 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19774 if (saved_regs_mask
& (1 << i
))
19777 gcc_assert (num_regs
&& num_regs
<= 16);
19779 /* If SP is in reglist, then we don't emit SP update insn. */
19780 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19782 /* The parallel needs to hold num_regs SETs
19783 and one SET for the stack update. */
19784 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19789 XVECEXP (par
, 0, 0) = tmp
;
19794 /* Increment the stack pointer, based on there being
19795 num_regs 4-byte registers to restore. */
19796 tmp
= gen_rtx_SET (VOIDmode
,
19798 plus_constant (Pmode
,
19801 RTX_FRAME_RELATED_P (tmp
) = 1;
19802 XVECEXP (par
, 0, offset_adj
) = tmp
;
19805 /* Now restore every reg, which may include PC. */
19806 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19807 if (saved_regs_mask
& (1 << i
))
19809 reg
= gen_rtx_REG (SImode
, i
);
19810 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19812 /* Emit single load with writeback. */
19813 tmp
= gen_frame_mem (SImode
,
19814 gen_rtx_POST_INC (Pmode
,
19815 stack_pointer_rtx
));
19816 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19817 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19821 tmp
= gen_rtx_SET (VOIDmode
,
19825 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19826 RTX_FRAME_RELATED_P (tmp
) = 1;
19827 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19829 /* We need to maintain a sequence for DWARF info too. As dwarf info
19830 should not have PC, skip PC. */
19831 if (i
!= PC_REGNUM
)
19832 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19838 par
= emit_jump_insn (par
);
19840 par
= emit_insn (par
);
19842 REG_NOTES (par
) = dwarf
;
19844 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19845 stack_pointer_rtx
, stack_pointer_rtx
);
19848 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19849 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19851 Unfortunately, since this insn does not reflect very well the actual
19852 semantics of the operation, we need to annotate the insn for the benefit
19853 of DWARF2 frame unwind information. */
19855 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19859 rtx dwarf
= NULL_RTX
;
19862 gcc_assert (num_regs
&& num_regs
<= 32);
19864 /* Workaround ARM10 VFPr1 bug. */
19865 if (num_regs
== 2 && !arm_arch6
)
19867 if (first_reg
== 15)
19873 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19874 there could be up to 32 D-registers to restore.
19875 If there are more than 16 D-registers, make two recursive calls,
19876 each of which emits one pop_multi instruction. */
19879 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19880 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19884 /* The parallel needs to hold num_regs SETs
19885 and one SET for the stack update. */
19886 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19888 /* Increment the stack pointer, based on there being
19889 num_regs 8-byte registers to restore. */
19890 tmp
= gen_rtx_SET (VOIDmode
,
19892 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19893 RTX_FRAME_RELATED_P (tmp
) = 1;
19894 XVECEXP (par
, 0, 0) = tmp
;
19896 /* Now show every reg that will be restored, using a SET for each. */
19897 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19899 reg
= gen_rtx_REG (DFmode
, i
);
19901 tmp
= gen_rtx_SET (VOIDmode
,
19905 plus_constant (Pmode
, base_reg
, 8 * j
)));
19906 RTX_FRAME_RELATED_P (tmp
) = 1;
19907 XVECEXP (par
, 0, j
+ 1) = tmp
;
19909 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19914 par
= emit_insn (par
);
19915 REG_NOTES (par
) = dwarf
;
19917 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19918 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
19920 RTX_FRAME_RELATED_P (par
) = 1;
19921 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
19924 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19925 base_reg
, base_reg
);
19928 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19929 number of registers are being popped, multiple LDRD patterns are created for
19930 all register pairs. If odd number of registers are popped, last register is
19931 loaded by using LDR pattern. */
19933 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19937 rtx par
= NULL_RTX
;
19938 rtx dwarf
= NULL_RTX
;
19939 rtx tmp
, reg
, tmp1
;
19942 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19943 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19944 if (saved_regs_mask
& (1 << i
))
19947 gcc_assert (num_regs
&& num_regs
<= 16);
19949 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19950 to be popped. So, if num_regs is even, now it will become odd,
19951 and we can generate pop with PC. If num_regs is odd, it will be
19952 even now, and ldr with return can be generated for PC. */
19956 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19958 /* Var j iterates over all the registers to gather all the registers in
19959 saved_regs_mask. Var i gives index of saved registers in stack frame.
19960 A PARALLEL RTX of register-pair is created here, so that pattern for
19961 LDRD can be matched. As PC is always last register to be popped, and
19962 we have already decremented num_regs if PC, we don't have to worry
19963 about PC in this loop. */
19964 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19965 if (saved_regs_mask
& (1 << j
))
19967 /* Create RTX for memory load. */
19968 reg
= gen_rtx_REG (SImode
, j
);
19969 tmp
= gen_rtx_SET (SImode
,
19971 gen_frame_mem (SImode
,
19972 plus_constant (Pmode
,
19973 stack_pointer_rtx
, 4 * i
)));
19974 RTX_FRAME_RELATED_P (tmp
) = 1;
19978 /* When saved-register index (i) is even, the RTX to be emitted is
19979 yet to be created. Hence create it first. The LDRD pattern we
19980 are generating is :
19981 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19982 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19983 where target registers need not be consecutive. */
19984 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19988 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19989 added as 0th element and if i is odd, reg_i is added as 1st element
19990 of LDRD pattern shown above. */
19991 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19992 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19996 /* When saved-register index (i) is odd, RTXs for both the registers
19997 to be loaded are generated in above given LDRD pattern, and the
19998 pattern can be emitted now. */
19999 par
= emit_insn (par
);
20000 REG_NOTES (par
) = dwarf
;
20001 RTX_FRAME_RELATED_P (par
) = 1;
20007 /* If the number of registers pushed is odd AND return_in_pc is false OR
20008 number of registers are even AND return_in_pc is true, last register is
20009 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20010 then LDR with post increment. */
20012 /* Increment the stack pointer, based on there being
20013 num_regs 4-byte registers to restore. */
20014 tmp
= gen_rtx_SET (VOIDmode
,
20016 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20017 RTX_FRAME_RELATED_P (tmp
) = 1;
20018 tmp
= emit_insn (tmp
);
20021 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20022 stack_pointer_rtx
, stack_pointer_rtx
);
20027 if (((num_regs
% 2) == 1 && !return_in_pc
)
20028 || ((num_regs
% 2) == 0 && return_in_pc
))
20030 /* Scan for the single register to be popped. Skip until the saved
20031 register is found. */
20032 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20034 /* Gen LDR with post increment here. */
20035 tmp1
= gen_rtx_MEM (SImode
,
20036 gen_rtx_POST_INC (SImode
,
20037 stack_pointer_rtx
));
20038 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20040 reg
= gen_rtx_REG (SImode
, j
);
20041 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20042 RTX_FRAME_RELATED_P (tmp
) = 1;
20043 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20047 /* If return_in_pc, j must be PC_REGNUM. */
20048 gcc_assert (j
== PC_REGNUM
);
20049 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20050 XVECEXP (par
, 0, 0) = ret_rtx
;
20051 XVECEXP (par
, 0, 1) = tmp
;
20052 par
= emit_jump_insn (par
);
20056 par
= emit_insn (tmp
);
20057 REG_NOTES (par
) = dwarf
;
20058 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20059 stack_pointer_rtx
, stack_pointer_rtx
);
20063 else if ((num_regs
% 2) == 1 && return_in_pc
)
20065 /* There are 2 registers to be popped. So, generate the pattern
20066 pop_multiple_with_stack_update_and_return to pop in PC. */
20067 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20073 /* LDRD in ARM mode needs consecutive registers as operands. This function
20074 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20075 offset addressing and then generates one separate stack udpate. This provides
20076 more scheduling freedom, compared to writeback on every load. However,
20077 if the function returns using load into PC directly
20078 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20079 before the last load. TODO: Add a peephole optimization to recognize
20080 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20081 peephole optimization to merge the load at stack-offset zero
20082 with the stack update instruction using load with writeback
20083 in post-index addressing mode. */
20085 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20089 rtx par
= NULL_RTX
;
20090 rtx dwarf
= NULL_RTX
;
20093 /* Restore saved registers. */
20094 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20096 while (j
<= LAST_ARM_REGNUM
)
20097 if (saved_regs_mask
& (1 << j
))
20100 && (saved_regs_mask
& (1 << (j
+ 1)))
20101 && (j
+ 1) != PC_REGNUM
)
20103 /* Current register and next register form register pair for which
20104 LDRD can be generated. PC is always the last register popped, and
20105 we handle it separately. */
20107 mem
= gen_frame_mem (DImode
,
20108 plus_constant (Pmode
,
20112 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20114 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20115 tmp
= emit_insn (tmp
);
20116 RTX_FRAME_RELATED_P (tmp
) = 1;
20118 /* Generate dwarf info. */
20120 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20121 gen_rtx_REG (SImode
, j
),
20123 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20124 gen_rtx_REG (SImode
, j
+ 1),
20127 REG_NOTES (tmp
) = dwarf
;
20132 else if (j
!= PC_REGNUM
)
20134 /* Emit a single word load. */
20136 mem
= gen_frame_mem (SImode
,
20137 plus_constant (Pmode
,
20141 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20143 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20144 tmp
= emit_insn (tmp
);
20145 RTX_FRAME_RELATED_P (tmp
) = 1;
20147 /* Generate dwarf info. */
20148 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20149 gen_rtx_REG (SImode
, j
),
20155 else /* j == PC_REGNUM */
20161 /* Update the stack. */
20164 tmp
= gen_rtx_SET (Pmode
,
20166 plus_constant (Pmode
,
20169 tmp
= emit_insn (tmp
);
20170 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20171 stack_pointer_rtx
, stack_pointer_rtx
);
20175 if (saved_regs_mask
& (1 << PC_REGNUM
))
20177 /* Only PC is to be popped. */
20178 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20179 XVECEXP (par
, 0, 0) = ret_rtx
;
20180 tmp
= gen_rtx_SET (SImode
,
20181 gen_rtx_REG (SImode
, PC_REGNUM
),
20182 gen_frame_mem (SImode
,
20183 gen_rtx_POST_INC (SImode
,
20184 stack_pointer_rtx
)));
20185 RTX_FRAME_RELATED_P (tmp
) = 1;
20186 XVECEXP (par
, 0, 1) = tmp
;
20187 par
= emit_jump_insn (par
);
20189 /* Generate dwarf info. */
20190 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20191 gen_rtx_REG (SImode
, PC_REGNUM
),
20193 REG_NOTES (par
) = dwarf
;
20194 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20195 stack_pointer_rtx
, stack_pointer_rtx
);
20199 /* Calculate the size of the return value that is passed in registers. */
20201 arm_size_return_regs (void)
20203 enum machine_mode mode
;
20205 if (crtl
->return_rtx
!= 0)
20206 mode
= GET_MODE (crtl
->return_rtx
);
20208 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20210 return GET_MODE_SIZE (mode
);
20213 /* Return true if the current function needs to save/restore LR. */
20215 thumb_force_lr_save (void)
20217 return !cfun
->machine
->lr_save_eliminated
20218 && (!leaf_function_p ()
20219 || thumb_far_jump_used_p ()
20220 || df_regs_ever_live_p (LR_REGNUM
));
20223 /* We do not know if r3 will be available because
20224 we do have an indirect tailcall happening in this
20225 particular case. */
20227 is_indirect_tailcall_p (rtx call
)
20229 rtx pat
= PATTERN (call
);
20231 /* Indirect tail call. */
20232 pat
= XVECEXP (pat
, 0, 0);
20233 if (GET_CODE (pat
) == SET
)
20234 pat
= SET_SRC (pat
);
20236 pat
= XEXP (XEXP (pat
, 0), 0);
20237 return REG_P (pat
);
20240 /* Return true if r3 is used by any of the tail call insns in the
20241 current function. */
20243 any_sibcall_could_use_r3 (void)
20248 if (!crtl
->tail_call_emit
)
20250 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20251 if (e
->flags
& EDGE_SIBCALL
)
20253 rtx call
= BB_END (e
->src
);
20254 if (!CALL_P (call
))
20255 call
= prev_nonnote_nondebug_insn (call
);
20256 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20257 if (find_regno_fusage (call
, USE
, 3)
20258 || is_indirect_tailcall_p (call
))
20265 /* Compute the distance from register FROM to register TO.
20266 These can be the arg pointer (26), the soft frame pointer (25),
20267 the stack pointer (13) or the hard frame pointer (11).
20268 In thumb mode r7 is used as the soft frame pointer, if needed.
20269 Typical stack layout looks like this:
20271 old stack pointer -> | |
20274 | | saved arguments for
20275 | | vararg functions
20278 hard FP & arg pointer -> | | \
20286 soft frame pointer -> | | /
20291 locals base pointer -> | | /
20296 current stack pointer -> | | /
20299 For a given function some or all of these stack components
20300 may not be needed, giving rise to the possibility of
20301 eliminating some of the registers.
20303 The values returned by this function must reflect the behavior
20304 of arm_expand_prologue() and arm_compute_save_reg_mask().
20306 The sign of the number returned reflects the direction of stack
20307 growth, so the values are positive for all eliminations except
20308 from the soft frame pointer to the hard frame pointer.
20310 SFP may point just inside the local variables block to ensure correct
20314 /* Calculate stack offsets. These are used to calculate register elimination
20315 offsets and in prologue/epilogue code. Also calculates which registers
20316 should be saved. */
20318 static arm_stack_offsets
*
20319 arm_get_frame_offsets (void)
20321 struct arm_stack_offsets
*offsets
;
20322 unsigned long func_type
;
20326 HOST_WIDE_INT frame_size
;
20329 offsets
= &cfun
->machine
->stack_offsets
;
20331 /* We need to know if we are a leaf function. Unfortunately, it
20332 is possible to be called after start_sequence has been called,
20333 which causes get_insns to return the insns for the sequence,
20334 not the function, which will cause leaf_function_p to return
20335 the incorrect result.
20337 to know about leaf functions once reload has completed, and the
20338 frame size cannot be changed after that time, so we can safely
20339 use the cached value. */
20341 if (reload_completed
)
20344 /* Initially this is the size of the local variables. It will translated
20345 into an offset once we have determined the size of preceding data. */
20346 frame_size
= ROUND_UP_WORD (get_frame_size ());
20348 leaf
= leaf_function_p ();
20350 /* Space for variadic functions. */
20351 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20353 /* In Thumb mode this is incorrect, but never used. */
20355 = (offsets
->saved_args
20356 + arm_compute_static_chain_stack_bytes ()
20357 + (frame_pointer_needed
? 4 : 0));
20361 unsigned int regno
;
20363 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20364 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20365 saved
= core_saved
;
20367 /* We know that SP will be doubleword aligned on entry, and we must
20368 preserve that condition at any subroutine call. We also require the
20369 soft frame pointer to be doubleword aligned. */
20371 if (TARGET_REALLY_IWMMXT
)
20373 /* Check for the call-saved iWMMXt registers. */
20374 for (regno
= FIRST_IWMMXT_REGNUM
;
20375 regno
<= LAST_IWMMXT_REGNUM
;
20377 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20381 func_type
= arm_current_func_type ();
20382 /* Space for saved VFP registers. */
20383 if (! IS_VOLATILE (func_type
)
20384 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20385 saved
+= arm_get_vfp_saved_size ();
20387 else /* TARGET_THUMB1 */
20389 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20390 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20391 saved
= core_saved
;
20392 if (TARGET_BACKTRACE
)
20396 /* Saved registers include the stack frame. */
20397 offsets
->saved_regs
20398 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20399 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20401 /* A leaf function does not need any stack alignment if it has nothing
20403 if (leaf
&& frame_size
== 0
20404 /* However if it calls alloca(), we have a dynamically allocated
20405 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20406 && ! cfun
->calls_alloca
)
20408 offsets
->outgoing_args
= offsets
->soft_frame
;
20409 offsets
->locals_base
= offsets
->soft_frame
;
20413 /* Ensure SFP has the correct alignment. */
20414 if (ARM_DOUBLEWORD_ALIGN
20415 && (offsets
->soft_frame
& 7))
20417 offsets
->soft_frame
+= 4;
20418 /* Try to align stack by pushing an extra reg. Don't bother doing this
20419 when there is a stack frame as the alignment will be rolled into
20420 the normal stack adjustment. */
20421 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20425 /* If it is safe to use r3, then do so. This sometimes
20426 generates better code on Thumb-2 by avoiding the need to
20427 use 32-bit push/pop instructions. */
20428 if (! any_sibcall_could_use_r3 ()
20429 && arm_size_return_regs () <= 12
20430 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20432 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20437 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20439 /* Avoid fixed registers; they may be changed at
20440 arbitrary times so it's unsafe to restore them
20441 during the epilogue. */
20443 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20452 offsets
->saved_regs
+= 4;
20453 offsets
->saved_regs_mask
|= (1 << reg
);
20458 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20459 offsets
->outgoing_args
= (offsets
->locals_base
20460 + crtl
->outgoing_args_size
);
20462 if (ARM_DOUBLEWORD_ALIGN
)
20464 /* Ensure SP remains doubleword aligned. */
20465 if (offsets
->outgoing_args
& 7)
20466 offsets
->outgoing_args
+= 4;
20467 gcc_assert (!(offsets
->outgoing_args
& 7));
20474 /* Calculate the relative offsets for the different stack pointers. Positive
20475 offsets are in the direction of stack growth. */
20478 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20480 arm_stack_offsets
*offsets
;
20482 offsets
= arm_get_frame_offsets ();
20484 /* OK, now we have enough information to compute the distances.
20485 There must be an entry in these switch tables for each pair
20486 of registers in ELIMINABLE_REGS, even if some of the entries
20487 seem to be redundant or useless. */
20490 case ARG_POINTER_REGNUM
:
20493 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20496 case FRAME_POINTER_REGNUM
:
20497 /* This is the reverse of the soft frame pointer
20498 to hard frame pointer elimination below. */
20499 return offsets
->soft_frame
- offsets
->saved_args
;
20501 case ARM_HARD_FRAME_POINTER_REGNUM
:
20502 /* This is only non-zero in the case where the static chain register
20503 is stored above the frame. */
20504 return offsets
->frame
- offsets
->saved_args
- 4;
20506 case STACK_POINTER_REGNUM
:
20507 /* If nothing has been pushed on the stack at all
20508 then this will return -4. This *is* correct! */
20509 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20512 gcc_unreachable ();
20514 gcc_unreachable ();
20516 case FRAME_POINTER_REGNUM
:
20519 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20522 case ARM_HARD_FRAME_POINTER_REGNUM
:
20523 /* The hard frame pointer points to the top entry in the
20524 stack frame. The soft frame pointer to the bottom entry
20525 in the stack frame. If there is no stack frame at all,
20526 then they are identical. */
20528 return offsets
->frame
- offsets
->soft_frame
;
20530 case STACK_POINTER_REGNUM
:
20531 return offsets
->outgoing_args
- offsets
->soft_frame
;
20534 gcc_unreachable ();
20536 gcc_unreachable ();
20539 /* You cannot eliminate from the stack pointer.
20540 In theory you could eliminate from the hard frame
20541 pointer to the stack pointer, but this will never
20542 happen, since if a stack frame is not needed the
20543 hard frame pointer will never be used. */
20544 gcc_unreachable ();
20548 /* Given FROM and TO register numbers, say whether this elimination is
20549 allowed. Frame pointer elimination is automatically handled.
20551 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20552 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20553 pointer, we must eliminate FRAME_POINTER_REGNUM into
20554 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20555 ARG_POINTER_REGNUM. */
20558 arm_can_eliminate (const int from
, const int to
)
20560 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20561 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20562 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20563 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20567 /* Emit RTL to save coprocessor registers on function entry. Returns the
20568 number of bytes pushed. */
20571 arm_save_coproc_regs(void)
20573 int saved_size
= 0;
20575 unsigned start_reg
;
20578 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20579 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20581 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20582 insn
= gen_rtx_MEM (V2SImode
, insn
);
20583 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20584 RTX_FRAME_RELATED_P (insn
) = 1;
20588 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20590 start_reg
= FIRST_VFP_REGNUM
;
20592 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20594 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20595 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20597 if (start_reg
!= reg
)
20598 saved_size
+= vfp_emit_fstmd (start_reg
,
20599 (reg
- start_reg
) / 2);
20600 start_reg
= reg
+ 2;
20603 if (start_reg
!= reg
)
20604 saved_size
+= vfp_emit_fstmd (start_reg
,
20605 (reg
- start_reg
) / 2);
20611 /* Set the Thumb frame pointer from the stack pointer. */
20614 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20616 HOST_WIDE_INT amount
;
20619 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20621 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20622 stack_pointer_rtx
, GEN_INT (amount
)));
20625 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20626 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20627 expects the first two operands to be the same. */
20630 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20632 hard_frame_pointer_rtx
));
20636 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20637 hard_frame_pointer_rtx
,
20638 stack_pointer_rtx
));
20640 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20641 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20642 RTX_FRAME_RELATED_P (dwarf
) = 1;
20643 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20646 RTX_FRAME_RELATED_P (insn
) = 1;
20649 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20652 arm_expand_prologue (void)
20657 unsigned long live_regs_mask
;
20658 unsigned long func_type
;
20660 int saved_pretend_args
= 0;
20661 int saved_regs
= 0;
20662 unsigned HOST_WIDE_INT args_to_push
;
20663 arm_stack_offsets
*offsets
;
20665 func_type
= arm_current_func_type ();
20667 /* Naked functions don't have prologues. */
20668 if (IS_NAKED (func_type
))
20671 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20672 args_to_push
= crtl
->args
.pretend_args_size
;
20674 /* Compute which register we will have to save onto the stack. */
20675 offsets
= arm_get_frame_offsets ();
20676 live_regs_mask
= offsets
->saved_regs_mask
;
20678 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20680 if (IS_STACKALIGN (func_type
))
20684 /* Handle a word-aligned stack pointer. We generate the following:
20689 <save and restore r0 in normal prologue/epilogue>
20693 The unwinder doesn't need to know about the stack realignment.
20694 Just tell it we saved SP in r0. */
20695 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20697 r0
= gen_rtx_REG (SImode
, 0);
20698 r1
= gen_rtx_REG (SImode
, 1);
20700 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20701 RTX_FRAME_RELATED_P (insn
) = 1;
20702 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20704 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20706 /* ??? The CFA changes here, which may cause GDB to conclude that it
20707 has entered a different function. That said, the unwind info is
20708 correct, individually, before and after this instruction because
20709 we've described the save of SP, which will override the default
20710 handling of SP as restoring from the CFA. */
20711 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20714 /* For APCS frames, if IP register is clobbered
20715 when creating frame, save that register in a special
20717 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20719 if (IS_INTERRUPT (func_type
))
20721 /* Interrupt functions must not corrupt any registers.
20722 Creating a frame pointer however, corrupts the IP
20723 register, so we must push it first. */
20724 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20726 /* Do not set RTX_FRAME_RELATED_P on this insn.
20727 The dwarf stack unwinding code only wants to see one
20728 stack decrement per function, and this is not it. If
20729 this instruction is labeled as being part of the frame
20730 creation sequence then dwarf2out_frame_debug_expr will
20731 die when it encounters the assignment of IP to FP
20732 later on, since the use of SP here establishes SP as
20733 the CFA register and not IP.
20735 Anyway this instruction is not really part of the stack
20736 frame creation although it is part of the prologue. */
20738 else if (IS_NESTED (func_type
))
20740 /* The static chain register is the same as the IP register
20741 used as a scratch register during stack frame creation.
20742 To get around this need to find somewhere to store IP
20743 whilst the frame is being created. We try the following
20746 1. The last argument register r3 if it is available.
20747 2. A slot on the stack above the frame if there are no
20748 arguments to push onto the stack.
20749 3. Register r3 again, after pushing the argument registers
20750 onto the stack, if this is a varargs function.
20751 4. The last slot on the stack created for the arguments to
20752 push, if this isn't a varargs function.
20754 Note - we only need to tell the dwarf2 backend about the SP
20755 adjustment in the second variant; the static chain register
20756 doesn't need to be unwound, as it doesn't contain a value
20757 inherited from the caller. */
20759 if (!arm_r3_live_at_start_p ())
20760 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20761 else if (args_to_push
== 0)
20765 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20768 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20769 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20772 /* Just tell the dwarf backend that we adjusted SP. */
20773 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20774 plus_constant (Pmode
, stack_pointer_rtx
,
20776 RTX_FRAME_RELATED_P (insn
) = 1;
20777 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20781 /* Store the args on the stack. */
20782 if (cfun
->machine
->uses_anonymous_args
)
20785 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20786 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20787 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20788 saved_pretend_args
= 1;
20794 if (args_to_push
== 4)
20795 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20798 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20799 plus_constant (Pmode
,
20803 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20805 /* Just tell the dwarf backend that we adjusted SP. */
20807 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20808 plus_constant (Pmode
, stack_pointer_rtx
,
20810 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20813 RTX_FRAME_RELATED_P (insn
) = 1;
20814 fp_offset
= args_to_push
;
20819 insn
= emit_set_insn (ip_rtx
,
20820 plus_constant (Pmode
, stack_pointer_rtx
,
20822 RTX_FRAME_RELATED_P (insn
) = 1;
20827 /* Push the argument registers, or reserve space for them. */
20828 if (cfun
->machine
->uses_anonymous_args
)
20829 insn
= emit_multi_reg_push
20830 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20831 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20834 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20835 GEN_INT (- args_to_push
)));
20836 RTX_FRAME_RELATED_P (insn
) = 1;
20839 /* If this is an interrupt service routine, and the link register
20840 is going to be pushed, and we're not generating extra
20841 push of IP (needed when frame is needed and frame layout if apcs),
20842 subtracting four from LR now will mean that the function return
20843 can be done with a single instruction. */
20844 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20845 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20846 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20849 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20851 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20854 if (live_regs_mask
)
20856 unsigned long dwarf_regs_mask
= live_regs_mask
;
20858 saved_regs
+= bit_count (live_regs_mask
) * 4;
20859 if (optimize_size
&& !frame_pointer_needed
20860 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20862 /* If no coprocessor registers are being pushed and we don't have
20863 to worry about a frame pointer then push extra registers to
20864 create the stack frame. This is done is a way that does not
20865 alter the frame layout, so is independent of the epilogue. */
20869 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20871 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20872 if (frame
&& n
* 4 >= frame
)
20875 live_regs_mask
|= (1 << n
) - 1;
20876 saved_regs
+= frame
;
20881 && current_tune
->prefer_ldrd_strd
20882 && !optimize_function_for_size_p (cfun
))
20884 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
20886 thumb2_emit_strd_push (live_regs_mask
);
20887 else if (TARGET_ARM
20888 && !TARGET_APCS_FRAME
20889 && !IS_INTERRUPT (func_type
))
20890 arm_emit_strd_push (live_regs_mask
);
20893 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
20894 RTX_FRAME_RELATED_P (insn
) = 1;
20899 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
20900 RTX_FRAME_RELATED_P (insn
) = 1;
20904 if (! IS_VOLATILE (func_type
))
20905 saved_regs
+= arm_save_coproc_regs ();
20907 if (frame_pointer_needed
&& TARGET_ARM
)
20909 /* Create the new frame pointer. */
20910 if (TARGET_APCS_FRAME
)
20912 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20913 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20914 RTX_FRAME_RELATED_P (insn
) = 1;
20916 if (IS_NESTED (func_type
))
20918 /* Recover the static chain register. */
20919 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20920 insn
= gen_rtx_REG (SImode
, 3);
20923 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20924 insn
= gen_frame_mem (SImode
, insn
);
20926 emit_set_insn (ip_rtx
, insn
);
20927 /* Add a USE to stop propagate_one_insn() from barfing. */
20928 emit_insn (gen_force_register_use (ip_rtx
));
20933 insn
= GEN_INT (saved_regs
- 4);
20934 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20935 stack_pointer_rtx
, insn
));
20936 RTX_FRAME_RELATED_P (insn
) = 1;
20940 if (flag_stack_usage_info
)
20941 current_function_static_stack_size
20942 = offsets
->outgoing_args
- offsets
->saved_args
;
20944 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20946 /* This add can produce multiple insns for a large constant, so we
20947 need to get tricky. */
20948 rtx last
= get_last_insn ();
20950 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20951 - offsets
->outgoing_args
);
20953 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20957 last
= last
? NEXT_INSN (last
) : get_insns ();
20958 RTX_FRAME_RELATED_P (last
) = 1;
20960 while (last
!= insn
);
20962 /* If the frame pointer is needed, emit a special barrier that
20963 will prevent the scheduler from moving stores to the frame
20964 before the stack adjustment. */
20965 if (frame_pointer_needed
)
20966 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20967 hard_frame_pointer_rtx
));
20971 if (frame_pointer_needed
&& TARGET_THUMB2
)
20972 thumb_set_frame_pointer (offsets
);
20974 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20976 unsigned long mask
;
20978 mask
= live_regs_mask
;
20979 mask
&= THUMB2_WORK_REGS
;
20980 if (!IS_NESTED (func_type
))
20981 mask
|= (1 << IP_REGNUM
);
20982 arm_load_pic_register (mask
);
20985 /* If we are profiling, make sure no instructions are scheduled before
20986 the call to mcount. Similarly if the user has requested no
20987 scheduling in the prolog. Similarly if we want non-call exceptions
20988 using the EABI unwinder, to prevent faulting instructions from being
20989 swapped with a stack adjustment. */
20990 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20991 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20992 && cfun
->can_throw_non_call_exceptions
))
20993 emit_insn (gen_blockage ());
20995 /* If the link register is being kept alive, with the return address in it,
20996 then make sure that it does not get reused by the ce2 pass. */
20997 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20998 cfun
->machine
->lr_save_eliminated
= 1;
21001 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21003 arm_print_condition (FILE *stream
)
21005 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21007 /* Branch conversion is not implemented for Thumb-2. */
21010 output_operand_lossage ("predicated Thumb instruction");
21013 if (current_insn_predicate
!= NULL
)
21015 output_operand_lossage
21016 ("predicated instruction in conditional sequence");
21020 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21022 else if (current_insn_predicate
)
21024 enum arm_cond_code code
;
21028 output_operand_lossage ("predicated Thumb instruction");
21032 code
= get_arm_condition_code (current_insn_predicate
);
21033 fputs (arm_condition_codes
[code
], stream
);
21038 /* If CODE is 'd', then the X is a condition operand and the instruction
21039 should only be executed if the condition is true.
21040 if CODE is 'D', then the X is a condition operand and the instruction
21041 should only be executed if the condition is false: however, if the mode
21042 of the comparison is CCFPEmode, then always execute the instruction -- we
21043 do this because in these circumstances !GE does not necessarily imply LT;
21044 in these cases the instruction pattern will take care to make sure that
21045 an instruction containing %d will follow, thereby undoing the effects of
21046 doing this instruction unconditionally.
21047 If CODE is 'N' then X is a floating point operand that must be negated
21049 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21050 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21052 arm_print_operand (FILE *stream
, rtx x
, int code
)
21057 fputs (ASM_COMMENT_START
, stream
);
21061 fputs (user_label_prefix
, stream
);
21065 fputs (REGISTER_PREFIX
, stream
);
21069 arm_print_condition (stream
);
21073 /* Nothing in unified syntax, otherwise the current condition code. */
21074 if (!TARGET_UNIFIED_ASM
)
21075 arm_print_condition (stream
);
21079 /* The current condition code in unified syntax, otherwise nothing. */
21080 if (TARGET_UNIFIED_ASM
)
21081 arm_print_condition (stream
);
21085 /* The current condition code for a condition code setting instruction.
21086 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21087 if (TARGET_UNIFIED_ASM
)
21089 fputc('s', stream
);
21090 arm_print_condition (stream
);
21094 arm_print_condition (stream
);
21095 fputc('s', stream
);
21100 /* If the instruction is conditionally executed then print
21101 the current condition code, otherwise print 's'. */
21102 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21103 if (current_insn_predicate
)
21104 arm_print_condition (stream
);
21106 fputc('s', stream
);
21109 /* %# is a "break" sequence. It doesn't output anything, but is used to
21110 separate e.g. operand numbers from following text, if that text consists
21111 of further digits which we don't want to be part of the operand
21119 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21120 r
= real_value_negate (&r
);
21121 fprintf (stream
, "%s", fp_const_from_val (&r
));
21125 /* An integer or symbol address without a preceding # sign. */
21127 switch (GET_CODE (x
))
21130 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21134 output_addr_const (stream
, x
);
21138 if (GET_CODE (XEXP (x
, 0)) == PLUS
21139 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21141 output_addr_const (stream
, x
);
21144 /* Fall through. */
21147 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21151 /* An integer that we want to print in HEX. */
21153 switch (GET_CODE (x
))
21156 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21160 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21165 if (CONST_INT_P (x
))
21168 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21169 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21173 putc ('~', stream
);
21174 output_addr_const (stream
, x
);
21179 /* The low 16 bits of an immediate constant. */
21180 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21184 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21188 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21196 shift
= shift_op (x
, &val
);
21200 fprintf (stream
, ", %s ", shift
);
21202 arm_print_operand (stream
, XEXP (x
, 1), 0);
21204 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21209 /* An explanation of the 'Q', 'R' and 'H' register operands:
21211 In a pair of registers containing a DI or DF value the 'Q'
21212 operand returns the register number of the register containing
21213 the least significant part of the value. The 'R' operand returns
21214 the register number of the register containing the most
21215 significant part of the value.
21217 The 'H' operand returns the higher of the two register numbers.
21218 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21219 same as the 'Q' operand, since the most significant part of the
21220 value is held in the lower number register. The reverse is true
21221 on systems where WORDS_BIG_ENDIAN is false.
21223 The purpose of these operands is to distinguish between cases
21224 where the endian-ness of the values is important (for example
21225 when they are added together), and cases where the endian-ness
21226 is irrelevant, but the order of register operations is important.
21227 For example when loading a value from memory into a register
21228 pair, the endian-ness does not matter. Provided that the value
21229 from the lower memory address is put into the lower numbered
21230 register, and the value from the higher address is put into the
21231 higher numbered register, the load will work regardless of whether
21232 the value being loaded is big-wordian or little-wordian. The
21233 order of the two register loads can matter however, if the address
21234 of the memory location is actually held in one of the registers
21235 being overwritten by the load.
21237 The 'Q' and 'R' constraints are also available for 64-bit
21240 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21242 rtx part
= gen_lowpart (SImode
, x
);
21243 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21247 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21249 output_operand_lossage ("invalid operand for code '%c'", code
);
21253 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21257 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21259 enum machine_mode mode
= GET_MODE (x
);
21262 if (mode
== VOIDmode
)
21264 part
= gen_highpart_mode (SImode
, mode
, x
);
21265 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21269 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21271 output_operand_lossage ("invalid operand for code '%c'", code
);
21275 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21279 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21281 output_operand_lossage ("invalid operand for code '%c'", code
);
21285 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21289 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21291 output_operand_lossage ("invalid operand for code '%c'", code
);
21295 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21299 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21301 output_operand_lossage ("invalid operand for code '%c'", code
);
21305 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21309 asm_fprintf (stream
, "%r",
21310 REG_P (XEXP (x
, 0))
21311 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21315 asm_fprintf (stream
, "{%r-%r}",
21317 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21320 /* Like 'M', but writing doubleword vector registers, for use by Neon
21324 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21325 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21327 asm_fprintf (stream
, "{d%d}", regno
);
21329 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21334 /* CONST_TRUE_RTX means always -- that's the default. */
21335 if (x
== const_true_rtx
)
21338 if (!COMPARISON_P (x
))
21340 output_operand_lossage ("invalid operand for code '%c'", code
);
21344 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21349 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21350 want to do that. */
21351 if (x
== const_true_rtx
)
21353 output_operand_lossage ("instruction never executed");
21356 if (!COMPARISON_P (x
))
21358 output_operand_lossage ("invalid operand for code '%c'", code
);
21362 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21363 (get_arm_condition_code (x
))],
21373 /* Former Maverick support, removed after GCC-4.7. */
21374 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21379 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21380 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21381 /* Bad value for wCG register number. */
21383 output_operand_lossage ("invalid operand for code '%c'", code
);
21388 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21391 /* Print an iWMMXt control register name. */
21393 if (!CONST_INT_P (x
)
21395 || INTVAL (x
) >= 16)
21396 /* Bad value for wC register number. */
21398 output_operand_lossage ("invalid operand for code '%c'", code
);
21404 static const char * wc_reg_names
[16] =
21406 "wCID", "wCon", "wCSSF", "wCASF",
21407 "wC4", "wC5", "wC6", "wC7",
21408 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21409 "wC12", "wC13", "wC14", "wC15"
21412 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21416 /* Print the high single-precision register of a VFP double-precision
21420 int mode
= GET_MODE (x
);
21423 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21425 output_operand_lossage ("invalid operand for code '%c'", code
);
21430 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21432 output_operand_lossage ("invalid operand for code '%c'", code
);
21436 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21440 /* Print a VFP/Neon double precision or quad precision register name. */
21444 int mode
= GET_MODE (x
);
21445 int is_quad
= (code
== 'q');
21448 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21450 output_operand_lossage ("invalid operand for code '%c'", code
);
21455 || !IS_VFP_REGNUM (REGNO (x
)))
21457 output_operand_lossage ("invalid operand for code '%c'", code
);
21462 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21463 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21465 output_operand_lossage ("invalid operand for code '%c'", code
);
21469 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21470 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21474 /* These two codes print the low/high doubleword register of a Neon quad
21475 register, respectively. For pair-structure types, can also print
21476 low/high quadword registers. */
21480 int mode
= GET_MODE (x
);
21483 if ((GET_MODE_SIZE (mode
) != 16
21484 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21486 output_operand_lossage ("invalid operand for code '%c'", code
);
21491 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21493 output_operand_lossage ("invalid operand for code '%c'", code
);
21497 if (GET_MODE_SIZE (mode
) == 16)
21498 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21499 + (code
== 'f' ? 1 : 0));
21501 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21502 + (code
== 'f' ? 1 : 0));
21506 /* Print a VFPv3 floating-point constant, represented as an integer
21510 int index
= vfp3_const_double_index (x
);
21511 gcc_assert (index
!= -1);
21512 fprintf (stream
, "%d", index
);
21516 /* Print bits representing opcode features for Neon.
21518 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21519 and polynomials as unsigned.
21521 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21523 Bit 2 is 1 for rounding functions, 0 otherwise. */
21525 /* Identify the type as 's', 'u', 'p' or 'f'. */
21528 HOST_WIDE_INT bits
= INTVAL (x
);
21529 fputc ("uspf"[bits
& 3], stream
);
21533 /* Likewise, but signed and unsigned integers are both 'i'. */
21536 HOST_WIDE_INT bits
= INTVAL (x
);
21537 fputc ("iipf"[bits
& 3], stream
);
21541 /* As for 'T', but emit 'u' instead of 'p'. */
21544 HOST_WIDE_INT bits
= INTVAL (x
);
21545 fputc ("usuf"[bits
& 3], stream
);
21549 /* Bit 2: rounding (vs none). */
21552 HOST_WIDE_INT bits
= INTVAL (x
);
21553 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21557 /* Memory operand for vld1/vst1 instruction. */
21561 bool postinc
= FALSE
;
21562 unsigned align
, memsize
, align_bits
;
21564 gcc_assert (MEM_P (x
));
21565 addr
= XEXP (x
, 0);
21566 if (GET_CODE (addr
) == POST_INC
)
21569 addr
= XEXP (addr
, 0);
21571 asm_fprintf (stream
, "[%r", REGNO (addr
));
21573 /* We know the alignment of this access, so we can emit a hint in the
21574 instruction (for some alignments) as an aid to the memory subsystem
21576 align
= MEM_ALIGN (x
) >> 3;
21577 memsize
= MEM_SIZE (x
);
21579 /* Only certain alignment specifiers are supported by the hardware. */
21580 if (memsize
== 32 && (align
% 32) == 0)
21582 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21584 else if (memsize
>= 8 && (align
% 8) == 0)
21589 if (align_bits
!= 0)
21590 asm_fprintf (stream
, ":%d", align_bits
);
21592 asm_fprintf (stream
, "]");
21595 fputs("!", stream
);
21603 gcc_assert (MEM_P (x
));
21604 addr
= XEXP (x
, 0);
21605 gcc_assert (REG_P (addr
));
21606 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21610 /* Translate an S register number into a D register number and element index. */
21613 int mode
= GET_MODE (x
);
21616 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21618 output_operand_lossage ("invalid operand for code '%c'", code
);
21623 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21625 output_operand_lossage ("invalid operand for code '%c'", code
);
21629 regno
= regno
- FIRST_VFP_REGNUM
;
21630 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21635 gcc_assert (CONST_DOUBLE_P (x
));
21637 result
= vfp3_const_double_for_fract_bits (x
);
21639 result
= vfp3_const_double_for_bits (x
);
21640 fprintf (stream
, "#%d", result
);
21643 /* Register specifier for vld1.16/vst1.16. Translate the S register
21644 number into a D register number and element index. */
21647 int mode
= GET_MODE (x
);
21650 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21652 output_operand_lossage ("invalid operand for code '%c'", code
);
21657 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21659 output_operand_lossage ("invalid operand for code '%c'", code
);
21663 regno
= regno
- FIRST_VFP_REGNUM
;
21664 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21671 output_operand_lossage ("missing operand");
21675 switch (GET_CODE (x
))
21678 asm_fprintf (stream
, "%r", REGNO (x
));
21682 output_memory_reference_mode
= GET_MODE (x
);
21683 output_address (XEXP (x
, 0));
21690 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21691 sizeof (fpstr
), 0, 1);
21692 fprintf (stream
, "#%s", fpstr
);
21695 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21699 gcc_assert (GET_CODE (x
) != NEG
);
21700 fputc ('#', stream
);
21701 if (GET_CODE (x
) == HIGH
)
21703 fputs (":lower16:", stream
);
21707 output_addr_const (stream
, x
);
21713 /* Target hook for printing a memory address. */
21715 arm_print_operand_address (FILE *stream
, rtx x
)
21719 int is_minus
= GET_CODE (x
) == MINUS
;
21722 asm_fprintf (stream
, "[%r]", REGNO (x
));
21723 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21725 rtx base
= XEXP (x
, 0);
21726 rtx index
= XEXP (x
, 1);
21727 HOST_WIDE_INT offset
= 0;
21729 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21731 /* Ensure that BASE is a register. */
21732 /* (one of them must be). */
21733 /* Also ensure the SP is not used as in index register. */
21738 switch (GET_CODE (index
))
21741 offset
= INTVAL (index
);
21744 asm_fprintf (stream
, "[%r, #%wd]",
21745 REGNO (base
), offset
);
21749 asm_fprintf (stream
, "[%r, %s%r]",
21750 REGNO (base
), is_minus
? "-" : "",
21760 asm_fprintf (stream
, "[%r, %s%r",
21761 REGNO (base
), is_minus
? "-" : "",
21762 REGNO (XEXP (index
, 0)));
21763 arm_print_operand (stream
, index
, 'S');
21764 fputs ("]", stream
);
21769 gcc_unreachable ();
21772 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21773 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21775 extern enum machine_mode output_memory_reference_mode
;
21777 gcc_assert (REG_P (XEXP (x
, 0)));
21779 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21780 asm_fprintf (stream
, "[%r, #%s%d]!",
21781 REGNO (XEXP (x
, 0)),
21782 GET_CODE (x
) == PRE_DEC
? "-" : "",
21783 GET_MODE_SIZE (output_memory_reference_mode
));
21785 asm_fprintf (stream
, "[%r], #%s%d",
21786 REGNO (XEXP (x
, 0)),
21787 GET_CODE (x
) == POST_DEC
? "-" : "",
21788 GET_MODE_SIZE (output_memory_reference_mode
));
21790 else if (GET_CODE (x
) == PRE_MODIFY
)
21792 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21793 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21794 asm_fprintf (stream
, "#%wd]!",
21795 INTVAL (XEXP (XEXP (x
, 1), 1)));
21797 asm_fprintf (stream
, "%r]!",
21798 REGNO (XEXP (XEXP (x
, 1), 1)));
21800 else if (GET_CODE (x
) == POST_MODIFY
)
21802 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21803 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21804 asm_fprintf (stream
, "#%wd",
21805 INTVAL (XEXP (XEXP (x
, 1), 1)));
21807 asm_fprintf (stream
, "%r",
21808 REGNO (XEXP (XEXP (x
, 1), 1)));
21810 else output_addr_const (stream
, x
);
21815 asm_fprintf (stream
, "[%r]", REGNO (x
));
21816 else if (GET_CODE (x
) == POST_INC
)
21817 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21818 else if (GET_CODE (x
) == PLUS
)
21820 gcc_assert (REG_P (XEXP (x
, 0)));
21821 if (CONST_INT_P (XEXP (x
, 1)))
21822 asm_fprintf (stream
, "[%r, #%wd]",
21823 REGNO (XEXP (x
, 0)),
21824 INTVAL (XEXP (x
, 1)));
21826 asm_fprintf (stream
, "[%r, %r]",
21827 REGNO (XEXP (x
, 0)),
21828 REGNO (XEXP (x
, 1)));
21831 output_addr_const (stream
, x
);
21835 /* Target hook for indicating whether a punctuation character for
21836 TARGET_PRINT_OPERAND is valid. */
21838 arm_print_operand_punct_valid_p (unsigned char code
)
21840 return (code
== '@' || code
== '|' || code
== '.'
21841 || code
== '(' || code
== ')' || code
== '#'
21842 || (TARGET_32BIT
&& (code
== '?'))
21843 || (TARGET_THUMB2
&& (code
== '!'))
21844 || (TARGET_THUMB
&& (code
== '_')));
21847 /* Target hook for assembling integer objects. The ARM version needs to
21848 handle word-sized values specially. */
21850 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21852 enum machine_mode mode
;
21854 if (size
== UNITS_PER_WORD
&& aligned_p
)
21856 fputs ("\t.word\t", asm_out_file
);
21857 output_addr_const (asm_out_file
, x
);
21859 /* Mark symbols as position independent. We only do this in the
21860 .text segment, not in the .data segment. */
21861 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21862 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21864 /* See legitimize_pic_address for an explanation of the
21865 TARGET_VXWORKS_RTP check. */
21866 if (!arm_pic_data_is_text_relative
21867 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21868 fputs ("(GOT)", asm_out_file
);
21870 fputs ("(GOTOFF)", asm_out_file
);
21872 fputc ('\n', asm_out_file
);
21876 mode
= GET_MODE (x
);
21878 if (arm_vector_mode_supported_p (mode
))
21882 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21884 units
= CONST_VECTOR_NUNITS (x
);
21885 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21887 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21888 for (i
= 0; i
< units
; i
++)
21890 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21892 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21895 for (i
= 0; i
< units
; i
++)
21897 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21898 REAL_VALUE_TYPE rval
;
21900 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21903 (rval
, GET_MODE_INNER (mode
),
21904 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21910 return default_assemble_integer (x
, size
, aligned_p
);
21914 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21918 if (!TARGET_AAPCS_BASED
)
21921 default_named_section_asm_out_constructor
21922 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21926 /* Put these in the .init_array section, using a special relocation. */
21927 if (priority
!= DEFAULT_INIT_PRIORITY
)
21930 sprintf (buf
, "%s.%.5u",
21931 is_ctor
? ".init_array" : ".fini_array",
21933 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21940 switch_to_section (s
);
21941 assemble_align (POINTER_SIZE
);
21942 fputs ("\t.word\t", asm_out_file
);
21943 output_addr_const (asm_out_file
, symbol
);
21944 fputs ("(target1)\n", asm_out_file
);
21947 /* Add a function to the list of static constructors. */
21950 arm_elf_asm_constructor (rtx symbol
, int priority
)
21952 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21955 /* Add a function to the list of static destructors. */
21958 arm_elf_asm_destructor (rtx symbol
, int priority
)
21960 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21963 /* A finite state machine takes care of noticing whether or not instructions
21964 can be conditionally executed, and thus decrease execution time and code
21965 size by deleting branch instructions. The fsm is controlled by
21966 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21968 /* The state of the fsm controlling condition codes are:
21969 0: normal, do nothing special
21970 1: make ASM_OUTPUT_OPCODE not output this instruction
21971 2: make ASM_OUTPUT_OPCODE not output this instruction
21972 3: make instructions conditional
21973 4: make instructions conditional
21975 State transitions (state->state by whom under condition):
21976 0 -> 1 final_prescan_insn if the `target' is a label
21977 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21978 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21979 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21980 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21981 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21982 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21983 (the target insn is arm_target_insn).
21985 If the jump clobbers the conditions then we use states 2 and 4.
21987 A similar thing can be done with conditional return insns.
21989 XXX In case the `target' is an unconditional branch, this conditionalising
21990 of the instructions always reduces code size, but not always execution
21991 time. But then, I want to reduce the code size to somewhere near what
21992 /bin/cc produces. */
21994 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21995 instructions. When a COND_EXEC instruction is seen the subsequent
21996 instructions are scanned so that multiple conditional instructions can be
21997 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21998 specify the length and true/false mask for the IT block. These will be
21999 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22001 /* Returns the index of the ARM condition code string in
22002 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22003 COMPARISON should be an rtx like `(eq (...) (...))'. */
22006 maybe_get_arm_condition_code (rtx comparison
)
22008 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22009 enum arm_cond_code code
;
22010 enum rtx_code comp_code
= GET_CODE (comparison
);
22012 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22013 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22014 XEXP (comparison
, 1));
22018 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22019 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22020 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22021 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22022 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22023 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22024 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22025 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22026 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22027 case CC_DLTUmode
: code
= ARM_CC
;
22030 if (comp_code
== EQ
)
22031 return ARM_INVERSE_CONDITION_CODE (code
);
22032 if (comp_code
== NE
)
22039 case NE
: return ARM_NE
;
22040 case EQ
: return ARM_EQ
;
22041 case GE
: return ARM_PL
;
22042 case LT
: return ARM_MI
;
22043 default: return ARM_NV
;
22049 case NE
: return ARM_NE
;
22050 case EQ
: return ARM_EQ
;
22051 default: return ARM_NV
;
22057 case NE
: return ARM_MI
;
22058 case EQ
: return ARM_PL
;
22059 default: return ARM_NV
;
22064 /* We can handle all cases except UNEQ and LTGT. */
22067 case GE
: return ARM_GE
;
22068 case GT
: return ARM_GT
;
22069 case LE
: return ARM_LS
;
22070 case LT
: return ARM_MI
;
22071 case NE
: return ARM_NE
;
22072 case EQ
: return ARM_EQ
;
22073 case ORDERED
: return ARM_VC
;
22074 case UNORDERED
: return ARM_VS
;
22075 case UNLT
: return ARM_LT
;
22076 case UNLE
: return ARM_LE
;
22077 case UNGT
: return ARM_HI
;
22078 case UNGE
: return ARM_PL
;
22079 /* UNEQ and LTGT do not have a representation. */
22080 case UNEQ
: /* Fall through. */
22081 case LTGT
: /* Fall through. */
22082 default: return ARM_NV
;
22088 case NE
: return ARM_NE
;
22089 case EQ
: return ARM_EQ
;
22090 case GE
: return ARM_LE
;
22091 case GT
: return ARM_LT
;
22092 case LE
: return ARM_GE
;
22093 case LT
: return ARM_GT
;
22094 case GEU
: return ARM_LS
;
22095 case GTU
: return ARM_CC
;
22096 case LEU
: return ARM_CS
;
22097 case LTU
: return ARM_HI
;
22098 default: return ARM_NV
;
22104 case LTU
: return ARM_CS
;
22105 case GEU
: return ARM_CC
;
22106 default: return ARM_NV
;
22112 case NE
: return ARM_NE
;
22113 case EQ
: return ARM_EQ
;
22114 case GEU
: return ARM_CS
;
22115 case GTU
: return ARM_HI
;
22116 case LEU
: return ARM_LS
;
22117 case LTU
: return ARM_CC
;
22118 default: return ARM_NV
;
22124 case GE
: return ARM_GE
;
22125 case LT
: return ARM_LT
;
22126 case GEU
: return ARM_CS
;
22127 case LTU
: return ARM_CC
;
22128 default: return ARM_NV
;
22134 case NE
: return ARM_NE
;
22135 case EQ
: return ARM_EQ
;
22136 case GE
: return ARM_GE
;
22137 case GT
: return ARM_GT
;
22138 case LE
: return ARM_LE
;
22139 case LT
: return ARM_LT
;
22140 case GEU
: return ARM_CS
;
22141 case GTU
: return ARM_HI
;
22142 case LEU
: return ARM_LS
;
22143 case LTU
: return ARM_CC
;
22144 default: return ARM_NV
;
22147 default: gcc_unreachable ();
22151 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22152 static enum arm_cond_code
22153 get_arm_condition_code (rtx comparison
)
22155 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22156 gcc_assert (code
!= ARM_NV
);
22160 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22163 thumb2_final_prescan_insn (rtx insn
)
22165 rtx first_insn
= insn
;
22166 rtx body
= PATTERN (insn
);
22168 enum arm_cond_code code
;
22173 /* max_insns_skipped in the tune was already taken into account in the
22174 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22175 just emit the IT blocks as we can. It does not make sense to split
22177 max
= MAX_INSN_PER_IT_BLOCK
;
22179 /* Remove the previous insn from the count of insns to be output. */
22180 if (arm_condexec_count
)
22181 arm_condexec_count
--;
22183 /* Nothing to do if we are already inside a conditional block. */
22184 if (arm_condexec_count
)
22187 if (GET_CODE (body
) != COND_EXEC
)
22190 /* Conditional jumps are implemented directly. */
22194 predicate
= COND_EXEC_TEST (body
);
22195 arm_current_cc
= get_arm_condition_code (predicate
);
22197 n
= get_attr_ce_count (insn
);
22198 arm_condexec_count
= 1;
22199 arm_condexec_mask
= (1 << n
) - 1;
22200 arm_condexec_masklen
= n
;
22201 /* See if subsequent instructions can be combined into the same block. */
22204 insn
= next_nonnote_insn (insn
);
22206 /* Jumping into the middle of an IT block is illegal, so a label or
22207 barrier terminates the block. */
22208 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22211 body
= PATTERN (insn
);
22212 /* USE and CLOBBER aren't really insns, so just skip them. */
22213 if (GET_CODE (body
) == USE
22214 || GET_CODE (body
) == CLOBBER
)
22217 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22218 if (GET_CODE (body
) != COND_EXEC
)
22220 /* Maximum number of conditionally executed instructions in a block. */
22221 n
= get_attr_ce_count (insn
);
22222 if (arm_condexec_masklen
+ n
> max
)
22225 predicate
= COND_EXEC_TEST (body
);
22226 code
= get_arm_condition_code (predicate
);
22227 mask
= (1 << n
) - 1;
22228 if (arm_current_cc
== code
)
22229 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22230 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22233 arm_condexec_count
++;
22234 arm_condexec_masklen
+= n
;
22236 /* A jump must be the last instruction in a conditional block. */
22240 /* Restore recog_data (getting the attributes of other insns can
22241 destroy this array, but final.c assumes that it remains intact
22242 across this call). */
22243 extract_constrain_insn_cached (first_insn
);
22247 arm_final_prescan_insn (rtx insn
)
22249 /* BODY will hold the body of INSN. */
22250 rtx body
= PATTERN (insn
);
22252 /* This will be 1 if trying to repeat the trick, and things need to be
22253 reversed if it appears to fail. */
22256 /* If we start with a return insn, we only succeed if we find another one. */
22257 int seeking_return
= 0;
22258 enum rtx_code return_code
= UNKNOWN
;
22260 /* START_INSN will hold the insn from where we start looking. This is the
22261 first insn after the following code_label if REVERSE is true. */
22262 rtx start_insn
= insn
;
22264 /* If in state 4, check if the target branch is reached, in order to
22265 change back to state 0. */
22266 if (arm_ccfsm_state
== 4)
22268 if (insn
== arm_target_insn
)
22270 arm_target_insn
= NULL
;
22271 arm_ccfsm_state
= 0;
22276 /* If in state 3, it is possible to repeat the trick, if this insn is an
22277 unconditional branch to a label, and immediately following this branch
22278 is the previous target label which is only used once, and the label this
22279 branch jumps to is not too far off. */
22280 if (arm_ccfsm_state
== 3)
22282 if (simplejump_p (insn
))
22284 start_insn
= next_nonnote_insn (start_insn
);
22285 if (BARRIER_P (start_insn
))
22287 /* XXX Isn't this always a barrier? */
22288 start_insn
= next_nonnote_insn (start_insn
);
22290 if (LABEL_P (start_insn
)
22291 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22292 && LABEL_NUSES (start_insn
) == 1)
22297 else if (ANY_RETURN_P (body
))
22299 start_insn
= next_nonnote_insn (start_insn
);
22300 if (BARRIER_P (start_insn
))
22301 start_insn
= next_nonnote_insn (start_insn
);
22302 if (LABEL_P (start_insn
)
22303 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22304 && LABEL_NUSES (start_insn
) == 1)
22307 seeking_return
= 1;
22308 return_code
= GET_CODE (body
);
22317 gcc_assert (!arm_ccfsm_state
|| reverse
);
22318 if (!JUMP_P (insn
))
22321 /* This jump might be paralleled with a clobber of the condition codes
22322 the jump should always come first */
22323 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22324 body
= XVECEXP (body
, 0, 0);
22327 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22328 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22331 int fail
= FALSE
, succeed
= FALSE
;
22332 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22333 int then_not_else
= TRUE
;
22334 rtx this_insn
= start_insn
, label
= 0;
22336 /* Register the insn jumped to. */
22339 if (!seeking_return
)
22340 label
= XEXP (SET_SRC (body
), 0);
22342 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22343 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22344 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22346 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22347 then_not_else
= FALSE
;
22349 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22351 seeking_return
= 1;
22352 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22354 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22356 seeking_return
= 1;
22357 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22358 then_not_else
= FALSE
;
22361 gcc_unreachable ();
22363 /* See how many insns this branch skips, and what kind of insns. If all
22364 insns are okay, and the label or unconditional branch to the same
22365 label is not too far away, succeed. */
22366 for (insns_skipped
= 0;
22367 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22371 this_insn
= next_nonnote_insn (this_insn
);
22375 switch (GET_CODE (this_insn
))
22378 /* Succeed if it is the target label, otherwise fail since
22379 control falls in from somewhere else. */
22380 if (this_insn
== label
)
22382 arm_ccfsm_state
= 1;
22390 /* Succeed if the following insn is the target label.
22392 If return insns are used then the last insn in a function
22393 will be a barrier. */
22394 this_insn
= next_nonnote_insn (this_insn
);
22395 if (this_insn
&& this_insn
== label
)
22397 arm_ccfsm_state
= 1;
22405 /* The AAPCS says that conditional calls should not be
22406 used since they make interworking inefficient (the
22407 linker can't transform BL<cond> into BLX). That's
22408 only a problem if the machine has BLX. */
22415 /* Succeed if the following insn is the target label, or
22416 if the following two insns are a barrier and the
22418 this_insn
= next_nonnote_insn (this_insn
);
22419 if (this_insn
&& BARRIER_P (this_insn
))
22420 this_insn
= next_nonnote_insn (this_insn
);
22422 if (this_insn
&& this_insn
== label
22423 && insns_skipped
< max_insns_skipped
)
22425 arm_ccfsm_state
= 1;
22433 /* If this is an unconditional branch to the same label, succeed.
22434 If it is to another label, do nothing. If it is conditional,
22436 /* XXX Probably, the tests for SET and the PC are
22439 scanbody
= PATTERN (this_insn
);
22440 if (GET_CODE (scanbody
) == SET
22441 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22443 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22444 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22446 arm_ccfsm_state
= 2;
22449 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22452 /* Fail if a conditional return is undesirable (e.g. on a
22453 StrongARM), but still allow this if optimizing for size. */
22454 else if (GET_CODE (scanbody
) == return_code
22455 && !use_return_insn (TRUE
, NULL
)
22458 else if (GET_CODE (scanbody
) == return_code
)
22460 arm_ccfsm_state
= 2;
22463 else if (GET_CODE (scanbody
) == PARALLEL
)
22465 switch (get_attr_conds (this_insn
))
22475 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22480 /* Instructions using or affecting the condition codes make it
22482 scanbody
= PATTERN (this_insn
);
22483 if (!(GET_CODE (scanbody
) == SET
22484 || GET_CODE (scanbody
) == PARALLEL
)
22485 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22495 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22496 arm_target_label
= CODE_LABEL_NUMBER (label
);
22499 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22501 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22503 this_insn
= next_nonnote_insn (this_insn
);
22504 gcc_assert (!this_insn
22505 || (!BARRIER_P (this_insn
)
22506 && !LABEL_P (this_insn
)));
22510 /* Oh, dear! we ran off the end.. give up. */
22511 extract_constrain_insn_cached (insn
);
22512 arm_ccfsm_state
= 0;
22513 arm_target_insn
= NULL
;
22516 arm_target_insn
= this_insn
;
22519 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22522 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22524 if (reverse
|| then_not_else
)
22525 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22528 /* Restore recog_data (getting the attributes of other insns can
22529 destroy this array, but final.c assumes that it remains intact
22530 across this call. */
22531 extract_constrain_insn_cached (insn
);
22535 /* Output IT instructions. */
22537 thumb2_asm_output_opcode (FILE * stream
)
22542 if (arm_condexec_mask
)
22544 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22545 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22547 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22548 arm_condition_codes
[arm_current_cc
]);
22549 arm_condexec_mask
= 0;
22553 /* Returns true if REGNO is a valid register
22554 for holding a quantity of type MODE. */
22556 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22558 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22559 return (regno
== CC_REGNUM
22560 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22561 && regno
== VFPCC_REGNUM
));
22564 /* For the Thumb we only allow values bigger than SImode in
22565 registers 0 - 6, so that there is always a second low
22566 register available to hold the upper part of the value.
22567 We probably we ought to ensure that the register is the
22568 start of an even numbered register pair. */
22569 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22571 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22572 && IS_VFP_REGNUM (regno
))
22574 if (mode
== SFmode
|| mode
== SImode
)
22575 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22577 if (mode
== DFmode
)
22578 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22580 /* VFP registers can hold HFmode values, but there is no point in
22581 putting them there unless we have hardware conversion insns. */
22582 if (mode
== HFmode
)
22583 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22586 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22587 || (VALID_NEON_QREG_MODE (mode
)
22588 && NEON_REGNO_OK_FOR_QUAD (regno
))
22589 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22590 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22591 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22592 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22593 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22598 if (TARGET_REALLY_IWMMXT
)
22600 if (IS_IWMMXT_GR_REGNUM (regno
))
22601 return mode
== SImode
;
22603 if (IS_IWMMXT_REGNUM (regno
))
22604 return VALID_IWMMXT_REG_MODE (mode
);
22607 /* We allow almost any value to be stored in the general registers.
22608 Restrict doubleword quantities to even register pairs so that we can
22609 use ldrd. Do not allow very large Neon structure opaque modes in
22610 general registers; they would use too many. */
22611 if (regno
<= LAST_ARM_REGNUM
)
22612 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22613 && ARM_NUM_REGS (mode
) <= 4;
22615 if (regno
== FRAME_POINTER_REGNUM
22616 || regno
== ARG_POINTER_REGNUM
)
22617 /* We only allow integers in the fake hard registers. */
22618 return GET_MODE_CLASS (mode
) == MODE_INT
;
22623 /* Implement MODES_TIEABLE_P. */
22626 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22628 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22631 /* We specifically want to allow elements of "structure" modes to
22632 be tieable to the structure. This more general condition allows
22633 other rarer situations too. */
22635 && (VALID_NEON_DREG_MODE (mode1
)
22636 || VALID_NEON_QREG_MODE (mode1
)
22637 || VALID_NEON_STRUCT_MODE (mode1
))
22638 && (VALID_NEON_DREG_MODE (mode2
)
22639 || VALID_NEON_QREG_MODE (mode2
)
22640 || VALID_NEON_STRUCT_MODE (mode2
)))
22646 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22647 not used in arm mode. */
22650 arm_regno_class (int regno
)
22654 if (regno
== STACK_POINTER_REGNUM
)
22656 if (regno
== CC_REGNUM
)
22663 if (TARGET_THUMB2
&& regno
< 8)
22666 if ( regno
<= LAST_ARM_REGNUM
22667 || regno
== FRAME_POINTER_REGNUM
22668 || regno
== ARG_POINTER_REGNUM
)
22669 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22671 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22672 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22674 if (IS_VFP_REGNUM (regno
))
22676 if (regno
<= D7_VFP_REGNUM
)
22677 return VFP_D0_D7_REGS
;
22678 else if (regno
<= LAST_LO_VFP_REGNUM
)
22679 return VFP_LO_REGS
;
22681 return VFP_HI_REGS
;
22684 if (IS_IWMMXT_REGNUM (regno
))
22685 return IWMMXT_REGS
;
22687 if (IS_IWMMXT_GR_REGNUM (regno
))
22688 return IWMMXT_GR_REGS
;
22693 /* Handle a special case when computing the offset
22694 of an argument from the frame pointer. */
22696 arm_debugger_arg_offset (int value
, rtx addr
)
22700 /* We are only interested if dbxout_parms() failed to compute the offset. */
22704 /* We can only cope with the case where the address is held in a register. */
22708 /* If we are using the frame pointer to point at the argument, then
22709 an offset of 0 is correct. */
22710 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22713 /* If we are using the stack pointer to point at the
22714 argument, then an offset of 0 is correct. */
22715 /* ??? Check this is consistent with thumb2 frame layout. */
22716 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22717 && REGNO (addr
) == SP_REGNUM
)
22720 /* Oh dear. The argument is pointed to by a register rather
22721 than being held in a register, or being stored at a known
22722 offset from the frame pointer. Since GDB only understands
22723 those two kinds of argument we must translate the address
22724 held in the register into an offset from the frame pointer.
22725 We do this by searching through the insns for the function
22726 looking to see where this register gets its value. If the
22727 register is initialized from the frame pointer plus an offset
22728 then we are in luck and we can continue, otherwise we give up.
22730 This code is exercised by producing debugging information
22731 for a function with arguments like this:
22733 double func (double a, double b, int c, double d) {return d;}
22735 Without this code the stab for parameter 'd' will be set to
22736 an offset of 0 from the frame pointer, rather than 8. */
22738 /* The if() statement says:
22740 If the insn is a normal instruction
22741 and if the insn is setting the value in a register
22742 and if the register being set is the register holding the address of the argument
22743 and if the address is computing by an addition
22744 that involves adding to a register
22745 which is the frame pointer
22750 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22752 if ( NONJUMP_INSN_P (insn
)
22753 && GET_CODE (PATTERN (insn
)) == SET
22754 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22755 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22756 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22757 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22758 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22761 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22770 warning (0, "unable to compute real location of stacked parameter");
22771 value
= 8; /* XXX magic hack */
22792 T_MAX
/* Size of enum. Keep last. */
22793 } neon_builtin_type_mode
;
22795 #define TYPE_MODE_BIT(X) (1 << (X))
22797 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22798 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22799 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22800 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22801 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22802 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22804 #define v8qi_UP T_V8QI
22805 #define v4hi_UP T_V4HI
22806 #define v4hf_UP T_V4HF
22807 #define v2si_UP T_V2SI
22808 #define v2sf_UP T_V2SF
22810 #define v16qi_UP T_V16QI
22811 #define v8hi_UP T_V8HI
22812 #define v4si_UP T_V4SI
22813 #define v4sf_UP T_V4SF
22814 #define v2di_UP T_V2DI
22819 #define UP(X) X##_UP
22855 NEON_LOADSTRUCTLANE
,
22857 NEON_STORESTRUCTLANE
,
22866 const neon_itype itype
;
22867 const neon_builtin_type_mode mode
;
22868 const enum insn_code code
;
22869 unsigned int fcode
;
22870 } neon_builtin_datum
;
22872 #define CF(N,X) CODE_FOR_neon_##N##X
22874 #define VAR1(T, N, A) \
22875 {#N, NEON_##T, UP (A), CF (N, A), 0}
22876 #define VAR2(T, N, A, B) \
22878 {#N, NEON_##T, UP (B), CF (N, B), 0}
22879 #define VAR3(T, N, A, B, C) \
22880 VAR2 (T, N, A, B), \
22881 {#N, NEON_##T, UP (C), CF (N, C), 0}
22882 #define VAR4(T, N, A, B, C, D) \
22883 VAR3 (T, N, A, B, C), \
22884 {#N, NEON_##T, UP (D), CF (N, D), 0}
22885 #define VAR5(T, N, A, B, C, D, E) \
22886 VAR4 (T, N, A, B, C, D), \
22887 {#N, NEON_##T, UP (E), CF (N, E), 0}
22888 #define VAR6(T, N, A, B, C, D, E, F) \
22889 VAR5 (T, N, A, B, C, D, E), \
22890 {#N, NEON_##T, UP (F), CF (N, F), 0}
22891 #define VAR7(T, N, A, B, C, D, E, F, G) \
22892 VAR6 (T, N, A, B, C, D, E, F), \
22893 {#N, NEON_##T, UP (G), CF (N, G), 0}
22894 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22895 VAR7 (T, N, A, B, C, D, E, F, G), \
22896 {#N, NEON_##T, UP (H), CF (N, H), 0}
22897 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22898 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22899 {#N, NEON_##T, UP (I), CF (N, I), 0}
22900 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22901 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22902 {#N, NEON_##T, UP (J), CF (N, J), 0}
22904 /* The NEON builtin data can be found in arm_neon_builtins.def.
22905 The mode entries in the following table correspond to the "key" type of the
22906 instruction variant, i.e. equivalent to that which would be specified after
22907 the assembler mnemonic, which usually refers to the last vector operand.
22908 (Signed/unsigned/polynomial types are not differentiated between though, and
22909 are all mapped onto the same mode for a given element size.) The modes
22910 listed per instruction should be the same as those defined for that
22911 instruction's pattern in neon.md. */
22913 static neon_builtin_datum neon_builtin_data
[] =
22915 #include "arm_neon_builtins.def"
22930 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22931 #define VAR1(T, N, A) \
22933 #define VAR2(T, N, A, B) \
22936 #define VAR3(T, N, A, B, C) \
22937 VAR2 (T, N, A, B), \
22939 #define VAR4(T, N, A, B, C, D) \
22940 VAR3 (T, N, A, B, C), \
22942 #define VAR5(T, N, A, B, C, D, E) \
22943 VAR4 (T, N, A, B, C, D), \
22945 #define VAR6(T, N, A, B, C, D, E, F) \
22946 VAR5 (T, N, A, B, C, D, E), \
22948 #define VAR7(T, N, A, B, C, D, E, F, G) \
22949 VAR6 (T, N, A, B, C, D, E, F), \
22951 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22952 VAR7 (T, N, A, B, C, D, E, F, G), \
22954 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22955 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22957 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22958 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22962 ARM_BUILTIN_GETWCGR0
,
22963 ARM_BUILTIN_GETWCGR1
,
22964 ARM_BUILTIN_GETWCGR2
,
22965 ARM_BUILTIN_GETWCGR3
,
22967 ARM_BUILTIN_SETWCGR0
,
22968 ARM_BUILTIN_SETWCGR1
,
22969 ARM_BUILTIN_SETWCGR2
,
22970 ARM_BUILTIN_SETWCGR3
,
22974 ARM_BUILTIN_WAVG2BR
,
22975 ARM_BUILTIN_WAVG2HR
,
22976 ARM_BUILTIN_WAVG2B
,
22977 ARM_BUILTIN_WAVG2H
,
22984 ARM_BUILTIN_WMACSZ
,
22986 ARM_BUILTIN_WMACUZ
,
22989 ARM_BUILTIN_WSADBZ
,
22991 ARM_BUILTIN_WSADHZ
,
22993 ARM_BUILTIN_WALIGNI
,
22994 ARM_BUILTIN_WALIGNR0
,
22995 ARM_BUILTIN_WALIGNR1
,
22996 ARM_BUILTIN_WALIGNR2
,
22997 ARM_BUILTIN_WALIGNR3
,
23000 ARM_BUILTIN_TMIAPH
,
23001 ARM_BUILTIN_TMIABB
,
23002 ARM_BUILTIN_TMIABT
,
23003 ARM_BUILTIN_TMIATB
,
23004 ARM_BUILTIN_TMIATT
,
23006 ARM_BUILTIN_TMOVMSKB
,
23007 ARM_BUILTIN_TMOVMSKH
,
23008 ARM_BUILTIN_TMOVMSKW
,
23010 ARM_BUILTIN_TBCSTB
,
23011 ARM_BUILTIN_TBCSTH
,
23012 ARM_BUILTIN_TBCSTW
,
23014 ARM_BUILTIN_WMADDS
,
23015 ARM_BUILTIN_WMADDU
,
23017 ARM_BUILTIN_WPACKHSS
,
23018 ARM_BUILTIN_WPACKWSS
,
23019 ARM_BUILTIN_WPACKDSS
,
23020 ARM_BUILTIN_WPACKHUS
,
23021 ARM_BUILTIN_WPACKWUS
,
23022 ARM_BUILTIN_WPACKDUS
,
23027 ARM_BUILTIN_WADDSSB
,
23028 ARM_BUILTIN_WADDSSH
,
23029 ARM_BUILTIN_WADDSSW
,
23030 ARM_BUILTIN_WADDUSB
,
23031 ARM_BUILTIN_WADDUSH
,
23032 ARM_BUILTIN_WADDUSW
,
23036 ARM_BUILTIN_WSUBSSB
,
23037 ARM_BUILTIN_WSUBSSH
,
23038 ARM_BUILTIN_WSUBSSW
,
23039 ARM_BUILTIN_WSUBUSB
,
23040 ARM_BUILTIN_WSUBUSH
,
23041 ARM_BUILTIN_WSUBUSW
,
23048 ARM_BUILTIN_WCMPEQB
,
23049 ARM_BUILTIN_WCMPEQH
,
23050 ARM_BUILTIN_WCMPEQW
,
23051 ARM_BUILTIN_WCMPGTUB
,
23052 ARM_BUILTIN_WCMPGTUH
,
23053 ARM_BUILTIN_WCMPGTUW
,
23054 ARM_BUILTIN_WCMPGTSB
,
23055 ARM_BUILTIN_WCMPGTSH
,
23056 ARM_BUILTIN_WCMPGTSW
,
23058 ARM_BUILTIN_TEXTRMSB
,
23059 ARM_BUILTIN_TEXTRMSH
,
23060 ARM_BUILTIN_TEXTRMSW
,
23061 ARM_BUILTIN_TEXTRMUB
,
23062 ARM_BUILTIN_TEXTRMUH
,
23063 ARM_BUILTIN_TEXTRMUW
,
23064 ARM_BUILTIN_TINSRB
,
23065 ARM_BUILTIN_TINSRH
,
23066 ARM_BUILTIN_TINSRW
,
23068 ARM_BUILTIN_WMAXSW
,
23069 ARM_BUILTIN_WMAXSH
,
23070 ARM_BUILTIN_WMAXSB
,
23071 ARM_BUILTIN_WMAXUW
,
23072 ARM_BUILTIN_WMAXUH
,
23073 ARM_BUILTIN_WMAXUB
,
23074 ARM_BUILTIN_WMINSW
,
23075 ARM_BUILTIN_WMINSH
,
23076 ARM_BUILTIN_WMINSB
,
23077 ARM_BUILTIN_WMINUW
,
23078 ARM_BUILTIN_WMINUH
,
23079 ARM_BUILTIN_WMINUB
,
23081 ARM_BUILTIN_WMULUM
,
23082 ARM_BUILTIN_WMULSM
,
23083 ARM_BUILTIN_WMULUL
,
23085 ARM_BUILTIN_PSADBH
,
23086 ARM_BUILTIN_WSHUFH
,
23100 ARM_BUILTIN_WSLLHI
,
23101 ARM_BUILTIN_WSLLWI
,
23102 ARM_BUILTIN_WSLLDI
,
23103 ARM_BUILTIN_WSRAHI
,
23104 ARM_BUILTIN_WSRAWI
,
23105 ARM_BUILTIN_WSRADI
,
23106 ARM_BUILTIN_WSRLHI
,
23107 ARM_BUILTIN_WSRLWI
,
23108 ARM_BUILTIN_WSRLDI
,
23109 ARM_BUILTIN_WRORHI
,
23110 ARM_BUILTIN_WRORWI
,
23111 ARM_BUILTIN_WRORDI
,
23113 ARM_BUILTIN_WUNPCKIHB
,
23114 ARM_BUILTIN_WUNPCKIHH
,
23115 ARM_BUILTIN_WUNPCKIHW
,
23116 ARM_BUILTIN_WUNPCKILB
,
23117 ARM_BUILTIN_WUNPCKILH
,
23118 ARM_BUILTIN_WUNPCKILW
,
23120 ARM_BUILTIN_WUNPCKEHSB
,
23121 ARM_BUILTIN_WUNPCKEHSH
,
23122 ARM_BUILTIN_WUNPCKEHSW
,
23123 ARM_BUILTIN_WUNPCKEHUB
,
23124 ARM_BUILTIN_WUNPCKEHUH
,
23125 ARM_BUILTIN_WUNPCKEHUW
,
23126 ARM_BUILTIN_WUNPCKELSB
,
23127 ARM_BUILTIN_WUNPCKELSH
,
23128 ARM_BUILTIN_WUNPCKELSW
,
23129 ARM_BUILTIN_WUNPCKELUB
,
23130 ARM_BUILTIN_WUNPCKELUH
,
23131 ARM_BUILTIN_WUNPCKELUW
,
23137 ARM_BUILTIN_WADDSUBHX
,
23138 ARM_BUILTIN_WSUBADDHX
,
23140 ARM_BUILTIN_WABSDIFFB
,
23141 ARM_BUILTIN_WABSDIFFH
,
23142 ARM_BUILTIN_WABSDIFFW
,
23144 ARM_BUILTIN_WADDCH
,
23145 ARM_BUILTIN_WADDCW
,
23148 ARM_BUILTIN_WAVG4R
,
23150 ARM_BUILTIN_WMADDSX
,
23151 ARM_BUILTIN_WMADDUX
,
23153 ARM_BUILTIN_WMADDSN
,
23154 ARM_BUILTIN_WMADDUN
,
23156 ARM_BUILTIN_WMULWSM
,
23157 ARM_BUILTIN_WMULWUM
,
23159 ARM_BUILTIN_WMULWSMR
,
23160 ARM_BUILTIN_WMULWUMR
,
23162 ARM_BUILTIN_WMULWL
,
23164 ARM_BUILTIN_WMULSMR
,
23165 ARM_BUILTIN_WMULUMR
,
23167 ARM_BUILTIN_WQMULM
,
23168 ARM_BUILTIN_WQMULMR
,
23170 ARM_BUILTIN_WQMULWM
,
23171 ARM_BUILTIN_WQMULWMR
,
23173 ARM_BUILTIN_WADDBHUSM
,
23174 ARM_BUILTIN_WADDBHUSL
,
23176 ARM_BUILTIN_WQMIABB
,
23177 ARM_BUILTIN_WQMIABT
,
23178 ARM_BUILTIN_WQMIATB
,
23179 ARM_BUILTIN_WQMIATT
,
23181 ARM_BUILTIN_WQMIABBN
,
23182 ARM_BUILTIN_WQMIABTN
,
23183 ARM_BUILTIN_WQMIATBN
,
23184 ARM_BUILTIN_WQMIATTN
,
23186 ARM_BUILTIN_WMIABB
,
23187 ARM_BUILTIN_WMIABT
,
23188 ARM_BUILTIN_WMIATB
,
23189 ARM_BUILTIN_WMIATT
,
23191 ARM_BUILTIN_WMIABBN
,
23192 ARM_BUILTIN_WMIABTN
,
23193 ARM_BUILTIN_WMIATBN
,
23194 ARM_BUILTIN_WMIATTN
,
23196 ARM_BUILTIN_WMIAWBB
,
23197 ARM_BUILTIN_WMIAWBT
,
23198 ARM_BUILTIN_WMIAWTB
,
23199 ARM_BUILTIN_WMIAWTT
,
23201 ARM_BUILTIN_WMIAWBBN
,
23202 ARM_BUILTIN_WMIAWBTN
,
23203 ARM_BUILTIN_WMIAWTBN
,
23204 ARM_BUILTIN_WMIAWTTN
,
23206 ARM_BUILTIN_WMERGE
,
23208 ARM_BUILTIN_CRC32B
,
23209 ARM_BUILTIN_CRC32H
,
23210 ARM_BUILTIN_CRC32W
,
23211 ARM_BUILTIN_CRC32CB
,
23212 ARM_BUILTIN_CRC32CH
,
23213 ARM_BUILTIN_CRC32CW
,
23219 #define CRYPTO1(L, U, M1, M2) \
23220 ARM_BUILTIN_CRYPTO_##U,
23221 #define CRYPTO2(L, U, M1, M2, M3) \
23222 ARM_BUILTIN_CRYPTO_##U,
23223 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23224 ARM_BUILTIN_CRYPTO_##U,
23226 #include "crypto.def"
23232 #include "arm_neon_builtins.def"
23237 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23251 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23253 #define NUM_DREG_TYPES 5
23254 #define NUM_QREG_TYPES 6
23257 arm_init_neon_builtins (void)
23259 unsigned int i
, fcode
;
23262 tree neon_intQI_type_node
;
23263 tree neon_intHI_type_node
;
23264 tree neon_floatHF_type_node
;
23265 tree neon_polyQI_type_node
;
23266 tree neon_polyHI_type_node
;
23267 tree neon_intSI_type_node
;
23268 tree neon_intDI_type_node
;
23269 tree neon_intUTI_type_node
;
23270 tree neon_float_type_node
;
23272 tree intQI_pointer_node
;
23273 tree intHI_pointer_node
;
23274 tree intSI_pointer_node
;
23275 tree intDI_pointer_node
;
23276 tree float_pointer_node
;
23278 tree const_intQI_node
;
23279 tree const_intHI_node
;
23280 tree const_intSI_node
;
23281 tree const_intDI_node
;
23282 tree const_float_node
;
23284 tree const_intQI_pointer_node
;
23285 tree const_intHI_pointer_node
;
23286 tree const_intSI_pointer_node
;
23287 tree const_intDI_pointer_node
;
23288 tree const_float_pointer_node
;
23290 tree V8QI_type_node
;
23291 tree V4HI_type_node
;
23292 tree V4HF_type_node
;
23293 tree V2SI_type_node
;
23294 tree V2SF_type_node
;
23295 tree V16QI_type_node
;
23296 tree V8HI_type_node
;
23297 tree V4SI_type_node
;
23298 tree V4SF_type_node
;
23299 tree V2DI_type_node
;
23301 tree intUQI_type_node
;
23302 tree intUHI_type_node
;
23303 tree intUSI_type_node
;
23304 tree intUDI_type_node
;
23306 tree intEI_type_node
;
23307 tree intOI_type_node
;
23308 tree intCI_type_node
;
23309 tree intXI_type_node
;
23311 tree V8QI_pointer_node
;
23312 tree V4HI_pointer_node
;
23313 tree V2SI_pointer_node
;
23314 tree V2SF_pointer_node
;
23315 tree V16QI_pointer_node
;
23316 tree V8HI_pointer_node
;
23317 tree V4SI_pointer_node
;
23318 tree V4SF_pointer_node
;
23319 tree V2DI_pointer_node
;
23321 tree void_ftype_pv8qi_v8qi_v8qi
;
23322 tree void_ftype_pv4hi_v4hi_v4hi
;
23323 tree void_ftype_pv2si_v2si_v2si
;
23324 tree void_ftype_pv2sf_v2sf_v2sf
;
23325 tree void_ftype_pdi_di_di
;
23326 tree void_ftype_pv16qi_v16qi_v16qi
;
23327 tree void_ftype_pv8hi_v8hi_v8hi
;
23328 tree void_ftype_pv4si_v4si_v4si
;
23329 tree void_ftype_pv4sf_v4sf_v4sf
;
23330 tree void_ftype_pv2di_v2di_v2di
;
23332 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23333 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23334 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23336 /* Create distinguished type nodes for NEON vector element types,
23337 and pointers to values of such types, so we can detect them later. */
23338 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23339 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23340 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23341 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23342 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23343 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23344 neon_float_type_node
= make_node (REAL_TYPE
);
23345 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23346 layout_type (neon_float_type_node
);
23347 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23348 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23349 layout_type (neon_floatHF_type_node
);
23351 /* Define typedefs which exactly correspond to the modes we are basing vector
23352 types on. If you change these names you'll need to change
23353 the table used by arm_mangle_type too. */
23354 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23355 "__builtin_neon_qi");
23356 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23357 "__builtin_neon_hi");
23358 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23359 "__builtin_neon_hf");
23360 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23361 "__builtin_neon_si");
23362 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23363 "__builtin_neon_sf");
23364 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23365 "__builtin_neon_di");
23366 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23367 "__builtin_neon_poly8");
23368 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23369 "__builtin_neon_poly16");
23371 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23372 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23373 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23374 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23375 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23377 /* Next create constant-qualified versions of the above types. */
23378 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23380 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23382 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23384 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23386 const_float_node
= build_qualified_type (neon_float_type_node
,
23389 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23390 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23391 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23392 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23393 const_float_pointer_node
= build_pointer_type (const_float_node
);
23395 /* Now create vector types based on our NEON element types. */
23396 /* 64-bit vectors. */
23398 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23400 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23402 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23404 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23406 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23407 /* 128-bit vectors. */
23409 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23411 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23413 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23415 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23417 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23419 /* Unsigned integer types for various mode sizes. */
23420 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23421 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23422 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23423 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23424 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23427 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23428 "__builtin_neon_uqi");
23429 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23430 "__builtin_neon_uhi");
23431 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23432 "__builtin_neon_usi");
23433 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23434 "__builtin_neon_udi");
23435 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23436 "__builtin_neon_poly64");
23437 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23438 "__builtin_neon_poly128");
23440 /* Opaque integer types for structures of vectors. */
23441 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23442 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23443 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23444 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23446 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23447 "__builtin_neon_ti");
23448 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23449 "__builtin_neon_ei");
23450 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23451 "__builtin_neon_oi");
23452 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23453 "__builtin_neon_ci");
23454 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23455 "__builtin_neon_xi");
23457 /* Pointers to vector types. */
23458 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23459 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23460 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23461 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23462 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23463 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23464 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23465 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23466 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23468 /* Operations which return results as pairs. */
23469 void_ftype_pv8qi_v8qi_v8qi
=
23470 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23471 V8QI_type_node
, NULL
);
23472 void_ftype_pv4hi_v4hi_v4hi
=
23473 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23474 V4HI_type_node
, NULL
);
23475 void_ftype_pv2si_v2si_v2si
=
23476 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23477 V2SI_type_node
, NULL
);
23478 void_ftype_pv2sf_v2sf_v2sf
=
23479 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23480 V2SF_type_node
, NULL
);
23481 void_ftype_pdi_di_di
=
23482 build_function_type_list (void_type_node
, intDI_pointer_node
,
23483 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23484 void_ftype_pv16qi_v16qi_v16qi
=
23485 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23486 V16QI_type_node
, V16QI_type_node
, NULL
);
23487 void_ftype_pv8hi_v8hi_v8hi
=
23488 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23489 V8HI_type_node
, NULL
);
23490 void_ftype_pv4si_v4si_v4si
=
23491 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23492 V4SI_type_node
, NULL
);
23493 void_ftype_pv4sf_v4sf_v4sf
=
23494 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23495 V4SF_type_node
, NULL
);
23496 void_ftype_pv2di_v2di_v2di
=
23497 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23498 V2DI_type_node
, NULL
);
23500 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23502 tree V4USI_type_node
=
23503 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23505 tree V16UQI_type_node
=
23506 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23508 tree v16uqi_ftype_v16uqi
23509 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23511 tree v16uqi_ftype_v16uqi_v16uqi
23512 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23513 V16UQI_type_node
, NULL_TREE
);
23515 tree v4usi_ftype_v4usi
23516 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23518 tree v4usi_ftype_v4usi_v4usi
23519 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23520 V4USI_type_node
, NULL_TREE
);
23522 tree v4usi_ftype_v4usi_v4usi_v4usi
23523 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23524 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23526 tree uti_ftype_udi_udi
23527 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23528 intUDI_type_node
, NULL_TREE
);
23541 ARM_BUILTIN_CRYPTO_##U
23543 "__builtin_arm_crypto_"#L
23544 #define FT1(R, A) \
23546 #define FT2(R, A1, A2) \
23547 R##_ftype_##A1##_##A2
23548 #define FT3(R, A1, A2, A3) \
23549 R##_ftype_##A1##_##A2##_##A3
23550 #define CRYPTO1(L, U, R, A) \
23551 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23552 C (U), BUILT_IN_MD, \
23554 #define CRYPTO2(L, U, R, A1, A2) \
23555 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23556 C (U), BUILT_IN_MD, \
23559 #define CRYPTO3(L, U, R, A1, A2, A3) \
23560 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23561 C (U), BUILT_IN_MD, \
23563 #include "crypto.def"
23574 dreg_types
[0] = V8QI_type_node
;
23575 dreg_types
[1] = V4HI_type_node
;
23576 dreg_types
[2] = V2SI_type_node
;
23577 dreg_types
[3] = V2SF_type_node
;
23578 dreg_types
[4] = neon_intDI_type_node
;
23580 qreg_types
[0] = V16QI_type_node
;
23581 qreg_types
[1] = V8HI_type_node
;
23582 qreg_types
[2] = V4SI_type_node
;
23583 qreg_types
[3] = V4SF_type_node
;
23584 qreg_types
[4] = V2DI_type_node
;
23585 qreg_types
[5] = neon_intUTI_type_node
;
23587 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23590 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23592 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23593 reinterp_ftype_dreg
[i
][j
]
23594 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23596 reinterp_ftype_qreg
[i
][j
]
23597 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23601 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23602 i
< ARRAY_SIZE (neon_builtin_data
);
23605 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23607 const char* const modenames
[] = {
23608 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23609 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23614 int is_load
= 0, is_store
= 0;
23616 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23623 case NEON_LOAD1LANE
:
23624 case NEON_LOADSTRUCT
:
23625 case NEON_LOADSTRUCTLANE
:
23627 /* Fall through. */
23629 case NEON_STORE1LANE
:
23630 case NEON_STORESTRUCT
:
23631 case NEON_STORESTRUCTLANE
:
23634 /* Fall through. */
23638 case NEON_LOGICBINOP
:
23639 case NEON_SHIFTINSERT
:
23646 case NEON_SHIFTIMM
:
23647 case NEON_SHIFTACC
:
23653 case NEON_LANEMULL
:
23654 case NEON_LANEMULH
:
23656 case NEON_SCALARMUL
:
23657 case NEON_SCALARMULL
:
23658 case NEON_SCALARMULH
:
23659 case NEON_SCALARMAC
:
23665 tree return_type
= void_type_node
, args
= void_list_node
;
23667 /* Build a function type directly from the insn_data for
23668 this builtin. The build_function_type() function takes
23669 care of removing duplicates for us. */
23670 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23674 if (is_load
&& k
== 1)
23676 /* Neon load patterns always have the memory
23677 operand in the operand 1 position. */
23678 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23679 == neon_struct_operand
);
23685 eltype
= const_intQI_pointer_node
;
23690 eltype
= const_intHI_pointer_node
;
23695 eltype
= const_intSI_pointer_node
;
23700 eltype
= const_float_pointer_node
;
23705 eltype
= const_intDI_pointer_node
;
23708 default: gcc_unreachable ();
23711 else if (is_store
&& k
== 0)
23713 /* Similarly, Neon store patterns use operand 0 as
23714 the memory location to store to. */
23715 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23716 == neon_struct_operand
);
23722 eltype
= intQI_pointer_node
;
23727 eltype
= intHI_pointer_node
;
23732 eltype
= intSI_pointer_node
;
23737 eltype
= float_pointer_node
;
23742 eltype
= intDI_pointer_node
;
23745 default: gcc_unreachable ();
23750 switch (insn_data
[d
->code
].operand
[k
].mode
)
23752 case VOIDmode
: eltype
= void_type_node
; break;
23754 case QImode
: eltype
= neon_intQI_type_node
; break;
23755 case HImode
: eltype
= neon_intHI_type_node
; break;
23756 case SImode
: eltype
= neon_intSI_type_node
; break;
23757 case SFmode
: eltype
= neon_float_type_node
; break;
23758 case DImode
: eltype
= neon_intDI_type_node
; break;
23759 case TImode
: eltype
= intTI_type_node
; break;
23760 case EImode
: eltype
= intEI_type_node
; break;
23761 case OImode
: eltype
= intOI_type_node
; break;
23762 case CImode
: eltype
= intCI_type_node
; break;
23763 case XImode
: eltype
= intXI_type_node
; break;
23764 /* 64-bit vectors. */
23765 case V8QImode
: eltype
= V8QI_type_node
; break;
23766 case V4HImode
: eltype
= V4HI_type_node
; break;
23767 case V2SImode
: eltype
= V2SI_type_node
; break;
23768 case V2SFmode
: eltype
= V2SF_type_node
; break;
23769 /* 128-bit vectors. */
23770 case V16QImode
: eltype
= V16QI_type_node
; break;
23771 case V8HImode
: eltype
= V8HI_type_node
; break;
23772 case V4SImode
: eltype
= V4SI_type_node
; break;
23773 case V4SFmode
: eltype
= V4SF_type_node
; break;
23774 case V2DImode
: eltype
= V2DI_type_node
; break;
23775 default: gcc_unreachable ();
23779 if (k
== 0 && !is_store
)
23780 return_type
= eltype
;
23782 args
= tree_cons (NULL_TREE
, eltype
, args
);
23785 ftype
= build_function_type (return_type
, args
);
23789 case NEON_RESULTPAIR
:
23791 switch (insn_data
[d
->code
].operand
[1].mode
)
23793 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23794 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23795 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23796 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23797 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23798 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23799 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23800 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23801 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23802 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23803 default: gcc_unreachable ();
23808 case NEON_REINTERP
:
23810 /* We iterate over NUM_DREG_TYPES doubleword types,
23811 then NUM_QREG_TYPES quadword types.
23812 V4HF is not a type used in reinterpret, so we translate
23813 d->mode to the correct index in reinterp_ftype_dreg. */
23815 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
23816 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
23818 switch (insn_data
[d
->code
].operand
[0].mode
)
23820 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23821 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23822 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23823 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23824 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23825 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23826 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23827 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23828 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23829 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23830 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
23831 default: gcc_unreachable ();
23835 case NEON_FLOAT_WIDEN
:
23837 tree eltype
= NULL_TREE
;
23838 tree return_type
= NULL_TREE
;
23840 switch (insn_data
[d
->code
].operand
[1].mode
)
23843 eltype
= V4HF_type_node
;
23844 return_type
= V4SF_type_node
;
23846 default: gcc_unreachable ();
23848 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23851 case NEON_FLOAT_NARROW
:
23853 tree eltype
= NULL_TREE
;
23854 tree return_type
= NULL_TREE
;
23856 switch (insn_data
[d
->code
].operand
[1].mode
)
23859 eltype
= V4SF_type_node
;
23860 return_type
= V4HF_type_node
;
23862 default: gcc_unreachable ();
23864 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23868 gcc_unreachable ();
23871 gcc_assert (ftype
!= NULL
);
23873 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23875 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23877 arm_builtin_decls
[fcode
] = decl
;
23881 #undef NUM_DREG_TYPES
23882 #undef NUM_QREG_TYPES
23884 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23887 if ((MASK) & insn_flags) \
23890 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23891 BUILT_IN_MD, NULL, NULL_TREE); \
23892 arm_builtin_decls[CODE] = bdecl; \
23897 struct builtin_description
23899 const unsigned int mask
;
23900 const enum insn_code icode
;
23901 const char * const name
;
23902 const enum arm_builtins code
;
23903 const enum rtx_code comparison
;
23904 const unsigned int flag
;
23907 static const struct builtin_description bdesc_2arg
[] =
23909 #define IWMMXT_BUILTIN(code, string, builtin) \
23910 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23911 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23913 #define IWMMXT2_BUILTIN(code, string, builtin) \
23914 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23915 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23917 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23918 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23919 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23920 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23921 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23922 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23923 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23924 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23925 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23926 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23927 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23928 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23929 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23930 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23931 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23932 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23933 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23934 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23935 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23936 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23937 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23938 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23939 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23940 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23941 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23942 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23943 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23944 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23945 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23946 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23947 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23948 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23949 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23950 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23951 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23952 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23953 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23954 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23955 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23956 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23957 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23958 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23959 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23960 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23961 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23962 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23963 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23964 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23965 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23966 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23967 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23968 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23969 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23970 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23971 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23972 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23973 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23974 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23975 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23976 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23977 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23978 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23979 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23980 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23981 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23982 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23983 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23984 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23985 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23986 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23987 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23988 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23989 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
23990 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
23991 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
23992 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
23993 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
23994 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
23996 #define IWMMXT_BUILTIN2(code, builtin) \
23997 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23999 #define IWMMXT2_BUILTIN2(code, builtin) \
24000 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24002 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24003 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24004 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24005 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24006 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24007 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24008 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24009 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24010 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24011 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24013 #define CRC32_BUILTIN(L, U) \
24014 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24016 CRC32_BUILTIN (crc32b
, CRC32B
)
24017 CRC32_BUILTIN (crc32h
, CRC32H
)
24018 CRC32_BUILTIN (crc32w
, CRC32W
)
24019 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24020 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24021 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24022 #undef CRC32_BUILTIN
24025 #define CRYPTO_BUILTIN(L, U) \
24026 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24031 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24032 #define CRYPTO1(L, U, R, A)
24033 #define CRYPTO3(L, U, R, A1, A2, A3)
24034 #include "crypto.def"
24041 static const struct builtin_description bdesc_1arg
[] =
24043 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24044 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24045 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24046 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24047 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24048 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24049 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24050 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24051 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24052 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24053 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24054 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24055 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24056 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24057 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24058 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24059 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24060 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24061 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24062 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24063 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24064 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24065 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24066 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24068 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24069 #define CRYPTO2(L, U, R, A1, A2)
24070 #define CRYPTO3(L, U, R, A1, A2, A3)
24071 #include "crypto.def"
24077 static const struct builtin_description bdesc_3arg
[] =
24079 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24080 #define CRYPTO1(L, U, R, A)
24081 #define CRYPTO2(L, U, R, A1, A2)
24082 #include "crypto.def"
24087 #undef CRYPTO_BUILTIN
24089 /* Set up all the iWMMXt builtins. This is not called if
24090 TARGET_IWMMXT is zero. */
24093 arm_init_iwmmxt_builtins (void)
24095 const struct builtin_description
* d
;
24098 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24099 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24100 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24102 tree v8qi_ftype_v8qi_v8qi_int
24103 = build_function_type_list (V8QI_type_node
,
24104 V8QI_type_node
, V8QI_type_node
,
24105 integer_type_node
, NULL_TREE
);
24106 tree v4hi_ftype_v4hi_int
24107 = build_function_type_list (V4HI_type_node
,
24108 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24109 tree v2si_ftype_v2si_int
24110 = build_function_type_list (V2SI_type_node
,
24111 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24112 tree v2si_ftype_di_di
24113 = build_function_type_list (V2SI_type_node
,
24114 long_long_integer_type_node
,
24115 long_long_integer_type_node
,
24117 tree di_ftype_di_int
24118 = build_function_type_list (long_long_integer_type_node
,
24119 long_long_integer_type_node
,
24120 integer_type_node
, NULL_TREE
);
24121 tree di_ftype_di_int_int
24122 = build_function_type_list (long_long_integer_type_node
,
24123 long_long_integer_type_node
,
24125 integer_type_node
, NULL_TREE
);
24126 tree int_ftype_v8qi
24127 = build_function_type_list (integer_type_node
,
24128 V8QI_type_node
, NULL_TREE
);
24129 tree int_ftype_v4hi
24130 = build_function_type_list (integer_type_node
,
24131 V4HI_type_node
, NULL_TREE
);
24132 tree int_ftype_v2si
24133 = build_function_type_list (integer_type_node
,
24134 V2SI_type_node
, NULL_TREE
);
24135 tree int_ftype_v8qi_int
24136 = build_function_type_list (integer_type_node
,
24137 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24138 tree int_ftype_v4hi_int
24139 = build_function_type_list (integer_type_node
,
24140 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24141 tree int_ftype_v2si_int
24142 = build_function_type_list (integer_type_node
,
24143 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24144 tree v8qi_ftype_v8qi_int_int
24145 = build_function_type_list (V8QI_type_node
,
24146 V8QI_type_node
, integer_type_node
,
24147 integer_type_node
, NULL_TREE
);
24148 tree v4hi_ftype_v4hi_int_int
24149 = build_function_type_list (V4HI_type_node
,
24150 V4HI_type_node
, integer_type_node
,
24151 integer_type_node
, NULL_TREE
);
24152 tree v2si_ftype_v2si_int_int
24153 = build_function_type_list (V2SI_type_node
,
24154 V2SI_type_node
, integer_type_node
,
24155 integer_type_node
, NULL_TREE
);
24156 /* Miscellaneous. */
24157 tree v8qi_ftype_v4hi_v4hi
24158 = build_function_type_list (V8QI_type_node
,
24159 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24160 tree v4hi_ftype_v2si_v2si
24161 = build_function_type_list (V4HI_type_node
,
24162 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24163 tree v8qi_ftype_v4hi_v8qi
24164 = build_function_type_list (V8QI_type_node
,
24165 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24166 tree v2si_ftype_v4hi_v4hi
24167 = build_function_type_list (V2SI_type_node
,
24168 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24169 tree v2si_ftype_v8qi_v8qi
24170 = build_function_type_list (V2SI_type_node
,
24171 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24172 tree v4hi_ftype_v4hi_di
24173 = build_function_type_list (V4HI_type_node
,
24174 V4HI_type_node
, long_long_integer_type_node
,
24176 tree v2si_ftype_v2si_di
24177 = build_function_type_list (V2SI_type_node
,
24178 V2SI_type_node
, long_long_integer_type_node
,
24181 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24182 tree int_ftype_void
24183 = build_function_type_list (integer_type_node
, NULL_TREE
);
24185 = build_function_type_list (long_long_integer_type_node
,
24186 V8QI_type_node
, NULL_TREE
);
24188 = build_function_type_list (long_long_integer_type_node
,
24189 V4HI_type_node
, NULL_TREE
);
24191 = build_function_type_list (long_long_integer_type_node
,
24192 V2SI_type_node
, NULL_TREE
);
24193 tree v2si_ftype_v4hi
24194 = build_function_type_list (V2SI_type_node
,
24195 V4HI_type_node
, NULL_TREE
);
24196 tree v4hi_ftype_v8qi
24197 = build_function_type_list (V4HI_type_node
,
24198 V8QI_type_node
, NULL_TREE
);
24199 tree v8qi_ftype_v8qi
24200 = build_function_type_list (V8QI_type_node
,
24201 V8QI_type_node
, NULL_TREE
);
24202 tree v4hi_ftype_v4hi
24203 = build_function_type_list (V4HI_type_node
,
24204 V4HI_type_node
, NULL_TREE
);
24205 tree v2si_ftype_v2si
24206 = build_function_type_list (V2SI_type_node
,
24207 V2SI_type_node
, NULL_TREE
);
24209 tree di_ftype_di_v4hi_v4hi
24210 = build_function_type_list (long_long_unsigned_type_node
,
24211 long_long_unsigned_type_node
,
24212 V4HI_type_node
, V4HI_type_node
,
24215 tree di_ftype_v4hi_v4hi
24216 = build_function_type_list (long_long_unsigned_type_node
,
24217 V4HI_type_node
,V4HI_type_node
,
24220 tree v2si_ftype_v2si_v4hi_v4hi
24221 = build_function_type_list (V2SI_type_node
,
24222 V2SI_type_node
, V4HI_type_node
,
24223 V4HI_type_node
, NULL_TREE
);
24225 tree v2si_ftype_v2si_v8qi_v8qi
24226 = build_function_type_list (V2SI_type_node
,
24227 V2SI_type_node
, V8QI_type_node
,
24228 V8QI_type_node
, NULL_TREE
);
24230 tree di_ftype_di_v2si_v2si
24231 = build_function_type_list (long_long_unsigned_type_node
,
24232 long_long_unsigned_type_node
,
24233 V2SI_type_node
, V2SI_type_node
,
24236 tree di_ftype_di_di_int
24237 = build_function_type_list (long_long_unsigned_type_node
,
24238 long_long_unsigned_type_node
,
24239 long_long_unsigned_type_node
,
24240 integer_type_node
, NULL_TREE
);
24242 tree void_ftype_int
24243 = build_function_type_list (void_type_node
,
24244 integer_type_node
, NULL_TREE
);
24246 tree v8qi_ftype_char
24247 = build_function_type_list (V8QI_type_node
,
24248 signed_char_type_node
, NULL_TREE
);
24250 tree v4hi_ftype_short
24251 = build_function_type_list (V4HI_type_node
,
24252 short_integer_type_node
, NULL_TREE
);
24254 tree v2si_ftype_int
24255 = build_function_type_list (V2SI_type_node
,
24256 integer_type_node
, NULL_TREE
);
24258 /* Normal vector binops. */
24259 tree v8qi_ftype_v8qi_v8qi
24260 = build_function_type_list (V8QI_type_node
,
24261 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24262 tree v4hi_ftype_v4hi_v4hi
24263 = build_function_type_list (V4HI_type_node
,
24264 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24265 tree v2si_ftype_v2si_v2si
24266 = build_function_type_list (V2SI_type_node
,
24267 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24268 tree di_ftype_di_di
24269 = build_function_type_list (long_long_unsigned_type_node
,
24270 long_long_unsigned_type_node
,
24271 long_long_unsigned_type_node
,
24274 /* Add all builtins that are more or less simple operations on two
24276 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24278 /* Use one of the operands; the target can have a different mode for
24279 mask-generating compares. */
24280 enum machine_mode mode
;
24283 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24286 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24291 type
= v8qi_ftype_v8qi_v8qi
;
24294 type
= v4hi_ftype_v4hi_v4hi
;
24297 type
= v2si_ftype_v2si_v2si
;
24300 type
= di_ftype_di_di
;
24304 gcc_unreachable ();
24307 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24310 /* Add the remaining MMX insns with somewhat more complicated types. */
24311 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24312 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24313 ARM_BUILTIN_ ## CODE)
24315 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24316 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24317 ARM_BUILTIN_ ## CODE)
24319 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24320 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24321 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24322 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24323 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24324 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24325 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24326 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24327 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24329 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24330 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24331 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24332 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24333 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24334 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24336 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24337 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24338 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24339 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24340 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24341 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24343 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24344 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24345 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24346 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24347 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24348 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24350 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24351 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24352 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24353 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24354 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24355 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24357 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24359 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24360 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24361 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24362 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24363 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24364 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24365 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24366 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24367 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24368 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24370 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24371 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24372 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24373 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24374 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24375 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24376 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24377 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24378 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24380 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24381 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24382 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24384 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24385 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24386 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24388 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24389 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24391 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24392 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24393 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24394 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24395 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24396 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24398 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24399 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24400 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24401 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24402 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24403 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24404 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24405 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24406 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24407 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24408 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24409 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24411 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24412 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24413 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24414 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24416 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24417 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24418 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24419 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24420 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24421 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24422 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24424 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24425 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24426 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24428 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24429 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24430 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24431 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24433 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24434 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24435 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24436 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24438 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24439 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24440 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24441 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24443 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24444 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24445 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24446 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24448 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24449 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24450 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24451 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24453 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24454 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24455 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24456 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24458 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24460 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24461 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24462 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24464 #undef iwmmx_mbuiltin
24465 #undef iwmmx2_mbuiltin
24469 arm_init_fp16_builtins (void)
24471 tree fp16_type
= make_node (REAL_TYPE
);
24472 TYPE_PRECISION (fp16_type
) = 16;
24473 layout_type (fp16_type
);
24474 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24478 arm_init_crc32_builtins ()
24480 tree si_ftype_si_qi
24481 = build_function_type_list (unsigned_intSI_type_node
,
24482 unsigned_intSI_type_node
,
24483 unsigned_intQI_type_node
, NULL_TREE
);
24484 tree si_ftype_si_hi
24485 = build_function_type_list (unsigned_intSI_type_node
,
24486 unsigned_intSI_type_node
,
24487 unsigned_intHI_type_node
, NULL_TREE
);
24488 tree si_ftype_si_si
24489 = build_function_type_list (unsigned_intSI_type_node
,
24490 unsigned_intSI_type_node
,
24491 unsigned_intSI_type_node
, NULL_TREE
);
24493 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24494 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24495 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24496 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24497 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24498 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24499 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24500 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24501 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24502 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24503 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24504 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24505 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24506 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24507 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24508 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24509 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24510 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24514 arm_init_builtins (void)
24516 if (TARGET_REALLY_IWMMXT
)
24517 arm_init_iwmmxt_builtins ();
24520 arm_init_neon_builtins ();
24522 if (arm_fp16_format
)
24523 arm_init_fp16_builtins ();
24526 arm_init_crc32_builtins ();
24529 /* Return the ARM builtin for CODE. */
24532 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24534 if (code
>= ARM_BUILTIN_MAX
)
24535 return error_mark_node
;
24537 return arm_builtin_decls
[code
];
24540 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24542 static const char *
24543 arm_invalid_parameter_type (const_tree t
)
24545 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24546 return N_("function parameters cannot have __fp16 type");
24550 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24552 static const char *
24553 arm_invalid_return_type (const_tree t
)
24555 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24556 return N_("functions cannot return __fp16 type");
24560 /* Implement TARGET_PROMOTED_TYPE. */
24563 arm_promoted_type (const_tree t
)
24565 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24566 return float_type_node
;
24570 /* Implement TARGET_CONVERT_TO_TYPE.
24571 Specifically, this hook implements the peculiarity of the ARM
24572 half-precision floating-point C semantics that requires conversions between
24573 __fp16 to or from double to do an intermediate conversion to float. */
24576 arm_convert_to_type (tree type
, tree expr
)
24578 tree fromtype
= TREE_TYPE (expr
);
24579 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24581 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24582 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24583 return convert (type
, convert (float_type_node
, expr
));
24587 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24588 This simply adds HFmode as a supported mode; even though we don't
24589 implement arithmetic on this type directly, it's supported by
24590 optabs conversions, much the way the double-word arithmetic is
24591 special-cased in the default hook. */
24594 arm_scalar_mode_supported_p (enum machine_mode mode
)
24596 if (mode
== HFmode
)
24597 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24598 else if (ALL_FIXED_POINT_MODE_P (mode
))
24601 return default_scalar_mode_supported_p (mode
);
24604 /* Errors in the source file can cause expand_expr to return const0_rtx
24605 where we expect a vector. To avoid crashing, use one of the vector
24606 clear instructions. */
24609 safe_vector_operand (rtx x
, enum machine_mode mode
)
24611 if (x
!= const0_rtx
)
24613 x
= gen_reg_rtx (mode
);
24615 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24616 : gen_rtx_SUBREG (DImode
, x
, 0)));
24620 /* Function to expand ternary builtins. */
24622 arm_expand_ternop_builtin (enum insn_code icode
,
24623 tree exp
, rtx target
)
24626 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24627 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24628 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24630 rtx op0
= expand_normal (arg0
);
24631 rtx op1
= expand_normal (arg1
);
24632 rtx op2
= expand_normal (arg2
);
24633 rtx op3
= NULL_RTX
;
24635 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24636 lane operand depending on endianness. */
24637 bool builtin_sha1cpm_p
= false;
24639 if (insn_data
[icode
].n_operands
== 5)
24641 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24642 || icode
== CODE_FOR_crypto_sha1p
24643 || icode
== CODE_FOR_crypto_sha1m
);
24644 builtin_sha1cpm_p
= true;
24646 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24647 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24648 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24649 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24652 if (VECTOR_MODE_P (mode0
))
24653 op0
= safe_vector_operand (op0
, mode0
);
24654 if (VECTOR_MODE_P (mode1
))
24655 op1
= safe_vector_operand (op1
, mode1
);
24656 if (VECTOR_MODE_P (mode2
))
24657 op2
= safe_vector_operand (op2
, mode2
);
24660 || GET_MODE (target
) != tmode
24661 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24662 target
= gen_reg_rtx (tmode
);
24664 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24665 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24666 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24668 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24669 op0
= copy_to_mode_reg (mode0
, op0
);
24670 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24671 op1
= copy_to_mode_reg (mode1
, op1
);
24672 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24673 op2
= copy_to_mode_reg (mode2
, op2
);
24674 if (builtin_sha1cpm_p
)
24675 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24677 if (builtin_sha1cpm_p
)
24678 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24680 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24687 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24690 arm_expand_binop_builtin (enum insn_code icode
,
24691 tree exp
, rtx target
)
24694 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24695 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24696 rtx op0
= expand_normal (arg0
);
24697 rtx op1
= expand_normal (arg1
);
24698 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24699 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24700 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24702 if (VECTOR_MODE_P (mode0
))
24703 op0
= safe_vector_operand (op0
, mode0
);
24704 if (VECTOR_MODE_P (mode1
))
24705 op1
= safe_vector_operand (op1
, mode1
);
24708 || GET_MODE (target
) != tmode
24709 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24710 target
= gen_reg_rtx (tmode
);
24712 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24713 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24715 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24716 op0
= copy_to_mode_reg (mode0
, op0
);
24717 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24718 op1
= copy_to_mode_reg (mode1
, op1
);
24720 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24727 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24730 arm_expand_unop_builtin (enum insn_code icode
,
24731 tree exp
, rtx target
, int do_load
)
24734 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24735 rtx op0
= expand_normal (arg0
);
24736 rtx op1
= NULL_RTX
;
24737 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24738 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24739 bool builtin_sha1h_p
= false;
24741 if (insn_data
[icode
].n_operands
== 3)
24743 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24744 builtin_sha1h_p
= true;
24748 || GET_MODE (target
) != tmode
24749 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24750 target
= gen_reg_rtx (tmode
);
24752 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
24755 if (VECTOR_MODE_P (mode0
))
24756 op0
= safe_vector_operand (op0
, mode0
);
24758 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24759 op0
= copy_to_mode_reg (mode0
, op0
);
24761 if (builtin_sha1h_p
)
24762 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24764 if (builtin_sha1h_p
)
24765 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24767 pat
= GEN_FCN (icode
) (target
, op0
);
24775 NEON_ARG_COPY_TO_REG
,
24781 #define NEON_MAX_BUILTIN_ARGS 5
24783 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24784 and return an expression for the accessed memory.
24786 The intrinsic function operates on a block of registers that has
24787 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24788 function references the memory at EXP of type TYPE and in mode
24789 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24793 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
24794 enum machine_mode reg_mode
,
24795 neon_builtin_type_mode type_mode
)
24797 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
24798 tree elem_type
, upper_bound
, array_type
;
24800 /* Work out the size of the register block in bytes. */
24801 reg_size
= GET_MODE_SIZE (reg_mode
);
24803 /* Work out the size of each vector in bytes. */
24804 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
24805 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
24807 /* Work out how many vectors there are. */
24808 gcc_assert (reg_size
% vector_size
== 0);
24809 nvectors
= reg_size
/ vector_size
;
24811 /* Work out the type of each element. */
24812 gcc_assert (POINTER_TYPE_P (type
));
24813 elem_type
= TREE_TYPE (type
);
24815 /* Work out how many elements are being loaded or stored.
24816 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24817 and memory elements; anything else implies a lane load or store. */
24818 if (mem_mode
== reg_mode
)
24819 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
24823 /* Create a type that describes the full access. */
24824 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
24825 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
24827 /* Dereference EXP using that type. */
24828 return fold_build2 (MEM_REF
, array_type
, exp
,
24829 build_int_cst (build_pointer_type (array_type
), 0));
24832 /* Expand a Neon builtin. */
24834 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
24835 neon_builtin_type_mode type_mode
,
24836 tree exp
, int fcode
, ...)
24840 tree arg
[NEON_MAX_BUILTIN_ARGS
];
24841 rtx op
[NEON_MAX_BUILTIN_ARGS
];
24844 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24845 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
24846 enum machine_mode other_mode
;
24852 || GET_MODE (target
) != tmode
24853 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24854 target
= gen_reg_rtx (tmode
);
24856 va_start (ap
, fcode
);
24858 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24862 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24864 if (thisarg
== NEON_ARG_STOP
)
24868 opno
= argc
+ have_retval
;
24869 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24870 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24871 arg_type
= TREE_VALUE (formals
);
24872 if (thisarg
== NEON_ARG_MEMORY
)
24874 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24875 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24876 mode
[argc
], other_mode
,
24880 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24882 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
24883 (thisarg
== NEON_ARG_MEMORY
24884 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
24888 case NEON_ARG_COPY_TO_REG
:
24889 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24890 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24891 (op
[argc
], mode
[argc
]))
24892 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24895 case NEON_ARG_CONSTANT
:
24896 /* FIXME: This error message is somewhat unhelpful. */
24897 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24898 (op
[argc
], mode
[argc
]))
24899 error ("argument must be a constant");
24902 case NEON_ARG_MEMORY
:
24903 /* Check if expand failed. */
24904 if (op
[argc
] == const0_rtx
)
24906 gcc_assert (MEM_P (op
[argc
]));
24907 PUT_MODE (op
[argc
], mode
[argc
]);
24908 /* ??? arm_neon.h uses the same built-in functions for signed
24909 and unsigned accesses, casting where necessary. This isn't
24911 set_mem_alias_set (op
[argc
], 0);
24912 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24913 (op
[argc
], mode
[argc
]))
24914 op
[argc
] = (replace_equiv_address
24915 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24918 case NEON_ARG_STOP
:
24919 gcc_unreachable ();
24923 formals
= TREE_CHAIN (formals
);
24933 pat
= GEN_FCN (icode
) (target
, op
[0]);
24937 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24941 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24945 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24949 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24953 gcc_unreachable ();
24959 pat
= GEN_FCN (icode
) (op
[0]);
24963 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24967 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24971 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24975 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24979 gcc_unreachable ();
24990 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24991 constants defined per-instruction or per instruction-variant. Instead, the
24992 required info is looked up in the table neon_builtin_data. */
24994 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
24996 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
24997 neon_itype itype
= d
->itype
;
24998 enum insn_code icode
= d
->code
;
24999 neon_builtin_type_mode type_mode
= d
->mode
;
25006 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25007 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25011 case NEON_SCALARMUL
:
25012 case NEON_SCALARMULL
:
25013 case NEON_SCALARMULH
:
25014 case NEON_SHIFTINSERT
:
25015 case NEON_LOGICBINOP
:
25016 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25017 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25021 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25022 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25023 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25027 case NEON_SHIFTIMM
:
25028 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25029 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25033 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25034 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25039 case NEON_FLOAT_WIDEN
:
25040 case NEON_FLOAT_NARROW
:
25041 case NEON_REINTERP
:
25042 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25043 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25047 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25048 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25050 case NEON_RESULTPAIR
:
25051 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25052 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25056 case NEON_LANEMULL
:
25057 case NEON_LANEMULH
:
25058 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25059 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25060 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25063 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25064 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25065 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25067 case NEON_SHIFTACC
:
25068 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25069 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25070 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25072 case NEON_SCALARMAC
:
25073 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25074 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25075 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25079 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25080 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25084 case NEON_LOADSTRUCT
:
25085 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25086 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25088 case NEON_LOAD1LANE
:
25089 case NEON_LOADSTRUCTLANE
:
25090 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25091 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25095 case NEON_STORESTRUCT
:
25096 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25097 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25099 case NEON_STORE1LANE
:
25100 case NEON_STORESTRUCTLANE
:
25101 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25102 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25106 gcc_unreachable ();
25109 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25111 neon_reinterpret (rtx dest
, rtx src
)
25113 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25116 /* Emit code to place a Neon pair result in memory locations (with equal
25119 neon_emit_pair_result_insn (enum machine_mode mode
,
25120 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25123 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25124 rtx tmp1
= gen_reg_rtx (mode
);
25125 rtx tmp2
= gen_reg_rtx (mode
);
25127 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25129 emit_move_insn (mem
, tmp1
);
25130 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25131 emit_move_insn (mem
, tmp2
);
25134 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25135 not to early-clobber SRC registers in the process.
25137 We assume that the operands described by SRC and DEST represent a
25138 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25139 number of components into which the copy has been decomposed. */
25141 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25145 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25146 || REGNO (operands
[0]) < REGNO (operands
[1]))
25148 for (i
= 0; i
< count
; i
++)
25150 operands
[2 * i
] = dest
[i
];
25151 operands
[2 * i
+ 1] = src
[i
];
25156 for (i
= 0; i
< count
; i
++)
25158 operands
[2 * i
] = dest
[count
- i
- 1];
25159 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25164 /* Split operands into moves from op[1] + op[2] into op[0]. */
25167 neon_split_vcombine (rtx operands
[3])
25169 unsigned int dest
= REGNO (operands
[0]);
25170 unsigned int src1
= REGNO (operands
[1]);
25171 unsigned int src2
= REGNO (operands
[2]);
25172 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25173 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25174 rtx destlo
, desthi
;
25176 if (src1
== dest
&& src2
== dest
+ halfregs
)
25178 /* No-op move. Can't split to nothing; emit something. */
25179 emit_note (NOTE_INSN_DELETED
);
25183 /* Preserve register attributes for variable tracking. */
25184 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25185 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25186 GET_MODE_SIZE (halfmode
));
25188 /* Special case of reversed high/low parts. Use VSWP. */
25189 if (src2
== dest
&& src1
== dest
+ halfregs
)
25191 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25192 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25193 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25197 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25199 /* Try to avoid unnecessary moves if part of the result
25200 is in the right place already. */
25202 emit_move_insn (destlo
, operands
[1]);
25203 if (src2
!= dest
+ halfregs
)
25204 emit_move_insn (desthi
, operands
[2]);
25208 if (src2
!= dest
+ halfregs
)
25209 emit_move_insn (desthi
, operands
[2]);
25211 emit_move_insn (destlo
, operands
[1]);
25215 /* Expand an expression EXP that calls a built-in function,
25216 with result going to TARGET if that's convenient
25217 (and in mode MODE if that's convenient).
25218 SUBTARGET may be used as the target for computing one of EXP's operands.
25219 IGNORE is nonzero if the value is to be ignored. */
25222 arm_expand_builtin (tree exp
,
25224 rtx subtarget ATTRIBUTE_UNUSED
,
25225 enum machine_mode mode ATTRIBUTE_UNUSED
,
25226 int ignore ATTRIBUTE_UNUSED
)
25228 const struct builtin_description
* d
;
25229 enum insn_code icode
;
25230 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25238 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25240 enum machine_mode tmode
;
25241 enum machine_mode mode0
;
25242 enum machine_mode mode1
;
25243 enum machine_mode mode2
;
25249 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25250 return arm_expand_neon_builtin (fcode
, exp
, target
);
25254 case ARM_BUILTIN_TEXTRMSB
:
25255 case ARM_BUILTIN_TEXTRMUB
:
25256 case ARM_BUILTIN_TEXTRMSH
:
25257 case ARM_BUILTIN_TEXTRMUH
:
25258 case ARM_BUILTIN_TEXTRMSW
:
25259 case ARM_BUILTIN_TEXTRMUW
:
25260 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25261 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25262 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25263 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25264 : CODE_FOR_iwmmxt_textrmw
);
25266 arg0
= CALL_EXPR_ARG (exp
, 0);
25267 arg1
= CALL_EXPR_ARG (exp
, 1);
25268 op0
= expand_normal (arg0
);
25269 op1
= expand_normal (arg1
);
25270 tmode
= insn_data
[icode
].operand
[0].mode
;
25271 mode0
= insn_data
[icode
].operand
[1].mode
;
25272 mode1
= insn_data
[icode
].operand
[2].mode
;
25274 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25275 op0
= copy_to_mode_reg (mode0
, op0
);
25276 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25278 /* @@@ better error message */
25279 error ("selector must be an immediate");
25280 return gen_reg_rtx (tmode
);
25283 opint
= INTVAL (op1
);
25284 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25286 if (opint
> 7 || opint
< 0)
25287 error ("the range of selector should be in 0 to 7");
25289 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25291 if (opint
> 3 || opint
< 0)
25292 error ("the range of selector should be in 0 to 3");
25294 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25296 if (opint
> 1 || opint
< 0)
25297 error ("the range of selector should be in 0 to 1");
25301 || GET_MODE (target
) != tmode
25302 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25303 target
= gen_reg_rtx (tmode
);
25304 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25310 case ARM_BUILTIN_WALIGNI
:
25311 /* If op2 is immediate, call walighi, else call walighr. */
25312 arg0
= CALL_EXPR_ARG (exp
, 0);
25313 arg1
= CALL_EXPR_ARG (exp
, 1);
25314 arg2
= CALL_EXPR_ARG (exp
, 2);
25315 op0
= expand_normal (arg0
);
25316 op1
= expand_normal (arg1
);
25317 op2
= expand_normal (arg2
);
25318 if (CONST_INT_P (op2
))
25320 icode
= CODE_FOR_iwmmxt_waligni
;
25321 tmode
= insn_data
[icode
].operand
[0].mode
;
25322 mode0
= insn_data
[icode
].operand
[1].mode
;
25323 mode1
= insn_data
[icode
].operand
[2].mode
;
25324 mode2
= insn_data
[icode
].operand
[3].mode
;
25325 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25326 op0
= copy_to_mode_reg (mode0
, op0
);
25327 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25328 op1
= copy_to_mode_reg (mode1
, op1
);
25329 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25330 selector
= INTVAL (op2
);
25331 if (selector
> 7 || selector
< 0)
25332 error ("the range of selector should be in 0 to 7");
25336 icode
= CODE_FOR_iwmmxt_walignr
;
25337 tmode
= insn_data
[icode
].operand
[0].mode
;
25338 mode0
= insn_data
[icode
].operand
[1].mode
;
25339 mode1
= insn_data
[icode
].operand
[2].mode
;
25340 mode2
= insn_data
[icode
].operand
[3].mode
;
25341 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25342 op0
= copy_to_mode_reg (mode0
, op0
);
25343 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25344 op1
= copy_to_mode_reg (mode1
, op1
);
25345 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25346 op2
= copy_to_mode_reg (mode2
, op2
);
25349 || GET_MODE (target
) != tmode
25350 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25351 target
= gen_reg_rtx (tmode
);
25352 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25358 case ARM_BUILTIN_TINSRB
:
25359 case ARM_BUILTIN_TINSRH
:
25360 case ARM_BUILTIN_TINSRW
:
25361 case ARM_BUILTIN_WMERGE
:
25362 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25363 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25364 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25365 : CODE_FOR_iwmmxt_tinsrw
);
25366 arg0
= CALL_EXPR_ARG (exp
, 0);
25367 arg1
= CALL_EXPR_ARG (exp
, 1);
25368 arg2
= CALL_EXPR_ARG (exp
, 2);
25369 op0
= expand_normal (arg0
);
25370 op1
= expand_normal (arg1
);
25371 op2
= expand_normal (arg2
);
25372 tmode
= insn_data
[icode
].operand
[0].mode
;
25373 mode0
= insn_data
[icode
].operand
[1].mode
;
25374 mode1
= insn_data
[icode
].operand
[2].mode
;
25375 mode2
= insn_data
[icode
].operand
[3].mode
;
25377 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25378 op0
= copy_to_mode_reg (mode0
, op0
);
25379 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25380 op1
= copy_to_mode_reg (mode1
, op1
);
25381 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25383 error ("selector must be an immediate");
25386 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25388 selector
= INTVAL (op2
);
25389 if (selector
> 7 || selector
< 0)
25390 error ("the range of selector should be in 0 to 7");
25392 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25393 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25394 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25397 selector
= INTVAL (op2
);
25398 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25399 error ("the range of selector should be in 0 to 7");
25400 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25401 error ("the range of selector should be in 0 to 3");
25402 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25403 error ("the range of selector should be in 0 to 1");
25405 op2
= GEN_INT (mask
);
25408 || GET_MODE (target
) != tmode
25409 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25410 target
= gen_reg_rtx (tmode
);
25411 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25417 case ARM_BUILTIN_SETWCGR0
:
25418 case ARM_BUILTIN_SETWCGR1
:
25419 case ARM_BUILTIN_SETWCGR2
:
25420 case ARM_BUILTIN_SETWCGR3
:
25421 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25422 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25423 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25424 : CODE_FOR_iwmmxt_setwcgr3
);
25425 arg0
= CALL_EXPR_ARG (exp
, 0);
25426 op0
= expand_normal (arg0
);
25427 mode0
= insn_data
[icode
].operand
[0].mode
;
25428 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25429 op0
= copy_to_mode_reg (mode0
, op0
);
25430 pat
= GEN_FCN (icode
) (op0
);
25436 case ARM_BUILTIN_GETWCGR0
:
25437 case ARM_BUILTIN_GETWCGR1
:
25438 case ARM_BUILTIN_GETWCGR2
:
25439 case ARM_BUILTIN_GETWCGR3
:
25440 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25441 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25442 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25443 : CODE_FOR_iwmmxt_getwcgr3
);
25444 tmode
= insn_data
[icode
].operand
[0].mode
;
25446 || GET_MODE (target
) != tmode
25447 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25448 target
= gen_reg_rtx (tmode
);
25449 pat
= GEN_FCN (icode
) (target
);
25455 case ARM_BUILTIN_WSHUFH
:
25456 icode
= CODE_FOR_iwmmxt_wshufh
;
25457 arg0
= CALL_EXPR_ARG (exp
, 0);
25458 arg1
= CALL_EXPR_ARG (exp
, 1);
25459 op0
= expand_normal (arg0
);
25460 op1
= expand_normal (arg1
);
25461 tmode
= insn_data
[icode
].operand
[0].mode
;
25462 mode1
= insn_data
[icode
].operand
[1].mode
;
25463 mode2
= insn_data
[icode
].operand
[2].mode
;
25465 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25466 op0
= copy_to_mode_reg (mode1
, op0
);
25467 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25469 error ("mask must be an immediate");
25472 selector
= INTVAL (op1
);
25473 if (selector
< 0 || selector
> 255)
25474 error ("the range of mask should be in 0 to 255");
25476 || GET_MODE (target
) != tmode
25477 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25478 target
= gen_reg_rtx (tmode
);
25479 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25485 case ARM_BUILTIN_WMADDS
:
25486 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25487 case ARM_BUILTIN_WMADDSX
:
25488 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25489 case ARM_BUILTIN_WMADDSN
:
25490 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25491 case ARM_BUILTIN_WMADDU
:
25492 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25493 case ARM_BUILTIN_WMADDUX
:
25494 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25495 case ARM_BUILTIN_WMADDUN
:
25496 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25497 case ARM_BUILTIN_WSADBZ
:
25498 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25499 case ARM_BUILTIN_WSADHZ
:
25500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25502 /* Several three-argument builtins. */
25503 case ARM_BUILTIN_WMACS
:
25504 case ARM_BUILTIN_WMACU
:
25505 case ARM_BUILTIN_TMIA
:
25506 case ARM_BUILTIN_TMIAPH
:
25507 case ARM_BUILTIN_TMIATT
:
25508 case ARM_BUILTIN_TMIATB
:
25509 case ARM_BUILTIN_TMIABT
:
25510 case ARM_BUILTIN_TMIABB
:
25511 case ARM_BUILTIN_WQMIABB
:
25512 case ARM_BUILTIN_WQMIABT
:
25513 case ARM_BUILTIN_WQMIATB
:
25514 case ARM_BUILTIN_WQMIATT
:
25515 case ARM_BUILTIN_WQMIABBN
:
25516 case ARM_BUILTIN_WQMIABTN
:
25517 case ARM_BUILTIN_WQMIATBN
:
25518 case ARM_BUILTIN_WQMIATTN
:
25519 case ARM_BUILTIN_WMIABB
:
25520 case ARM_BUILTIN_WMIABT
:
25521 case ARM_BUILTIN_WMIATB
:
25522 case ARM_BUILTIN_WMIATT
:
25523 case ARM_BUILTIN_WMIABBN
:
25524 case ARM_BUILTIN_WMIABTN
:
25525 case ARM_BUILTIN_WMIATBN
:
25526 case ARM_BUILTIN_WMIATTN
:
25527 case ARM_BUILTIN_WMIAWBB
:
25528 case ARM_BUILTIN_WMIAWBT
:
25529 case ARM_BUILTIN_WMIAWTB
:
25530 case ARM_BUILTIN_WMIAWTT
:
25531 case ARM_BUILTIN_WMIAWBBN
:
25532 case ARM_BUILTIN_WMIAWBTN
:
25533 case ARM_BUILTIN_WMIAWTBN
:
25534 case ARM_BUILTIN_WMIAWTTN
:
25535 case ARM_BUILTIN_WSADB
:
25536 case ARM_BUILTIN_WSADH
:
25537 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25538 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25539 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25540 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25541 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25542 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25543 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25544 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25545 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25546 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25547 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25548 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25549 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25550 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25551 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25552 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25553 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25554 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25555 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25556 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25557 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25558 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25559 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25560 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25561 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25562 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25563 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25564 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25565 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25566 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25567 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25568 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25569 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25570 : CODE_FOR_iwmmxt_wsadh
);
25571 arg0
= CALL_EXPR_ARG (exp
, 0);
25572 arg1
= CALL_EXPR_ARG (exp
, 1);
25573 arg2
= CALL_EXPR_ARG (exp
, 2);
25574 op0
= expand_normal (arg0
);
25575 op1
= expand_normal (arg1
);
25576 op2
= expand_normal (arg2
);
25577 tmode
= insn_data
[icode
].operand
[0].mode
;
25578 mode0
= insn_data
[icode
].operand
[1].mode
;
25579 mode1
= insn_data
[icode
].operand
[2].mode
;
25580 mode2
= insn_data
[icode
].operand
[3].mode
;
25582 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25583 op0
= copy_to_mode_reg (mode0
, op0
);
25584 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25585 op1
= copy_to_mode_reg (mode1
, op1
);
25586 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25587 op2
= copy_to_mode_reg (mode2
, op2
);
25589 || GET_MODE (target
) != tmode
25590 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25591 target
= gen_reg_rtx (tmode
);
25592 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25598 case ARM_BUILTIN_WZERO
:
25599 target
= gen_reg_rtx (DImode
);
25600 emit_insn (gen_iwmmxt_clrdi (target
));
25603 case ARM_BUILTIN_WSRLHI
:
25604 case ARM_BUILTIN_WSRLWI
:
25605 case ARM_BUILTIN_WSRLDI
:
25606 case ARM_BUILTIN_WSLLHI
:
25607 case ARM_BUILTIN_WSLLWI
:
25608 case ARM_BUILTIN_WSLLDI
:
25609 case ARM_BUILTIN_WSRAHI
:
25610 case ARM_BUILTIN_WSRAWI
:
25611 case ARM_BUILTIN_WSRADI
:
25612 case ARM_BUILTIN_WRORHI
:
25613 case ARM_BUILTIN_WRORWI
:
25614 case ARM_BUILTIN_WRORDI
:
25615 case ARM_BUILTIN_WSRLH
:
25616 case ARM_BUILTIN_WSRLW
:
25617 case ARM_BUILTIN_WSRLD
:
25618 case ARM_BUILTIN_WSLLH
:
25619 case ARM_BUILTIN_WSLLW
:
25620 case ARM_BUILTIN_WSLLD
:
25621 case ARM_BUILTIN_WSRAH
:
25622 case ARM_BUILTIN_WSRAW
:
25623 case ARM_BUILTIN_WSRAD
:
25624 case ARM_BUILTIN_WRORH
:
25625 case ARM_BUILTIN_WRORW
:
25626 case ARM_BUILTIN_WRORD
:
25627 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25628 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25629 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25630 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25631 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25632 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25633 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25634 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25635 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25636 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25637 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25638 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25639 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25640 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25641 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25642 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25643 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25644 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25645 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25646 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25647 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25648 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25649 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25650 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25651 : CODE_FOR_nothing
);
25652 arg1
= CALL_EXPR_ARG (exp
, 1);
25653 op1
= expand_normal (arg1
);
25654 if (GET_MODE (op1
) == VOIDmode
)
25656 imm
= INTVAL (op1
);
25657 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25658 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25659 && (imm
< 0 || imm
> 32))
25661 if (fcode
== ARM_BUILTIN_WRORHI
)
25662 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25663 else if (fcode
== ARM_BUILTIN_WRORWI
)
25664 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25665 else if (fcode
== ARM_BUILTIN_WRORH
)
25666 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25668 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25670 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25671 && (imm
< 0 || imm
> 64))
25673 if (fcode
== ARM_BUILTIN_WRORDI
)
25674 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25676 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25680 if (fcode
== ARM_BUILTIN_WSRLHI
)
25681 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25682 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25683 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25684 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25685 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25686 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25687 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25688 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25689 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25690 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25691 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25692 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25693 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25694 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25695 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25696 else if (fcode
== ARM_BUILTIN_WSRADI
)
25697 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25698 else if (fcode
== ARM_BUILTIN_WSRLH
)
25699 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25700 else if (fcode
== ARM_BUILTIN_WSRLW
)
25701 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25702 else if (fcode
== ARM_BUILTIN_WSRLD
)
25703 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25704 else if (fcode
== ARM_BUILTIN_WSLLH
)
25705 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25706 else if (fcode
== ARM_BUILTIN_WSLLW
)
25707 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25708 else if (fcode
== ARM_BUILTIN_WSLLD
)
25709 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25710 else if (fcode
== ARM_BUILTIN_WSRAH
)
25711 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25712 else if (fcode
== ARM_BUILTIN_WSRAW
)
25713 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25715 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25718 return arm_expand_binop_builtin (icode
, exp
, target
);
25724 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25725 if (d
->code
== (const enum arm_builtins
) fcode
)
25726 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25728 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25729 if (d
->code
== (const enum arm_builtins
) fcode
)
25730 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25732 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25733 if (d
->code
== (const enum arm_builtins
) fcode
)
25734 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25736 /* @@@ Should really do something sensible here. */
25740 /* Return the number (counting from 0) of
25741 the least significant set bit in MASK. */
25744 number_of_first_bit_set (unsigned mask
)
25746 return ctz_hwi (mask
);
25749 /* Like emit_multi_reg_push, but allowing for a different set of
25750 registers to be described as saved. MASK is the set of registers
25751 to be saved; REAL_REGS is the set of registers to be described as
25752 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25755 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25757 unsigned long regno
;
25758 rtx par
[10], tmp
, reg
, insn
;
25761 /* Build the parallel of the registers actually being stored. */
25762 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25764 regno
= ctz_hwi (mask
);
25765 reg
= gen_rtx_REG (SImode
, regno
);
25768 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25770 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25775 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25776 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25777 tmp
= gen_frame_mem (BLKmode
, tmp
);
25778 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
25781 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25782 insn
= emit_insn (tmp
);
25784 /* Always build the stack adjustment note for unwind info. */
25785 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25786 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
25789 /* Build the parallel of the registers recorded as saved for unwind. */
25790 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25792 regno
= ctz_hwi (real_regs
);
25793 reg
= gen_rtx_REG (SImode
, regno
);
25795 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25796 tmp
= gen_frame_mem (SImode
, tmp
);
25797 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
25798 RTX_FRAME_RELATED_P (tmp
) = 1;
25806 RTX_FRAME_RELATED_P (par
[0]) = 1;
25807 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25810 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25815 /* Emit code to push or pop registers to or from the stack. F is the
25816 assembly file. MASK is the registers to pop. */
25818 thumb_pop (FILE *f
, unsigned long mask
)
25821 int lo_mask
= mask
& 0xFF;
25822 int pushed_words
= 0;
25826 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25828 /* Special case. Do not generate a POP PC statement here, do it in
25830 thumb_exit (f
, -1);
25834 fprintf (f
, "\tpop\t{");
25836 /* Look at the low registers first. */
25837 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25841 asm_fprintf (f
, "%r", regno
);
25843 if ((lo_mask
& ~1) != 0)
25850 if (mask
& (1 << PC_REGNUM
))
25852 /* Catch popping the PC. */
25853 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
25854 || crtl
->calls_eh_return
)
25856 /* The PC is never poped directly, instead
25857 it is popped into r3 and then BX is used. */
25858 fprintf (f
, "}\n");
25860 thumb_exit (f
, -1);
25869 asm_fprintf (f
, "%r", PC_REGNUM
);
25873 fprintf (f
, "}\n");
25876 /* Generate code to return from a thumb function.
25877 If 'reg_containing_return_addr' is -1, then the return address is
25878 actually on the stack, at the stack pointer. */
25880 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25882 unsigned regs_available_for_popping
;
25883 unsigned regs_to_pop
;
25885 unsigned available
;
25889 int restore_a4
= FALSE
;
25891 /* Compute the registers we need to pop. */
25895 if (reg_containing_return_addr
== -1)
25897 regs_to_pop
|= 1 << LR_REGNUM
;
25901 if (TARGET_BACKTRACE
)
25903 /* Restore the (ARM) frame pointer and stack pointer. */
25904 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25908 /* If there is nothing to pop then just emit the BX instruction and
25910 if (pops_needed
== 0)
25912 if (crtl
->calls_eh_return
)
25913 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25915 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25918 /* Otherwise if we are not supporting interworking and we have not created
25919 a backtrace structure and the function was not entered in ARM mode then
25920 just pop the return address straight into the PC. */
25921 else if (!TARGET_INTERWORK
25922 && !TARGET_BACKTRACE
25923 && !is_called_in_ARM_mode (current_function_decl
)
25924 && !crtl
->calls_eh_return
)
25926 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25930 /* Find out how many of the (return) argument registers we can corrupt. */
25931 regs_available_for_popping
= 0;
25933 /* If returning via __builtin_eh_return, the bottom three registers
25934 all contain information needed for the return. */
25935 if (crtl
->calls_eh_return
)
25939 /* If we can deduce the registers used from the function's
25940 return value. This is more reliable that examining
25941 df_regs_ever_live_p () because that will be set if the register is
25942 ever used in the function, not just if the register is used
25943 to hold a return value. */
25945 if (crtl
->return_rtx
!= 0)
25946 mode
= GET_MODE (crtl
->return_rtx
);
25948 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25950 size
= GET_MODE_SIZE (mode
);
25954 /* In a void function we can use any argument register.
25955 In a function that returns a structure on the stack
25956 we can use the second and third argument registers. */
25957 if (mode
== VOIDmode
)
25958 regs_available_for_popping
=
25959 (1 << ARG_REGISTER (1))
25960 | (1 << ARG_REGISTER (2))
25961 | (1 << ARG_REGISTER (3));
25963 regs_available_for_popping
=
25964 (1 << ARG_REGISTER (2))
25965 | (1 << ARG_REGISTER (3));
25967 else if (size
<= 4)
25968 regs_available_for_popping
=
25969 (1 << ARG_REGISTER (2))
25970 | (1 << ARG_REGISTER (3));
25971 else if (size
<= 8)
25972 regs_available_for_popping
=
25973 (1 << ARG_REGISTER (3));
25976 /* Match registers to be popped with registers into which we pop them. */
25977 for (available
= regs_available_for_popping
,
25978 required
= regs_to_pop
;
25979 required
!= 0 && available
!= 0;
25980 available
&= ~(available
& - available
),
25981 required
&= ~(required
& - required
))
25984 /* If we have any popping registers left over, remove them. */
25986 regs_available_for_popping
&= ~available
;
25988 /* Otherwise if we need another popping register we can use
25989 the fourth argument register. */
25990 else if (pops_needed
)
25992 /* If we have not found any free argument registers and
25993 reg a4 contains the return address, we must move it. */
25994 if (regs_available_for_popping
== 0
25995 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25997 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25998 reg_containing_return_addr
= LR_REGNUM
;
26000 else if (size
> 12)
26002 /* Register a4 is being used to hold part of the return value,
26003 but we have dire need of a free, low register. */
26006 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26009 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26011 /* The fourth argument register is available. */
26012 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26018 /* Pop as many registers as we can. */
26019 thumb_pop (f
, regs_available_for_popping
);
26021 /* Process the registers we popped. */
26022 if (reg_containing_return_addr
== -1)
26024 /* The return address was popped into the lowest numbered register. */
26025 regs_to_pop
&= ~(1 << LR_REGNUM
);
26027 reg_containing_return_addr
=
26028 number_of_first_bit_set (regs_available_for_popping
);
26030 /* Remove this register for the mask of available registers, so that
26031 the return address will not be corrupted by further pops. */
26032 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26035 /* If we popped other registers then handle them here. */
26036 if (regs_available_for_popping
)
26040 /* Work out which register currently contains the frame pointer. */
26041 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26043 /* Move it into the correct place. */
26044 asm_fprintf (f
, "\tmov\t%r, %r\n",
26045 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26047 /* (Temporarily) remove it from the mask of popped registers. */
26048 regs_available_for_popping
&= ~(1 << frame_pointer
);
26049 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26051 if (regs_available_for_popping
)
26055 /* We popped the stack pointer as well,
26056 find the register that contains it. */
26057 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26059 /* Move it into the stack register. */
26060 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26062 /* At this point we have popped all necessary registers, so
26063 do not worry about restoring regs_available_for_popping
26064 to its correct value:
26066 assert (pops_needed == 0)
26067 assert (regs_available_for_popping == (1 << frame_pointer))
26068 assert (regs_to_pop == (1 << STACK_POINTER)) */
26072 /* Since we have just move the popped value into the frame
26073 pointer, the popping register is available for reuse, and
26074 we know that we still have the stack pointer left to pop. */
26075 regs_available_for_popping
|= (1 << frame_pointer
);
26079 /* If we still have registers left on the stack, but we no longer have
26080 any registers into which we can pop them, then we must move the return
26081 address into the link register and make available the register that
26083 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26085 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26087 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26088 reg_containing_return_addr
);
26090 reg_containing_return_addr
= LR_REGNUM
;
26093 /* If we have registers left on the stack then pop some more.
26094 We know that at most we will want to pop FP and SP. */
26095 if (pops_needed
> 0)
26100 thumb_pop (f
, regs_available_for_popping
);
26102 /* We have popped either FP or SP.
26103 Move whichever one it is into the correct register. */
26104 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26105 move_to
= number_of_first_bit_set (regs_to_pop
);
26107 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26109 regs_to_pop
&= ~(1 << move_to
);
26114 /* If we still have not popped everything then we must have only
26115 had one register available to us and we are now popping the SP. */
26116 if (pops_needed
> 0)
26120 thumb_pop (f
, regs_available_for_popping
);
26122 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26124 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26126 assert (regs_to_pop == (1 << STACK_POINTER))
26127 assert (pops_needed == 1)
26131 /* If necessary restore the a4 register. */
26134 if (reg_containing_return_addr
!= LR_REGNUM
)
26136 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26137 reg_containing_return_addr
= LR_REGNUM
;
26140 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26143 if (crtl
->calls_eh_return
)
26144 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26146 /* Return to caller. */
26147 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26150 /* Scan INSN just before assembler is output for it.
26151 For Thumb-1, we track the status of the condition codes; this
26152 information is used in the cbranchsi4_insn pattern. */
26154 thumb1_final_prescan_insn (rtx insn
)
26156 if (flag_print_asm_name
)
26157 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26158 INSN_ADDRESSES (INSN_UID (insn
)));
26159 /* Don't overwrite the previous setter when we get to a cbranch. */
26160 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26162 enum attr_conds conds
;
26164 if (cfun
->machine
->thumb1_cc_insn
)
26166 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26167 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26170 conds
= get_attr_conds (insn
);
26171 if (conds
== CONDS_SET
)
26173 rtx set
= single_set (insn
);
26174 cfun
->machine
->thumb1_cc_insn
= insn
;
26175 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26176 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26177 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26178 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26180 rtx src1
= XEXP (SET_SRC (set
), 1);
26181 if (src1
== const0_rtx
)
26182 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26184 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26186 /* Record the src register operand instead of dest because
26187 cprop_hardreg pass propagates src. */
26188 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26191 else if (conds
!= CONDS_NOCOND
)
26192 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26195 /* Check if unexpected far jump is used. */
26196 if (cfun
->machine
->lr_save_eliminated
26197 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26198 internal_error("Unexpected thumb1 far jump");
26202 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26204 unsigned HOST_WIDE_INT mask
= 0xff;
26207 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26208 if (val
== 0) /* XXX */
26211 for (i
= 0; i
< 25; i
++)
26212 if ((val
& (mask
<< i
)) == val
)
26218 /* Returns nonzero if the current function contains,
26219 or might contain a far jump. */
26221 thumb_far_jump_used_p (void)
26224 bool far_jump
= false;
26225 unsigned int func_size
= 0;
26227 /* This test is only important for leaf functions. */
26228 /* assert (!leaf_function_p ()); */
26230 /* If we have already decided that far jumps may be used,
26231 do not bother checking again, and always return true even if
26232 it turns out that they are not being used. Once we have made
26233 the decision that far jumps are present (and that hence the link
26234 register will be pushed onto the stack) we cannot go back on it. */
26235 if (cfun
->machine
->far_jump_used
)
26238 /* If this function is not being called from the prologue/epilogue
26239 generation code then it must be being called from the
26240 INITIAL_ELIMINATION_OFFSET macro. */
26241 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26243 /* In this case we know that we are being asked about the elimination
26244 of the arg pointer register. If that register is not being used,
26245 then there are no arguments on the stack, and we do not have to
26246 worry that a far jump might force the prologue to push the link
26247 register, changing the stack offsets. In this case we can just
26248 return false, since the presence of far jumps in the function will
26249 not affect stack offsets.
26251 If the arg pointer is live (or if it was live, but has now been
26252 eliminated and so set to dead) then we do have to test to see if
26253 the function might contain a far jump. This test can lead to some
26254 false negatives, since before reload is completed, then length of
26255 branch instructions is not known, so gcc defaults to returning their
26256 longest length, which in turn sets the far jump attribute to true.
26258 A false negative will not result in bad code being generated, but it
26259 will result in a needless push and pop of the link register. We
26260 hope that this does not occur too often.
26262 If we need doubleword stack alignment this could affect the other
26263 elimination offsets so we can't risk getting it wrong. */
26264 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26265 cfun
->machine
->arg_pointer_live
= 1;
26266 else if (!cfun
->machine
->arg_pointer_live
)
26270 /* We should not change far_jump_used during or after reload, as there is
26271 no chance to change stack frame layout. */
26272 if (reload_in_progress
|| reload_completed
)
26275 /* Check to see if the function contains a branch
26276 insn with the far jump attribute set. */
26277 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26279 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26283 func_size
+= get_attr_length (insn
);
26286 /* Attribute far_jump will always be true for thumb1 before
26287 shorten_branch pass. So checking far_jump attribute before
26288 shorten_branch isn't much useful.
26290 Following heuristic tries to estimate more accurately if a far jump
26291 may finally be used. The heuristic is very conservative as there is
26292 no chance to roll-back the decision of not to use far jump.
26294 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26295 2-byte insn is associated with a 4 byte constant pool. Using
26296 function size 2048/3 as the threshold is conservative enough. */
26299 if ((func_size
* 3) >= 2048)
26301 /* Record the fact that we have decided that
26302 the function does use far jumps. */
26303 cfun
->machine
->far_jump_used
= 1;
26311 /* Return nonzero if FUNC must be entered in ARM mode. */
26313 is_called_in_ARM_mode (tree func
)
26315 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26317 /* Ignore the problem about functions whose address is taken. */
26318 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26322 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26328 /* Given the stack offsets and register mask in OFFSETS, decide how
26329 many additional registers to push instead of subtracting a constant
26330 from SP. For epilogues the principle is the same except we use pop.
26331 FOR_PROLOGUE indicates which we're generating. */
26333 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26335 HOST_WIDE_INT amount
;
26336 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26337 /* Extract a mask of the ones we can give to the Thumb's push/pop
26339 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26340 /* Then count how many other high registers will need to be pushed. */
26341 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26342 int n_free
, reg_base
, size
;
26344 if (!for_prologue
&& frame_pointer_needed
)
26345 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26347 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26349 /* If the stack frame size is 512 exactly, we can save one load
26350 instruction, which should make this a win even when optimizing
26352 if (!optimize_size
&& amount
!= 512)
26355 /* Can't do this if there are high registers to push. */
26356 if (high_regs_pushed
!= 0)
26359 /* Shouldn't do it in the prologue if no registers would normally
26360 be pushed at all. In the epilogue, also allow it if we'll have
26361 a pop insn for the PC. */
26364 || TARGET_BACKTRACE
26365 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26366 || TARGET_INTERWORK
26367 || crtl
->args
.pretend_args_size
!= 0))
26370 /* Don't do this if thumb_expand_prologue wants to emit instructions
26371 between the push and the stack frame allocation. */
26373 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26374 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26381 size
= arm_size_return_regs ();
26382 reg_base
= ARM_NUM_INTS (size
);
26383 live_regs_mask
>>= reg_base
;
26386 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26387 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26389 live_regs_mask
>>= 1;
26395 gcc_assert (amount
/ 4 * 4 == amount
);
26397 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26398 return (amount
- 508) / 4;
26399 if (amount
<= n_free
* 4)
26404 /* The bits which aren't usefully expanded as rtl. */
26406 thumb1_unexpanded_epilogue (void)
26408 arm_stack_offsets
*offsets
;
26410 unsigned long live_regs_mask
= 0;
26411 int high_regs_pushed
= 0;
26413 int had_to_push_lr
;
26416 if (cfun
->machine
->return_used_this_function
!= 0)
26419 if (IS_NAKED (arm_current_func_type ()))
26422 offsets
= arm_get_frame_offsets ();
26423 live_regs_mask
= offsets
->saved_regs_mask
;
26424 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26426 /* If we can deduce the registers used from the function's return value.
26427 This is more reliable that examining df_regs_ever_live_p () because that
26428 will be set if the register is ever used in the function, not just if
26429 the register is used to hold a return value. */
26430 size
= arm_size_return_regs ();
26432 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26435 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26436 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26439 /* The prolog may have pushed some high registers to use as
26440 work registers. e.g. the testsuite file:
26441 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26442 compiles to produce:
26443 push {r4, r5, r6, r7, lr}
26447 as part of the prolog. We have to undo that pushing here. */
26449 if (high_regs_pushed
)
26451 unsigned long mask
= live_regs_mask
& 0xff;
26454 /* The available low registers depend on the size of the value we are
26462 /* Oh dear! We have no low registers into which we can pop
26465 ("no low registers available for popping high registers");
26467 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26468 if (live_regs_mask
& (1 << next_hi_reg
))
26471 while (high_regs_pushed
)
26473 /* Find lo register(s) into which the high register(s) can
26475 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26477 if (mask
& (1 << regno
))
26478 high_regs_pushed
--;
26479 if (high_regs_pushed
== 0)
26483 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26485 /* Pop the values into the low register(s). */
26486 thumb_pop (asm_out_file
, mask
);
26488 /* Move the value(s) into the high registers. */
26489 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26491 if (mask
& (1 << regno
))
26493 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26496 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26497 if (live_regs_mask
& (1 << next_hi_reg
))
26502 live_regs_mask
&= ~0x0f00;
26505 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26506 live_regs_mask
&= 0xff;
26508 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26510 /* Pop the return address into the PC. */
26511 if (had_to_push_lr
)
26512 live_regs_mask
|= 1 << PC_REGNUM
;
26514 /* Either no argument registers were pushed or a backtrace
26515 structure was created which includes an adjusted stack
26516 pointer, so just pop everything. */
26517 if (live_regs_mask
)
26518 thumb_pop (asm_out_file
, live_regs_mask
);
26520 /* We have either just popped the return address into the
26521 PC or it is was kept in LR for the entire function.
26522 Note that thumb_pop has already called thumb_exit if the
26523 PC was in the list. */
26524 if (!had_to_push_lr
)
26525 thumb_exit (asm_out_file
, LR_REGNUM
);
26529 /* Pop everything but the return address. */
26530 if (live_regs_mask
)
26531 thumb_pop (asm_out_file
, live_regs_mask
);
26533 if (had_to_push_lr
)
26537 /* We have no free low regs, so save one. */
26538 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26542 /* Get the return address into a temporary register. */
26543 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26547 /* Move the return address to lr. */
26548 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26550 /* Restore the low register. */
26551 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26556 regno
= LAST_ARG_REGNUM
;
26561 /* Remove the argument registers that were pushed onto the stack. */
26562 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26563 SP_REGNUM
, SP_REGNUM
,
26564 crtl
->args
.pretend_args_size
);
26566 thumb_exit (asm_out_file
, regno
);
26572 /* Functions to save and restore machine-specific function data. */
26573 static struct machine_function
*
26574 arm_init_machine_status (void)
26576 struct machine_function
*machine
;
26577 machine
= ggc_alloc_cleared_machine_function ();
26579 #if ARM_FT_UNKNOWN != 0
26580 machine
->func_type
= ARM_FT_UNKNOWN
;
26585 /* Return an RTX indicating where the return address to the
26586 calling function can be found. */
26588 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26593 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26596 /* Do anything needed before RTL is emitted for each function. */
26598 arm_init_expanders (void)
26600 /* Arrange to initialize and mark the machine per-function status. */
26601 init_machine_status
= arm_init_machine_status
;
26603 /* This is to stop the combine pass optimizing away the alignment
26604 adjustment of va_arg. */
26605 /* ??? It is claimed that this should not be necessary. */
26607 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26611 /* Like arm_compute_initial_elimination offset. Simpler because there
26612 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26613 to point at the base of the local variables after static stack
26614 space for a function has been allocated. */
26617 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26619 arm_stack_offsets
*offsets
;
26621 offsets
= arm_get_frame_offsets ();
26625 case ARG_POINTER_REGNUM
:
26628 case STACK_POINTER_REGNUM
:
26629 return offsets
->outgoing_args
- offsets
->saved_args
;
26631 case FRAME_POINTER_REGNUM
:
26632 return offsets
->soft_frame
- offsets
->saved_args
;
26634 case ARM_HARD_FRAME_POINTER_REGNUM
:
26635 return offsets
->saved_regs
- offsets
->saved_args
;
26637 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26638 return offsets
->locals_base
- offsets
->saved_args
;
26641 gcc_unreachable ();
26645 case FRAME_POINTER_REGNUM
:
26648 case STACK_POINTER_REGNUM
:
26649 return offsets
->outgoing_args
- offsets
->soft_frame
;
26651 case ARM_HARD_FRAME_POINTER_REGNUM
:
26652 return offsets
->saved_regs
- offsets
->soft_frame
;
26654 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26655 return offsets
->locals_base
- offsets
->soft_frame
;
26658 gcc_unreachable ();
26663 gcc_unreachable ();
26667 /* Generate the function's prologue. */
26670 thumb1_expand_prologue (void)
26674 HOST_WIDE_INT amount
;
26675 arm_stack_offsets
*offsets
;
26676 unsigned long func_type
;
26678 unsigned long live_regs_mask
;
26679 unsigned long l_mask
;
26680 unsigned high_regs_pushed
= 0;
26682 func_type
= arm_current_func_type ();
26684 /* Naked functions don't have prologues. */
26685 if (IS_NAKED (func_type
))
26688 if (IS_INTERRUPT (func_type
))
26690 error ("interrupt Service Routines cannot be coded in Thumb mode");
26694 if (is_called_in_ARM_mode (current_function_decl
))
26695 emit_insn (gen_prologue_thumb1_interwork ());
26697 offsets
= arm_get_frame_offsets ();
26698 live_regs_mask
= offsets
->saved_regs_mask
;
26700 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26701 l_mask
= live_regs_mask
& 0x40ff;
26702 /* Then count how many other high registers will need to be pushed. */
26703 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26705 if (crtl
->args
.pretend_args_size
)
26707 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26709 if (cfun
->machine
->uses_anonymous_args
)
26711 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26712 unsigned long mask
;
26714 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26715 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26717 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26721 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26722 stack_pointer_rtx
, x
));
26724 RTX_FRAME_RELATED_P (insn
) = 1;
26727 if (TARGET_BACKTRACE
)
26729 HOST_WIDE_INT offset
= 0;
26730 unsigned work_register
;
26731 rtx work_reg
, x
, arm_hfp_rtx
;
26733 /* We have been asked to create a stack backtrace structure.
26734 The code looks like this:
26738 0 sub SP, #16 Reserve space for 4 registers.
26739 2 push {R7} Push low registers.
26740 4 add R7, SP, #20 Get the stack pointer before the push.
26741 6 str R7, [SP, #8] Store the stack pointer
26742 (before reserving the space).
26743 8 mov R7, PC Get hold of the start of this code + 12.
26744 10 str R7, [SP, #16] Store it.
26745 12 mov R7, FP Get hold of the current frame pointer.
26746 14 str R7, [SP, #4] Store it.
26747 16 mov R7, LR Get hold of the current return address.
26748 18 str R7, [SP, #12] Store it.
26749 20 add R7, SP, #16 Point at the start of the
26750 backtrace structure.
26751 22 mov FP, R7 Put this value into the frame pointer. */
26753 work_register
= thumb_find_work_register (live_regs_mask
);
26754 work_reg
= gen_rtx_REG (SImode
, work_register
);
26755 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26757 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26758 stack_pointer_rtx
, GEN_INT (-16)));
26759 RTX_FRAME_RELATED_P (insn
) = 1;
26763 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26764 RTX_FRAME_RELATED_P (insn
) = 1;
26766 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26769 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26770 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26772 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26773 x
= gen_frame_mem (SImode
, x
);
26774 emit_move_insn (x
, work_reg
);
26776 /* Make sure that the instruction fetching the PC is in the right place
26777 to calculate "start of backtrace creation code + 12". */
26778 /* ??? The stores using the common WORK_REG ought to be enough to
26779 prevent the scheduler from doing anything weird. Failing that
26780 we could always move all of the following into an UNSPEC_VOLATILE. */
26783 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26784 emit_move_insn (work_reg
, x
);
26786 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26787 x
= gen_frame_mem (SImode
, x
);
26788 emit_move_insn (x
, work_reg
);
26790 emit_move_insn (work_reg
, arm_hfp_rtx
);
26792 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26793 x
= gen_frame_mem (SImode
, x
);
26794 emit_move_insn (x
, work_reg
);
26798 emit_move_insn (work_reg
, arm_hfp_rtx
);
26800 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26801 x
= gen_frame_mem (SImode
, x
);
26802 emit_move_insn (x
, work_reg
);
26804 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26805 emit_move_insn (work_reg
, x
);
26807 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26808 x
= gen_frame_mem (SImode
, x
);
26809 emit_move_insn (x
, work_reg
);
26812 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26813 emit_move_insn (work_reg
, x
);
26815 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26816 x
= gen_frame_mem (SImode
, x
);
26817 emit_move_insn (x
, work_reg
);
26819 x
= GEN_INT (offset
+ 12);
26820 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26822 emit_move_insn (arm_hfp_rtx
, work_reg
);
26824 /* Optimization: If we are not pushing any low registers but we are going
26825 to push some high registers then delay our first push. This will just
26826 be a push of LR and we can combine it with the push of the first high
26828 else if ((l_mask
& 0xff) != 0
26829 || (high_regs_pushed
== 0 && l_mask
))
26831 unsigned long mask
= l_mask
;
26832 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26833 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26834 RTX_FRAME_RELATED_P (insn
) = 1;
26837 if (high_regs_pushed
)
26839 unsigned pushable_regs
;
26840 unsigned next_hi_reg
;
26841 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26842 : crtl
->args
.info
.nregs
;
26843 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26845 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26846 if (live_regs_mask
& (1 << next_hi_reg
))
26849 /* Here we need to mask out registers used for passing arguments
26850 even if they can be pushed. This is to avoid using them to stash the high
26851 registers. Such kind of stash may clobber the use of arguments. */
26852 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
26854 if (pushable_regs
== 0)
26855 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26857 while (high_regs_pushed
> 0)
26859 unsigned long real_regs_mask
= 0;
26861 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26863 if (pushable_regs
& (1 << regno
))
26865 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26866 gen_rtx_REG (SImode
, next_hi_reg
));
26868 high_regs_pushed
--;
26869 real_regs_mask
|= (1 << next_hi_reg
);
26871 if (high_regs_pushed
)
26873 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26875 if (live_regs_mask
& (1 << next_hi_reg
))
26880 pushable_regs
&= ~((1 << regno
) - 1);
26886 /* If we had to find a work register and we have not yet
26887 saved the LR then add it to the list of regs to push. */
26888 if (l_mask
== (1 << LR_REGNUM
))
26890 pushable_regs
|= l_mask
;
26891 real_regs_mask
|= l_mask
;
26895 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26896 RTX_FRAME_RELATED_P (insn
) = 1;
26900 /* Load the pic register before setting the frame pointer,
26901 so we can use r7 as a temporary work register. */
26902 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26903 arm_load_pic_register (live_regs_mask
);
26905 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26906 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26907 stack_pointer_rtx
);
26909 if (flag_stack_usage_info
)
26910 current_function_static_stack_size
26911 = offsets
->outgoing_args
- offsets
->saved_args
;
26913 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26914 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26919 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26920 GEN_INT (- amount
)));
26921 RTX_FRAME_RELATED_P (insn
) = 1;
26927 /* The stack decrement is too big for an immediate value in a single
26928 insn. In theory we could issue multiple subtracts, but after
26929 three of them it becomes more space efficient to place the full
26930 value in the constant pool and load into a register. (Also the
26931 ARM debugger really likes to see only one stack decrement per
26932 function). So instead we look for a scratch register into which
26933 we can load the decrement, and then we subtract this from the
26934 stack pointer. Unfortunately on the thumb the only available
26935 scratch registers are the argument registers, and we cannot use
26936 these as they may hold arguments to the function. Instead we
26937 attempt to locate a call preserved register which is used by this
26938 function. If we can find one, then we know that it will have
26939 been pushed at the start of the prologue and so we can corrupt
26941 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26942 if (live_regs_mask
& (1 << regno
))
26945 gcc_assert(regno
<= LAST_LO_REGNUM
);
26947 reg
= gen_rtx_REG (SImode
, regno
);
26949 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26951 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26952 stack_pointer_rtx
, reg
));
26954 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26955 plus_constant (Pmode
, stack_pointer_rtx
,
26957 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26958 RTX_FRAME_RELATED_P (insn
) = 1;
26962 if (frame_pointer_needed
)
26963 thumb_set_frame_pointer (offsets
);
26965 /* If we are profiling, make sure no instructions are scheduled before
26966 the call to mcount. Similarly if the user has requested no
26967 scheduling in the prolog. Similarly if we want non-call exceptions
26968 using the EABI unwinder, to prevent faulting instructions from being
26969 swapped with a stack adjustment. */
26970 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26971 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26972 && cfun
->can_throw_non_call_exceptions
))
26973 emit_insn (gen_blockage ());
26975 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26976 if (live_regs_mask
& 0xff)
26977 cfun
->machine
->lr_save_eliminated
= 0;
26980 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26981 POP instruction can be generated. LR should be replaced by PC. All
26982 the checks required are already done by USE_RETURN_INSN (). Hence,
26983 all we really need to check here is if single register is to be
26984 returned, or multiple register return. */
26986 thumb2_expand_return (bool simple_return
)
26989 unsigned long saved_regs_mask
;
26990 arm_stack_offsets
*offsets
;
26992 offsets
= arm_get_frame_offsets ();
26993 saved_regs_mask
= offsets
->saved_regs_mask
;
26995 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26996 if (saved_regs_mask
& (1 << i
))
26999 if (!simple_return
&& saved_regs_mask
)
27003 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27004 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27005 rtx addr
= gen_rtx_MEM (SImode
,
27006 gen_rtx_POST_INC (SImode
,
27007 stack_pointer_rtx
));
27008 set_mem_alias_set (addr
, get_frame_alias_set ());
27009 XVECEXP (par
, 0, 0) = ret_rtx
;
27010 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27011 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27012 emit_jump_insn (par
);
27016 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27017 saved_regs_mask
|= (1 << PC_REGNUM
);
27018 arm_emit_multi_reg_pop (saved_regs_mask
);
27023 emit_jump_insn (simple_return_rtx
);
27028 thumb1_expand_epilogue (void)
27030 HOST_WIDE_INT amount
;
27031 arm_stack_offsets
*offsets
;
27034 /* Naked functions don't have prologues. */
27035 if (IS_NAKED (arm_current_func_type ()))
27038 offsets
= arm_get_frame_offsets ();
27039 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27041 if (frame_pointer_needed
)
27043 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27044 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27046 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27048 gcc_assert (amount
>= 0);
27051 emit_insn (gen_blockage ());
27054 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27055 GEN_INT (amount
)));
27058 /* r3 is always free in the epilogue. */
27059 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27061 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27062 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27066 /* Emit a USE (stack_pointer_rtx), so that
27067 the stack adjustment will not be deleted. */
27068 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27070 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27071 emit_insn (gen_blockage ());
27073 /* Emit a clobber for each insn that will be restored in the epilogue,
27074 so that flow2 will get register lifetimes correct. */
27075 for (regno
= 0; regno
< 13; regno
++)
27076 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27077 emit_clobber (gen_rtx_REG (SImode
, regno
));
27079 if (! df_regs_ever_live_p (LR_REGNUM
))
27080 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27083 /* Epilogue code for APCS frame. */
27085 arm_expand_epilogue_apcs_frame (bool really_return
)
27087 unsigned long func_type
;
27088 unsigned long saved_regs_mask
;
27091 int floats_from_frame
= 0;
27092 arm_stack_offsets
*offsets
;
27094 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27095 func_type
= arm_current_func_type ();
27097 /* Get frame offsets for ARM. */
27098 offsets
= arm_get_frame_offsets ();
27099 saved_regs_mask
= offsets
->saved_regs_mask
;
27101 /* Find the offset of the floating-point save area in the frame. */
27103 = (offsets
->saved_args
27104 + arm_compute_static_chain_stack_bytes ()
27107 /* Compute how many core registers saved and how far away the floats are. */
27108 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27109 if (saved_regs_mask
& (1 << i
))
27112 floats_from_frame
+= 4;
27115 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27118 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27120 /* The offset is from IP_REGNUM. */
27121 int saved_size
= arm_get_vfp_saved_size ();
27122 if (saved_size
> 0)
27125 floats_from_frame
+= saved_size
;
27126 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27127 hard_frame_pointer_rtx
,
27128 GEN_INT (-floats_from_frame
)));
27129 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27130 ip_rtx
, hard_frame_pointer_rtx
);
27133 /* Generate VFP register multi-pop. */
27134 start_reg
= FIRST_VFP_REGNUM
;
27136 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27137 /* Look for a case where a reg does not need restoring. */
27138 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27139 && (!df_regs_ever_live_p (i
+ 1)
27140 || call_used_regs
[i
+ 1]))
27142 if (start_reg
!= i
)
27143 arm_emit_vfp_multi_reg_pop (start_reg
,
27144 (i
- start_reg
) / 2,
27145 gen_rtx_REG (SImode
,
27150 /* Restore the remaining regs that we have discovered (or possibly
27151 even all of them, if the conditional in the for loop never
27153 if (start_reg
!= i
)
27154 arm_emit_vfp_multi_reg_pop (start_reg
,
27155 (i
- start_reg
) / 2,
27156 gen_rtx_REG (SImode
, IP_REGNUM
));
27161 /* The frame pointer is guaranteed to be non-double-word aligned, as
27162 it is set to double-word-aligned old_stack_pointer - 4. */
27164 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27166 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27167 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27169 rtx addr
= gen_frame_mem (V2SImode
,
27170 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27172 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27173 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27174 gen_rtx_REG (V2SImode
, i
),
27180 /* saved_regs_mask should contain IP which contains old stack pointer
27181 at the time of activation creation. Since SP and IP are adjacent registers,
27182 we can restore the value directly into SP. */
27183 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27184 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27185 saved_regs_mask
|= (1 << SP_REGNUM
);
27187 /* There are two registers left in saved_regs_mask - LR and PC. We
27188 only need to restore LR (the return address), but to
27189 save time we can load it directly into PC, unless we need a
27190 special function exit sequence, or we are not really returning. */
27192 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27193 && !crtl
->calls_eh_return
)
27194 /* Delete LR from the register mask, so that LR on
27195 the stack is loaded into the PC in the register mask. */
27196 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27198 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27200 num_regs
= bit_count (saved_regs_mask
);
27201 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27204 emit_insn (gen_blockage ());
27205 /* Unwind the stack to just below the saved registers. */
27206 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27207 hard_frame_pointer_rtx
,
27208 GEN_INT (- 4 * num_regs
)));
27210 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27211 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27214 arm_emit_multi_reg_pop (saved_regs_mask
);
27216 if (IS_INTERRUPT (func_type
))
27218 /* Interrupt handlers will have pushed the
27219 IP onto the stack, so restore it now. */
27221 rtx addr
= gen_rtx_MEM (SImode
,
27222 gen_rtx_POST_INC (SImode
,
27223 stack_pointer_rtx
));
27224 set_mem_alias_set (addr
, get_frame_alias_set ());
27225 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27226 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27227 gen_rtx_REG (SImode
, IP_REGNUM
),
27231 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27234 if (crtl
->calls_eh_return
)
27235 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27237 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27239 if (IS_STACKALIGN (func_type
))
27240 /* Restore the original stack pointer. Before prologue, the stack was
27241 realigned and the original stack pointer saved in r0. For details,
27242 see comment in arm_expand_prologue. */
27243 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27245 emit_jump_insn (simple_return_rtx
);
27248 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27249 function is not a sibcall. */
27251 arm_expand_epilogue (bool really_return
)
27253 unsigned long func_type
;
27254 unsigned long saved_regs_mask
;
27258 arm_stack_offsets
*offsets
;
27260 func_type
= arm_current_func_type ();
27262 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27263 let output_return_instruction take care of instruction emission if any. */
27264 if (IS_NAKED (func_type
)
27265 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27268 emit_jump_insn (simple_return_rtx
);
27272 /* If we are throwing an exception, then we really must be doing a
27273 return, so we can't tail-call. */
27274 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27276 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27278 arm_expand_epilogue_apcs_frame (really_return
);
27282 /* Get frame offsets for ARM. */
27283 offsets
= arm_get_frame_offsets ();
27284 saved_regs_mask
= offsets
->saved_regs_mask
;
27285 num_regs
= bit_count (saved_regs_mask
);
27287 if (frame_pointer_needed
)
27290 /* Restore stack pointer if necessary. */
27293 /* In ARM mode, frame pointer points to first saved register.
27294 Restore stack pointer to last saved register. */
27295 amount
= offsets
->frame
- offsets
->saved_regs
;
27297 /* Force out any pending memory operations that reference stacked data
27298 before stack de-allocation occurs. */
27299 emit_insn (gen_blockage ());
27300 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27301 hard_frame_pointer_rtx
,
27302 GEN_INT (amount
)));
27303 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27305 hard_frame_pointer_rtx
);
27307 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27309 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27313 /* In Thumb-2 mode, the frame pointer points to the last saved
27315 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27318 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27319 hard_frame_pointer_rtx
,
27320 GEN_INT (amount
)));
27321 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27322 hard_frame_pointer_rtx
,
27323 hard_frame_pointer_rtx
);
27326 /* Force out any pending memory operations that reference stacked data
27327 before stack de-allocation occurs. */
27328 emit_insn (gen_blockage ());
27329 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27330 hard_frame_pointer_rtx
));
27331 arm_add_cfa_adjust_cfa_note (insn
, 0,
27333 hard_frame_pointer_rtx
);
27334 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27336 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27341 /* Pop off outgoing args and local frame to adjust stack pointer to
27342 last saved register. */
27343 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27347 /* Force out any pending memory operations that reference stacked data
27348 before stack de-allocation occurs. */
27349 emit_insn (gen_blockage ());
27350 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27352 GEN_INT (amount
)));
27353 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27354 stack_pointer_rtx
, stack_pointer_rtx
);
27355 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27357 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27361 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27363 /* Generate VFP register multi-pop. */
27364 int end_reg
= LAST_VFP_REGNUM
+ 1;
27366 /* Scan the registers in reverse order. We need to match
27367 any groupings made in the prologue and generate matching
27368 vldm operations. The need to match groups is because,
27369 unlike pop, vldm can only do consecutive regs. */
27370 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27371 /* Look for a case where a reg does not need restoring. */
27372 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27373 && (!df_regs_ever_live_p (i
+ 1)
27374 || call_used_regs
[i
+ 1]))
27376 /* Restore the regs discovered so far (from reg+2 to
27378 if (end_reg
> i
+ 2)
27379 arm_emit_vfp_multi_reg_pop (i
+ 2,
27380 (end_reg
- (i
+ 2)) / 2,
27381 stack_pointer_rtx
);
27385 /* Restore the remaining regs that we have discovered (or possibly
27386 even all of them, if the conditional in the for loop never
27388 if (end_reg
> i
+ 2)
27389 arm_emit_vfp_multi_reg_pop (i
+ 2,
27390 (end_reg
- (i
+ 2)) / 2,
27391 stack_pointer_rtx
);
27395 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27396 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27399 rtx addr
= gen_rtx_MEM (V2SImode
,
27400 gen_rtx_POST_INC (SImode
,
27401 stack_pointer_rtx
));
27402 set_mem_alias_set (addr
, get_frame_alias_set ());
27403 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27404 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27405 gen_rtx_REG (V2SImode
, i
),
27407 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27408 stack_pointer_rtx
, stack_pointer_rtx
);
27411 if (saved_regs_mask
)
27414 bool return_in_pc
= false;
27416 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27417 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27418 && !IS_STACKALIGN (func_type
)
27420 && crtl
->args
.pretend_args_size
== 0
27421 && saved_regs_mask
& (1 << LR_REGNUM
)
27422 && !crtl
->calls_eh_return
)
27424 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27425 saved_regs_mask
|= (1 << PC_REGNUM
);
27426 return_in_pc
= true;
27429 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27431 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27432 if (saved_regs_mask
& (1 << i
))
27434 rtx addr
= gen_rtx_MEM (SImode
,
27435 gen_rtx_POST_INC (SImode
,
27436 stack_pointer_rtx
));
27437 set_mem_alias_set (addr
, get_frame_alias_set ());
27439 if (i
== PC_REGNUM
)
27441 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27442 XVECEXP (insn
, 0, 0) = ret_rtx
;
27443 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27444 gen_rtx_REG (SImode
, i
),
27446 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27447 insn
= emit_jump_insn (insn
);
27451 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27453 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27454 gen_rtx_REG (SImode
, i
),
27456 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27458 stack_pointer_rtx
);
27465 && current_tune
->prefer_ldrd_strd
27466 && !optimize_function_for_size_p (cfun
))
27469 thumb2_emit_ldrd_pop (saved_regs_mask
);
27470 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27471 arm_emit_ldrd_pop (saved_regs_mask
);
27473 arm_emit_multi_reg_pop (saved_regs_mask
);
27476 arm_emit_multi_reg_pop (saved_regs_mask
);
27479 if (return_in_pc
== true)
27483 if (crtl
->args
.pretend_args_size
)
27486 rtx dwarf
= NULL_RTX
;
27487 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27489 GEN_INT (crtl
->args
.pretend_args_size
)));
27491 RTX_FRAME_RELATED_P (tmp
) = 1;
27493 if (cfun
->machine
->uses_anonymous_args
)
27495 /* Restore pretend args. Refer arm_expand_prologue on how to save
27496 pretend_args in stack. */
27497 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27498 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27499 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27500 if (saved_regs_mask
& (1 << i
))
27502 rtx reg
= gen_rtx_REG (SImode
, i
);
27503 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27506 REG_NOTES (tmp
) = dwarf
;
27508 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27509 stack_pointer_rtx
, stack_pointer_rtx
);
27512 if (!really_return
)
27515 if (crtl
->calls_eh_return
)
27516 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27518 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27520 if (IS_STACKALIGN (func_type
))
27521 /* Restore the original stack pointer. Before prologue, the stack was
27522 realigned and the original stack pointer saved in r0. For details,
27523 see comment in arm_expand_prologue. */
27524 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27526 emit_jump_insn (simple_return_rtx
);
27529 /* Implementation of insn prologue_thumb1_interwork. This is the first
27530 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27533 thumb1_output_interwork (void)
27536 FILE *f
= asm_out_file
;
27538 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27539 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27541 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27543 /* Generate code sequence to switch us into Thumb mode. */
27544 /* The .code 32 directive has already been emitted by
27545 ASM_DECLARE_FUNCTION_NAME. */
27546 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27547 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27549 /* Generate a label, so that the debugger will notice the
27550 change in instruction sets. This label is also used by
27551 the assembler to bypass the ARM code when this function
27552 is called from a Thumb encoded function elsewhere in the
27553 same file. Hence the definition of STUB_NAME here must
27554 agree with the definition in gas/config/tc-arm.c. */
27556 #define STUB_NAME ".real_start_of"
27558 fprintf (f
, "\t.code\t16\n");
27560 if (arm_dllexport_name_p (name
))
27561 name
= arm_strip_name_encoding (name
);
27563 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27564 fprintf (f
, "\t.thumb_func\n");
27565 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27570 /* Handle the case of a double word load into a low register from
27571 a computed memory address. The computed address may involve a
27572 register which is overwritten by the load. */
27574 thumb_load_double_from_address (rtx
*operands
)
27582 gcc_assert (REG_P (operands
[0]));
27583 gcc_assert (MEM_P (operands
[1]));
27585 /* Get the memory address. */
27586 addr
= XEXP (operands
[1], 0);
27588 /* Work out how the memory address is computed. */
27589 switch (GET_CODE (addr
))
27592 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27594 if (REGNO (operands
[0]) == REGNO (addr
))
27596 output_asm_insn ("ldr\t%H0, %2", operands
);
27597 output_asm_insn ("ldr\t%0, %1", operands
);
27601 output_asm_insn ("ldr\t%0, %1", operands
);
27602 output_asm_insn ("ldr\t%H0, %2", operands
);
27607 /* Compute <address> + 4 for the high order load. */
27608 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27610 output_asm_insn ("ldr\t%0, %1", operands
);
27611 output_asm_insn ("ldr\t%H0, %2", operands
);
27615 arg1
= XEXP (addr
, 0);
27616 arg2
= XEXP (addr
, 1);
27618 if (CONSTANT_P (arg1
))
27619 base
= arg2
, offset
= arg1
;
27621 base
= arg1
, offset
= arg2
;
27623 gcc_assert (REG_P (base
));
27625 /* Catch the case of <address> = <reg> + <reg> */
27626 if (REG_P (offset
))
27628 int reg_offset
= REGNO (offset
);
27629 int reg_base
= REGNO (base
);
27630 int reg_dest
= REGNO (operands
[0]);
27632 /* Add the base and offset registers together into the
27633 higher destination register. */
27634 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27635 reg_dest
+ 1, reg_base
, reg_offset
);
27637 /* Load the lower destination register from the address in
27638 the higher destination register. */
27639 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27640 reg_dest
, reg_dest
+ 1);
27642 /* Load the higher destination register from its own address
27644 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27645 reg_dest
+ 1, reg_dest
+ 1);
27649 /* Compute <address> + 4 for the high order load. */
27650 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27652 /* If the computed address is held in the low order register
27653 then load the high order register first, otherwise always
27654 load the low order register first. */
27655 if (REGNO (operands
[0]) == REGNO (base
))
27657 output_asm_insn ("ldr\t%H0, %2", operands
);
27658 output_asm_insn ("ldr\t%0, %1", operands
);
27662 output_asm_insn ("ldr\t%0, %1", operands
);
27663 output_asm_insn ("ldr\t%H0, %2", operands
);
27669 /* With no registers to worry about we can just load the value
27671 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27673 output_asm_insn ("ldr\t%H0, %2", operands
);
27674 output_asm_insn ("ldr\t%0, %1", operands
);
27678 gcc_unreachable ();
27685 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27692 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27695 operands
[4] = operands
[5];
27698 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27699 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27703 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27706 operands
[4] = operands
[5];
27709 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27712 operands
[5] = operands
[6];
27715 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27718 operands
[4] = operands
[5];
27722 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27723 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27727 gcc_unreachable ();
27733 /* Output a call-via instruction for thumb state. */
27735 thumb_call_via_reg (rtx reg
)
27737 int regno
= REGNO (reg
);
27740 gcc_assert (regno
< LR_REGNUM
);
27742 /* If we are in the normal text section we can use a single instance
27743 per compilation unit. If we are doing function sections, then we need
27744 an entry per section, since we can't rely on reachability. */
27745 if (in_section
== text_section
)
27747 thumb_call_reg_needed
= 1;
27749 if (thumb_call_via_label
[regno
] == NULL
)
27750 thumb_call_via_label
[regno
] = gen_label_rtx ();
27751 labelp
= thumb_call_via_label
+ regno
;
27755 if (cfun
->machine
->call_via
[regno
] == NULL
)
27756 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27757 labelp
= cfun
->machine
->call_via
+ regno
;
27760 output_asm_insn ("bl\t%a0", labelp
);
27764 /* Routines for generating rtl. */
27766 thumb_expand_movmemqi (rtx
*operands
)
27768 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27769 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27770 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27771 HOST_WIDE_INT offset
= 0;
27775 emit_insn (gen_movmem12b (out
, in
, out
, in
));
27781 emit_insn (gen_movmem8b (out
, in
, out
, in
));
27787 rtx reg
= gen_reg_rtx (SImode
);
27788 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27789 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27796 rtx reg
= gen_reg_rtx (HImode
);
27797 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27798 plus_constant (Pmode
, in
,
27800 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27809 rtx reg
= gen_reg_rtx (QImode
);
27810 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27811 plus_constant (Pmode
, in
,
27813 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27820 thumb_reload_out_hi (rtx
*operands
)
27822 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27825 /* Handle reading a half-word from memory during reload. */
27827 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
27829 gcc_unreachable ();
27832 /* Return the length of a function name prefix
27833 that starts with the character 'c'. */
27835 arm_get_strip_length (int c
)
27839 ARM_NAME_ENCODING_LENGTHS
27844 /* Return a pointer to a function's name with any
27845 and all prefix encodings stripped from it. */
27847 arm_strip_name_encoding (const char *name
)
27851 while ((skip
= arm_get_strip_length (* name
)))
27857 /* If there is a '*' anywhere in the name's prefix, then
27858 emit the stripped name verbatim, otherwise prepend an
27859 underscore if leading underscores are being used. */
27861 arm_asm_output_labelref (FILE *stream
, const char *name
)
27866 while ((skip
= arm_get_strip_length (* name
)))
27868 verbatim
|= (*name
== '*');
27873 fputs (name
, stream
);
27875 asm_fprintf (stream
, "%U%s", name
);
27878 /* This function is used to emit an EABI tag and its associated value.
27879 We emit the numerical value of the tag in case the assembler does not
27880 support textual tags. (Eg gas prior to 2.20). If requested we include
27881 the tag name in a comment so that anyone reading the assembler output
27882 will know which tag is being set.
27884 This function is not static because arm-c.c needs it too. */
27887 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27889 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27890 if (flag_verbose_asm
|| flag_debug_asm
)
27891 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27892 asm_fprintf (asm_out_file
, "\n");
27896 arm_file_start (void)
27900 if (TARGET_UNIFIED_ASM
)
27901 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27905 const char *fpu_name
;
27906 if (arm_selected_arch
)
27908 /* armv7ve doesn't support any extensions. */
27909 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
27911 /* Keep backward compatability for assemblers
27912 which don't support armv7ve. */
27913 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
27914 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
27915 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
27916 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
27917 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
27921 const char* pos
= strchr (arm_selected_arch
->name
, '+');
27925 gcc_assert (strlen (arm_selected_arch
->name
)
27926 <= sizeof (buf
) / sizeof (*pos
));
27927 strncpy (buf
, arm_selected_arch
->name
,
27928 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
27929 buf
[pos
- arm_selected_arch
->name
] = '\0';
27930 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
27931 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
27934 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27937 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27938 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27941 const char* truncated_name
27942 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
27943 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27946 if (TARGET_SOFT_FLOAT
)
27948 fpu_name
= "softvfp";
27952 fpu_name
= arm_fpu_desc
->name
;
27953 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27955 if (TARGET_HARD_FLOAT
)
27956 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27957 if (TARGET_HARD_FLOAT_ABI
)
27958 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27961 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27963 /* Some of these attributes only apply when the corresponding features
27964 are used. However we don't have any easy way of figuring this out.
27965 Conservatively record the setting that would have been used. */
27967 if (flag_rounding_math
)
27968 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27970 if (!flag_unsafe_math_optimizations
)
27972 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27973 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27975 if (flag_signaling_nans
)
27976 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27978 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27979 flag_finite_math_only
? 1 : 3);
27981 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27982 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27983 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27984 flag_short_enums
? 1 : 2);
27986 /* Tag_ABI_optimization_goals. */
27989 else if (optimize
>= 2)
27995 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27997 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28000 if (arm_fp16_format
)
28001 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28002 (int) arm_fp16_format
);
28004 if (arm_lang_output_object_attributes_hook
)
28005 arm_lang_output_object_attributes_hook();
28008 default_file_start ();
28012 arm_file_end (void)
28016 if (NEED_INDICATE_EXEC_STACK
)
28017 /* Add .note.GNU-stack. */
28018 file_end_indicate_exec_stack ();
28020 if (! thumb_call_reg_needed
)
28023 switch_to_section (text_section
);
28024 asm_fprintf (asm_out_file
, "\t.code 16\n");
28025 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28027 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28029 rtx label
= thumb_call_via_label
[regno
];
28033 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28034 CODE_LABEL_NUMBER (label
));
28035 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28041 /* Symbols in the text segment can be accessed without indirecting via the
28042 constant pool; it may take an extra binary operation, but this is still
28043 faster than indirecting via memory. Don't do this when not optimizing,
28044 since we won't be calculating al of the offsets necessary to do this
28048 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28050 if (optimize
> 0 && TREE_CONSTANT (decl
))
28051 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28053 default_encode_section_info (decl
, rtl
, first
);
28055 #endif /* !ARM_PE */
28058 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28060 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28061 && !strcmp (prefix
, "L"))
28063 arm_ccfsm_state
= 0;
28064 arm_target_insn
= NULL
;
28066 default_internal_label (stream
, prefix
, labelno
);
28069 /* Output code to add DELTA to the first argument, and then jump
28070 to FUNCTION. Used for C++ multiple inheritance. */
28072 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28073 HOST_WIDE_INT delta
,
28074 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28077 static int thunk_label
= 0;
28080 int mi_delta
= delta
;
28081 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28083 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28086 mi_delta
= - mi_delta
;
28088 final_start_function (emit_barrier (), file
, 1);
28092 int labelno
= thunk_label
++;
28093 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28094 /* Thunks are entered in arm mode when avaiable. */
28095 if (TARGET_THUMB1_ONLY
)
28097 /* push r3 so we can use it as a temporary. */
28098 /* TODO: Omit this save if r3 is not used. */
28099 fputs ("\tpush {r3}\n", file
);
28100 fputs ("\tldr\tr3, ", file
);
28104 fputs ("\tldr\tr12, ", file
);
28106 assemble_name (file
, label
);
28107 fputc ('\n', file
);
28110 /* If we are generating PIC, the ldr instruction below loads
28111 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28112 the address of the add + 8, so we have:
28114 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28117 Note that we have "+ 1" because some versions of GNU ld
28118 don't set the low bit of the result for R_ARM_REL32
28119 relocations against thumb function symbols.
28120 On ARMv6M this is +4, not +8. */
28121 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28122 assemble_name (file
, labelpc
);
28123 fputs (":\n", file
);
28124 if (TARGET_THUMB1_ONLY
)
28126 /* This is 2 insns after the start of the thunk, so we know it
28127 is 4-byte aligned. */
28128 fputs ("\tadd\tr3, pc, r3\n", file
);
28129 fputs ("\tmov r12, r3\n", file
);
28132 fputs ("\tadd\tr12, pc, r12\n", file
);
28134 else if (TARGET_THUMB1_ONLY
)
28135 fputs ("\tmov r12, r3\n", file
);
28137 if (TARGET_THUMB1_ONLY
)
28139 if (mi_delta
> 255)
28141 fputs ("\tldr\tr3, ", file
);
28142 assemble_name (file
, label
);
28143 fputs ("+4\n", file
);
28144 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28145 mi_op
, this_regno
, this_regno
);
28147 else if (mi_delta
!= 0)
28149 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28150 mi_op
, this_regno
, this_regno
,
28156 /* TODO: Use movw/movt for large constants when available. */
28157 while (mi_delta
!= 0)
28159 if ((mi_delta
& (3 << shift
)) == 0)
28163 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28164 mi_op
, this_regno
, this_regno
,
28165 mi_delta
& (0xff << shift
));
28166 mi_delta
&= ~(0xff << shift
);
28173 if (TARGET_THUMB1_ONLY
)
28174 fputs ("\tpop\t{r3}\n", file
);
28176 fprintf (file
, "\tbx\tr12\n");
28177 ASM_OUTPUT_ALIGN (file
, 2);
28178 assemble_name (file
, label
);
28179 fputs (":\n", file
);
28182 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28183 rtx tem
= XEXP (DECL_RTL (function
), 0);
28184 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28185 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28187 gen_rtx_SYMBOL_REF (Pmode
,
28188 ggc_strdup (labelpc
)));
28189 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28192 /* Output ".word .LTHUNKn". */
28193 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28195 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28196 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28200 fputs ("\tb\t", file
);
28201 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28202 if (NEED_PLT_RELOC
)
28203 fputs ("(PLT)", file
);
28204 fputc ('\n', file
);
28207 final_end_function ();
28211 arm_emit_vector_const (FILE *file
, rtx x
)
28214 const char * pattern
;
28216 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28218 switch (GET_MODE (x
))
28220 case V2SImode
: pattern
= "%08x"; break;
28221 case V4HImode
: pattern
= "%04x"; break;
28222 case V8QImode
: pattern
= "%02x"; break;
28223 default: gcc_unreachable ();
28226 fprintf (file
, "0x");
28227 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28231 element
= CONST_VECTOR_ELT (x
, i
);
28232 fprintf (file
, pattern
, INTVAL (element
));
28238 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28239 HFmode constant pool entries are actually loaded with ldr. */
28241 arm_emit_fp16_const (rtx c
)
28246 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28247 bits
= real_to_target (NULL
, &r
, HFmode
);
28248 if (WORDS_BIG_ENDIAN
)
28249 assemble_zeros (2);
28250 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28251 if (!WORDS_BIG_ENDIAN
)
28252 assemble_zeros (2);
28256 arm_output_load_gr (rtx
*operands
)
28263 if (!MEM_P (operands
[1])
28264 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28265 || !REG_P (reg
= XEXP (sum
, 0))
28266 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28267 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28268 return "wldrw%?\t%0, %1";
28270 /* Fix up an out-of-range load of a GR register. */
28271 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28272 wcgr
= operands
[0];
28274 output_asm_insn ("ldr%?\t%0, %1", operands
);
28276 operands
[0] = wcgr
;
28278 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28279 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28284 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28286 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28287 named arg and all anonymous args onto the stack.
28288 XXX I know the prologue shouldn't be pushing registers, but it is faster
28292 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28293 enum machine_mode mode
,
28296 int second_time ATTRIBUTE_UNUSED
)
28298 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28301 cfun
->machine
->uses_anonymous_args
= 1;
28302 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28304 nregs
= pcum
->aapcs_ncrn
;
28305 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28309 nregs
= pcum
->nregs
;
28311 if (nregs
< NUM_ARG_REGS
)
28312 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28315 /* We can't rely on the caller doing the proper promotion when
28316 using APCS or ATPCS. */
28319 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28321 return !TARGET_AAPCS_BASED
;
28324 static enum machine_mode
28325 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28326 enum machine_mode mode
,
28327 int *punsignedp ATTRIBUTE_UNUSED
,
28328 const_tree fntype ATTRIBUTE_UNUSED
,
28329 int for_return ATTRIBUTE_UNUSED
)
28331 if (GET_MODE_CLASS (mode
) == MODE_INT
28332 && GET_MODE_SIZE (mode
) < 4)
28338 /* AAPCS based ABIs use short enums by default. */
28341 arm_default_short_enums (void)
28343 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28347 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28350 arm_align_anon_bitfield (void)
28352 return TARGET_AAPCS_BASED
;
28356 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28359 arm_cxx_guard_type (void)
28361 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28365 /* The EABI says test the least significant bit of a guard variable. */
28368 arm_cxx_guard_mask_bit (void)
28370 return TARGET_AAPCS_BASED
;
28374 /* The EABI specifies that all array cookies are 8 bytes long. */
28377 arm_get_cookie_size (tree type
)
28381 if (!TARGET_AAPCS_BASED
)
28382 return default_cxx_get_cookie_size (type
);
28384 size
= build_int_cst (sizetype
, 8);
28389 /* The EABI says that array cookies should also contain the element size. */
28392 arm_cookie_has_size (void)
28394 return TARGET_AAPCS_BASED
;
28398 /* The EABI says constructors and destructors should return a pointer to
28399 the object constructed/destroyed. */
28402 arm_cxx_cdtor_returns_this (void)
28404 return TARGET_AAPCS_BASED
;
28407 /* The EABI says that an inline function may never be the key
28411 arm_cxx_key_method_may_be_inline (void)
28413 return !TARGET_AAPCS_BASED
;
28417 arm_cxx_determine_class_data_visibility (tree decl
)
28419 if (!TARGET_AAPCS_BASED
28420 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28423 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28424 is exported. However, on systems without dynamic vague linkage,
28425 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28426 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28427 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28429 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28430 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28434 arm_cxx_class_data_always_comdat (void)
28436 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28437 vague linkage if the class has no key function. */
28438 return !TARGET_AAPCS_BASED
;
28442 /* The EABI says __aeabi_atexit should be used to register static
28446 arm_cxx_use_aeabi_atexit (void)
28448 return TARGET_AAPCS_BASED
;
28453 arm_set_return_address (rtx source
, rtx scratch
)
28455 arm_stack_offsets
*offsets
;
28456 HOST_WIDE_INT delta
;
28458 unsigned long saved_regs
;
28460 offsets
= arm_get_frame_offsets ();
28461 saved_regs
= offsets
->saved_regs_mask
;
28463 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28464 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28467 if (frame_pointer_needed
)
28468 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28471 /* LR will be the first saved register. */
28472 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28477 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28478 GEN_INT (delta
& ~4095)));
28483 addr
= stack_pointer_rtx
;
28485 addr
= plus_constant (Pmode
, addr
, delta
);
28487 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28493 thumb_set_return_address (rtx source
, rtx scratch
)
28495 arm_stack_offsets
*offsets
;
28496 HOST_WIDE_INT delta
;
28497 HOST_WIDE_INT limit
;
28500 unsigned long mask
;
28504 offsets
= arm_get_frame_offsets ();
28505 mask
= offsets
->saved_regs_mask
;
28506 if (mask
& (1 << LR_REGNUM
))
28509 /* Find the saved regs. */
28510 if (frame_pointer_needed
)
28512 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28513 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28519 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28522 /* Allow for the stack frame. */
28523 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28525 /* The link register is always the first saved register. */
28528 /* Construct the address. */
28529 addr
= gen_rtx_REG (SImode
, reg
);
28532 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28533 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28537 addr
= plus_constant (Pmode
, addr
, delta
);
28539 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28542 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28545 /* Implements target hook vector_mode_supported_p. */
28547 arm_vector_mode_supported_p (enum machine_mode mode
)
28549 /* Neon also supports V2SImode, etc. listed in the clause below. */
28550 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28551 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28554 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28555 && ((mode
== V2SImode
)
28556 || (mode
== V4HImode
)
28557 || (mode
== V8QImode
)))
28560 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28561 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28562 || mode
== V2HAmode
))
28568 /* Implements target hook array_mode_supported_p. */
28571 arm_array_mode_supported_p (enum machine_mode mode
,
28572 unsigned HOST_WIDE_INT nelems
)
28575 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28576 && (nelems
>= 2 && nelems
<= 4))
28582 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28583 registers when autovectorizing for Neon, at least until multiple vector
28584 widths are supported properly by the middle-end. */
28586 static enum machine_mode
28587 arm_preferred_simd_mode (enum machine_mode mode
)
28593 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28595 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28597 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28599 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28601 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28608 if (TARGET_REALLY_IWMMXT
)
28624 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28626 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28627 using r0-r4 for function arguments, r7 for the stack frame and don't have
28628 enough left over to do doubleword arithmetic. For Thumb-2 all the
28629 potentially problematic instructions accept high registers so this is not
28630 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28631 that require many low registers. */
28633 arm_class_likely_spilled_p (reg_class_t rclass
)
28635 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28636 || rclass
== CC_REG
)
28642 /* Implements target hook small_register_classes_for_mode_p. */
28644 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28646 return TARGET_THUMB1
;
28649 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28650 ARM insns and therefore guarantee that the shift count is modulo 256.
28651 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28652 guarantee no particular behavior for out-of-range counts. */
28654 static unsigned HOST_WIDE_INT
28655 arm_shift_truncation_mask (enum machine_mode mode
)
28657 return mode
== SImode
? 255 : 0;
28661 /* Map internal gcc register numbers to DWARF2 register numbers. */
28664 arm_dbx_register_number (unsigned int regno
)
28669 if (IS_VFP_REGNUM (regno
))
28671 /* See comment in arm_dwarf_register_span. */
28672 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28673 return 64 + regno
- FIRST_VFP_REGNUM
;
28675 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28678 if (IS_IWMMXT_GR_REGNUM (regno
))
28679 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28681 if (IS_IWMMXT_REGNUM (regno
))
28682 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28684 gcc_unreachable ();
28687 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28688 GCC models tham as 64 32-bit registers, so we need to describe this to
28689 the DWARF generation code. Other registers can use the default. */
28691 arm_dwarf_register_span (rtx rtl
)
28693 enum machine_mode mode
;
28699 regno
= REGNO (rtl
);
28700 if (!IS_VFP_REGNUM (regno
))
28703 /* XXX FIXME: The EABI defines two VFP register ranges:
28704 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28706 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28707 corresponding D register. Until GDB supports this, we shall use the
28708 legacy encodings. We also use these encodings for D0-D15 for
28709 compatibility with older debuggers. */
28710 mode
= GET_MODE (rtl
);
28711 if (GET_MODE_SIZE (mode
) < 8)
28714 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28716 nregs
= GET_MODE_SIZE (mode
) / 4;
28717 for (i
= 0; i
< nregs
; i
+= 2)
28718 if (TARGET_BIG_END
)
28720 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28721 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28725 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28726 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28731 nregs
= GET_MODE_SIZE (mode
) / 8;
28732 for (i
= 0; i
< nregs
; i
++)
28733 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28736 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28739 #if ARM_UNWIND_INFO
28740 /* Emit unwind directives for a store-multiple instruction or stack pointer
28741 push during alignment.
28742 These should only ever be generated by the function prologue code, so
28743 expect them to have a particular form.
28744 The store-multiple instruction sometimes pushes pc as the last register,
28745 although it should not be tracked into unwind information, or for -Os
28746 sometimes pushes some dummy registers before first register that needs
28747 to be tracked in unwind information; such dummy registers are there just
28748 to avoid separate stack adjustment, and will not be restored in the
28752 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28755 HOST_WIDE_INT offset
;
28756 HOST_WIDE_INT nregs
;
28760 unsigned padfirst
= 0, padlast
= 0;
28763 e
= XVECEXP (p
, 0, 0);
28764 gcc_assert (GET_CODE (e
) == SET
);
28766 /* First insn will adjust the stack pointer. */
28767 gcc_assert (GET_CODE (e
) == SET
28768 && REG_P (SET_DEST (e
))
28769 && REGNO (SET_DEST (e
)) == SP_REGNUM
28770 && GET_CODE (SET_SRC (e
)) == PLUS
);
28772 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
28773 nregs
= XVECLEN (p
, 0) - 1;
28774 gcc_assert (nregs
);
28776 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
28779 /* For -Os dummy registers can be pushed at the beginning to
28780 avoid separate stack pointer adjustment. */
28781 e
= XVECEXP (p
, 0, 1);
28782 e
= XEXP (SET_DEST (e
), 0);
28783 if (GET_CODE (e
) == PLUS
)
28784 padfirst
= INTVAL (XEXP (e
, 1));
28785 gcc_assert (padfirst
== 0 || optimize_size
);
28786 /* The function prologue may also push pc, but not annotate it as it is
28787 never restored. We turn this into a stack pointer adjustment. */
28788 e
= XVECEXP (p
, 0, nregs
);
28789 e
= XEXP (SET_DEST (e
), 0);
28790 if (GET_CODE (e
) == PLUS
)
28791 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
28793 padlast
= offset
- 4;
28794 gcc_assert (padlast
== 0 || padlast
== 4);
28796 fprintf (asm_out_file
, "\t.pad #4\n");
28798 fprintf (asm_out_file
, "\t.save {");
28800 else if (IS_VFP_REGNUM (reg
))
28803 fprintf (asm_out_file
, "\t.vsave {");
28806 /* Unknown register type. */
28807 gcc_unreachable ();
28809 /* If the stack increment doesn't match the size of the saved registers,
28810 something has gone horribly wrong. */
28811 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
28815 /* The remaining insns will describe the stores. */
28816 for (i
= 1; i
<= nregs
; i
++)
28818 /* Expect (set (mem <addr>) (reg)).
28819 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28820 e
= XVECEXP (p
, 0, i
);
28821 gcc_assert (GET_CODE (e
) == SET
28822 && MEM_P (SET_DEST (e
))
28823 && REG_P (SET_SRC (e
)));
28825 reg
= REGNO (SET_SRC (e
));
28826 gcc_assert (reg
>= lastreg
);
28829 fprintf (asm_out_file
, ", ");
28830 /* We can't use %r for vfp because we need to use the
28831 double precision register names. */
28832 if (IS_VFP_REGNUM (reg
))
28833 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28835 asm_fprintf (asm_out_file
, "%r", reg
);
28837 #ifdef ENABLE_CHECKING
28838 /* Check that the addresses are consecutive. */
28839 e
= XEXP (SET_DEST (e
), 0);
28840 if (GET_CODE (e
) == PLUS
)
28841 gcc_assert (REG_P (XEXP (e
, 0))
28842 && REGNO (XEXP (e
, 0)) == SP_REGNUM
28843 && CONST_INT_P (XEXP (e
, 1))
28844 && offset
== INTVAL (XEXP (e
, 1)));
28848 && REGNO (e
) == SP_REGNUM
);
28849 offset
+= reg_size
;
28852 fprintf (asm_out_file
, "}\n");
28854 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
28857 /* Emit unwind directives for a SET. */
28860 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28868 switch (GET_CODE (e0
))
28871 /* Pushing a single register. */
28872 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28873 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28874 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28877 asm_fprintf (asm_out_file
, "\t.save ");
28878 if (IS_VFP_REGNUM (REGNO (e1
)))
28879 asm_fprintf(asm_out_file
, "{d%d}\n",
28880 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28882 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28886 if (REGNO (e0
) == SP_REGNUM
)
28888 /* A stack increment. */
28889 if (GET_CODE (e1
) != PLUS
28890 || !REG_P (XEXP (e1
, 0))
28891 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28892 || !CONST_INT_P (XEXP (e1
, 1)))
28895 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28896 -INTVAL (XEXP (e1
, 1)));
28898 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28900 HOST_WIDE_INT offset
;
28902 if (GET_CODE (e1
) == PLUS
)
28904 if (!REG_P (XEXP (e1
, 0))
28905 || !CONST_INT_P (XEXP (e1
, 1)))
28907 reg
= REGNO (XEXP (e1
, 0));
28908 offset
= INTVAL (XEXP (e1
, 1));
28909 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28910 HARD_FRAME_POINTER_REGNUM
, reg
,
28913 else if (REG_P (e1
))
28916 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28917 HARD_FRAME_POINTER_REGNUM
, reg
);
28922 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28924 /* Move from sp to reg. */
28925 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28927 else if (GET_CODE (e1
) == PLUS
28928 && REG_P (XEXP (e1
, 0))
28929 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28930 && CONST_INT_P (XEXP (e1
, 1)))
28932 /* Set reg to offset from sp. */
28933 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28934 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28946 /* Emit unwind directives for the given insn. */
28949 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28952 bool handled_one
= false;
28954 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28957 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28958 && (TREE_NOTHROW (current_function_decl
)
28959 || crtl
->all_throwers_are_sibcalls
))
28962 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28965 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28967 switch (REG_NOTE_KIND (note
))
28969 case REG_FRAME_RELATED_EXPR
:
28970 pat
= XEXP (note
, 0);
28973 case REG_CFA_REGISTER
:
28974 pat
= XEXP (note
, 0);
28977 pat
= PATTERN (insn
);
28978 if (GET_CODE (pat
) == PARALLEL
)
28979 pat
= XVECEXP (pat
, 0, 0);
28982 /* Only emitted for IS_STACKALIGN re-alignment. */
28987 src
= SET_SRC (pat
);
28988 dest
= SET_DEST (pat
);
28990 gcc_assert (src
== stack_pointer_rtx
);
28991 reg
= REGNO (dest
);
28992 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28995 handled_one
= true;
28998 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28999 to get correct dwarf information for shrink-wrap. We should not
29000 emit unwind information for it because these are used either for
29001 pretend arguments or notes to adjust sp and restore registers from
29003 case REG_CFA_DEF_CFA
:
29004 case REG_CFA_ADJUST_CFA
:
29005 case REG_CFA_RESTORE
:
29008 case REG_CFA_EXPRESSION
:
29009 case REG_CFA_OFFSET
:
29010 /* ??? Only handling here what we actually emit. */
29011 gcc_unreachable ();
29019 pat
= PATTERN (insn
);
29022 switch (GET_CODE (pat
))
29025 arm_unwind_emit_set (asm_out_file
, pat
);
29029 /* Store multiple. */
29030 arm_unwind_emit_sequence (asm_out_file
, pat
);
29039 /* Output a reference from a function exception table to the type_info
29040 object X. The EABI specifies that the symbol should be relocated by
29041 an R_ARM_TARGET2 relocation. */
29044 arm_output_ttype (rtx x
)
29046 fputs ("\t.word\t", asm_out_file
);
29047 output_addr_const (asm_out_file
, x
);
29048 /* Use special relocations for symbol references. */
29049 if (!CONST_INT_P (x
))
29050 fputs ("(TARGET2)", asm_out_file
);
29051 fputc ('\n', asm_out_file
);
29056 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29059 arm_asm_emit_except_personality (rtx personality
)
29061 fputs ("\t.personality\t", asm_out_file
);
29062 output_addr_const (asm_out_file
, personality
);
29063 fputc ('\n', asm_out_file
);
29066 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29069 arm_asm_init_sections (void)
29071 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29074 #endif /* ARM_UNWIND_INFO */
29076 /* Output unwind directives for the start/end of a function. */
29079 arm_output_fn_unwind (FILE * f
, bool prologue
)
29081 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29085 fputs ("\t.fnstart\n", f
);
29088 /* If this function will never be unwound, then mark it as such.
29089 The came condition is used in arm_unwind_emit to suppress
29090 the frame annotations. */
29091 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29092 && (TREE_NOTHROW (current_function_decl
)
29093 || crtl
->all_throwers_are_sibcalls
))
29094 fputs("\t.cantunwind\n", f
);
29096 fputs ("\t.fnend\n", f
);
29101 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29103 enum tls_reloc reloc
;
29106 val
= XVECEXP (x
, 0, 0);
29107 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29109 output_addr_const (fp
, val
);
29114 fputs ("(tlsgd)", fp
);
29117 fputs ("(tlsldm)", fp
);
29120 fputs ("(tlsldo)", fp
);
29123 fputs ("(gottpoff)", fp
);
29126 fputs ("(tpoff)", fp
);
29129 fputs ("(tlsdesc)", fp
);
29132 gcc_unreachable ();
29141 fputs (" + (. - ", fp
);
29142 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29143 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29144 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29145 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29155 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29158 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29160 gcc_assert (size
== 4);
29161 fputs ("\t.word\t", file
);
29162 output_addr_const (file
, x
);
29163 fputs ("(tlsldo)", file
);
29166 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29169 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29171 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29172 return arm_emit_tls_decoration (fp
, x
);
29173 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29176 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29178 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29179 assemble_name_raw (fp
, label
);
29183 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29185 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29189 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29193 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29195 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29199 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29203 else if (GET_CODE (x
) == CONST_VECTOR
)
29204 return arm_emit_vector_const (fp
, x
);
29209 /* Output assembly for a shift instruction.
29210 SET_FLAGS determines how the instruction modifies the condition codes.
29211 0 - Do not set condition codes.
29212 1 - Set condition codes.
29213 2 - Use smallest instruction. */
29215 arm_output_shift(rtx
* operands
, int set_flags
)
29218 static const char flag_chars
[3] = {'?', '.', '!'};
29223 c
= flag_chars
[set_flags
];
29224 if (TARGET_UNIFIED_ASM
)
29226 shift
= shift_op(operands
[3], &val
);
29230 operands
[2] = GEN_INT(val
);
29231 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29234 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29237 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29238 output_asm_insn (pattern
, operands
);
29242 /* Output assembly for a WMMX immediate shift instruction. */
29244 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29246 int shift
= INTVAL (operands
[2]);
29248 enum machine_mode opmode
= GET_MODE (operands
[0]);
29250 gcc_assert (shift
>= 0);
29252 /* If the shift value in the register versions is > 63 (for D qualifier),
29253 31 (for W qualifier) or 15 (for H qualifier). */
29254 if (((opmode
== V4HImode
) && (shift
> 15))
29255 || ((opmode
== V2SImode
) && (shift
> 31))
29256 || ((opmode
== DImode
) && (shift
> 63)))
29260 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29261 output_asm_insn (templ
, operands
);
29262 if (opmode
== DImode
)
29264 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29265 output_asm_insn (templ
, operands
);
29270 /* The destination register will contain all zeros. */
29271 sprintf (templ
, "wzero\t%%0");
29272 output_asm_insn (templ
, operands
);
29277 if ((opmode
== DImode
) && (shift
> 32))
29279 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29280 output_asm_insn (templ
, operands
);
29281 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29282 output_asm_insn (templ
, operands
);
29286 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29287 output_asm_insn (templ
, operands
);
29292 /* Output assembly for a WMMX tinsr instruction. */
29294 arm_output_iwmmxt_tinsr (rtx
*operands
)
29296 int mask
= INTVAL (operands
[3]);
29299 int units
= mode_nunits
[GET_MODE (operands
[0])];
29300 gcc_assert ((mask
& (mask
- 1)) == 0);
29301 for (i
= 0; i
< units
; ++i
)
29303 if ((mask
& 0x01) == 1)
29309 gcc_assert (i
< units
);
29311 switch (GET_MODE (operands
[0]))
29314 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29317 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29320 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29323 gcc_unreachable ();
29326 output_asm_insn (templ
, operands
);
29331 /* Output a Thumb-1 casesi dispatch sequence. */
29333 thumb1_output_casesi (rtx
*operands
)
29335 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29337 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29339 switch (GET_MODE(diff_vec
))
29342 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29343 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29345 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29346 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29348 return "bl\t%___gnu_thumb1_case_si";
29350 gcc_unreachable ();
29354 /* Output a Thumb-2 casesi instruction. */
29356 thumb2_output_casesi (rtx
*operands
)
29358 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29360 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29362 output_asm_insn ("cmp\t%0, %1", operands
);
29363 output_asm_insn ("bhi\t%l3", operands
);
29364 switch (GET_MODE(diff_vec
))
29367 return "tbb\t[%|pc, %0]";
29369 return "tbh\t[%|pc, %0, lsl #1]";
29373 output_asm_insn ("adr\t%4, %l2", operands
);
29374 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29375 output_asm_insn ("add\t%4, %4, %5", operands
);
29380 output_asm_insn ("adr\t%4, %l2", operands
);
29381 return "ldr\t%|pc, [%4, %0, lsl #2]";
29384 gcc_unreachable ();
29388 /* Most ARM cores are single issue, but some newer ones can dual issue.
29389 The scheduler descriptions rely on this being correct. */
29391 arm_issue_rate (void)
29418 /* A table and a function to perform ARM-specific name mangling for
29419 NEON vector types in order to conform to the AAPCS (see "Procedure
29420 Call Standard for the ARM Architecture", Appendix A). To qualify
29421 for emission with the mangled names defined in that document, a
29422 vector type must not only be of the correct mode but also be
29423 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29426 enum machine_mode mode
;
29427 const char *element_type_name
;
29428 const char *aapcs_name
;
29429 } arm_mangle_map_entry
;
29431 static arm_mangle_map_entry arm_mangle_map
[] = {
29432 /* 64-bit containerized types. */
29433 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29434 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29435 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29436 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29437 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29438 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29439 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29440 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29441 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29442 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29444 /* 128-bit containerized types. */
29445 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29446 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29447 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29448 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29449 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29450 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29451 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29452 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29453 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29454 { VOIDmode
, NULL
, NULL
}
29458 arm_mangle_type (const_tree type
)
29460 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29462 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29463 has to be managled as if it is in the "std" namespace. */
29464 if (TARGET_AAPCS_BASED
29465 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29466 return "St9__va_list";
29468 /* Half-precision float. */
29469 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29472 if (TREE_CODE (type
) != VECTOR_TYPE
)
29475 /* Check the mode of the vector type, and the name of the vector
29476 element type, against the table. */
29477 while (pos
->mode
!= VOIDmode
)
29479 tree elt_type
= TREE_TYPE (type
);
29481 if (pos
->mode
== TYPE_MODE (type
)
29482 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29483 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29484 pos
->element_type_name
))
29485 return pos
->aapcs_name
;
29490 /* Use the default mangling for unrecognized (possibly user-defined)
29495 /* Order of allocation of core registers for Thumb: this allocation is
29496 written over the corresponding initial entries of the array
29497 initialized with REG_ALLOC_ORDER. We allocate all low registers
29498 first. Saving and restoring a low register is usually cheaper than
29499 using a call-clobbered high register. */
29501 static const int thumb_core_reg_alloc_order
[] =
29503 3, 2, 1, 0, 4, 5, 6, 7,
29504 14, 12, 8, 9, 10, 11
29507 /* Adjust register allocation order when compiling for Thumb. */
29510 arm_order_regs_for_local_alloc (void)
29512 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29513 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29515 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29516 sizeof (thumb_core_reg_alloc_order
));
29519 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29522 arm_frame_pointer_required (void)
29524 return (cfun
->has_nonlocal_label
29525 || SUBTARGET_FRAME_POINTER_REQUIRED
29526 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29529 /* Only thumb1 can't support conditional execution, so return true if
29530 the target is not thumb1. */
29532 arm_have_conditional_execution (void)
29534 return !TARGET_THUMB1
;
29538 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29540 enum machine_mode in_mode
, out_mode
;
29543 if (TREE_CODE (type_out
) != VECTOR_TYPE
29544 || TREE_CODE (type_in
) != VECTOR_TYPE
29545 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29548 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29549 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29550 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29551 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29553 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29554 decl of the vectorized builtin for the appropriate vector mode.
29555 NULL_TREE is returned if no such builtin is available. */
29556 #undef ARM_CHECK_BUILTIN_MODE
29557 #define ARM_CHECK_BUILTIN_MODE(C) \
29558 (out_mode == SFmode && out_n == C \
29559 && in_mode == SFmode && in_n == C)
29561 #undef ARM_FIND_VRINT_VARIANT
29562 #define ARM_FIND_VRINT_VARIANT(N) \
29563 (ARM_CHECK_BUILTIN_MODE (2) \
29564 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29565 : (ARM_CHECK_BUILTIN_MODE (4) \
29566 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29569 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29571 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29574 case BUILT_IN_FLOORF
:
29575 return ARM_FIND_VRINT_VARIANT (vrintm
);
29576 case BUILT_IN_CEILF
:
29577 return ARM_FIND_VRINT_VARIANT (vrintp
);
29578 case BUILT_IN_TRUNCF
:
29579 return ARM_FIND_VRINT_VARIANT (vrintz
);
29580 case BUILT_IN_ROUNDF
:
29581 return ARM_FIND_VRINT_VARIANT (vrinta
);
29588 #undef ARM_CHECK_BUILTIN_MODE
29589 #undef ARM_FIND_VRINT_VARIANT
29591 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29592 static HOST_WIDE_INT
29593 arm_vector_alignment (const_tree type
)
29595 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29597 if (TARGET_AAPCS_BASED
)
29598 align
= MIN (align
, 64);
29603 static unsigned int
29604 arm_autovectorize_vector_sizes (void)
29606 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29610 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29612 /* Vectors which aren't in packed structures will not be less aligned than
29613 the natural alignment of their element type, so this is safe. */
29614 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29617 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29621 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29622 const_tree type
, int misalignment
,
29625 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29627 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29632 /* If the misalignment is unknown, we should be able to handle the access
29633 so long as it is not to a member of a packed data structure. */
29634 if (misalignment
== -1)
29637 /* Return true if the misalignment is a multiple of the natural alignment
29638 of the vector's element type. This is probably always going to be
29639 true in practice, since we've already established that this isn't a
29641 return ((misalignment
% align
) == 0);
29644 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29649 arm_conditional_register_usage (void)
29653 if (TARGET_THUMB1
&& optimize_size
)
29655 /* When optimizing for size on Thumb-1, it's better not
29656 to use the HI regs, because of the overhead of
29658 for (regno
= FIRST_HI_REGNUM
;
29659 regno
<= LAST_HI_REGNUM
; ++regno
)
29660 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29663 /* The link register can be clobbered by any branch insn,
29664 but we have no way to track that at present, so mark
29665 it as unavailable. */
29667 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29669 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29671 /* VFPv3 registers are disabled when earlier VFP
29672 versions are selected due to the definition of
29673 LAST_VFP_REGNUM. */
29674 for (regno
= FIRST_VFP_REGNUM
;
29675 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29677 fixed_regs
[regno
] = 0;
29678 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29679 || regno
>= FIRST_VFP_REGNUM
+ 32;
29683 if (TARGET_REALLY_IWMMXT
)
29685 regno
= FIRST_IWMMXT_GR_REGNUM
;
29686 /* The 2002/10/09 revision of the XScale ABI has wCG0
29687 and wCG1 as call-preserved registers. The 2002/11/21
29688 revision changed this so that all wCG registers are
29689 scratch registers. */
29690 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29691 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29692 fixed_regs
[regno
] = 0;
29693 /* The XScale ABI has wR0 - wR9 as scratch registers,
29694 the rest as call-preserved registers. */
29695 for (regno
= FIRST_IWMMXT_REGNUM
;
29696 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29698 fixed_regs
[regno
] = 0;
29699 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29703 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29705 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29706 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29708 else if (TARGET_APCS_STACK
)
29710 fixed_regs
[10] = 1;
29711 call_used_regs
[10] = 1;
29713 /* -mcaller-super-interworking reserves r11 for calls to
29714 _interwork_r11_call_via_rN(). Making the register global
29715 is an easy way of ensuring that it remains valid for all
29717 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29718 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29720 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29721 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29722 if (TARGET_CALLER_INTERWORKING
)
29723 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29725 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29729 arm_preferred_rename_class (reg_class_t rclass
)
29731 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29732 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29733 and code size can be reduced. */
29734 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29740 /* Compute the atrribute "length" of insn "*push_multi".
29741 So this function MUST be kept in sync with that insn pattern. */
29743 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29745 int i
, regno
, hi_reg
;
29746 int num_saves
= XVECLEN (parallel_op
, 0);
29756 regno
= REGNO (first_op
);
29757 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29758 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29760 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29761 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29769 /* Compute the number of instructions emitted by output_move_double. */
29771 arm_count_output_move_double_insns (rtx
*operands
)
29775 /* output_move_double may modify the operands array, so call it
29776 here on a copy of the array. */
29777 ops
[0] = operands
[0];
29778 ops
[1] = operands
[1];
29779 output_move_double (ops
, false, &count
);
29784 vfp3_const_double_for_fract_bits (rtx operand
)
29786 REAL_VALUE_TYPE r0
;
29788 if (!CONST_DOUBLE_P (operand
))
29791 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29792 if (exact_real_inverse (DFmode
, &r0
))
29794 if (exact_real_truncate (DFmode
, &r0
))
29796 HOST_WIDE_INT value
= real_to_integer (&r0
);
29797 value
= value
& 0xffffffff;
29798 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29799 return int_log2 (value
);
29806 vfp3_const_double_for_bits (rtx operand
)
29808 REAL_VALUE_TYPE r0
;
29810 if (!CONST_DOUBLE_P (operand
))
29813 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29814 if (exact_real_truncate (DFmode
, &r0
))
29816 HOST_WIDE_INT value
= real_to_integer (&r0
);
29817 value
= value
& 0xffffffff;
29818 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29819 return int_log2 (value
);
29825 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29828 arm_pre_atomic_barrier (enum memmodel model
)
29830 if (need_atomic_barrier_p (model
, true))
29831 emit_insn (gen_memory_barrier ());
29835 arm_post_atomic_barrier (enum memmodel model
)
29837 if (need_atomic_barrier_p (model
, false))
29838 emit_insn (gen_memory_barrier ());
29841 /* Emit the load-exclusive and store-exclusive instructions.
29842 Use acquire and release versions if necessary. */
29845 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29847 rtx (*gen
) (rtx
, rtx
);
29853 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29854 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29855 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29856 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29858 gcc_unreachable ();
29865 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29866 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
29867 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
29868 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
29870 gcc_unreachable ();
29874 emit_insn (gen (rval
, mem
));
29878 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
29881 rtx (*gen
) (rtx
, rtx
, rtx
);
29887 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29888 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29889 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29890 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29892 gcc_unreachable ();
29899 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29900 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
29901 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
29902 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
29904 gcc_unreachable ();
29908 emit_insn (gen (bval
, rval
, mem
));
29911 /* Mark the previous jump instruction as unlikely. */
29914 emit_unlikely_jump (rtx insn
)
29916 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
29918 insn
= emit_jump_insn (insn
);
29919 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
29922 /* Expand a compare and swap pattern. */
29925 arm_expand_compare_and_swap (rtx operands
[])
29927 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29928 enum machine_mode mode
;
29929 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
29931 bval
= operands
[0];
29932 rval
= operands
[1];
29934 oldval
= operands
[3];
29935 newval
= operands
[4];
29936 is_weak
= operands
[5];
29937 mod_s
= operands
[6];
29938 mod_f
= operands
[7];
29939 mode
= GET_MODE (mem
);
29941 /* Normally the succ memory model must be stronger than fail, but in the
29942 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29943 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29945 if (TARGET_HAVE_LDACQ
29946 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
29947 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
29948 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29954 /* For narrow modes, we're going to perform the comparison in SImode,
29955 so do the zero-extension now. */
29956 rval
= gen_reg_rtx (SImode
);
29957 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29961 /* Force the value into a register if needed. We waited until after
29962 the zero-extension above to do this properly. */
29963 if (!arm_add_operand (oldval
, SImode
))
29964 oldval
= force_reg (SImode
, oldval
);
29968 if (!cmpdi_operand (oldval
, mode
))
29969 oldval
= force_reg (mode
, oldval
);
29973 gcc_unreachable ();
29978 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29979 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29980 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29981 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29983 gcc_unreachable ();
29986 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29988 if (mode
== QImode
|| mode
== HImode
)
29989 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29991 /* In all cases, we arrange for success to be signaled by Z set.
29992 This arrangement allows for the boolean result to be used directly
29993 in a subsequent branch, post optimization. */
29994 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29995 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
29996 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
29999 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30000 another memory store between the load-exclusive and store-exclusive can
30001 reset the monitor from Exclusive to Open state. This means we must wait
30002 until after reload to split the pattern, lest we get a register spill in
30003 the middle of the atomic sequence. */
30006 arm_split_compare_and_swap (rtx operands
[])
30008 rtx rval
, mem
, oldval
, newval
, scratch
;
30009 enum machine_mode mode
;
30010 enum memmodel mod_s
, mod_f
;
30012 rtx label1
, label2
, x
, cond
;
30014 rval
= operands
[0];
30016 oldval
= operands
[2];
30017 newval
= operands
[3];
30018 is_weak
= (operands
[4] != const0_rtx
);
30019 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30020 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30021 scratch
= operands
[7];
30022 mode
= GET_MODE (mem
);
30024 bool use_acquire
= TARGET_HAVE_LDACQ
30025 && !(mod_s
== MEMMODEL_RELAXED
30026 || mod_s
== MEMMODEL_CONSUME
30027 || mod_s
== MEMMODEL_RELEASE
);
30029 bool use_release
= TARGET_HAVE_LDACQ
30030 && !(mod_s
== MEMMODEL_RELAXED
30031 || mod_s
== MEMMODEL_CONSUME
30032 || mod_s
== MEMMODEL_ACQUIRE
);
30034 /* Checks whether a barrier is needed and emits one accordingly. */
30035 if (!(use_acquire
|| use_release
))
30036 arm_pre_atomic_barrier (mod_s
);
30041 label1
= gen_label_rtx ();
30042 emit_label (label1
);
30044 label2
= gen_label_rtx ();
30046 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30048 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30049 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30050 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30051 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30052 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30054 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30056 /* Weak or strong, we want EQ to be true for success, so that we
30057 match the flags that we got from the compare above. */
30058 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30059 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30060 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30064 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30065 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30066 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30067 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30070 if (mod_f
!= MEMMODEL_RELAXED
)
30071 emit_label (label2
);
30073 /* Checks whether a barrier is needed and emits one accordingly. */
30074 if (!(use_acquire
|| use_release
))
30075 arm_post_atomic_barrier (mod_s
);
30077 if (mod_f
== MEMMODEL_RELAXED
)
30078 emit_label (label2
);
30082 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30083 rtx value
, rtx model_rtx
, rtx cond
)
30085 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30086 enum machine_mode mode
= GET_MODE (mem
);
30087 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30090 bool use_acquire
= TARGET_HAVE_LDACQ
30091 && !(model
== MEMMODEL_RELAXED
30092 || model
== MEMMODEL_CONSUME
30093 || model
== MEMMODEL_RELEASE
);
30095 bool use_release
= TARGET_HAVE_LDACQ
30096 && !(model
== MEMMODEL_RELAXED
30097 || model
== MEMMODEL_CONSUME
30098 || model
== MEMMODEL_ACQUIRE
);
30100 /* Checks whether a barrier is needed and emits one accordingly. */
30101 if (!(use_acquire
|| use_release
))
30102 arm_pre_atomic_barrier (model
);
30104 label
= gen_label_rtx ();
30105 emit_label (label
);
30108 new_out
= gen_lowpart (wmode
, new_out
);
30110 old_out
= gen_lowpart (wmode
, old_out
);
30113 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30115 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30124 x
= gen_rtx_AND (wmode
, old_out
, value
);
30125 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30126 x
= gen_rtx_NOT (wmode
, new_out
);
30127 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30131 if (CONST_INT_P (value
))
30133 value
= GEN_INT (-INTVAL (value
));
30139 if (mode
== DImode
)
30141 /* DImode plus/minus need to clobber flags. */
30142 /* The adddi3 and subdi3 patterns are incorrectly written so that
30143 they require matching operands, even when we could easily support
30144 three operands. Thankfully, this can be fixed up post-splitting,
30145 as the individual add+adc patterns do accept three operands and
30146 post-reload cprop can make these moves go away. */
30147 emit_move_insn (new_out
, old_out
);
30149 x
= gen_adddi3 (new_out
, new_out
, value
);
30151 x
= gen_subdi3 (new_out
, new_out
, value
);
30158 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30159 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30163 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30166 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30167 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30169 /* Checks whether a barrier is needed and emits one accordingly. */
30170 if (!(use_acquire
|| use_release
))
30171 arm_post_atomic_barrier (model
);
30174 #define MAX_VECT_LEN 16
30176 struct expand_vec_perm_d
30178 rtx target
, op0
, op1
;
30179 unsigned char perm
[MAX_VECT_LEN
];
30180 enum machine_mode vmode
;
30181 unsigned char nelt
;
30186 /* Generate a variable permutation. */
30189 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30191 enum machine_mode vmode
= GET_MODE (target
);
30192 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30194 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30195 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30196 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30197 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30198 gcc_checking_assert (TARGET_NEON
);
30202 if (vmode
== V8QImode
)
30203 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30205 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30211 if (vmode
== V8QImode
)
30213 pair
= gen_reg_rtx (V16QImode
);
30214 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30215 pair
= gen_lowpart (TImode
, pair
);
30216 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30220 pair
= gen_reg_rtx (OImode
);
30221 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30222 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30228 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30230 enum machine_mode vmode
= GET_MODE (target
);
30231 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30232 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30233 rtx rmask
[MAX_VECT_LEN
], mask
;
30235 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30236 numbering of elements for big-endian, we must reverse the order. */
30237 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30239 /* The VTBL instruction does not use a modulo index, so we must take care
30240 of that ourselves. */
30241 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30242 for (i
= 0; i
< nelt
; ++i
)
30244 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30245 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30247 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30250 /* Generate or test for an insn that supports a constant permutation. */
30252 /* Recognize patterns for the VUZP insns. */
30255 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30257 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30258 rtx out0
, out1
, in0
, in1
, x
;
30259 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30261 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30264 /* Note that these are little-endian tests. Adjust for big-endian later. */
30265 if (d
->perm
[0] == 0)
30267 else if (d
->perm
[0] == 1)
30271 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30273 for (i
= 0; i
< nelt
; i
++)
30275 unsigned elt
= (i
* 2 + odd
) & mask
;
30276 if (d
->perm
[i
] != elt
)
30286 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30287 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30288 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30289 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30290 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30291 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30292 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30293 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30295 gcc_unreachable ();
30300 if (BYTES_BIG_ENDIAN
)
30302 x
= in0
, in0
= in1
, in1
= x
;
30307 out1
= gen_reg_rtx (d
->vmode
);
30309 x
= out0
, out0
= out1
, out1
= x
;
30311 emit_insn (gen (out0
, in0
, in1
, out1
));
30315 /* Recognize patterns for the VZIP insns. */
30318 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30320 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30321 rtx out0
, out1
, in0
, in1
, x
;
30322 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30324 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30327 /* Note that these are little-endian tests. Adjust for big-endian later. */
30329 if (d
->perm
[0] == high
)
30331 else if (d
->perm
[0] == 0)
30335 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30337 for (i
= 0; i
< nelt
/ 2; i
++)
30339 unsigned elt
= (i
+ high
) & mask
;
30340 if (d
->perm
[i
* 2] != elt
)
30342 elt
= (elt
+ nelt
) & mask
;
30343 if (d
->perm
[i
* 2 + 1] != elt
)
30353 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30354 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30355 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30356 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30357 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30358 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30359 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30360 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30362 gcc_unreachable ();
30367 if (BYTES_BIG_ENDIAN
)
30369 x
= in0
, in0
= in1
, in1
= x
;
30374 out1
= gen_reg_rtx (d
->vmode
);
30376 x
= out0
, out0
= out1
, out1
= x
;
30378 emit_insn (gen (out0
, in0
, in1
, out1
));
30382 /* Recognize patterns for the VREV insns. */
30385 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30387 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30388 rtx (*gen
)(rtx
, rtx
, rtx
);
30390 if (!d
->one_vector_p
)
30399 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30400 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30408 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30409 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30410 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30411 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30419 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30420 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30421 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30422 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30423 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30424 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30425 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30426 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30435 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30436 for (j
= 0; j
<= diff
; j
+= 1)
30438 /* This is guaranteed to be true as the value of diff
30439 is 7, 3, 1 and we should have enough elements in the
30440 queue to generate this. Getting a vector mask with a
30441 value of diff other than these values implies that
30442 something is wrong by the time we get here. */
30443 gcc_assert (i
+ j
< nelt
);
30444 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30452 /* ??? The third operand is an artifact of the builtin infrastructure
30453 and is ignored by the actual instruction. */
30454 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30458 /* Recognize patterns for the VTRN insns. */
30461 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30463 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30464 rtx out0
, out1
, in0
, in1
, x
;
30465 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30467 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30470 /* Note that these are little-endian tests. Adjust for big-endian later. */
30471 if (d
->perm
[0] == 0)
30473 else if (d
->perm
[0] == 1)
30477 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30479 for (i
= 0; i
< nelt
; i
+= 2)
30481 if (d
->perm
[i
] != i
+ odd
)
30483 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30493 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30494 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30495 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30496 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30497 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30498 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30499 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30500 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30502 gcc_unreachable ();
30507 if (BYTES_BIG_ENDIAN
)
30509 x
= in0
, in0
= in1
, in1
= x
;
30514 out1
= gen_reg_rtx (d
->vmode
);
30516 x
= out0
, out0
= out1
, out1
= x
;
30518 emit_insn (gen (out0
, in0
, in1
, out1
));
30522 /* Recognize patterns for the VEXT insns. */
30525 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30527 unsigned int i
, nelt
= d
->nelt
;
30528 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30531 unsigned int location
;
30533 unsigned int next
= d
->perm
[0] + 1;
30535 /* TODO: Handle GCC's numbering of elements for big-endian. */
30536 if (BYTES_BIG_ENDIAN
)
30539 /* Check if the extracted indexes are increasing by one. */
30540 for (i
= 1; i
< nelt
; next
++, i
++)
30542 /* If we hit the most significant element of the 2nd vector in
30543 the previous iteration, no need to test further. */
30544 if (next
== 2 * nelt
)
30547 /* If we are operating on only one vector: it could be a
30548 rotation. If there are only two elements of size < 64, let
30549 arm_evpc_neon_vrev catch it. */
30550 if (d
->one_vector_p
&& (next
== nelt
))
30552 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30558 if (d
->perm
[i
] != next
)
30562 location
= d
->perm
[0];
30566 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30567 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30568 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30569 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30570 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30571 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30572 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30573 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30574 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30583 offset
= GEN_INT (location
);
30584 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30588 /* The NEON VTBL instruction is a fully variable permuation that's even
30589 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30590 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30591 can do slightly better by expanding this as a constant where we don't
30592 have to apply a mask. */
30595 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30597 rtx rperm
[MAX_VECT_LEN
], sel
;
30598 enum machine_mode vmode
= d
->vmode
;
30599 unsigned int i
, nelt
= d
->nelt
;
30601 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30602 numbering of elements for big-endian, we must reverse the order. */
30603 if (BYTES_BIG_ENDIAN
)
30609 /* Generic code will try constant permutation twice. Once with the
30610 original mode and again with the elements lowered to QImode.
30611 So wait and don't do the selector expansion ourselves. */
30612 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30615 for (i
= 0; i
< nelt
; ++i
)
30616 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30617 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30618 sel
= force_reg (vmode
, sel
);
30620 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30625 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30627 /* Check if the input mask matches vext before reordering the
30630 if (arm_evpc_neon_vext (d
))
30633 /* The pattern matching functions above are written to look for a small
30634 number to begin the sequence (0, 1, N/2). If we begin with an index
30635 from the second operand, we can swap the operands. */
30636 if (d
->perm
[0] >= d
->nelt
)
30638 unsigned i
, nelt
= d
->nelt
;
30641 for (i
= 0; i
< nelt
; ++i
)
30642 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30651 if (arm_evpc_neon_vuzp (d
))
30653 if (arm_evpc_neon_vzip (d
))
30655 if (arm_evpc_neon_vrev (d
))
30657 if (arm_evpc_neon_vtrn (d
))
30659 return arm_evpc_neon_vtbl (d
);
30664 /* Expand a vec_perm_const pattern. */
30667 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30669 struct expand_vec_perm_d d
;
30670 int i
, nelt
, which
;
30676 d
.vmode
= GET_MODE (target
);
30677 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30678 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30679 d
.testing_p
= false;
30681 for (i
= which
= 0; i
< nelt
; ++i
)
30683 rtx e
= XVECEXP (sel
, 0, i
);
30684 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30685 which
|= (ei
< nelt
? 1 : 2);
30695 d
.one_vector_p
= false;
30696 if (!rtx_equal_p (op0
, op1
))
30699 /* The elements of PERM do not suggest that only the first operand
30700 is used, but both operands are identical. Allow easier matching
30701 of the permutation by folding the permutation into the single
30705 for (i
= 0; i
< nelt
; ++i
)
30706 d
.perm
[i
] &= nelt
- 1;
30708 d
.one_vector_p
= true;
30713 d
.one_vector_p
= true;
30717 return arm_expand_vec_perm_const_1 (&d
);
30720 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30723 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30724 const unsigned char *sel
)
30726 struct expand_vec_perm_d d
;
30727 unsigned int i
, nelt
, which
;
30731 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30732 d
.testing_p
= true;
30733 memcpy (d
.perm
, sel
, nelt
);
30735 /* Categorize the set of elements in the selector. */
30736 for (i
= which
= 0; i
< nelt
; ++i
)
30738 unsigned char e
= d
.perm
[i
];
30739 gcc_assert (e
< 2 * nelt
);
30740 which
|= (e
< nelt
? 1 : 2);
30743 /* For all elements from second vector, fold the elements to first. */
30745 for (i
= 0; i
< nelt
; ++i
)
30748 /* Check whether the mask can be applied to the vector type. */
30749 d
.one_vector_p
= (which
!= 3);
30751 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30752 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30753 if (!d
.one_vector_p
)
30754 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30757 ret
= arm_expand_vec_perm_const_1 (&d
);
30764 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
30766 /* If we are soft float and we do not have ldrd
30767 then all auto increment forms are ok. */
30768 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30773 /* Post increment and Pre Decrement are supported for all
30774 instruction forms except for vector forms. */
30777 if (VECTOR_MODE_P (mode
))
30779 if (code
!= ARM_PRE_DEC
)
30789 /* Without LDRD and mode size greater than
30790 word size, there is no point in auto-incrementing
30791 because ldm and stm will not have these forms. */
30792 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30795 /* Vector and floating point modes do not support
30796 these auto increment forms. */
30797 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30810 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30811 on ARM, since we know that shifts by negative amounts are no-ops.
30812 Additionally, the default expansion code is not available or suitable
30813 for post-reload insn splits (this can occur when the register allocator
30814 chooses not to do a shift in NEON).
30816 This function is used in both initial expand and post-reload splits, and
30817 handles all kinds of 64-bit shifts.
30819 Input requirements:
30820 - It is safe for the input and output to be the same register, but
30821 early-clobber rules apply for the shift amount and scratch registers.
30822 - Shift by register requires both scratch registers. In all other cases
30823 the scratch registers may be NULL.
30824 - Ashiftrt by a register also clobbers the CC register. */
30826 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30827 rtx amount
, rtx scratch1
, rtx scratch2
)
30829 rtx out_high
= gen_highpart (SImode
, out
);
30830 rtx out_low
= gen_lowpart (SImode
, out
);
30831 rtx in_high
= gen_highpart (SImode
, in
);
30832 rtx in_low
= gen_lowpart (SImode
, in
);
30835 in = the register pair containing the input value.
30836 out = the destination register pair.
30837 up = the high- or low-part of each pair.
30838 down = the opposite part to "up".
30839 In a shift, we can consider bits to shift from "up"-stream to
30840 "down"-stream, so in a left-shift "up" is the low-part and "down"
30841 is the high-part of each register pair. */
30843 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30844 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30845 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30846 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30848 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30850 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30851 && GET_MODE (out
) == DImode
);
30853 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30854 && GET_MODE (in
) == DImode
);
30856 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30857 && GET_MODE (amount
) == SImode
)
30858 || CONST_INT_P (amount
)));
30859 gcc_assert (scratch1
== NULL
30860 || (GET_CODE (scratch1
) == SCRATCH
)
30861 || (GET_MODE (scratch1
) == SImode
30862 && REG_P (scratch1
)));
30863 gcc_assert (scratch2
== NULL
30864 || (GET_CODE (scratch2
) == SCRATCH
)
30865 || (GET_MODE (scratch2
) == SImode
30866 && REG_P (scratch2
)));
30867 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30868 || !HARD_REGISTER_P (out
)
30869 || (REGNO (out
) != REGNO (amount
)
30870 && REGNO (out
) + 1 != REGNO (amount
)));
30872 /* Macros to make following code more readable. */
30873 #define SUB_32(DEST,SRC) \
30874 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30875 #define RSB_32(DEST,SRC) \
30876 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30877 #define SUB_S_32(DEST,SRC) \
30878 gen_addsi3_compare0 ((DEST), (SRC), \
30880 #define SET(DEST,SRC) \
30881 gen_rtx_SET (SImode, (DEST), (SRC))
30882 #define SHIFT(CODE,SRC,AMOUNT) \
30883 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30884 #define LSHIFT(CODE,SRC,AMOUNT) \
30885 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30886 SImode, (SRC), (AMOUNT))
30887 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30888 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30889 SImode, (SRC), (AMOUNT))
30891 gen_rtx_IOR (SImode, (A), (B))
30892 #define BRANCH(COND,LABEL) \
30893 gen_arm_cond_branch ((LABEL), \
30894 gen_rtx_ ## COND (CCmode, cc_reg, \
30898 /* Shifts by register and shifts by constant are handled separately. */
30899 if (CONST_INT_P (amount
))
30901 /* We have a shift-by-constant. */
30903 /* First, handle out-of-range shift amounts.
30904 In both cases we try to match the result an ARM instruction in a
30905 shift-by-register would give. This helps reduce execution
30906 differences between optimization levels, but it won't stop other
30907 parts of the compiler doing different things. This is "undefined
30908 behaviour, in any case. */
30909 if (INTVAL (amount
) <= 0)
30910 emit_insn (gen_movdi (out
, in
));
30911 else if (INTVAL (amount
) >= 64)
30913 if (code
== ASHIFTRT
)
30915 rtx const31_rtx
= GEN_INT (31);
30916 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30917 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30920 emit_insn (gen_movdi (out
, const0_rtx
));
30923 /* Now handle valid shifts. */
30924 else if (INTVAL (amount
) < 32)
30926 /* Shifts by a constant less than 32. */
30927 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30929 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30930 emit_insn (SET (out_down
,
30931 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30933 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30937 /* Shifts by a constant greater than 31. */
30938 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30940 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30941 if (code
== ASHIFTRT
)
30942 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30945 emit_insn (SET (out_up
, const0_rtx
));
30950 /* We have a shift-by-register. */
30951 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30953 /* This alternative requires the scratch registers. */
30954 gcc_assert (scratch1
&& REG_P (scratch1
));
30955 gcc_assert (scratch2
&& REG_P (scratch2
));
30957 /* We will need the values "amount-32" and "32-amount" later.
30958 Swapping them around now allows the later code to be more general. */
30962 emit_insn (SUB_32 (scratch1
, amount
));
30963 emit_insn (RSB_32 (scratch2
, amount
));
30966 emit_insn (RSB_32 (scratch1
, amount
));
30967 /* Also set CC = amount > 32. */
30968 emit_insn (SUB_S_32 (scratch2
, amount
));
30971 emit_insn (RSB_32 (scratch1
, amount
));
30972 emit_insn (SUB_32 (scratch2
, amount
));
30975 gcc_unreachable ();
30978 /* Emit code like this:
30981 out_down = in_down << amount;
30982 out_down = (in_up << (amount - 32)) | out_down;
30983 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30984 out_up = in_up << amount;
30987 out_down = in_down >> amount;
30988 out_down = (in_up << (32 - amount)) | out_down;
30990 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30991 out_up = in_up << amount;
30994 out_down = in_down >> amount;
30995 out_down = (in_up << (32 - amount)) | out_down;
30997 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30998 out_up = in_up << amount;
31000 The ARM and Thumb2 variants are the same but implemented slightly
31001 differently. If this were only called during expand we could just
31002 use the Thumb2 case and let combine do the right thing, but this
31003 can also be called from post-reload splitters. */
31005 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31007 if (!TARGET_THUMB2
)
31009 /* Emit code for ARM mode. */
31010 emit_insn (SET (out_down
,
31011 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31012 if (code
== ASHIFTRT
)
31014 rtx done_label
= gen_label_rtx ();
31015 emit_jump_insn (BRANCH (LT
, done_label
));
31016 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31018 emit_label (done_label
);
31021 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31026 /* Emit code for Thumb2 mode.
31027 Thumb2 can't do shift and or in one insn. */
31028 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31029 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31031 if (code
== ASHIFTRT
)
31033 rtx done_label
= gen_label_rtx ();
31034 emit_jump_insn (BRANCH (LT
, done_label
));
31035 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31036 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31037 emit_label (done_label
);
31041 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31042 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31046 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31061 /* Returns true if a valid comparison operation and makes
31062 the operands in a form that is valid. */
31064 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31066 enum rtx_code code
= GET_CODE (*comparison
);
31068 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31069 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31071 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31073 if (code
== UNEQ
|| code
== LTGT
)
31076 code_int
= (int)code
;
31077 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31078 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31083 if (!arm_add_operand (*op1
, mode
))
31084 *op1
= force_reg (mode
, *op1
);
31085 if (!arm_add_operand (*op2
, mode
))
31086 *op2
= force_reg (mode
, *op2
);
31090 if (!cmpdi_operand (*op1
, mode
))
31091 *op1
= force_reg (mode
, *op1
);
31092 if (!cmpdi_operand (*op2
, mode
))
31093 *op2
= force_reg (mode
, *op2
);
31098 if (!arm_float_compare_operand (*op1
, mode
))
31099 *op1
= force_reg (mode
, *op1
);
31100 if (!arm_float_compare_operand (*op2
, mode
))
31101 *op2
= force_reg (mode
, *op2
);
31111 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31113 static unsigned HOST_WIDE_INT
31114 arm_asan_shadow_offset (void)
31116 return (unsigned HOST_WIDE_INT
) 1 << 29;
31119 #include "gt-arm.h"