1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets
*arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
81 HOST_WIDE_INT
, rtx
, rtx
, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx
, int);
84 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
85 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
86 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
87 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
88 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
89 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
90 inline static int thumb1_index_register_rtx_p (rtx
, int);
91 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx
, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx
, int);
97 static void arm_print_operand_address (FILE *, rtx
);
98 static bool arm_print_operand_punct_valid_p (unsigned char code
);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
100 static arm_cc
get_arm_condition_code (rtx
);
101 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
102 static const char *output_multi_immediate (rtx
*, const char *, const char *,
104 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
105 static struct machine_function
*arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT
get_jump_table_size (rtx
);
108 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_forward_ref (Mfix
*);
110 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
111 static Mnode
*add_minipool_backward_ref (Mfix
*);
112 static void assign_minipool_offsets (Mfix
*);
113 static void arm_print_value (FILE *, rtx
);
114 static void dump_minipool (rtx
);
115 static int arm_barrier_cost (rtx
);
116 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
117 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
118 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree
);
125 static unsigned long arm_compute_func_type (void);
126 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
127 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
134 static int arm_comp_type_attributes (const_tree
, const_tree
);
135 static void arm_set_default_type_attributes (tree
);
136 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
137 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
);
141 static int optimal_immediate_sequence_1 (enum rtx_code code
,
142 unsigned HOST_WIDE_INT val
,
143 struct four_ints
*return_sequence
,
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree
, tree
);
147 static enum machine_mode
arm_promote_function_mode (const_tree
,
148 enum machine_mode
, int *,
150 static bool arm_return_in_memory (const_tree
, const_tree
);
151 static rtx
arm_function_value (const_tree
, const_tree
, bool);
152 static rtx
arm_libcall_value_1 (enum machine_mode
);
153 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
160 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
161 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
162 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
163 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
168 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
169 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
170 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
174 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
175 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
176 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
177 static tree
arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond
, rtx pattern
);
179 static rtx
emit_set_insn (rtx
, rtx
);
180 static rtx
emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
183 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
185 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
187 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
188 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
190 static rtx
aapcs_libcall_value (enum machine_mode
);
191 static int aapcs_select_return_coproc (const_tree
, const_tree
);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_encode_section_info (tree
, rtx
, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 enum machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx
);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
237 static bool arm_cannot_copy_insn_p (rtx
);
238 static bool arm_tls_symbol_p (rtx x
);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
241 static bool arm_output_addr_const_extra (FILE *, rtx
);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree
);
244 static const char *arm_invalid_parameter_type (const_tree t
);
245 static const char *arm_invalid_return_type (const_tree t
);
246 static tree
arm_promoted_type (const_tree t
);
247 static tree
arm_convert_to_type (tree type
, tree expr
);
248 static bool arm_scalar_mode_supported_p (enum machine_mode
);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx
, tree
, rtx
);
253 static rtx
arm_trampoline_adjust_address (rtx
);
254 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
255 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode
,
259 unsigned HOST_WIDE_INT
);
260 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
261 static bool arm_class_likely_spilled_p (reg_class_t
);
262 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
263 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
268 static void arm_conditional_register_usage (void);
269 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
276 const unsigned char *sel
);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
280 int misalign ATTRIBUTE_UNUSED
);
281 static unsigned arm_add_stmt_cost (void *data
, int count
,
282 enum vect_cost_for_stmt kind
,
283 struct _stmt_vec_info
*stmt_info
,
285 enum vect_cost_model_location where
);
287 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
288 bool op0_preserve_value
);
289 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
299 { "long_call", 0, 0, false, true, true, NULL
, false },
300 /* Whereas these functions are always known to reside within the 26 bit
302 { "short_call", 0, 0, false, true, true, NULL
, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
314 /* ARM/PE has three new attributes:
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
323 { "dllimport", 0, 0, true, false, false, NULL
, false },
324 { "dllexport", 0, 0, true, false, false, NULL
, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
333 { NULL
, 0, 0, false, false, false, NULL
, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm
= TARGET_INITIALIZER
;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack
;
682 static char * minipool_startobj
;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped
= 5;
688 extern FILE * asm_out_file
;
690 /* True if we are currently building a constant table. */
691 int making_const_table
;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune
= arm_none
;
696 /* The current tuning set. */
697 const struct tune_params
*current_tune
;
699 /* Which floating point hardware to schedule for. */
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc
*arm_fpu_desc
;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label
[14];
707 static int thumb_call_reg_needed
;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags
= 0;
777 /* The bits in this mask specify which instruction scheduling options should
779 static unsigned long tune_flags
= 0;
781 /* The highest ARM architecture version supported by the
783 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
806 /* Nonzero if this chip supports the ARM 6K extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched
= 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm
= 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2
= 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale
= 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale
= 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf
= 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9
= 0;
849 /* Nonzero if generating Thumb instructions. */
852 /* Nonzero if generating Thumb-1 instructions. */
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork
= 0;
862 /* Nonzero if chip supports Thumb 2. */
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv
;
867 int arm_arch_thumb_hwdiv
;
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits
= 0;
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool
= false;
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode
;
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register
= INVALID_REGNUM
;
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg
= 0;
888 enum arm_pcs arm_pcs_default
;
890 /* For an explanation of these variables, see final_prescan_insn below. */
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc
;
896 int arm_target_label
;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count
= 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask
= 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen
= 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc
= 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes
[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence
[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
932 const char *const name
;
933 enum processor_type core
;
935 enum base_architecture base_arch
;
936 const unsigned long flags
;
937 const struct tune_params
*const tune
;
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
947 /* arm generic vectorizer costs. */
949 struct cpu_vec_costs arm_default_vec_cost
= {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
969 const struct cpu_cost_table cortexa9_extra_costs
=
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
987 true /* non_exec_costs_exec. */
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1065 COSTS_N_INSNS (1) /* Alu. */
1070 const struct cpu_cost_table cortexa7_extra_costs
=
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1088 true /* non_exec_costs_exec. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1167 COSTS_N_INSNS (1) /* Alu. */
1171 const struct cpu_cost_table cortexa12_extra_costs
=
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1189 true /* non_exec_costs_exec. */
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1267 COSTS_N_INSNS (1) /* Alu. */
1271 const struct cpu_cost_table cortexa15_extra_costs
=
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1289 true /* non_exec_costs_exec. */
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1329 0 /* Store_unaligned. */
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1367 COSTS_N_INSNS (1) /* Alu. */
1371 const struct cpu_cost_table v7m_extra_costs
=
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1467 COSTS_N_INSNS (1) /* Alu. */
1471 const struct tune_params arm_slowmul_tune
=
1473 arm_slowmul_rtx_costs
,
1475 NULL
, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL
,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost
,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost
, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1487 const struct tune_params arm_fastmul_tune
=
1489 arm_fastmul_rtx_costs
,
1491 NULL
, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL
,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost
,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost
, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1506 const struct tune_params arm_strongarm_tune
=
1508 arm_fastmul_rtx_costs
,
1510 NULL
, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL
,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost
,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost
, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1522 const struct tune_params arm_xscale_tune
=
1524 arm_xscale_rtx_costs
,
1526 xscale_sched_adjust_cost
,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL
,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost
,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost
, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1538 const struct tune_params arm_9e_tune
=
1542 NULL
, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL
,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost
,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost
, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1554 const struct tune_params arm_v6t2_tune
=
1558 NULL
, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL
,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost
,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost
, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune
=
1574 &generic_extra_costs
,
1575 NULL
, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL
,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost
,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost
, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1587 const struct tune_params arm_cortex_a7_tune
=
1590 &cortexa7_extra_costs
,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL
,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost
,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost
, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1603 const struct tune_params arm_cortex_a15_tune
=
1606 &cortexa15_extra_costs
,
1607 NULL
, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL
,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost
,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost
, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1619 const struct tune_params arm_cortex_a53_tune
=
1622 &cortexa53_extra_costs
,
1623 NULL
, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL
,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost
,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost
, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1638 const struct tune_params arm_cortex_a5_tune
=
1642 NULL
, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL
,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost
,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost
, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1654 const struct tune_params arm_cortex_a9_tune
=
1657 &cortexa9_extra_costs
,
1658 cortex_a9_sched_adjust_cost
,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost
,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost
, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1670 const struct tune_params arm_cortex_a12_tune
=
1673 &cortexa12_extra_costs
,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost
,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost
, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1693 const struct tune_params arm_v7m_tune
=
1697 NULL
, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 2, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL
,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost
,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost
, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune
=
1715 NULL
, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL
,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost
,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost
, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1727 const struct tune_params arm_fa726te_tune
=
1731 fa726te_sched_adjust_cost
,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL
,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost
,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost
, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores
[] =
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1754 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1757 static const struct processors all_architectures
[] =
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1767 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors
*arm_selected_arch
;
1774 static const struct processors
*arm_selected_cpu
;
1775 static const struct processors
*arm_selected_tune
;
1777 /* The name of the preprocessor macro to define for this architecture. */
1779 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1781 /* Available values for -mfpu=. */
1783 static const struct arm_fpu_desc all_fpus
[] =
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1792 /* Supported TLS relocations. */
1800 TLS_DESCSEQ
/* GNU scheme */
1803 /* The maximum number of insns to be used when loading a constant. */
1805 arm_constant_limit (bool size_p
)
1807 return size_p
? 1 : current_tune
->constant_limit
;
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1813 emit_set_insn (rtx x
, rtx y
)
1815 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1818 /* Return the number of bits set in VALUE. */
1820 bit_count (unsigned long value
)
1822 unsigned long count
= 0;
1827 value
&= value
- 1; /* Clear the least-significant set bit. */
1835 enum machine_mode mode
;
1837 } arm_fixed_mode_set
;
1839 /* A small helper for setting fixed-point library libfuncs. */
1842 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1843 const char *funcname
, const char *modename
,
1848 if (num_suffix
== 0)
1849 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1851 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1853 set_optab_libfunc (optable
, mode
, buffer
);
1857 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1858 enum machine_mode from
, const char *funcname
,
1859 const char *toname
, const char *fromname
)
1862 const char *maybe_suffix_2
= "";
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1867 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1868 maybe_suffix_2
= "2";
1870 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1873 set_conv_libfunc (optable
, to
, from
, buffer
);
1876 /* Set up library functions unique to ARM. */
1879 arm_init_libfuncs (void)
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1885 /* There are no special library functions unless we are using the
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1903 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1919 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1972 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1983 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1984 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1985 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format
)
1991 case ARM_FP16_FORMAT_IEEE
:
1992 case ARM_FP16_FORMAT_ALTERNATIVE
:
1995 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1996 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2000 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2002 : "__gnu_h2f_alternative"));
2005 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2006 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2007 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2008 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2009 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2012 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2013 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2014 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2015 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2016 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2017 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2018 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2027 const arm_fixed_mode_set fixed_arith_modes
[] =
2048 const arm_fixed_mode_set fixed_conv_modes
[] =
2078 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2080 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2081 "add", fixed_arith_modes
[i
].name
, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2083 "ssadd", fixed_arith_modes
[i
].name
, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2085 "usadd", fixed_arith_modes
[i
].name
, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2087 "sub", fixed_arith_modes
[i
].name
, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2089 "sssub", fixed_arith_modes
[i
].name
, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2091 "ussub", fixed_arith_modes
[i
].name
, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2093 "mul", fixed_arith_modes
[i
].name
, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2095 "ssmul", fixed_arith_modes
[i
].name
, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2097 "usmul", fixed_arith_modes
[i
].name
, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2099 "div", fixed_arith_modes
[i
].name
, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2101 "udiv", fixed_arith_modes
[i
].name
, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2103 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2105 "usdiv", fixed_arith_modes
[i
].name
, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2107 "neg", fixed_arith_modes
[i
].name
, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2109 "ssneg", fixed_arith_modes
[i
].name
, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2111 "usneg", fixed_arith_modes
[i
].name
, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2113 "ashl", fixed_arith_modes
[i
].name
, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2115 "ashr", fixed_arith_modes
[i
].name
, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2117 "lshr", fixed_arith_modes
[i
].name
, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2119 "ssashl", fixed_arith_modes
[i
].name
, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2121 "usashl", fixed_arith_modes
[i
].name
, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2123 "cmp", fixed_arith_modes
[i
].name
, 2);
2126 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2127 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2134 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2135 fixed_conv_modes
[j
].mode
, "fract",
2136 fixed_conv_modes
[i
].name
,
2137 fixed_conv_modes
[j
].name
);
2138 arm_set_fixed_conv_libfunc (satfract_optab
,
2139 fixed_conv_modes
[i
].mode
,
2140 fixed_conv_modes
[j
].mode
, "satfract",
2141 fixed_conv_modes
[i
].name
,
2142 fixed_conv_modes
[j
].name
);
2143 arm_set_fixed_conv_libfunc (fractuns_optab
,
2144 fixed_conv_modes
[i
].mode
,
2145 fixed_conv_modes
[j
].mode
, "fractuns",
2146 fixed_conv_modes
[i
].name
,
2147 fixed_conv_modes
[j
].name
);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2149 fixed_conv_modes
[i
].mode
,
2150 fixed_conv_modes
[j
].mode
, "satfractuns",
2151 fixed_conv_modes
[i
].name
,
2152 fixed_conv_modes
[j
].name
);
2156 if (TARGET_AAPCS_BASED
)
2157 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type
;
2163 /* Return the type to use as __builtin_va_list. */
2165 arm_build_builtin_va_list (void)
2170 if (!TARGET_AAPCS_BASED
)
2171 return std_build_builtin_va_list ();
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2181 The C Library ABI further reinforces this definition in \S
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2190 /* Give it the required name. */
2191 va_list_name
= build_decl (BUILTINS_LOCATION
,
2193 get_identifier ("__va_list"),
2195 DECL_ARTIFICIAL (va_list_name
) = 1;
2196 TYPE_NAME (va_list_type
) = va_list_name
;
2197 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2198 /* Create the __ap field. */
2199 ap_field
= build_decl (BUILTINS_LOCATION
,
2201 get_identifier ("__ap"),
2203 DECL_ARTIFICIAL (ap_field
) = 1;
2204 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2205 TYPE_FIELDS (va_list_type
) = ap_field
;
2206 /* Compute its layout. */
2207 layout_type (va_list_type
);
2209 return va_list_type
;
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2216 arm_extract_valist_ptr (tree valist
)
2218 if (TREE_TYPE (valist
) == error_mark_node
)
2219 return error_mark_node
;
2221 /* On an AAPCS target, the pointer is stored within "struct
2223 if (TARGET_AAPCS_BASED
)
2225 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2226 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2227 valist
, ap_field
, NULL_TREE
);
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2235 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2237 valist
= arm_extract_valist_ptr (valist
);
2238 std_expand_builtin_va_start (valist
, nextarg
);
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2243 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2246 valist
= arm_extract_valist_ptr (valist
);
2247 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2250 /* Fix up any incompatible options that the user has specified. */
2252 arm_option_override (void)
2254 if (global_options_set
.x_arm_arch_option
)
2255 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2257 if (global_options_set
.x_arm_cpu_option
)
2259 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2260 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2263 if (global_options_set
.x_arm_tune_option
)
2264 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS
;
2270 if (arm_selected_arch
)
2272 if (arm_selected_cpu
)
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune
)
2282 arm_selected_tune
= arm_selected_cpu
;
2284 arm_selected_cpu
= arm_selected_arch
;
2288 arm_selected_arch
= NULL
;
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu
= arm_selected_arch
;
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu
)
2298 const struct processors
* sel
;
2299 unsigned int sought
;
2301 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2302 if (!arm_selected_cpu
->name
)
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2307 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu
->name
)
2311 arm_selected_cpu
= &all_cores
[arm6
];
2314 sel
= arm_selected_cpu
;
2315 insn_flags
= sel
->flags
;
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2321 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2323 sought
|= (FL_THUMB
| FL_MODE32
);
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags
&= ~FL_MODE26
;
2332 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2337 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2338 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2341 if (sel
->name
== NULL
)
2343 unsigned current_bit_count
= 0;
2344 const struct processors
* best_fit
= NULL
;
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2358 if ((sel
->flags
& sought
) == sought
)
2362 count
= bit_count (sel
->flags
& insn_flags
);
2364 if (count
>= current_bit_count
)
2367 current_bit_count
= count
;
2371 gcc_assert (best_fit
);
2375 arm_selected_cpu
= sel
;
2379 gcc_assert (arm_selected_cpu
);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune
)
2382 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2384 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2385 insn_flags
= arm_selected_cpu
->flags
;
2386 arm_base_arch
= arm_selected_cpu
->base_arch
;
2388 arm_tune
= arm_selected_tune
->core
;
2389 tune_flags
= arm_selected_tune
->flags
;
2390 current_tune
= arm_selected_tune
->tune
;
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2395 error ("target CPU does not support ARM mode");
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags
&= ~MASK_INTERWORK
;
2405 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags
&= ~MASK_THUMB
;
2411 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags
&= ~MASK_APCS_FRAME
;
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2420 target_flags
|= MASK_INTERWORK
;
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2427 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2430 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags
|= MASK_APCS_FRAME
;
2436 if (TARGET_POKE_FUNCTION_NAME
)
2437 target_flags
|= MASK_APCS_FRAME
;
2439 if (TARGET_APCS_REENT
&& flag_pic
)
2440 error ("-fpic and -mapcs-reent are incompatible");
2442 if (TARGET_APCS_REENT
)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2448 && write_symbols
!= NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2453 if (TARGET_APCS_FLOAT
)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2456 if (TARGET_LITTLE_WORDS
)
2457 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2462 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2463 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2464 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2465 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2466 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2467 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2468 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2469 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2470 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2471 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2472 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2473 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2474 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2476 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2477 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2478 thumb_code
= TARGET_ARM
== 0;
2479 thumb1_code
= TARGET_THUMB1
!= 0;
2480 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2481 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2482 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2483 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2484 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2485 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2486 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2487 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2488 if (arm_restrict_it
== 2)
2489 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2492 arm_restrict_it
= 0;
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm
.min_anchor_offset
= 0;
2504 targetm
.max_anchor_offset
= 127;
2506 else if (TARGET_THUMB2
)
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm
.min_anchor_offset
= -248;
2512 targetm
.max_anchor_offset
= 4095;
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK
)
2521 arm_cpp_interwork
= 1;
2524 target_flags
&= ~MASK_INTERWORK
;
2526 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2529 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2532 if (!global_options_set
.x_arm_fpu_index
)
2534 const char *target_fpu_name
;
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name
= FPUTYPE_DEFAULT
;
2540 target_fpu_name
= "vfp";
2543 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2548 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2550 switch (arm_fpu_desc
->model
)
2552 case ARM_FP_MODEL_VFP
:
2553 arm_fpu_attr
= FPU_VFP
;
2560 if (TARGET_AAPCS_BASED
)
2562 if (TARGET_CALLER_INTERWORKING
)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2565 if (TARGET_CALLEE_INTERWORKING
)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT
&& TARGET_NEON
)
2571 error ("iWMMXt and NEON are incompatible");
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2575 error ("iWMMXt unsupported under Thumb mode");
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2579 sorry ("__fp16 and no ldrh");
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT
)
2583 arm_fpu_attr
= FPU_NONE
;
2585 if (TARGET_AAPCS_BASED
)
2587 if (arm_abi
== ARM_ABI_IWMMXT
)
2588 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2589 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2592 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2594 arm_pcs_default
= ARM_PCS_AAPCS
;
2598 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2599 sorry ("-mfloat-abi=hard and VFP");
2601 if (arm_abi
== ARM_ABI_APCS
)
2602 arm_pcs_default
= ARM_PCS_APCS
;
2604 arm_pcs_default
= ARM_PCS_ATPCS
;
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2610 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer
== TP_AUTO
)
2615 if (arm_arch6k
&& !TARGET_THUMB1
)
2616 target_thread_pointer
= TP_CP15
;
2618 target_thread_pointer
= TP_SOFT
;
2621 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set
.x_arm_structure_size_boundary
)
2627 if (TARGET_AAPCS_BASED
)
2628 arm_structure_size_boundary
= 8;
2632 if (arm_structure_size_boundary
!= 8
2633 && arm_structure_size_boundary
!= 32
2634 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2636 if (ARM_DOUBLEWORD_ALIGN
)
2638 "structure size boundary can only be set to 8, 32 or 64");
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2646 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2648 error ("RTP PIC is incompatible with Thumb");
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2656 if (TARGET_VXWORKS_RTP
)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2661 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2662 arm_pic_register
= 9;
2664 if (arm_pic_register_string
!= NULL
)
2666 int pic_register
= decode_reg_name (arm_pic_register_string
);
2669 warning (0, "-mpic-register= is useless without -fpic");
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2673 || pic_register
== HARD_FRAME_POINTER_REGNUM
2674 || pic_register
== STACK_POINTER_REGNUM
2675 || pic_register
>= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register
!= arm_pic_register
))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2680 arm_pic_register
= pic_register
;
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2685 arm_pic_data_is_text_relative
= 0;
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd
== 2)
2690 if (arm_selected_cpu
->core
== cortexm3
)
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2705 if (unaligned_access
== 2)
2707 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2708 unaligned_access
= 1;
2710 unaligned_access
= 0;
2712 else if (unaligned_access
== 1
2713 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access
= 0;
2719 if (TARGET_THUMB1
&& flag_schedule_insns
)
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns
= 0;
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped
= 6;
2732 max_insns_skipped
= current_tune
->max_insns_skipped
;
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition
)
2738 inform (input_location
,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition
= 0;
2741 flag_reorder_blocks
= 1;
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2750 global_options
.x_param_values
,
2751 global_options_set
.x_param_values
);
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields
= 1;
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays
< 0
2763 && current_tune
->num_prefetch_slots
> 0)
2764 flag_prefetch_loop_arrays
= 1;
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune
->num_prefetch_slots
> 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2770 current_tune
->num_prefetch_slots
,
2771 global_options
.x_param_values
,
2772 global_options_set
.x_param_values
);
2773 if (current_tune
->l1_cache_line_size
>= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2775 current_tune
->l1_cache_line_size
,
2776 global_options
.x_param_values
,
2777 global_options_set
.x_param_values
);
2778 if (current_tune
->l1_cache_size
>= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2780 current_tune
->l1_cache_size
,
2781 global_options
.x_param_values
,
2782 global_options_set
.x_param_values
);
2784 /* Use Neon to perform 64-bits operations rather than core
2786 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2787 if (use_neon_for_64bits
== 1)
2788 prefer_neon_for_64bits
= true;
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2792 global_options
.x_param_values
,
2793 global_options_set
.x_param_values
);
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2798 flag_shrink_wrap
= false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME
)
2801 flag_shrink_wrap
= false;
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2806 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data
)
2811 arm_disable_literal_pool
= true;
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2818 arm_add_gc_roots (void)
2820 gcc_obstack_init(&minipool_obstack
);
2821 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2829 const char *const arg
;
2830 const unsigned long return_value
;
2834 static const isr_attribute_arg isr_attribute_args
[] =
2836 { "IRQ", ARM_FT_ISR
},
2837 { "irq", ARM_FT_ISR
},
2838 { "FIQ", ARM_FT_FIQ
},
2839 { "fiq", ARM_FT_FIQ
},
2840 { "ABORT", ARM_FT_ISR
},
2841 { "abort", ARM_FT_ISR
},
2842 { "ABORT", ARM_FT_ISR
},
2843 { "abort", ARM_FT_ISR
},
2844 { "UNDEF", ARM_FT_EXCEPTION
},
2845 { "undef", ARM_FT_EXCEPTION
},
2846 { "SWI", ARM_FT_EXCEPTION
},
2847 { "swi", ARM_FT_EXCEPTION
},
2848 { NULL
, ARM_FT_NORMAL
}
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2854 static unsigned long
2855 arm_isr_value (tree argument
)
2857 const isr_attribute_arg
* ptr
;
2861 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2863 /* No argument - default to IRQ. */
2864 if (argument
== NULL_TREE
)
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument
) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2870 return ARM_FT_UNKNOWN
;
2872 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2874 /* Check it against the list of known arguments. */
2875 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2876 if (streq (arg
, ptr
->arg
))
2877 return ptr
->return_value
;
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN
;
2883 /* Computes the type of the current function. */
2885 static unsigned long
2886 arm_compute_func_type (void)
2888 unsigned long type
= ARM_FT_UNKNOWN
;
2892 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2899 && (TREE_NOTHROW (current_function_decl
)
2900 || !(flag_unwind_tables
2902 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2903 && TREE_THIS_VOLATILE (current_function_decl
))
2904 type
|= ARM_FT_VOLATILE
;
2906 if (cfun
->static_chain_decl
!= NULL
)
2907 type
|= ARM_FT_NESTED
;
2909 attr
= DECL_ATTRIBUTES (current_function_decl
);
2911 a
= lookup_attribute ("naked", attr
);
2913 type
|= ARM_FT_NAKED
;
2915 a
= lookup_attribute ("isr", attr
);
2917 a
= lookup_attribute ("interrupt", attr
);
2920 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2922 type
|= arm_isr_value (TREE_VALUE (a
));
2927 /* Returns the type of the current function. */
2930 arm_current_func_type (void)
2932 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2933 cfun
->machine
->func_type
= arm_compute_func_type ();
2935 return cfun
->machine
->func_type
;
2939 arm_allocate_stack_slots_for_args (void)
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2946 arm_warn_func_return (tree decl
)
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2966 arm_asm_trampoline_template (FILE *f
)
2970 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2971 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2973 else if (TARGET_THUMB2
)
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2979 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2983 ASM_OUTPUT_ALIGN (f
, 2);
2984 fprintf (f
, "\t.code\t16\n");
2985 fprintf (f
, ".Ltrampoline_start:\n");
2986 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2988 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2989 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2990 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2991 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2993 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2994 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3000 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3002 rtx fnaddr
, mem
, a_tramp
;
3004 emit_block_move (m_tramp
, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3007 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3008 emit_move_insn (mem
, chain_value
);
3010 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3011 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3012 emit_move_insn (mem
, fnaddr
);
3014 a_tramp
= XEXP (m_tramp
, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3016 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3017 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3024 arm_trampoline_adjust_address (rtx addr
)
3027 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3028 NULL
, 0, OPTAB_LIB_WIDEN
);
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3037 use_return_insn (int iscond
, rtx sibling
)
3040 unsigned int func_type
;
3041 unsigned long saved_int_regs
;
3042 unsigned HOST_WIDE_INT stack_adjust
;
3043 arm_stack_offsets
*offsets
;
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed
)
3049 func_type
= arm_current_func_type ();
3051 /* Naked, volatile and stack alignment functions need special
3053 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3061 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun
))
3065 offsets
= arm_get_frame_offsets ();
3066 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3068 /* As do variadic functions. */
3069 if (crtl
->args
.pretend_args_size
3070 || cfun
->machine
->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl
->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun
->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3078 && stack_adjust
== 4)))
3081 saved_int_regs
= offsets
->saved_regs_mask
;
3083 /* Unfortunately, the insn
3085 ldmib sp, {..., sp, ...}
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3096 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs
[3])
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3107 /* ... or for a tail-call argument ... */
3110 gcc_assert (CALL_P (sibling
));
3112 if (find_regno_fusage (sibling
, USE
, 3))
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs
& 0x7)
3122 /* Can't be done if interworking with Thumb, and any registers have been
3124 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond
&& arm_tune_strongarm
)
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3137 && arm_pic_register
!= INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3150 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3151 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3154 if (TARGET_REALLY_IWMMXT
)
3155 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3156 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3166 use_simple_return_p (void)
3168 arm_stack_offsets
*offsets
;
3170 offsets
= arm_get_frame_offsets ();
3171 return offsets
->outgoing_args
!= 0;
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3177 const_ok_for_arm (HOST_WIDE_INT i
)
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3184 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT
) 0)
3186 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3189 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3196 /* Get the number of trailing zeros. */
3197 lowbit
= ffs((int) i
) - 1;
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3204 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3209 /* Allow rotated constants in ARM mode. */
3211 && ((i
& ~0xc000003f) == 0
3212 || (i
& ~0xf000000f) == 0
3213 || (i
& ~0xfc000003) == 0))
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3223 if (i
== v
|| i
== (v
| (v
<< 8)))
3226 /* Allow repeated pattern 0xXY00XY00. */
3236 /* Return true if I is a valid constant for the operation CODE. */
3238 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3240 if (const_ok_for_arm (i
))
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3254 /* See if we can use addw or subw. */
3256 && ((i
& 0xfffff000) == 0
3257 || ((-i
) & 0xfffff000) == 0))
3259 /* else fall through. */
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3281 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3300 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3302 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3304 rtx hi
= GEN_INT (hi_val
);
3305 rtx lo
= GEN_INT (lo_val
);
3315 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3318 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3335 /* ??? Tweak this for thumb2. */
3337 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3338 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3342 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3343 cond
= COND_EXEC_TEST (PATTERN (insn
));
3347 if (subtargets
|| code
== SET
3348 || (REG_P (target
) && REG_P (source
)
3349 && REGNO (target
) != REGNO (source
)))
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3360 if (!after_arm_reorg
3362 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT
)
3372 arm_emit_movpair (target
, GEN_INT (val
));
3374 emit_set_insn (target
, GEN_INT (val
));
3380 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3382 if (TARGET_USE_MOVT
)
3383 arm_emit_movpair (temp
, GEN_INT (val
));
3385 emit_set_insn (temp
, GEN_INT (val
));
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3390 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3392 emit_set_insn (target
,
3393 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3399 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3407 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3408 struct four_ints
*return_sequence
)
3410 int best_consecutive_zeros
= 0;
3414 struct four_ints tmp_sequence
;
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3420 for (i
= 0; i
< 32; i
+= 2)
3422 int consecutive_zeros
= 0;
3424 if (!(val
& (3 << i
)))
3426 while ((i
< 32) && !(val
& (3 << i
)))
3428 consecutive_zeros
+= 2;
3431 if (consecutive_zeros
> best_consecutive_zeros
)
3433 best_consecutive_zeros
= consecutive_zeros
;
3434 best_start
= i
- consecutive_zeros
;
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3450 We want this to wind up as:
3454 str rB, [rA, #0x100]
3456 str rB, [rA, #0x110]
3458 rather than having to synthesize both large constants from scratch.
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3466 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3468 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3469 if (insns2
<= insns1
)
3471 *return_sequence
= tmp_sequence
;
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3481 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3482 struct four_ints
*return_sequence
, int i
)
3484 int remainder
= val
& 0xffffffff;
3487 /* Try and find a way of doing the job in either two or three
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3503 unsigned int b1
, b2
, b3
, b4
;
3504 unsigned HOST_WIDE_INT result
;
3507 gcc_assert (insns
< 4);
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3516 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3517 /* We can use addw/subw for the last 12 bits. */
3521 /* Use an 8-bit shifted/rotated immediate. */
3525 result
= remainder
& ((0x0ff << end
)
3526 | ((i
< end
) ? (0xff >> (32 - end
))
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i
-= TARGET_ARM
? 2 : 1;
3539 /* Next, see if we can do a better job with a thumb2 replicated
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3550 b1
= (remainder
& 0xff000000) >> 24;
3551 b2
= (remainder
& 0x00ff0000) >> 16;
3552 b3
= (remainder
& 0x0000ff00) >> 8;
3553 b4
= remainder
& 0xff;
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp
= b1
& b2
& b3
& b4
;
3564 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3566 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3567 + (tmp
== b3
) + (tmp
== b4
);
3569 && (matching_bytes
>= 3
3570 || (matching_bytes
== 2
3571 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1
== b3
&& (!b2
|| !b4
3588 || (remainder
& 0x00ff0000 & ~result
)))
3590 result
= remainder
& 0xff00ff00;
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3604 result
= remainder
& 0x00ff00ff;
3610 return_sequence
->i
[insns
++] = result
;
3611 remainder
&= ~result
;
3613 if (code
== SET
|| code
== MINUS
)
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3626 emit_constant_insn (rtx cond
, rtx pattern
)
3629 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3630 emit_insn (pattern
);
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3637 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3638 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3643 int final_invert
= 0;
3645 int set_sign_bit_copies
= 0;
3646 int clear_sign_bit_copies
= 0;
3647 int clear_zero_bit_copies
= 0;
3648 int set_zero_bit_copies
= 0;
3649 int insns
= 0, neg_insns
, inv_insns
;
3650 unsigned HOST_WIDE_INT temp1
, temp2
;
3651 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3652 struct four_ints
*immediates
;
3653 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3669 if (remainder
== 0xffffffff)
3672 emit_constant_insn (cond
,
3673 gen_rtx_SET (VOIDmode
, target
,
3674 GEN_INT (ARM_SIGN_EXTEND (val
))));
3680 if (reload_completed
&& rtx_equal_p (target
, source
))
3684 emit_constant_insn (cond
,
3685 gen_rtx_SET (VOIDmode
, target
, source
));
3694 emit_constant_insn (cond
,
3695 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3698 if (remainder
== 0xffffffff)
3700 if (reload_completed
&& rtx_equal_p (target
, source
))
3703 emit_constant_insn (cond
,
3704 gen_rtx_SET (VOIDmode
, target
, source
));
3713 if (reload_completed
&& rtx_equal_p (target
, source
))
3716 emit_constant_insn (cond
,
3717 gen_rtx_SET (VOIDmode
, target
, source
));
3721 if (remainder
== 0xffffffff)
3724 emit_constant_insn (cond
,
3725 gen_rtx_SET (VOIDmode
, target
,
3726 gen_rtx_NOT (mode
, source
)));
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3738 emit_constant_insn (cond
,
3739 gen_rtx_SET (VOIDmode
, target
,
3740 gen_rtx_NEG (mode
, source
)));
3743 if (const_ok_for_arm (val
))
3746 emit_constant_insn (cond
,
3747 gen_rtx_SET (VOIDmode
, target
,
3748 gen_rtx_MINUS (mode
, GEN_INT (val
),
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val
, code
))
3763 emit_constant_insn (cond
,
3764 gen_rtx_SET (VOIDmode
, target
,
3766 ? gen_rtx_fmt_ee (code
, mode
, source
,
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3774 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3775 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3779 if (mode
== SImode
&& i
== 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3782 emit_constant_insn (cond
,
3783 gen_zero_extendhisi2
3784 (target
, gen_lowpart (HImode
, source
)));
3786 /* Extz only supports SImode, but we can coerce the operands
3788 emit_constant_insn (cond
,
3789 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3790 gen_lowpart (SImode
, source
),
3791 GEN_INT (i
), const0_rtx
));
3797 /* Calculate a few attributes that may be useful for specific
3799 /* Count number of leading zeros. */
3800 for (i
= 31; i
>= 0; i
--)
3802 if ((remainder
& (1 << i
)) == 0)
3803 clear_sign_bit_copies
++;
3808 /* Count number of leading 1's. */
3809 for (i
= 31; i
>= 0; i
--)
3811 if ((remainder
& (1 << i
)) != 0)
3812 set_sign_bit_copies
++;
3817 /* Count number of trailing zero's. */
3818 for (i
= 0; i
<= 31; i
++)
3820 if ((remainder
& (1 << i
)) == 0)
3821 clear_zero_bit_copies
++;
3826 /* Count number of trailing 1's. */
3827 for (i
= 0; i
<= 31; i
++)
3829 if ((remainder
& (1 << i
)) != 0)
3830 set_zero_bit_copies
++;
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies
> 1)
3843 if (const_ok_for_arm
3844 (temp1
= ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies
- 1))))
3849 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3850 emit_constant_insn (cond
,
3851 gen_rtx_SET (VOIDmode
, new_src
,
3853 emit_constant_insn (cond
,
3854 gen_ashrsi3 (target
, new_src
,
3855 GEN_INT (set_sign_bit_copies
- 1)));
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3862 if (const_ok_for_arm (~temp1
))
3866 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3867 emit_constant_insn (cond
,
3868 gen_rtx_SET (VOIDmode
, new_src
,
3870 emit_constant_insn (cond
,
3871 gen_ashrsi3 (target
, new_src
,
3872 GEN_INT (set_sign_bit_copies
- 1)));
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3882 int topshift
= clear_sign_bit_copies
& ~1;
3884 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3885 & (0xff000000 >> topshift
));
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1
== 0 && topshift
!= 0)
3892 temp1
= 0x80000000 >> (topshift
- 1);
3894 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3896 if (const_ok_for_arm (temp2
))
3900 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3901 emit_constant_insn (cond
,
3902 gen_rtx_SET (VOIDmode
, new_src
,
3904 emit_constant_insn (cond
,
3905 gen_addsi3 (target
, new_src
,
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val
& 0xffff0000)
3920 temp1
= remainder
& 0xffff0000;
3921 temp2
= remainder
& 0x0000ffff;
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i
= 9; i
< 24; i
++)
3926 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3927 && !const_ok_for_arm (temp2
))
3929 rtx new_src
= (subtargets
3930 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3932 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3933 source
, subtargets
, generate
);
3941 gen_rtx_ASHIFT (mode
, source
,
3948 /* Don't duplicate cases already considered. */
3949 for (i
= 17; i
< 24; i
++)
3951 if (((temp1
| (temp1
>> i
)) == remainder
)
3952 && !const_ok_for_arm (temp1
))
3954 rtx new_src
= (subtargets
3955 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3957 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3958 source
, subtargets
, generate
);
3963 gen_rtx_SET (VOIDmode
, target
,
3966 gen_rtx_LSHIFTRT (mode
, source
,
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3988 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3990 emit_constant_insn (cond
,
3991 gen_rtx_SET (VOIDmode
, sub
,
3993 emit_constant_insn (cond
,
3994 gen_rtx_SET (VOIDmode
, target
,
3995 gen_rtx_fmt_ee (code
, mode
,
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies
> 8
4017 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4021 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4022 rtx shift
= GEN_INT (set_sign_bit_copies
);
4026 gen_rtx_SET (VOIDmode
, sub
,
4028 gen_rtx_ASHIFT (mode
,
4033 gen_rtx_SET (VOIDmode
, target
,
4035 gen_rtx_LSHIFTRT (mode
, sub
,
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4046 For eg. r0 = r0 | 0xfff
4051 if (set_zero_bit_copies
> 8
4052 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4056 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4057 rtx shift
= GEN_INT (set_zero_bit_copies
);
4061 gen_rtx_SET (VOIDmode
, sub
,
4063 gen_rtx_LSHIFTRT (mode
,
4068 gen_rtx_SET (VOIDmode
, target
,
4070 gen_rtx_ASHIFT (mode
, sub
,
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4079 x = y | constant (such that ~constant is a valid constant)
4081 x = ~(~y & ~constant).
4083 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4087 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4088 emit_constant_insn (cond
,
4089 gen_rtx_SET (VOIDmode
, sub
,
4090 gen_rtx_NOT (mode
, source
)));
4093 sub
= gen_reg_rtx (mode
);
4094 emit_constant_insn (cond
,
4095 gen_rtx_SET (VOIDmode
, sub
,
4096 gen_rtx_AND (mode
, source
,
4098 emit_constant_insn (cond
,
4099 gen_rtx_SET (VOIDmode
, target
,
4100 gen_rtx_NOT (mode
, sub
)));
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4110 HOST_WIDE_INT shift_mask
= ((0xffffffff
4111 << (32 - clear_sign_bit_copies
))
4114 if ((remainder
| shift_mask
) != 0xffffffff)
4118 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4119 insns
= arm_gen_constant (AND
, mode
, cond
,
4120 remainder
| shift_mask
,
4121 new_src
, source
, subtargets
, 1);
4126 rtx targ
= subtargets
? NULL_RTX
: target
;
4127 insns
= arm_gen_constant (AND
, mode
, cond
,
4128 remainder
| shift_mask
,
4129 targ
, source
, subtargets
, 0);
4135 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4136 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4138 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4139 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4145 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4147 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4149 if ((remainder
| shift_mask
) != 0xffffffff)
4153 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4155 insns
= arm_gen_constant (AND
, mode
, cond
,
4156 remainder
| shift_mask
,
4157 new_src
, source
, subtargets
, 1);
4162 rtx targ
= subtargets
? NULL_RTX
: target
;
4164 insns
= arm_gen_constant (AND
, mode
, cond
,
4165 remainder
| shift_mask
,
4166 targ
, source
, subtargets
, 0);
4172 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4173 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4175 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4176 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4194 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4197 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4202 if (can_invert
|| final_invert
)
4203 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4208 immediates
= &pos_immediates
;
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4214 immediates
= &neg_immediates
;
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4225 immediates
= &inv_immediates
;
4233 /* Now output the chosen sequence as instructions. */
4236 for (i
= 0; i
< insns
; i
++)
4238 rtx new_src
, temp1_rtx
;
4240 temp1
= immediates
->i
[i
];
4242 if (code
== SET
|| code
== MINUS
)
4243 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4244 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4245 new_src
= gen_reg_rtx (mode
);
4251 else if (can_negate
)
4254 temp1
= trunc_int_for_mode (temp1
, mode
);
4255 temp1_rtx
= GEN_INT (temp1
);
4259 else if (code
== MINUS
)
4260 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4262 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4264 emit_constant_insn (cond
,
4265 gen_rtx_SET (VOIDmode
, new_src
,
4271 can_negate
= can_invert
;
4275 else if (code
== MINUS
)
4283 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4284 gen_rtx_NOT (mode
, source
)));
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4296 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4297 bool op0_preserve_value
)
4299 enum machine_mode mode
;
4300 unsigned HOST_WIDE_INT i
, maxval
;
4302 mode
= GET_MODE (*op0
);
4303 if (mode
== VOIDmode
)
4304 mode
= GET_MODE (*op1
);
4306 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4316 if (*code
== GT
|| *code
== LE
4317 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4319 /* Missing comparison. First try to use an available
4321 if (CONST_INT_P (*op1
))
4329 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4331 *op1
= GEN_INT (i
+ 1);
4332 *code
= *code
== GT
? GE
: LT
;
4338 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4341 *op1
= GEN_INT (i
+ 1);
4342 *code
= *code
== GTU
? GEU
: LTU
;
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value
)
4357 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4367 && GET_CODE (*op0
) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0
, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0
, 0))
4372 && *op1
== const0_rtx
)
4373 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1
)
4379 || const_ok_for_arm (INTVAL (*op1
))
4380 || const_ok_for_arm (- INTVAL (*op1
)))
4394 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4396 *op1
= GEN_INT (i
+ 1);
4397 *code
= *code
== GT
? GE
: LT
;
4405 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4407 *op1
= GEN_INT (i
- 1);
4408 *code
= *code
== GE
? GT
: LE
;
4415 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4416 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4418 *op1
= GEN_INT (i
+ 1);
4419 *code
= *code
== GTU
? GEU
: LTU
;
4427 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4429 *op1
= GEN_INT (i
- 1);
4430 *code
= *code
== GEU
? GTU
: LEU
;
4441 /* Define how to find the value returned by a function. */
4444 arm_function_value(const_tree type
, const_tree func
,
4445 bool outgoing ATTRIBUTE_UNUSED
)
4447 enum machine_mode mode
;
4448 int unsignedp ATTRIBUTE_UNUSED
;
4449 rtx r ATTRIBUTE_UNUSED
;
4451 mode
= TYPE_MODE (type
);
4453 if (TARGET_AAPCS_BASED
)
4454 return aapcs_allocate_return_reg (mode
, type
, func
);
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type
))
4458 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type
))
4464 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4465 if (size
% UNITS_PER_WORD
!= 0)
4467 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4468 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4472 return arm_libcall_value_1 (mode
);
4475 /* libcall hashtable helpers. */
4477 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4479 typedef rtx_def value_type
;
4480 typedef rtx_def compare_type
;
4481 static inline hashval_t
hash (const value_type
*);
4482 static inline bool equal (const value_type
*, const compare_type
*);
4483 static inline void remove (value_type
*);
4487 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4489 return rtx_equal_p (p1
, p2
);
4493 libcall_hasher::hash (const value_type
*p1
)
4495 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4498 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4501 add_libcall (libcall_table_type htab
, rtx libcall
)
4503 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4507 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4509 static bool init_done
= false;
4510 static libcall_table_type libcall_htab
;
4516 libcall_htab
.create (31);
4517 add_libcall (libcall_htab
,
4518 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4519 add_libcall (libcall_htab
,
4520 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4521 add_libcall (libcall_htab
,
4522 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4523 add_libcall (libcall_htab
,
4524 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4526 add_libcall (libcall_htab
,
4527 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4528 add_libcall (libcall_htab
,
4529 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4530 add_libcall (libcall_htab
,
4531 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4532 add_libcall (libcall_htab
,
4533 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4535 add_libcall (libcall_htab
,
4536 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4537 add_libcall (libcall_htab
,
4538 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4539 add_libcall (libcall_htab
,
4540 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4541 add_libcall (libcall_htab
,
4542 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4543 add_libcall (libcall_htab
,
4544 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4545 add_libcall (libcall_htab
,
4546 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4547 add_libcall (libcall_htab
,
4548 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4549 add_libcall (libcall_htab
,
4550 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4559 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4560 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4561 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4562 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4563 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4564 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4565 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4566 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4567 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4568 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4569 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4571 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4575 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4579 arm_libcall_value_1 (enum machine_mode mode
)
4581 if (TARGET_AAPCS_BASED
)
4582 return aapcs_libcall_value (mode
);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode
))
4585 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4587 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4594 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4596 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall
))
4602 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4606 return arm_libcall_value_1 (mode
);
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4612 arm_function_value_regno_p (const unsigned int regno
)
4614 if (regno
== ARG_REGISTER (1)
4616 && TARGET_AAPCS_BASED
4618 && TARGET_HARD_FLOAT
4619 && regno
== FIRST_VFP_REGNUM
)
4620 || (TARGET_IWMMXT_ABI
4621 && regno
== FIRST_IWMMXT_REGNUM
))
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4630 arm_apply_result_size (void)
4636 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4638 if (TARGET_IWMMXT_ABI
)
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4649 arm_return_in_memory (const_tree type
, const_tree fntype
)
4653 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4655 if (TARGET_AAPCS_BASED
)
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type
)
4662 && TREE_CODE (type
) != VECTOR_TYPE
4663 && TREE_CODE (type
) != COMPLEX_TYPE
)
4666 /* Any return value that is no larger than one word can be
4668 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type
) == VECTOR_TYPE
)
4682 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4684 /* The rest go in memory. */
4688 if (TREE_CODE (type
) == VECTOR_TYPE
)
4689 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4691 if (!AGGREGATE_TYPE_P (type
) &&
4692 (TREE_CODE (type
) != VECTOR_TYPE
))
4693 /* All simple types are returned in registers. */
4696 if (arm_abi
!= ARM_ABI_APCS
)
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size
< 0 || size
> UNITS_PER_WORD
);
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size
< 0 || size
> UNITS_PER_WORD
)
4713 if (TREE_CODE (type
) == RECORD_TYPE
)
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field
= TYPE_FIELDS (type
);
4726 field
&& TREE_CODE (field
) != FIELD_DECL
;
4727 field
= DECL_CHAIN (field
))
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4733 /* Check that the first field is valid for returning in a register. */
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field
= DECL_CHAIN (field
);
4748 field
= DECL_CHAIN (field
))
4750 if (TREE_CODE (field
) != FIELD_DECL
)
4753 if (!DECL_BIT_FIELD_TYPE (field
))
4760 if (TREE_CODE (type
) == UNION_TYPE
)
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field
= TYPE_FIELDS (type
);
4768 field
= DECL_CHAIN (field
))
4770 if (TREE_CODE (field
) != FIELD_DECL
)
4773 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4776 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4782 #endif /* not ARM_WINCE */
4784 /* Return all other types in memory. */
4788 const struct pcs_attribute_arg
4792 } pcs_attribute_args
[] =
4794 {"aapcs", ARM_PCS_AAPCS
},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4800 {"atpcs", ARM_PCS_ATPCS
},
4801 {"apcs", ARM_PCS_APCS
},
4803 {NULL
, ARM_PCS_UNKNOWN
}
4807 arm_pcs_from_attribute (tree attr
)
4809 const struct pcs_attribute_arg
*ptr
;
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr
) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4815 return ARM_PCS_UNKNOWN
;
4817 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4819 /* Check it against the list of known arguments. */
4820 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4821 if (streq (arg
, ptr
->arg
))
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN
;
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4832 arm_get_pcs_model (const_tree type
, const_tree decl
)
4834 bool user_convention
= false;
4835 enum arm_pcs user_pcs
= arm_pcs_default
;
4840 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4843 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4844 user_convention
= true;
4847 if (TARGET_AAPCS_BASED
)
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4852 bool base_rules
= stdarg_p (type
);
4854 if (user_convention
)
4856 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4859 error ("variadic functions must use the base AAPCS variant");
4863 return ARM_PCS_AAPCS
;
4864 else if (user_convention
)
4866 else if (decl
&& flag_unit_at_a_time
)
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4874 return ARM_PCS_AAPCS_LOCAL
;
4877 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4878 sorry ("PCS variant");
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default
;
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4887 const_tree fntype ATTRIBUTE_UNUSED
,
4888 rtx libcall ATTRIBUTE_UNUSED
,
4889 const_tree fndecl ATTRIBUTE_UNUSED
)
4891 /* Record the unallocated VFP registers. */
4892 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4893 pcum
->aapcs_vfp_reg_alloc
= 0;
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4902 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4904 enum machine_mode mode
;
4907 switch (TREE_CODE (type
))
4910 mode
= TYPE_MODE (type
);
4911 if (mode
!= DFmode
&& mode
!= SFmode
)
4914 if (*modep
== VOIDmode
)
4923 mode
= TYPE_MODE (TREE_TYPE (type
));
4924 if (mode
!= DFmode
&& mode
!= SFmode
)
4927 if (*modep
== VOIDmode
)
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size
= int_size_in_bytes (type
);
4952 if (*modep
== VOIDmode
)
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4966 tree index
= TYPE_DOMAIN (type
);
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type
))
4972 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4975 || !TYPE_MAX_VALUE (index
)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
4977 || !TYPE_MIN_VALUE (index
)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
4982 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
4987 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
4988 != count
* GET_MODE_BITSIZE (*modep
)))
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type
))
5004 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5006 if (TREE_CODE (field
) != FIELD_DECL
)
5009 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5017 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5018 != count
* GET_MODE_BITSIZE (*modep
)))
5025 case QUAL_UNION_TYPE
:
5027 /* These aren't very interesting except in a degenerate case. */
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type
))
5036 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5038 if (TREE_CODE (field
) != FIELD_DECL
)
5041 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5044 count
= count
> sub_count
? count
: sub_count
;
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5049 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5050 != count
* GET_MODE_BITSIZE (*modep
)))
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5065 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5067 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5069 static bool seen_thumb1_vfp
= false;
5071 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp
= true;
5081 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5084 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5085 (TARGET_VFP_DOUBLE
|| !is_double
));
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5095 enum machine_mode mode
, const_tree type
,
5096 enum machine_mode
*base_mode
, int *count
)
5098 enum machine_mode new_mode
= VOIDmode
;
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5104 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5106 if (ag_count
> 0 && ag_count
<= 4)
5111 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5118 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5121 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5127 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5130 *base_mode
= new_mode
;
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5136 enum machine_mode mode
, const_tree type
)
5138 int count ATTRIBUTE_UNUSED
;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5141 if (!use_vfp_abi (pcs_variant
, false))
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5151 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5154 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5155 &pcum
->aapcs_vfp_rmode
,
5156 &pcum
->aapcs_vfp_rcount
);
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5161 const_tree type ATTRIBUTE_UNUSED
)
5163 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5164 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5167 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5168 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5170 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5172 || (mode
== TImode
&& ! TARGET_NEON
)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5176 int rcount
= pcum
->aapcs_vfp_rcount
;
5178 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode
== V2SImode
)
5185 else if (rmode
== V4SImode
)
5192 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5193 for (i
= 0; i
< rcount
; i
++)
5195 rtx tmp
= gen_rtx_REG (rmode
,
5196 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5197 tmp
= gen_rtx_EXPR_LIST
5199 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5200 XVECEXP (par
, 0, i
) = tmp
;
5203 pcum
->aapcs_reg
= par
;
5206 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5214 enum machine_mode mode
,
5215 const_tree type ATTRIBUTE_UNUSED
)
5217 if (!use_vfp_abi (pcs_variant
, false))
5220 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5223 enum machine_mode ag_mode
;
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5233 if (ag_mode
== V2SImode
)
5235 else if (ag_mode
== V4SImode
)
5241 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5242 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5243 for (i
= 0; i
< count
; i
++)
5245 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5246 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5247 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5248 XVECEXP (par
, 0, i
) = tmp
;
5254 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5258 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5259 enum machine_mode mode ATTRIBUTE_UNUSED
,
5260 const_tree type ATTRIBUTE_UNUSED
)
5262 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5263 pcum
->aapcs_vfp_reg_alloc
= 0;
5267 #define AAPCS_CP(X) \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5308 /* Finish processing this argument and prepare to start processing
5310 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5311 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5324 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5325 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5332 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant
;
5341 const_tree fndecl
= NULL_TREE
;
5343 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5346 fntype
= TREE_TYPE (fntype
);
5349 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5352 pcs_variant
= arm_pcs_default
;
5354 if (pcs_variant
!= ARM_PCS_AAPCS
)
5358 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5359 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5368 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant
;
5375 int unsignedp ATTRIBUTE_UNUSED
;
5379 const_tree fndecl
= NULL_TREE
;
5381 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5384 fntype
= TREE_TYPE (fntype
);
5387 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5390 pcs_variant
= arm_pcs_default
;
5392 /* Promote integer types. */
5393 if (type
&& INTEGRAL_TYPE_P (type
))
5394 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5396 if (pcs_variant
!= ARM_PCS_AAPCS
)
5400 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5401 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5403 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type
&& arm_return_in_msb (type
))
5411 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5412 if (size
% UNITS_PER_WORD
!= 0)
5414 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5415 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5419 return gen_rtx_REG (mode
, R0_REGNUM
);
5423 aapcs_libcall_value (enum machine_mode mode
)
5425 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5426 && GET_MODE_SIZE (mode
) <= 4)
5429 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5435 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5436 const_tree type
, bool named
)
5441 /* We only need to do this once per argument. */
5442 if (pcum
->aapcs_arg_processed
)
5445 pcum
->aapcs_arg_processed
= true;
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5455 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5456 pcum
->aapcs_cprc_slot
= slot
;
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum
->aapcs_cprc_failed
[slot
])
5468 /* C1.cp - Try to allocate the argument to co-processor
5470 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum
->aapcs_cprc_failed
[slot
] = true;
5479 pcum
->can_split
= false;
5482 /* We didn't get a register, so this argument goes on the
5484 gcc_assert (pcum
->can_split
== false);
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn
= pcum
->aapcs_ncrn
;
5492 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5495 nregs
= ARM_NUM_REGS2(mode
, type
);
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs
>= 0);
5505 nregs2
= nregs
? nregs
: 1;
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5510 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5511 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5520 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5521 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5522 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5526 /* C6 - NCRN is set to 4. */
5527 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5537 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5539 tree fndecl ATTRIBUTE_UNUSED
)
5541 /* Long call handling. */
5543 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5545 pcum
->pcs_variant
= arm_pcs_default
;
5547 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5549 if (arm_libcall_uses_aapcs_base (libname
))
5550 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5552 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5553 pcum
->aapcs_reg
= NULL_RTX
;
5554 pcum
->aapcs_partial
= 0;
5555 pcum
->aapcs_arg_processed
= false;
5556 pcum
->aapcs_cprc_slot
= -1;
5557 pcum
->can_split
= true;
5559 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5563 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5565 pcum
->aapcs_cprc_failed
[i
] = false;
5566 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5574 /* On the ARM, the offset starts at 0. */
5576 pcum
->iwmmxt_nregs
= 0;
5577 pcum
->can_split
= true;
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum
->named_count
= 0;
5584 if (TARGET_REALLY_IWMMXT
&& fntype
)
5588 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5590 fn_arg
= TREE_CHAIN (fn_arg
))
5591 pcum
->named_count
+= 1;
5593 if (! pcum
->named_count
)
5594 pcum
->named_count
= INT_MAX
;
5598 /* Return true if we use LRA instead of reload pass. */
5602 return arm_lra_flag
;
5605 /* Return true if mode/type need doubleword alignment. */
5607 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5609 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5610 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5634 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5635 const_tree type
, bool named
)
5637 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode
== VOIDmode
)
5645 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5647 aapcs_layout_arg (pcum
, mode
, type
, named
);
5648 return pcum
->aapcs_reg
;
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode
)
5655 && pcum
->named_count
> pcum
->nargs
+ 1)
5657 if (pcum
->iwmmxt_nregs
<= 9)
5658 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5661 pcum
->can_split
= false;
5666 /* Put doubleword aligned quantities in even register pairs. */
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode
, type
))
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum
->can_split
)
5678 nregs
= ARM_NUM_REGS2 (mode
, type
);
5680 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5683 return gen_rtx_REG (mode
, pcum
->nregs
);
5687 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5689 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5690 ? DOUBLEWORD_ALIGNMENT
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5696 tree type
, bool named
)
5698 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5699 int nregs
= pcum
->nregs
;
5701 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5703 aapcs_layout_arg (pcum
, mode
, type
, named
);
5704 return pcum
->aapcs_partial
;
5707 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5710 if (NUM_ARG_REGS
> nregs
5711 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5713 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5723 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5724 const_tree type
, bool named
)
5726 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5728 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5730 aapcs_layout_arg (pcum
, mode
, type
, named
);
5732 if (pcum
->aapcs_cprc_slot
>= 0)
5734 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5736 pcum
->aapcs_cprc_slot
= -1;
5739 /* Generic stuff. */
5740 pcum
->aapcs_arg_processed
= false;
5741 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5742 pcum
->aapcs_reg
= NULL_RTX
;
5743 pcum
->aapcs_partial
= 0;
5748 if (arm_vector_mode_supported_p (mode
)
5749 && pcum
->named_count
> pcum
->nargs
5750 && TARGET_IWMMXT_ABI
)
5751 pcum
->iwmmxt_nregs
+= 1;
5753 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5762 enum machine_mode mode ATTRIBUTE_UNUSED
,
5763 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5765 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5771 OFF
, /* No #pragma [no_]long_calls is in effect. */
5772 LONG
, /* #pragma long_calls is in effect. */
5773 SHORT
/* #pragma no_long_calls is in effect. */
5776 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5779 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5781 arm_pragma_long_calls
= LONG
;
5785 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5787 arm_pragma_long_calls
= SHORT
;
5791 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5793 arm_pragma_long_calls
= OFF
;
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5799 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5800 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5802 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5804 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5806 *no_add_attrs
= true;
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5815 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5820 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5822 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5824 *no_add_attrs
= true;
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5831 if (TREE_CODE (*node
) == FUNCTION_TYPE
5832 || TREE_CODE (*node
) == METHOD_TYPE
)
5834 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5836 warning (OPT_Wattributes
, "%qE attribute ignored",
5838 *no_add_attrs
= true;
5841 else if (TREE_CODE (*node
) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5844 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5846 *node
= build_variant_type_copy (*node
);
5847 TREE_TYPE (*node
) = build_type_attribute_variant
5849 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5850 *no_add_attrs
= true;
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT
))
5859 *no_add_attrs
= true;
5860 return tree_cons (name
, args
, NULL_TREE
);
5864 warning (OPT_Wattributes
, "%qE attribute ignored",
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5876 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5877 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5879 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5881 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5882 *no_add_attrs
= true;
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5894 arm_handle_notshared_attribute (tree
*node
,
5895 tree name ATTRIBUTE_UNUSED
,
5896 tree args ATTRIBUTE_UNUSED
,
5897 int flags ATTRIBUTE_UNUSED
,
5900 tree decl
= TYPE_NAME (*node
);
5904 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5905 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5906 *no_add_attrs
= false;
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5916 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5924 /* Check for mismatched call attributes. */
5925 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5926 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5927 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5928 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1
| l2
| s1
| s2
)
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1
!= l2
) || (s1
!= s2
))
5937 /* Disallow mixed attributes. */
5938 if ((l1
& s2
) || (l2
& s1
))
5942 /* Check for mismatched ISR attribute. */
5943 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5945 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5946 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5948 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5959 arm_set_default_type_attributes (tree type
)
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5966 tree type_attr_list
, attr_name
;
5967 type_attr_list
= TYPE_ATTRIBUTES (type
);
5969 if (arm_pragma_long_calls
== LONG
)
5970 attr_name
= get_identifier ("long_call");
5971 else if (arm_pragma_long_calls
== SHORT
)
5972 attr_name
= get_identifier ("short_call");
5976 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5977 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5981 /* Return true if DECL is known to be linked into section SECTION. */
5984 arm_function_in_section_p (tree decl
, section
*section
)
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl
))
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm
.binds_local_p (decl
))
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl
))
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6004 return function_section (decl
) == section
;
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6015 However we do not generate a long call if the function:
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6022 arm_is_long_call_p (tree decl
)
6027 return TARGET_LONG_CALLS
;
6029 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6030 if (lookup_attribute ("short_call", attrs
))
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl
) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl
, current_function_section ()))
6040 if (lookup_attribute ("long_call", attrs
))
6043 return TARGET_LONG_CALLS
;
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6048 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6050 unsigned long func_type
;
6052 if (cfun
->machine
->sibcall_blocked
)
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl
&& arm_is_long_call_p (decl
))
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6073 && !TREE_ASM_WRITTEN (decl
))
6076 func_type
= arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type
))
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6089 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6090 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6092 if (!rtx_equal_p (a
, b
))
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type
))
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi
== ARM_ABI_AAPCS
6106 && DECL_WEAK (decl
))
6109 /* Everything else is ok. */
6114 /* Addressing mode support functions. */
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6119 legitimate_pic_operand_p (rtx x
)
6121 if (GET_CODE (x
) == SYMBOL_REF
6122 || (GET_CODE (x
) == CONST
6123 && GET_CODE (XEXP (x
, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6134 require_pic_register (void)
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl
->uses_pic_offset_table
)
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register
!= INVALID_REGNUM
6145 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6147 if (!cfun
->machine
->pic_reg
)
6148 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6153 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6154 crtl
->uses_pic_offset_table
= 1;
6160 if (!cfun
->machine
->pic_reg
)
6161 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6166 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6168 crtl
->uses_pic_offset_table
= 1;
6171 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6172 && arm_pic_register
> LAST_LO_REGNUM
)
6173 emit_move_insn (cfun
->machine
->pic_reg
,
6174 gen_rtx_REG (Pmode
, arm_pic_register
));
6176 arm_load_pic_register (0UL);
6181 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6183 INSN_LOCATION (insn
) = prologue_location
;
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq
,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6197 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6199 if (GET_CODE (orig
) == SYMBOL_REF
6200 || GET_CODE (orig
) == LABEL_REF
)
6206 gcc_assert (can_create_pseudo_p ());
6207 reg
= gen_reg_rtx (Pmode
);
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig
) == LABEL_REF
6217 || (GET_CODE (orig
) == SYMBOL_REF
&&
6218 SYMBOL_REF_LOCAL_P (orig
)))
6220 && arm_pic_data_is_text_relative
)
6221 insn
= arm_pic_static_addr (orig
, reg
);
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6230 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6232 /* Make the MEM as close to a constant as possible. */
6233 mem
= SET_SRC (pat
);
6234 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6235 MEM_READONLY_P (mem
) = 1;
6236 MEM_NOTRAP_P (mem
) = 1;
6238 insn
= emit_insn (pat
);
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6243 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6247 else if (GET_CODE (orig
) == CONST
)
6251 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6252 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6257 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6263 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6273 gcc_assert (can_create_pseudo_p ());
6274 reg
= gen_reg_rtx (Pmode
);
6277 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6279 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6280 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6281 base
== reg
? 0 : reg
);
6283 if (CONST_INT_P (offset
))
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6289 gcc_assert (can_create_pseudo_p ());
6290 offset
= force_reg (Pmode
, offset
);
6293 if (CONST_INT_P (offset
))
6294 return plus_constant (Pmode
, base
, INTVAL (offset
));
6297 if (GET_MODE_SIZE (mode
) > 4
6298 && (GET_MODE_CLASS (mode
) == MODE_INT
6299 || TARGET_SOFT_FLOAT
))
6301 emit_insn (gen_addsi3 (reg
, base
, offset
));
6305 return gen_rtx_PLUS (Pmode
, base
, offset
);
6312 /* Find a spare register to use during the prolog of a function. */
6315 thumb_find_work_register (unsigned long pushed_regs_mask
)
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6323 if (!df_regs_ever_live_p (reg
))
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun
->machine
->uses_anonymous_args
6335 && crtl
->args
.pretend_args_size
> 0)
6336 return LAST_ARG_REGNUM
;
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun
->machine
->uses_anonymous_args
6351 && crtl
->args
.size
>= 0
6352 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl
->args
.info
.aapcs_ncrn
< 4
6355 : crtl
->args
.info
.nregs
< 4))
6356 return LAST_ARG_REGNUM
;
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6360 if (pushed_regs_mask
& (1 << reg
))
6365 /* Thumb-2 can use high regs. */
6366 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6367 if (pushed_regs_mask
& (1 << reg
))
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6375 static GTY(()) int pic_labelno
;
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6383 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6385 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6388 gcc_assert (flag_pic
);
6390 pic_reg
= cfun
->machine
->pic_reg
;
6391 if (TARGET_VXWORKS_RTP
)
6393 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6394 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6397 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6399 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6400 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6407 labelno
= GEN_INT (pic_labelno
++);
6408 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6409 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6414 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6416 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6420 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6422 else /* TARGET_THUMB1 */
6424 if (arm_pic_register
!= INVALID_REGNUM
6425 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp
= gen_rtx_REG (SImode
,
6430 thumb_find_work_register (saved_regs
));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6432 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6435 else if (arm_pic_register
!= INVALID_REGNUM
6436 && arm_pic_register
> LAST_LO_REGNUM
6437 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6439 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6440 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6441 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6444 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6455 arm_pic_static_addr (rtx orig
, rtx reg
)
6457 rtx l1
, labelno
, offset_rtx
, insn
;
6459 gcc_assert (flag_pic
);
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno
= GEN_INT (pic_labelno
++);
6464 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6465 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6470 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6471 UNSPEC_SYMBOL_OFFSET
);
6472 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6474 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6480 arm_address_register_rtx_p (rtx x
, int strict_p
)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6492 return (regno
<= LAST_ARM_REGNUM
6493 || regno
>= FIRST_PSEUDO_REGISTER
6494 || regno
== FRAME_POINTER_REGNUM
6495 || regno
== ARG_POINTER_REGNUM
);
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6503 pcrel_constant_p (rtx x
)
6505 if (GET_CODE (x
) == MINUS
)
6506 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6511 /* Return true if X will surely end up in an index register after next
6514 will_be_in_index_register (const_rtx x
)
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6520 /* Return nonzero if X is a valid ARM state address operand. */
6522 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6526 enum rtx_code code
= GET_CODE (x
);
6528 if (arm_address_register_rtx_p (x
, strict_p
))
6531 use_ldrd
= (TARGET_LDRD
6533 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6535 if (code
== POST_INC
|| code
== PRE_DEC
6536 || ((code
== PRE_INC
|| code
== POST_DEC
)
6537 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6540 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6541 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6542 && GET_CODE (XEXP (x
, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6545 rtx addend
= XEXP (XEXP (x
, 1), 1);
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6550 && GET_CODE (x
) == POST_MODIFY
6554 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6555 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code
== LABEL_REF
6563 && GET_CODE (XEXP (x
, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6568 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6571 else if (code
== PLUS
)
6573 rtx xop0
= XEXP (x
, 0);
6574 rtx xop1
= XEXP (x
, 1);
6576 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6577 && ((CONST_INT_P (xop1
)
6578 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6579 || (!strict_p
&& will_be_in_index_register (xop1
))))
6580 || (arm_address_register_rtx_p (xop1
, strict_p
)
6581 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x
) == MINUS
)
6588 rtx xop0
= XEXP (x
, 0);
6589 rtx xop1
= XEXP (x
, 1);
6591 return (arm_address_register_rtx_p (xop0
, strict_p
)
6592 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6596 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6597 && code
== SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x
)
6600 && symbol_mentioned_p (get_pool_constant (x
))
6601 && ! pcrel_constant_p (get_pool_constant (x
))))
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6609 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6612 enum rtx_code code
= GET_CODE (x
);
6614 if (arm_address_register_rtx_p (x
, strict_p
))
6617 use_ldrd
= (TARGET_LDRD
6619 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6621 if (code
== POST_INC
|| code
== PRE_DEC
6622 || ((code
== PRE_INC
|| code
== POST_DEC
)
6623 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6626 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6627 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6628 && GET_CODE (XEXP (x
, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend
= XEXP (XEXP (x
, 1), 1);
6633 HOST_WIDE_INT offset
;
6635 if (!CONST_INT_P (addend
))
6638 offset
= INTVAL(addend
);
6639 if (GET_MODE_SIZE (mode
) <= 4)
6640 return (offset
> -256 && offset
< 256);
6642 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6643 && (offset
& 3) == 0);
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code
== LABEL_REF
6651 && GET_CODE (XEXP (x
, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6656 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6659 else if (code
== PLUS
)
6661 rtx xop0
= XEXP (x
, 0);
6662 rtx xop1
= XEXP (x
, 1);
6664 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6665 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6666 || (!strict_p
&& will_be_in_index_register (xop1
))))
6667 || (arm_address_register_rtx_p (xop1
, strict_p
)
6668 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x
))
6690 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6691 && code
== SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x
)
6694 && symbol_mentioned_p (get_pool_constant (x
))
6695 && ! pcrel_constant_p (get_pool_constant (x
))))
6701 /* Return nonzero if INDEX is valid for an address index operand in
6704 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6707 HOST_WIDE_INT range
;
6708 enum rtx_code code
= GET_CODE (index
);
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6713 && (mode
== SFmode
|| mode
== DFmode
))
6714 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6715 && INTVAL (index
) > -1024
6716 && (INTVAL (index
) & 3) == 0);
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6724 return (code
== CONST_INT
6725 && INTVAL (index
) < 1016
6726 && INTVAL (index
) > -1024
6727 && (INTVAL (index
) & 3) == 0);
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6732 return (code
== CONST_INT
6733 && INTVAL (index
) < 1024
6734 && INTVAL (index
) > -1024
6735 && (INTVAL (index
) & 3) == 0);
6737 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6738 return (code
== CONST_INT
6739 && INTVAL (index
) < 1024
6740 && INTVAL (index
) > -1024
6741 && (INTVAL (index
) & 3) == 0);
6743 if (arm_address_register_rtx_p (index
, strict_p
)
6744 && (GET_MODE_SIZE (mode
) <= 4))
6747 if (mode
== DImode
|| mode
== DFmode
)
6749 if (code
== CONST_INT
)
6751 HOST_WIDE_INT val
= INTVAL (index
);
6754 return val
> -256 && val
< 256;
6756 return val
> -4096 && val
< 4092;
6759 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6762 if (GET_MODE_SIZE (mode
) <= 4
6766 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6770 rtx xiop0
= XEXP (index
, 0);
6771 rtx xiop1
= XEXP (index
, 1);
6773 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6774 && power_of_two_operand (xiop1
, SImode
))
6775 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6776 && power_of_two_operand (xiop0
, SImode
)));
6778 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6779 || code
== ASHIFT
|| code
== ROTATERT
)
6781 rtx op
= XEXP (index
, 1);
6783 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6786 && INTVAL (op
) <= 31);
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6796 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6802 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6804 return (code
== CONST_INT
6805 && INTVAL (index
) < range
6806 && INTVAL (index
) > -range
);
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6812 thumb2_index_mul_operand (rtx op
)
6816 if (!CONST_INT_P (op
))
6820 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6825 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6827 enum rtx_code code
= GET_CODE (index
);
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6833 && (mode
== SFmode
|| mode
== DFmode
))
6834 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6839 && INTVAL (index
) > -256
6840 && (INTVAL (index
) & 3) == 0);
6842 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD
|| mode
!= DImode
)
6847 return (code
== CONST_INT
6848 && INTVAL (index
) < 1024
6849 && INTVAL (index
) > -1024
6850 && (INTVAL (index
) & 3) == 0);
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6859 return (code
== CONST_INT
6860 && INTVAL (index
) < 1016
6861 && INTVAL (index
) > -1024
6862 && (INTVAL (index
) & 3) == 0);
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6867 return (code
== CONST_INT
6868 && INTVAL (index
) < 1024
6869 && INTVAL (index
) > -1024
6870 && (INTVAL (index
) & 3) == 0);
6872 if (arm_address_register_rtx_p (index
, strict_p
)
6873 && (GET_MODE_SIZE (mode
) <= 4))
6876 if (mode
== DImode
|| mode
== DFmode
)
6878 if (code
== CONST_INT
)
6880 HOST_WIDE_INT val
= INTVAL (index
);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val
> -256 && val
< 256 && (val
& 3) == 0;
6893 rtx xiop0
= XEXP (index
, 0);
6894 rtx xiop1
= XEXP (index
, 1);
6896 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6897 && thumb2_index_mul_operand (xiop1
))
6898 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6899 && thumb2_index_mul_operand (xiop0
)));
6901 else if (code
== ASHIFT
)
6903 rtx op
= XEXP (index
, 1);
6905 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6908 && INTVAL (op
) <= 3);
6911 return (code
== CONST_INT
6912 && INTVAL (index
) < 4096
6913 && INTVAL (index
) > -256);
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6918 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6930 return (regno
<= LAST_LO_REGNUM
6931 || regno
> LAST_VIRTUAL_REGISTER
6932 || regno
== FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode
) >= 4
6934 && (regno
== STACK_POINTER_REGNUM
6935 || regno
>= FIRST_PSEUDO_REGISTER
6936 || x
== hard_frame_pointer_rtx
6937 || x
== arg_pointer_rtx
)));
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6943 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6945 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6953 ??? Verify whether the above is the right approach.
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6966 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode
) < 4
6970 && !(reload_in_progress
|| reload_completed
)
6971 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6972 || reg_mentioned_p (arg_pointer_rtx
, x
)
6973 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6976 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6985 && GET_CODE (x
) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6992 && (GET_CODE (x
) == LABEL_REF
6993 || (GET_CODE (x
) == CONST
6994 && GET_CODE (XEXP (x
, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7004 else if (GET_CODE (x
) == PLUS
)
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode
) <= 4
7011 && XEXP (x
, 0) != frame_pointer_rtx
7012 && XEXP (x
, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7014 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7015 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7020 || XEXP (x
, 0) == arg_pointer_rtx
)
7021 && CONST_INT_P (XEXP (x
, 1))
7022 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x
, 0))
7030 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode
) >= 4
7032 && CONST_INT_P (XEXP (x
, 1))
7033 && INTVAL (XEXP (x
, 1)) >= 0
7034 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7035 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7038 else if (REG_P (XEXP (x
, 0))
7039 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x
, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER
))
7044 && GET_MODE_SIZE (mode
) >= 4
7045 && CONST_INT_P (XEXP (x
, 1))
7046 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7050 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode
) == 4
7052 && GET_CODE (x
) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x
)
7055 && symbol_mentioned_p (get_pool_constant (x
))
7056 && ! pcrel_constant_p (get_pool_constant (x
))))
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7065 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7067 switch (GET_MODE_SIZE (mode
))
7070 return val
>= 0 && val
< 32;
7073 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7077 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7083 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7086 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7087 else if (TARGET_THUMB2
)
7088 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7107 if (rclass
== GENERAL_REGS
)
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7116 static GTY(()) rtx tls_get_addr_libfunc
;
7119 get_tls_get_addr (void)
7121 if (!tls_get_addr_libfunc
)
7122 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc
;
7127 arm_load_tp (rtx target
)
7130 target
= gen_reg_rtx (SImode
);
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target
));
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7145 emit_insn (gen_load_tp_soft ());
7147 tmp
= gen_rtx_REG (SImode
, 0);
7148 emit_move_insn (target
, tmp
);
7154 load_tls_operand (rtx x
, rtx reg
)
7158 if (reg
== NULL_RTX
)
7159 reg
= gen_reg_rtx (SImode
);
7161 tmp
= gen_rtx_CONST (SImode
, x
);
7163 emit_move_insn (reg
, tmp
);
7169 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7171 rtx insns
, label
, labelno
, sum
;
7173 gcc_assert (reloc
!= TLS_DESCSEQ
);
7176 labelno
= GEN_INT (pic_labelno
++);
7177 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7178 label
= gen_rtx_CONST (VOIDmode
, label
);
7180 sum
= gen_rtx_UNSPEC (Pmode
,
7181 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7182 GEN_INT (TARGET_ARM
? 8 : 4)),
7184 reg
= load_tls_operand (sum
, reg
);
7187 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7189 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7191 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7192 LCT_PURE
, /* LCT_CONST? */
7193 Pmode
, 1, reg
, Pmode
);
7195 insns
= get_insns ();
7202 arm_tls_descseq_addr (rtx x
, rtx reg
)
7204 rtx labelno
= GEN_INT (pic_labelno
++);
7205 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7206 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7207 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7208 gen_rtx_CONST (VOIDmode
, label
),
7209 GEN_INT (!TARGET_ARM
)),
7211 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7213 emit_insn (gen_tlscall (x
, labelno
));
7215 reg
= gen_reg_rtx (SImode
);
7217 gcc_assert (REGNO (reg
) != 0);
7219 emit_move_insn (reg
, reg0
);
7225 legitimize_tls_address (rtx x
, rtx reg
)
7227 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7228 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7232 case TLS_MODEL_GLOBAL_DYNAMIC
:
7233 if (TARGET_GNU2_TLS
)
7235 reg
= arm_tls_descseq_addr (x
, reg
);
7237 tp
= arm_load_tp (NULL_RTX
);
7239 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7243 /* Original scheme */
7244 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7245 dest
= gen_reg_rtx (Pmode
);
7246 emit_libcall_block (insns
, dest
, ret
, x
);
7250 case TLS_MODEL_LOCAL_DYNAMIC
:
7251 if (TARGET_GNU2_TLS
)
7253 reg
= arm_tls_descseq_addr (x
, reg
);
7255 tp
= arm_load_tp (NULL_RTX
);
7257 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7261 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7267 dest
= gen_reg_rtx (Pmode
);
7268 emit_libcall_block (insns
, dest
, ret
, eqv
);
7270 /* Load the addend. */
7271 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7272 GEN_INT (TLS_LDO32
)),
7274 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7275 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7279 case TLS_MODEL_INITIAL_EXEC
:
7280 labelno
= GEN_INT (pic_labelno
++);
7281 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7282 label
= gen_rtx_CONST (VOIDmode
, label
);
7283 sum
= gen_rtx_UNSPEC (Pmode
,
7284 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7285 GEN_INT (TARGET_ARM
? 8 : 4)),
7287 reg
= load_tls_operand (sum
, reg
);
7290 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7291 else if (TARGET_THUMB2
)
7292 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7295 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7296 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7299 tp
= arm_load_tp (NULL_RTX
);
7301 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7303 case TLS_MODEL_LOCAL_EXEC
:
7304 tp
= arm_load_tp (NULL_RTX
);
7306 reg
= gen_rtx_UNSPEC (Pmode
,
7307 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7309 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7311 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7321 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7325 /* TODO: legitimize_address for Thumb2. */
7328 return thumb_legitimize_address (x
, orig_x
, mode
);
7331 if (arm_tls_symbol_p (x
))
7332 return legitimize_tls_address (x
, NULL_RTX
);
7334 if (GET_CODE (x
) == PLUS
)
7336 rtx xop0
= XEXP (x
, 0);
7337 rtx xop1
= XEXP (x
, 1);
7339 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7340 xop0
= force_reg (SImode
, xop0
);
7342 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7343 && !symbol_mentioned_p (xop1
))
7344 xop1
= force_reg (SImode
, xop1
);
7346 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7347 && CONST_INT_P (xop1
))
7349 HOST_WIDE_INT n
, low_n
;
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7356 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7368 low_n
= ((mode
) == TImode
? 0
7369 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7373 base_reg
= gen_reg_rtx (SImode
);
7374 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7375 emit_move_insn (base_reg
, val
);
7376 x
= plus_constant (Pmode
, base_reg
, low_n
);
7378 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7379 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x
) == MINUS
)
7386 rtx xop0
= XEXP (x
, 0);
7387 rtx xop1
= XEXP (x
, 1);
7389 if (CONSTANT_P (xop0
))
7390 xop0
= force_reg (SImode
, xop0
);
7392 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7393 xop1
= force_reg (SImode
, xop1
);
7395 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7396 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x
) && optimize
> 0)
7406 HOST_WIDE_INT mask
, base
, index
;
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits
= (mode
== SImode
) ? 12 : 8;
7413 mask
= (1 << bits
) - 1;
7414 base
= INTVAL (x
) & ~mask
;
7415 index
= INTVAL (x
) & mask
;
7416 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7423 base_reg
= force_reg (SImode
, GEN_INT (base
));
7424 x
= plus_constant (Pmode
, base_reg
, index
);
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7433 if (new_x
!= orig_x
)
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7444 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7446 if (arm_tls_symbol_p (x
))
7447 return legitimize_tls_address (x
, NULL_RTX
);
7449 if (GET_CODE (x
) == PLUS
7450 && CONST_INT_P (XEXP (x
, 1))
7451 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7452 || INTVAL (XEXP (x
, 1)) < 0))
7454 rtx xop0
= XEXP (x
, 0);
7455 rtx xop1
= XEXP (x
, 1);
7456 HOST_WIDE_INT offset
= INTVAL (xop1
);
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size
&& offset
>= 0
7462 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7464 HOST_WIDE_INT delta
;
7467 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7468 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7469 delta
= 31 * GET_MODE_SIZE (mode
);
7471 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7473 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7475 x
= plus_constant (Pmode
, xop0
, delta
);
7477 else if (offset
< 0 && offset
> -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7481 x
= force_operand (x
, NULL_RTX
);
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1
= force_reg (SImode
, xop1
);
7486 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7489 else if (GET_CODE (x
) == PLUS
7490 && s_register_operand (XEXP (x
, 1), SImode
)
7491 && !s_register_operand (XEXP (x
, 0), SImode
))
7493 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7495 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7504 if (new_x
!= orig_x
)
7512 arm_legitimize_reload_address (rtx
*p
,
7513 enum machine_mode mode
,
7514 int opnum
, int type
,
7515 int ind_levels ATTRIBUTE_UNUSED
)
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p
) == PLUS
7519 && GET_CODE (XEXP (*p
, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p
, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7522 && CONST_INT_P (XEXP (*p
, 1)))
7524 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7525 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7526 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7530 if (GET_CODE (*p
) == PLUS
7531 && REG_P (XEXP (*p
, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7537 && CONST_INT_P (XEXP (*p
, 1)))
7539 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7540 HOST_WIDE_INT low
, high
;
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p
= ((TARGET_HARD_FLOAT
7545 && (mode
== SFmode
|| mode
== DFmode
))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode
))
7549 && (VALID_NEON_DREG_MODE (mode
)
7550 || VALID_NEON_QREG_MODE (mode
))));
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val
& 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7556 /* For DI, and DF under soft-float: */
7557 || ((mode
== DImode
|| mode
== DFmode
)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2
))))
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7573 is best reloaded as:
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7593 ldr r0, [r2, #0x3FFFFC]
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7615 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7620 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7621 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7623 else if (GET_MODE_SIZE (mode
) == 8)
7626 low
= (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7635 else if (GET_MODE_SIZE (mode
) < 8)
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7646 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7648 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7652 if (mode
== HImode
|| mode
== HFmode
)
7655 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7661 if (low
== 4095 || low
== -4095)
7666 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7672 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7674 - (unsigned HOST_WIDE_INT
) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7679 /* Reload the high part into a base reg; leave the low part
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7686 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7689 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7690 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7691 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7699 thumb_legitimize_reload_address (rtx
*x_p
,
7700 enum machine_mode mode
,
7701 int opnum
, int type
,
7702 int ind_levels ATTRIBUTE_UNUSED
)
7706 if (GET_CODE (x
) == PLUS
7707 && GET_MODE_SIZE (mode
) < 4
7708 && REG_P (XEXP (x
, 0))
7709 && XEXP (x
, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x
, 1))
7711 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7716 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7717 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x
) == PLUS
7725 && REG_P (XEXP (x
, 0))
7726 && REG_P (XEXP (x
, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7733 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7734 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7741 /* Test for various thread-local symbols. */
7743 /* Return TRUE if X is a thread-local symbol. */
7746 arm_tls_symbol_p (rtx x
)
7748 if (! TARGET_HAVE_TLS
)
7751 if (GET_CODE (x
) != SYMBOL_REF
)
7754 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7757 /* Helper for arm_tls_referenced_p. */
7760 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7762 if (GET_CODE (*x
) == SYMBOL_REF
)
7763 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x
) == UNSPEC
7768 && XINT (*x
, 1) == UNSPEC_TLS
)
7774 /* Return TRUE if X contains any TLS symbol references. */
7777 arm_tls_referenced_p (rtx x
)
7779 if (! TARGET_HAVE_TLS
)
7782 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7791 When generating pic allow anything. */
7794 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7799 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7802 return flag_pic
|| !label_mentioned_p (x
);
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7808 return (CONST_INT_P (x
)
7809 || CONST_DOUBLE_P (x
)
7810 || CONSTANT_ADDRESS_P (x
)
7815 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7817 return (!arm_cannot_force_const_mem (mode
, x
)
7819 ? arm_legitimate_constant_p_1 (mode
, x
)
7820 : thumb_legitimate_constant_p (mode
, x
)));
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7832 split_const (x
, &base
, &offset
);
7833 if (GET_CODE (base
) == SYMBOL_REF
7834 && !offset_within_block_p (base
, INTVAL (offset
)))
7837 return arm_tls_referenced_p (x
);
7840 #define REG_OR_SUBREG_REG(X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7848 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7850 enum machine_mode mode
= GET_MODE (x
);
7859 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7866 return COSTS_N_INSNS (1);
7869 if (CONST_INT_P (XEXP (x
, 1)))
7872 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7879 return COSTS_N_INSNS (2) + cycles
;
7881 return COSTS_N_INSNS (1) + 16;
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7886 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7887 return (COSTS_N_INSNS (words
)
7888 + 4 * ((MEM_P (SET_SRC (x
)))
7889 + MEM_P (SET_DEST (x
))));
7894 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7896 if (thumb_shiftable_const (INTVAL (x
)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7900 else if ((outer
== PLUS
|| outer
== COMPARE
)
7901 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7903 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7904 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer
== AND
)
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i
= 9; i
<= 31; i
++)
7911 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7912 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7913 return COSTS_N_INSNS (2);
7915 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7916 || outer
== LSHIFTRT
)
7918 return COSTS_N_INSNS (2);
7924 return COSTS_N_INSNS (3);
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7946 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7951 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7957 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7958 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7964 return total
+ COSTS_N_INSNS (1);
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total
+ 1 + COSTS_N_INSNS (2);
7976 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7978 enum machine_mode mode
= GET_MODE (x
);
7979 enum rtx_code subcode
;
7981 enum rtx_code code
= GET_CODE (x
);
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7996 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7997 *total
= COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7999 *total
= COSTS_N_INSNS (4);
8001 *total
= COSTS_N_INSNS (20);
8005 if (REG_P (XEXP (x
, 1)))
8006 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x
, 1)))
8008 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8014 *total
+= COSTS_N_INSNS (4);
8019 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8020 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8023 *total
+= COSTS_N_INSNS (3);
8027 *total
+= COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8039 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8040 if (CONST_INT_P (XEXP (x
, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8043 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8047 if (CONST_INT_P (XEXP (x
, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8050 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8057 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8059 if (TARGET_HARD_FLOAT
8061 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8063 *total
= COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x
, 0))
8065 && arm_const_double_rtx (XEXP (x
, 0)))
8067 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8071 if (CONST_DOUBLE_P (XEXP (x
, 1))
8072 && arm_const_double_rtx (XEXP (x
, 1)))
8074 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8080 *total
= COSTS_N_INSNS (20);
8084 *total
= COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x
, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8088 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8092 subcode
= GET_CODE (XEXP (x
, 1));
8093 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8094 || subcode
== LSHIFTRT
8095 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8097 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8098 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x
, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8106 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8107 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8112 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8114 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8115 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8122 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8123 if (REG_P (XEXP (XEXP (x
, 1), 0))
8124 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8125 *total
+= COSTS_N_INSNS (1);
8133 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8134 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8137 *total
= COSTS_N_INSNS (1);
8138 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8140 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8147 if (GET_CODE (XEXP (x
, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8150 /* The cost comes from the cost of the multiply. */
8154 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8156 if (TARGET_HARD_FLOAT
8158 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8160 *total
= COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x
, 1))
8162 && arm_const_double_rtx (XEXP (x
, 1)))
8164 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8171 *total
= COSTS_N_INSNS (20);
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8178 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8179 if (REG_P (XEXP (XEXP (x
, 0), 0))
8180 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8181 *total
+= COSTS_N_INSNS (1);
8187 case AND
: case XOR
: case IOR
:
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8194 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8196 && !CONST_INT_P (XEXP (x
, 1)))
8197 *total
= COSTS_N_INSNS (1);
8201 *total
+= COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x
, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8205 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8212 *total
+= COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x
, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8216 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8219 subcode
= GET_CODE (XEXP (x
, 0));
8220 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8221 || subcode
== LSHIFTRT
8222 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8224 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8225 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8230 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8232 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8233 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8237 if (subcode
== UMIN
|| subcode
== UMAX
8238 || subcode
== SMIN
|| subcode
== SMAX
)
8240 *total
= COSTS_N_INSNS (3);
8247 /* This should have been handled by the CPU specific routines. */
8251 if (arm_arch3m
&& mode
== SImode
8252 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8259 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8262 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8266 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8268 if (TARGET_HARD_FLOAT
8270 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8272 *total
= COSTS_N_INSNS (1);
8275 *total
= COSTS_N_INSNS (2);
8281 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8282 if (mode
== SImode
&& code
== NOT
)
8284 subcode
= GET_CODE (XEXP (x
, 0));
8285 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8286 || subcode
== LSHIFTRT
8287 || subcode
== ROTATE
|| subcode
== ROTATERT
8289 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8291 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8294 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8303 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8305 *total
= COSTS_N_INSNS (4);
8309 operand
= XEXP (x
, 0);
8311 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8313 && REG_P (XEXP (operand
, 0))
8314 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8315 *total
+= COSTS_N_INSNS (1);
8316 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8317 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8321 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8323 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8329 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8330 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8332 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8338 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8339 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8341 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total
= COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8372 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8378 *total
+= COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x
, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8382 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8386 subcode
= GET_CODE (XEXP (x
, 0));
8387 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8388 || subcode
== LSHIFTRT
8389 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8391 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8392 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8397 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8399 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8400 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8410 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 if (!CONST_INT_P (XEXP (x
, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8413 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8417 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8419 if (TARGET_HARD_FLOAT
8421 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8423 *total
= COSTS_N_INSNS (1);
8426 *total
= COSTS_N_INSNS (20);
8429 *total
= COSTS_N_INSNS (1);
8431 *total
+= COSTS_N_INSNS (3);
8437 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8439 rtx op
= XEXP (x
, 0);
8440 enum machine_mode opmode
= GET_MODE (op
);
8443 *total
+= COSTS_N_INSNS (1);
8445 if (opmode
!= SImode
)
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8454 *total
+= COSTS_N_INSNS (2);
8457 *total
+= COSTS_N_INSNS (1);
8459 /* We don't have the necessary insn, so we need to perform some
8461 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8462 /* An and with constant 255. */
8463 *total
+= COSTS_N_INSNS (1);
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total
+= COSTS_N_INSNS (2) + 1;
8473 switch (GET_MODE (XEXP (x
, 0)))
8480 *total
= COSTS_N_INSNS (1);
8490 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8494 if (const_ok_for_arm (INTVAL (x
))
8495 || const_ok_for_arm (~INTVAL (x
)))
8496 *total
= COSTS_N_INSNS (1);
8498 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8499 INTVAL (x
), NULL_RTX
,
8506 *total
= COSTS_N_INSNS (3);
8510 *total
= COSTS_N_INSNS (1);
8514 *total
= COSTS_N_INSNS (1);
8515 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8520 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8521 *total
= COSTS_N_INSNS (1);
8523 *total
= COSTS_N_INSNS (4);
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8531 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8533 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8534 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8535 *total
+= COSTS_N_INSNS (1);
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8543 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8544 *total
= rtx_cost (mem
, code
, 0, speed
);
8545 if (!neon_vector_mem_operand (mem
, 2, true))
8546 *total
+= COSTS_N_INSNS (1);
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8556 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8562 && TARGET_HARD_FLOAT
8564 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8565 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8566 *total
= COSTS_N_INSNS (1);
8568 *total
= COSTS_N_INSNS (4);
8572 *total
= COSTS_N_INSNS (4);
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8581 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8583 enum machine_mode mode
= GET_MODE (x
);
8592 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8599 if ((GET_CODE (XEXP (x
, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8601 || (GET_CODE (XEXP (x
, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8608 return COSTS_N_INSNS (1);
8611 if (CONST_INT_P (XEXP (x
, 1)))
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8616 return COSTS_N_INSNS (1) + const_size
;
8618 return COSTS_N_INSNS (1);
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8623 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8624 return (COSTS_N_INSNS (words
)
8625 + 4 * ((MEM_P (SET_SRC (x
)))
8626 + MEM_P (SET_DEST (x
))));
8631 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x
)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8641 else if ((outer
== PLUS
|| outer
== COMPARE
)
8642 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8644 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8645 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer
== AND
)
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i
= 9; i
<= 31; i
++)
8652 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8653 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8654 return COSTS_N_INSNS (2);
8656 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8657 || outer
== LSHIFTRT
)
8659 return COSTS_N_INSNS (2);
8665 return COSTS_N_INSNS (3);
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8687 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8692 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x
, 0)))
8701 return (1 + (mode
== DImode
? 4 : 0)
8702 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8705 return (4 + (mode
== DImode
? 4 : 0)
8706 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8709 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8720 /* RTX costs when optimizing for size. */
8722 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8725 enum machine_mode mode
= GET_MODE (x
);
8728 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x
, 0)))
8739 *total
= COSTS_N_INSNS (1);
8741 && GET_CODE (XEXP (x
, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total
= COSTS_N_INSNS (2);
8747 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8754 /* Needs a libcall, so it costs about this. */
8755 *total
= COSTS_N_INSNS (2);
8759 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8761 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8769 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8771 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8774 else if (mode
== SImode
)
8776 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x
, 1)))
8779 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8783 /* Needs a libcall. */
8784 *total
= COSTS_N_INSNS (2);
8788 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8789 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8791 *total
= COSTS_N_INSNS (1);
8797 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8798 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8800 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8801 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8802 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8803 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8804 || subcode1
== ASHIFTRT
)
8806 /* It's just the cost of the two operands. */
8811 *total
= COSTS_N_INSNS (1);
8815 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8819 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8820 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8822 *total
= COSTS_N_INSNS (1);
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x
, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8830 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8831 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8832 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8837 case AND
: case XOR
: case IOR
:
8840 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8842 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8843 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8844 || (code
== AND
&& subcode
== NOT
))
8846 /* It's just the cost of the two operands. */
8852 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8856 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8860 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8861 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8863 *total
= COSTS_N_INSNS (1);
8869 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8878 if (cc_register (XEXP (x
, 0), VOIDmode
))
8881 *total
= COSTS_N_INSNS (1);
8885 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8886 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8887 *total
= COSTS_N_INSNS (1);
8889 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8894 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8897 if (const_ok_for_arm (INTVAL (x
)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8902 else if (const_ok_for_arm (~INTVAL (x
)))
8903 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x
)))
8906 if (outer_code
== COMPARE
|| outer_code
== PLUS
8907 || outer_code
== MINUS
)
8910 *total
= COSTS_N_INSNS (1);
8913 *total
= COSTS_N_INSNS (2);
8919 *total
= COSTS_N_INSNS (2);
8923 *total
= COSTS_N_INSNS (4);
8928 && TARGET_HARD_FLOAT
8929 && outer_code
== SET
8930 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8931 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8932 *total
= COSTS_N_INSNS (1);
8934 *total
= COSTS_N_INSNS (4);
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total
= COSTS_N_INSNS (1) + 1;
8948 if (mode
!= VOIDmode
)
8949 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8951 *total
= COSTS_N_INSNS (4); /* How knows? */
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8961 shifter_op_p (rtx op
, rtx
*shift_reg
)
8963 enum rtx_code code
= GET_CODE (op
);
8965 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8966 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8967 return XEXP (op
, 0);
8968 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8969 return XEXP (op
, 0);
8970 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8971 || code
== ASHIFTRT
)
8973 if (!CONST_INT_P (XEXP (op
, 1)))
8974 *shift_reg
= XEXP (op
, 1);
8975 return XEXP (op
, 0);
8982 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8984 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8985 gcc_assert (GET_CODE (x
) == UNSPEC
);
8987 switch (XINT (x
, 1))
8989 case UNSPEC_UNALIGNED_LOAD
:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8992 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8994 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8995 + extra_cost
->ldst
.load_unaligned
);
8998 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8999 ADDR_SPACE_GENERIC
, speed_p
);
9003 case UNSPEC_UNALIGNED_STORE
:
9004 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9006 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9007 + extra_cost
->ldst
.store_unaligned
);
9009 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9011 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9012 ADDR_SPACE_GENERIC
, speed_p
);
9022 *cost
= COSTS_N_INSNS (1);
9024 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9028 *cost
= COSTS_N_INSNS (2);
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9052 *cost += extra_cost->alu.arith_shift; \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9076 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9077 const struct cpu_cost_table
*extra_cost
,
9078 int *cost
, bool speed_p
)
9080 enum machine_mode mode
= GET_MODE (x
);
9085 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9087 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9095 /* SET RTXs don't have a mode so we get it from the destination. */
9096 mode
= GET_MODE (SET_DEST (x
));
9098 if (REG_P (SET_SRC (x
))
9099 && REG_P (SET_DEST (x
)))
9101 /* Assume that most copies can be done with a single insn,
9102 unless we don't have HW FP, in which case everything
9103 larger than word mode will require two insns. */
9104 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9105 && GET_MODE_SIZE (mode
) > 4)
9108 /* Conditional register moves can be encoded
9109 in 16 bits in Thumb mode. */
9110 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9116 if (CONST_INT_P (SET_SRC (x
)))
9118 /* Handle CONST_INT here, since the value doesn't have a mode
9119 and we would otherwise be unable to work out the true cost. */
9120 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9122 /* Slightly lower the cost of setting a core reg to a constant.
9123 This helps break up chains and allows for better scheduling. */
9124 if (REG_P (SET_DEST (x
))
9125 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9128 /* Immediate moves with an immediate in the range [0, 255] can be
9129 encoded in 16 bits in Thumb mode. */
9130 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9131 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9133 goto const_int_cost
;
9139 /* A memory access costs 1 insn if the mode is small, or the address is
9140 a single register, otherwise it costs one insn per word. */
9141 if (REG_P (XEXP (x
, 0)))
9142 *cost
= COSTS_N_INSNS (1);
9144 && GET_CODE (XEXP (x
, 0)) == PLUS
9145 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9146 /* This will be split into two instructions.
9147 See arm.md:calculate_pic_address. */
9148 *cost
= COSTS_N_INSNS (2);
9150 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9152 /* For speed optimizations, add the costs of the address and
9153 accessing memory. */
9156 *cost
+= (extra_cost
->ldst
.load
9157 + arm_address_cost (XEXP (x
, 0), mode
,
9158 ADDR_SPACE_GENERIC
, speed_p
));
9160 *cost
+= extra_cost
->ldst
.load
;
9166 /* Calculations of LDM costs are complex. We assume an initial cost
9167 (ldm_1st) which will load the number of registers mentioned in
9168 ldm_regs_per_insn_1st registers; then each additional
9169 ldm_regs_per_insn_subsequent registers cost one more insn. The
9170 formula for N regs is thus:
9172 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9173 + ldm_regs_per_insn_subsequent - 1)
9174 / ldm_regs_per_insn_subsequent).
9176 Additional costs may also be added for addressing. A similar
9177 formula is used for STM. */
9179 bool is_ldm
= load_multiple_operation (x
, SImode
);
9180 bool is_stm
= store_multiple_operation (x
, SImode
);
9182 *cost
= COSTS_N_INSNS (1);
9184 if (is_ldm
|| is_stm
)
9188 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9189 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9190 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9191 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9192 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9193 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9194 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9196 *cost
+= regs_per_insn_1st
9197 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9198 + regs_per_insn_sub
- 1)
9199 / regs_per_insn_sub
);
9208 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9209 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9210 *cost
= COSTS_N_INSNS (speed_p
9211 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9212 else if (mode
== SImode
&& TARGET_IDIV
)
9213 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9215 *cost
= LIBCALL_COST (2);
9216 return false; /* All arguments must be in registers. */
9220 *cost
= LIBCALL_COST (2);
9221 return false; /* All arguments must be in registers. */
9224 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9226 *cost
= (COSTS_N_INSNS (2)
9227 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9229 *cost
+= extra_cost
->alu
.shift_reg
;
9237 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9239 *cost
= (COSTS_N_INSNS (3)
9240 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9242 *cost
+= 2 * extra_cost
->alu
.shift
;
9245 else if (mode
== SImode
)
9247 *cost
= (COSTS_N_INSNS (1)
9248 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9249 /* Slightly disparage register shifts at -Os, but not by much. */
9250 if (!CONST_INT_P (XEXP (x
, 1)))
9251 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9252 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9255 else if (GET_MODE_CLASS (mode
) == MODE_INT
9256 && GET_MODE_SIZE (mode
) < 4)
9260 *cost
= (COSTS_N_INSNS (1)
9261 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9262 /* Slightly disparage register shifts at -Os, but not by
9264 if (!CONST_INT_P (XEXP (x
, 1)))
9265 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9266 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9268 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9270 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9272 /* Can use SBFX/UBFX. */
9273 *cost
= COSTS_N_INSNS (1);
9275 *cost
+= extra_cost
->alu
.bfx
;
9276 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9280 *cost
= COSTS_N_INSNS (2);
9281 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9284 if (CONST_INT_P (XEXP (x
, 1)))
9285 *cost
+= 2 * extra_cost
->alu
.shift
;
9287 *cost
+= (extra_cost
->alu
.shift
9288 + extra_cost
->alu
.shift_reg
);
9291 /* Slightly disparage register shifts. */
9292 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9297 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9298 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9301 if (CONST_INT_P (XEXP (x
, 1)))
9302 *cost
+= (2 * extra_cost
->alu
.shift
9303 + extra_cost
->alu
.log_shift
);
9305 *cost
+= (extra_cost
->alu
.shift
9306 + extra_cost
->alu
.shift_reg
9307 + extra_cost
->alu
.log_shift_reg
);
9313 *cost
= LIBCALL_COST (2);
9317 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9318 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9320 *cost
= COSTS_N_INSNS (1);
9321 if (GET_CODE (XEXP (x
, 0)) == MULT
9322 || GET_CODE (XEXP (x
, 1)) == MULT
)
9324 rtx mul_op0
, mul_op1
, sub_op
;
9327 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9329 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9331 mul_op0
= XEXP (XEXP (x
, 0), 0);
9332 mul_op1
= XEXP (XEXP (x
, 0), 1);
9333 sub_op
= XEXP (x
, 1);
9337 mul_op0
= XEXP (XEXP (x
, 1), 0);
9338 mul_op1
= XEXP (XEXP (x
, 1), 1);
9339 sub_op
= XEXP (x
, 0);
9342 /* The first operand of the multiply may be optionally
9344 if (GET_CODE (mul_op0
) == NEG
)
9345 mul_op0
= XEXP (mul_op0
, 0);
9347 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9348 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9349 + rtx_cost (sub_op
, code
, 0, speed_p
));
9355 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9361 rtx shift_by_reg
= NULL
;
9365 *cost
= COSTS_N_INSNS (1);
9367 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9368 if (shift_op
== NULL
)
9370 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9371 non_shift_op
= XEXP (x
, 0);
9374 non_shift_op
= XEXP (x
, 1);
9376 if (shift_op
!= NULL
)
9378 if (shift_by_reg
!= NULL
)
9381 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9382 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9385 *cost
+= extra_cost
->alu
.arith_shift
;
9387 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9388 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9393 && GET_CODE (XEXP (x
, 1)) == MULT
)
9397 *cost
+= extra_cost
->mult
[0].add
;
9398 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9399 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9400 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9404 if (CONST_INT_P (XEXP (x
, 0)))
9406 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9407 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9409 *cost
= COSTS_N_INSNS (insns
);
9411 *cost
+= insns
* extra_cost
->alu
.arith
;
9412 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9419 if (GET_MODE_CLASS (mode
) == MODE_INT
9420 && GET_MODE_SIZE (mode
) < 4)
9422 rtx shift_op
, shift_reg
;
9425 /* We check both sides of the MINUS for shifter operands since,
9426 unlike PLUS, it's not commutative. */
9428 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9429 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9431 /* Slightly disparage, as we might need to widen the result. */
9432 *cost
= 1 + COSTS_N_INSNS (1);
9434 *cost
+= extra_cost
->alu
.arith
;
9436 if (CONST_INT_P (XEXP (x
, 0)))
9438 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9447 *cost
= COSTS_N_INSNS (2);
9449 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9451 rtx op1
= XEXP (x
, 1);
9454 *cost
+= 2 * extra_cost
->alu
.arith
;
9456 if (GET_CODE (op1
) == ZERO_EXTEND
)
9457 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9459 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9460 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9464 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9467 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9468 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9470 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9473 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9474 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9477 *cost
+= (extra_cost
->alu
.arith
9478 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9479 ? extra_cost
->alu
.arith
9480 : extra_cost
->alu
.arith_shift
));
9481 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9482 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9483 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9488 *cost
+= 2 * extra_cost
->alu
.arith
;
9494 *cost
= LIBCALL_COST (2);
9498 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9499 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9501 *cost
= COSTS_N_INSNS (1);
9502 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9504 rtx mul_op0
, mul_op1
, add_op
;
9507 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9509 mul_op0
= XEXP (XEXP (x
, 0), 0);
9510 mul_op1
= XEXP (XEXP (x
, 0), 1);
9511 add_op
= XEXP (x
, 1);
9513 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9514 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9515 + rtx_cost (add_op
, code
, 0, speed_p
));
9521 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9524 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9526 *cost
= LIBCALL_COST (2);
9530 /* Narrow modes can be synthesized in SImode, but the range
9531 of useful sub-operations is limited. Check for shift operations
9532 on one of the operands. Only left shifts can be used in the
9534 if (GET_MODE_CLASS (mode
) == MODE_INT
9535 && GET_MODE_SIZE (mode
) < 4)
9537 rtx shift_op
, shift_reg
;
9540 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9542 if (CONST_INT_P (XEXP (x
, 1)))
9544 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9545 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9547 *cost
= COSTS_N_INSNS (insns
);
9549 *cost
+= insns
* extra_cost
->alu
.arith
;
9550 /* Slightly penalize a narrow operation as the result may
9552 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9556 /* Slightly penalize a narrow operation as the result may
9558 *cost
= 1 + COSTS_N_INSNS (1);
9560 *cost
+= extra_cost
->alu
.arith
;
9567 rtx shift_op
, shift_reg
;
9569 *cost
= COSTS_N_INSNS (1);
9571 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9572 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9574 /* UXTA[BH] or SXTA[BH]. */
9576 *cost
+= extra_cost
->alu
.extnd_arith
;
9577 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9579 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9584 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9585 if (shift_op
!= NULL
)
9590 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9591 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9594 *cost
+= extra_cost
->alu
.arith_shift
;
9596 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9597 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9600 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9602 rtx mul_op
= XEXP (x
, 0);
9604 *cost
= COSTS_N_INSNS (1);
9606 if (TARGET_DSP_MULTIPLY
9607 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9608 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9609 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9610 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9611 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9612 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9614 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9615 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9616 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9617 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9618 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9623 *cost
+= extra_cost
->mult
[0].extend_add
;
9624 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9625 SIGN_EXTEND
, 0, speed_p
)
9626 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9627 SIGN_EXTEND
, 0, speed_p
)
9628 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9633 *cost
+= extra_cost
->mult
[0].add
;
9634 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9635 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9636 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9639 if (CONST_INT_P (XEXP (x
, 1)))
9641 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9642 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9644 *cost
= COSTS_N_INSNS (insns
);
9646 *cost
+= insns
* extra_cost
->alu
.arith
;
9647 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9656 && GET_CODE (XEXP (x
, 0)) == MULT
9657 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9658 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9659 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9660 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9662 *cost
= COSTS_N_INSNS (1);
9664 *cost
+= extra_cost
->mult
[1].extend_add
;
9665 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9666 ZERO_EXTEND
, 0, speed_p
)
9667 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9668 ZERO_EXTEND
, 0, speed_p
)
9669 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9673 *cost
= COSTS_N_INSNS (2);
9675 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9676 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9679 *cost
+= (extra_cost
->alu
.arith
9680 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9681 ? extra_cost
->alu
.arith
9682 : extra_cost
->alu
.arith_shift
));
9684 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9686 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9691 *cost
+= 2 * extra_cost
->alu
.arith
;
9696 *cost
= LIBCALL_COST (2);
9699 case AND
: case XOR
: case IOR
:
9702 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9703 rtx op0
= XEXP (x
, 0);
9704 rtx shift_op
, shift_reg
;
9706 *cost
= COSTS_N_INSNS (1);
9710 || (code
== IOR
&& TARGET_THUMB2
)))
9711 op0
= XEXP (op0
, 0);
9714 shift_op
= shifter_op_p (op0
, &shift_reg
);
9715 if (shift_op
!= NULL
)
9720 *cost
+= extra_cost
->alu
.log_shift_reg
;
9721 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9724 *cost
+= extra_cost
->alu
.log_shift
;
9726 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9727 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9731 if (CONST_INT_P (XEXP (x
, 1)))
9733 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9734 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9737 *cost
= COSTS_N_INSNS (insns
);
9739 *cost
+= insns
* extra_cost
->alu
.logical
;
9740 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9745 *cost
+= extra_cost
->alu
.logical
;
9746 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9747 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9753 rtx op0
= XEXP (x
, 0);
9754 enum rtx_code subcode
= GET_CODE (op0
);
9756 *cost
= COSTS_N_INSNS (2);
9760 || (code
== IOR
&& TARGET_THUMB2
)))
9761 op0
= XEXP (op0
, 0);
9763 if (GET_CODE (op0
) == ZERO_EXTEND
)
9766 *cost
+= 2 * extra_cost
->alu
.logical
;
9768 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9769 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9772 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9775 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9777 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9778 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9783 *cost
+= 2 * extra_cost
->alu
.logical
;
9789 *cost
= LIBCALL_COST (2);
9793 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9794 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9796 rtx op0
= XEXP (x
, 0);
9798 *cost
= COSTS_N_INSNS (1);
9800 if (GET_CODE (op0
) == NEG
)
9801 op0
= XEXP (op0
, 0);
9804 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9806 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9807 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9810 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9812 *cost
= LIBCALL_COST (2);
9818 *cost
= COSTS_N_INSNS (1);
9819 if (TARGET_DSP_MULTIPLY
9820 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9821 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9822 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9823 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9824 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9825 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9827 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9828 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9829 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9830 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9831 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9836 *cost
+= extra_cost
->mult
[0].extend
;
9837 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9838 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9842 *cost
+= extra_cost
->mult
[0].simple
;
9849 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9850 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9851 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9852 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9854 *cost
= COSTS_N_INSNS (1);
9856 *cost
+= extra_cost
->mult
[1].extend
;
9857 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9858 ZERO_EXTEND
, 0, speed_p
)
9859 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9860 ZERO_EXTEND
, 0, speed_p
));
9864 *cost
= LIBCALL_COST (2);
9869 *cost
= LIBCALL_COST (2);
9873 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9874 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9876 *cost
= COSTS_N_INSNS (1);
9878 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9882 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9884 *cost
= LIBCALL_COST (1);
9890 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9892 *cost
= COSTS_N_INSNS (2);
9893 /* Assume the non-flag-changing variant. */
9895 *cost
+= (extra_cost
->alu
.log_shift
9896 + extra_cost
->alu
.arith_shift
);
9897 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9901 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9902 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9904 *cost
= COSTS_N_INSNS (2);
9905 /* No extra cost for MOV imm and MVN imm. */
9906 /* If the comparison op is using the flags, there's no further
9907 cost, otherwise we need to add the cost of the comparison. */
9908 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9909 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9910 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9912 *cost
+= (COSTS_N_INSNS (1)
9913 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9915 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9918 *cost
+= extra_cost
->alu
.arith
;
9922 *cost
= COSTS_N_INSNS (1);
9924 *cost
+= extra_cost
->alu
.arith
;
9928 if (GET_MODE_CLASS (mode
) == MODE_INT
9929 && GET_MODE_SIZE (mode
) < 4)
9931 /* Slightly disparage, as we might need an extend operation. */
9932 *cost
= 1 + COSTS_N_INSNS (1);
9934 *cost
+= extra_cost
->alu
.arith
;
9940 *cost
= COSTS_N_INSNS (2);
9942 *cost
+= 2 * extra_cost
->alu
.arith
;
9947 *cost
= LIBCALL_COST (1);
9954 rtx shift_reg
= NULL
;
9956 *cost
= COSTS_N_INSNS (1);
9957 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9961 if (shift_reg
!= NULL
)
9964 *cost
+= extra_cost
->alu
.log_shift_reg
;
9965 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9968 *cost
+= extra_cost
->alu
.log_shift
;
9969 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9974 *cost
+= extra_cost
->alu
.logical
;
9979 *cost
= COSTS_N_INSNS (2);
9985 *cost
+= LIBCALL_COST (1);
9990 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9992 *cost
= COSTS_N_INSNS (4);
9995 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
9996 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
9998 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
9999 /* Assume that if one arm of the if_then_else is a register,
10000 that it will be tied with the result and eliminate the
10001 conditional insn. */
10002 if (REG_P (XEXP (x
, 1)))
10004 else if (REG_P (XEXP (x
, 2)))
10010 if (extra_cost
->alu
.non_exec_costs_exec
)
10011 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10013 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10016 *cost
+= op1cost
+ op2cost
;
10022 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10026 enum machine_mode op0mode
;
10027 /* We'll mostly assume that the cost of a compare is the cost of the
10028 LHS. However, there are some notable exceptions. */
10030 /* Floating point compares are never done as side-effects. */
10031 op0mode
= GET_MODE (XEXP (x
, 0));
10032 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10033 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10035 *cost
= COSTS_N_INSNS (1);
10037 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10039 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10041 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10047 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10049 *cost
= LIBCALL_COST (2);
10053 /* DImode compares normally take two insns. */
10054 if (op0mode
== DImode
)
10056 *cost
= COSTS_N_INSNS (2);
10058 *cost
+= 2 * extra_cost
->alu
.arith
;
10062 if (op0mode
== SImode
)
10067 if (XEXP (x
, 1) == const0_rtx
10068 && !(REG_P (XEXP (x
, 0))
10069 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10070 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10072 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10074 /* Multiply operations that set the flags are often
10075 significantly more expensive. */
10077 && GET_CODE (XEXP (x
, 0)) == MULT
10078 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10079 *cost
+= extra_cost
->mult
[0].flag_setting
;
10082 && GET_CODE (XEXP (x
, 0)) == PLUS
10083 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10084 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10086 *cost
+= extra_cost
->mult
[0].flag_setting
;
10091 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10092 if (shift_op
!= NULL
)
10094 *cost
= COSTS_N_INSNS (1);
10095 if (shift_reg
!= NULL
)
10097 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10099 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10102 *cost
+= extra_cost
->alu
.arith_shift
;
10103 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10104 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10108 *cost
= COSTS_N_INSNS (1);
10110 *cost
+= extra_cost
->alu
.arith
;
10111 if (CONST_INT_P (XEXP (x
, 1))
10112 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10114 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10122 *cost
= LIBCALL_COST (2);
10145 if (outer_code
== SET
)
10147 /* Is it a store-flag operation? */
10148 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10149 && XEXP (x
, 1) == const0_rtx
)
10151 /* Thumb also needs an IT insn. */
10152 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10155 if (XEXP (x
, 1) == const0_rtx
)
10160 /* LSR Rd, Rn, #31. */
10161 *cost
= COSTS_N_INSNS (1);
10163 *cost
+= extra_cost
->alu
.shift
;
10173 *cost
= COSTS_N_INSNS (2);
10177 /* RSBS T1, Rn, Rn, LSR #31
10179 *cost
= COSTS_N_INSNS (2);
10181 *cost
+= extra_cost
->alu
.arith_shift
;
10185 /* RSB Rd, Rn, Rn, ASR #1
10186 LSR Rd, Rd, #31. */
10187 *cost
= COSTS_N_INSNS (2);
10189 *cost
+= (extra_cost
->alu
.arith_shift
10190 + extra_cost
->alu
.shift
);
10196 *cost
= COSTS_N_INSNS (2);
10198 *cost
+= extra_cost
->alu
.shift
;
10202 /* Remaining cases are either meaningless or would take
10203 three insns anyway. */
10204 *cost
= COSTS_N_INSNS (3);
10207 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10212 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10213 if (CONST_INT_P (XEXP (x
, 1))
10214 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10216 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10223 /* Not directly inside a set. If it involves the condition code
10224 register it must be the condition for a branch, cond_exec or
10225 I_T_E operation. Since the comparison is performed elsewhere
10226 this is just the control part which has no additional
10228 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10229 && XEXP (x
, 1) == const0_rtx
)
10237 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10238 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10240 *cost
= COSTS_N_INSNS (1);
10242 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10246 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10248 *cost
= LIBCALL_COST (1);
10252 if (mode
== SImode
)
10254 *cost
= COSTS_N_INSNS (1);
10256 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10260 *cost
= LIBCALL_COST (1);
10264 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10265 && MEM_P (XEXP (x
, 0)))
10267 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10269 if (mode
== DImode
)
10270 *cost
+= COSTS_N_INSNS (1);
10275 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10276 *cost
+= extra_cost
->ldst
.load
;
10278 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10280 if (mode
== DImode
)
10281 *cost
+= extra_cost
->alu
.shift
;
10286 /* Widening from less than 32-bits requires an extend operation. */
10287 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10289 /* We have SXTB/SXTH. */
10290 *cost
= COSTS_N_INSNS (1);
10291 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10293 *cost
+= extra_cost
->alu
.extnd
;
10295 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10297 /* Needs two shifts. */
10298 *cost
= COSTS_N_INSNS (2);
10299 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10301 *cost
+= 2 * extra_cost
->alu
.shift
;
10304 /* Widening beyond 32-bits requires one more insn. */
10305 if (mode
== DImode
)
10307 *cost
+= COSTS_N_INSNS (1);
10309 *cost
+= extra_cost
->alu
.shift
;
10316 || GET_MODE (XEXP (x
, 0)) == SImode
10317 || GET_MODE (XEXP (x
, 0)) == QImode
)
10318 && MEM_P (XEXP (x
, 0)))
10320 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10322 if (mode
== DImode
)
10323 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10328 /* Widening from less than 32-bits requires an extend operation. */
10329 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10331 /* UXTB can be a shorter instruction in Thumb2, but it might
10332 be slower than the AND Rd, Rn, #255 alternative. When
10333 optimizing for speed it should never be slower to use
10334 AND, and we don't really model 16-bit vs 32-bit insns
10336 *cost
= COSTS_N_INSNS (1);
10338 *cost
+= extra_cost
->alu
.logical
;
10340 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10342 /* We have UXTB/UXTH. */
10343 *cost
= COSTS_N_INSNS (1);
10344 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10346 *cost
+= extra_cost
->alu
.extnd
;
10348 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10350 /* Needs two shifts. It's marginally preferable to use
10351 shifts rather than two BIC instructions as the second
10352 shift may merge with a subsequent insn as a shifter
10354 *cost
= COSTS_N_INSNS (2);
10355 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10357 *cost
+= 2 * extra_cost
->alu
.shift
;
10359 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10360 *cost
= COSTS_N_INSNS (1);
10362 /* Widening beyond 32-bits requires one more insn. */
10363 if (mode
== DImode
)
10365 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10372 /* CONST_INT has no mode, so we cannot tell for sure how many
10373 insns are really going to be needed. The best we can do is
10374 look at the value passed. If it fits in SImode, then assume
10375 that's the mode it will be used for. Otherwise assume it
10376 will be used in DImode. */
10377 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10382 /* Avoid blowing up in arm_gen_constant (). */
10383 if (!(outer_code
== PLUS
10384 || outer_code
== AND
10385 || outer_code
== IOR
10386 || outer_code
== XOR
10387 || outer_code
== MINUS
))
10391 if (mode
== SImode
)
10394 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10395 INTVAL (x
), NULL
, NULL
,
10401 *cost
+= COSTS_N_INSNS (arm_gen_constant
10402 (outer_code
, SImode
, NULL
,
10403 trunc_int_for_mode (INTVAL (x
), SImode
),
10405 + arm_gen_constant (outer_code
, SImode
, NULL
,
10406 INTVAL (x
) >> 32, NULL
,
10418 if (arm_arch_thumb2
&& !flag_pic
)
10419 *cost
= COSTS_N_INSNS (2);
10421 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10424 *cost
= COSTS_N_INSNS (2);
10428 *cost
+= COSTS_N_INSNS (1);
10430 *cost
+= extra_cost
->alu
.arith
;
10436 *cost
= COSTS_N_INSNS (4);
10441 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10442 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10444 if (vfp3_const_double_rtx (x
))
10446 *cost
= COSTS_N_INSNS (1);
10448 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10454 *cost
= COSTS_N_INSNS (1);
10455 if (mode
== DFmode
)
10456 *cost
+= extra_cost
->ldst
.loadd
;
10458 *cost
+= extra_cost
->ldst
.loadf
;
10461 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10465 *cost
= COSTS_N_INSNS (4);
10471 && TARGET_HARD_FLOAT
10472 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10473 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10474 *cost
= COSTS_N_INSNS (1);
10476 *cost
= COSTS_N_INSNS (4);
10481 *cost
= COSTS_N_INSNS (1);
10482 /* When optimizing for size, we prefer constant pool entries to
10483 MOVW/MOVT pairs, so bump the cost of these slightly. */
10489 *cost
= COSTS_N_INSNS (1);
10491 *cost
+= extra_cost
->alu
.clz
;
10495 if (XEXP (x
, 1) == const0_rtx
)
10497 *cost
= COSTS_N_INSNS (1);
10499 *cost
+= extra_cost
->alu
.log_shift
;
10500 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10503 /* Fall through. */
10507 *cost
= COSTS_N_INSNS (2);
10511 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10512 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10513 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10514 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10515 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10516 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10517 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10518 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10521 *cost
= COSTS_N_INSNS (1);
10523 *cost
+= extra_cost
->mult
[1].extend
;
10524 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10526 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10530 *cost
= LIBCALL_COST (1);
10534 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10537 /* Reading the PC is like reading any other register. Writing it
10538 is more expensive, but we take that into account elsewhere. */
10543 /* TODO: Simple zero_extract of bottom bits using AND. */
10544 /* Fall through. */
10548 && CONST_INT_P (XEXP (x
, 1))
10549 && CONST_INT_P (XEXP (x
, 2)))
10551 *cost
= COSTS_N_INSNS (1);
10553 *cost
+= extra_cost
->alu
.bfx
;
10554 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10557 /* Without UBFX/SBFX, need to resort to shift operations. */
10558 *cost
= COSTS_N_INSNS (2);
10560 *cost
+= 2 * extra_cost
->alu
.shift
;
10561 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10565 if (TARGET_HARD_FLOAT
)
10567 *cost
= COSTS_N_INSNS (1);
10569 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10570 if (!TARGET_FPU_ARMV8
10571 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10573 /* Pre v8, widening HF->DF is a two-step process, first
10574 widening to SFmode. */
10575 *cost
+= COSTS_N_INSNS (1);
10577 *cost
+= extra_cost
->fp
[0].widen
;
10579 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10583 *cost
= LIBCALL_COST (1);
10586 case FLOAT_TRUNCATE
:
10587 if (TARGET_HARD_FLOAT
)
10589 *cost
= COSTS_N_INSNS (1);
10591 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10592 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10594 /* Vector modes? */
10596 *cost
= LIBCALL_COST (1);
10601 if (TARGET_HARD_FLOAT
)
10603 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10605 *cost
= COSTS_N_INSNS (1);
10607 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10608 /* Strip of the 'cost' of rounding towards zero. */
10609 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10610 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10612 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10613 /* ??? Increase the cost to deal with transferring from
10614 FP -> CORE registers? */
10617 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10618 && TARGET_FPU_ARMV8
)
10620 *cost
= COSTS_N_INSNS (1);
10622 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10625 /* Vector costs? */
10627 *cost
= LIBCALL_COST (1);
10631 case UNSIGNED_FLOAT
:
10632 if (TARGET_HARD_FLOAT
)
10634 /* ??? Increase the cost to deal with transferring from CORE
10635 -> FP registers? */
10636 *cost
= COSTS_N_INSNS (1);
10638 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10641 *cost
= LIBCALL_COST (1);
10645 *cost
= COSTS_N_INSNS (1);
10649 /* Just a guess. Cost one insn per input. */
10650 *cost
= COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x
));
10654 if (mode
!= VOIDmode
)
10655 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10657 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10662 #undef HANDLE_NARROW_SHIFT_ARITH
10664 /* RTX costs when optimizing for size. */
10666 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10667 int *total
, bool speed
)
10671 if (TARGET_OLD_RTX_COSTS
10672 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10674 /* Old way. (Deprecated.) */
10676 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10677 (enum rtx_code
) outer_code
, total
);
10679 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10680 (enum rtx_code
) outer_code
, total
,
10686 if (current_tune
->insn_extra_cost
)
10687 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10688 (enum rtx_code
) outer_code
,
10689 current_tune
->insn_extra_cost
,
10691 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10692 && current_tune->insn_extra_cost != NULL */
10694 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10695 (enum rtx_code
) outer_code
,
10696 &generic_extra_costs
, total
, speed
);
10699 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10701 print_rtl_single (dump_file
, x
);
10702 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10703 *total
, result
? "final" : "partial");
10708 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10709 supported on any "slowmul" cores, so it can be ignored. */
10712 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10713 int *total
, bool speed
)
10715 enum machine_mode mode
= GET_MODE (x
);
10719 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10726 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10729 *total
= COSTS_N_INSNS (20);
10733 if (CONST_INT_P (XEXP (x
, 1)))
10735 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10736 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10737 int cost
, const_ok
= const_ok_for_arm (i
);
10738 int j
, booth_unit_size
;
10740 /* Tune as appropriate. */
10741 cost
= const_ok
? 4 : 8;
10742 booth_unit_size
= 2;
10743 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10745 i
>>= booth_unit_size
;
10749 *total
= COSTS_N_INSNS (cost
);
10750 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10754 *total
= COSTS_N_INSNS (20);
10758 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10763 /* RTX cost for cores with a fast multiply unit (M variants). */
10766 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10767 int *total
, bool speed
)
10769 enum machine_mode mode
= GET_MODE (x
);
10773 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10777 /* ??? should thumb2 use different costs? */
10781 /* There is no point basing this on the tuning, since it is always the
10782 fast variant if it exists at all. */
10784 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10785 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10786 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10788 *total
= COSTS_N_INSNS(2);
10793 if (mode
== DImode
)
10795 *total
= COSTS_N_INSNS (5);
10799 if (CONST_INT_P (XEXP (x
, 1)))
10801 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10802 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10803 int cost
, const_ok
= const_ok_for_arm (i
);
10804 int j
, booth_unit_size
;
10806 /* Tune as appropriate. */
10807 cost
= const_ok
? 4 : 8;
10808 booth_unit_size
= 8;
10809 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10811 i
>>= booth_unit_size
;
10815 *total
= COSTS_N_INSNS(cost
);
10819 if (mode
== SImode
)
10821 *total
= COSTS_N_INSNS (4);
10825 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10827 if (TARGET_HARD_FLOAT
10829 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10831 *total
= COSTS_N_INSNS (1);
10836 /* Requires a lib call */
10837 *total
= COSTS_N_INSNS (20);
10841 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10846 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10847 so it can be ignored. */
10850 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10851 int *total
, bool speed
)
10853 enum machine_mode mode
= GET_MODE (x
);
10857 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10864 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10865 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10867 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10868 will stall until the multiplication is complete. */
10869 *total
= COSTS_N_INSNS (3);
10873 /* There is no point basing this on the tuning, since it is always the
10874 fast variant if it exists at all. */
10876 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10877 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10878 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10880 *total
= COSTS_N_INSNS (2);
10885 if (mode
== DImode
)
10887 *total
= COSTS_N_INSNS (5);
10891 if (CONST_INT_P (XEXP (x
, 1)))
10893 /* If operand 1 is a constant we can more accurately
10894 calculate the cost of the multiply. The multiplier can
10895 retire 15 bits on the first cycle and a further 12 on the
10896 second. We do, of course, have to load the constant into
10897 a register first. */
10898 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10899 /* There's a general overhead of one cycle. */
10901 unsigned HOST_WIDE_INT masked_const
;
10903 if (i
& 0x80000000)
10906 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10908 masked_const
= i
& 0xffff8000;
10909 if (masked_const
!= 0)
10912 masked_const
= i
& 0xf8000000;
10913 if (masked_const
!= 0)
10916 *total
= COSTS_N_INSNS (cost
);
10920 if (mode
== SImode
)
10922 *total
= COSTS_N_INSNS (3);
10926 /* Requires a lib call */
10927 *total
= COSTS_N_INSNS (20);
10931 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10936 /* RTX costs for 9e (and later) cores. */
10939 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10940 int *total
, bool speed
)
10942 enum machine_mode mode
= GET_MODE (x
);
10949 *total
= COSTS_N_INSNS (3);
10953 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10961 /* There is no point basing this on the tuning, since it is always the
10962 fast variant if it exists at all. */
10964 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10965 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10966 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10968 *total
= COSTS_N_INSNS (2);
10973 if (mode
== DImode
)
10975 *total
= COSTS_N_INSNS (5);
10979 if (mode
== SImode
)
10981 *total
= COSTS_N_INSNS (2);
10985 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10987 if (TARGET_HARD_FLOAT
10989 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10991 *total
= COSTS_N_INSNS (1);
10996 *total
= COSTS_N_INSNS (20);
11000 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11003 /* All address computations that can be done are free, but rtx cost returns
11004 the same for practically all of them. So we weight the different types
11005 of address here in the order (most pref first):
11006 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11008 arm_arm_address_cost (rtx x
)
11010 enum rtx_code c
= GET_CODE (x
);
11012 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11014 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11019 if (CONST_INT_P (XEXP (x
, 1)))
11022 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11032 arm_thumb_address_cost (rtx x
)
11034 enum rtx_code c
= GET_CODE (x
);
11039 && REG_P (XEXP (x
, 0))
11040 && CONST_INT_P (XEXP (x
, 1)))
11047 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11048 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11050 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11053 /* Adjust cost hook for XScale. */
11055 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11057 /* Some true dependencies can have a higher cost depending
11058 on precisely how certain input operands are used. */
11059 if (REG_NOTE_KIND(link
) == 0
11060 && recog_memoized (insn
) >= 0
11061 && recog_memoized (dep
) >= 0)
11063 int shift_opnum
= get_attr_shift (insn
);
11064 enum attr_type attr_type
= get_attr_type (dep
);
11066 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11067 operand for INSN. If we have a shifted input operand and the
11068 instruction we depend on is another ALU instruction, then we may
11069 have to account for an additional stall. */
11070 if (shift_opnum
!= 0
11071 && (attr_type
== TYPE_ALU_SHIFT_IMM
11072 || attr_type
== TYPE_ALUS_SHIFT_IMM
11073 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11074 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11075 || attr_type
== TYPE_ALU_SHIFT_REG
11076 || attr_type
== TYPE_ALUS_SHIFT_REG
11077 || attr_type
== TYPE_LOGIC_SHIFT_REG
11078 || attr_type
== TYPE_LOGICS_SHIFT_REG
11079 || attr_type
== TYPE_MOV_SHIFT
11080 || attr_type
== TYPE_MVN_SHIFT
11081 || attr_type
== TYPE_MOV_SHIFT_REG
11082 || attr_type
== TYPE_MVN_SHIFT_REG
))
11084 rtx shifted_operand
;
11087 /* Get the shifted operand. */
11088 extract_insn (insn
);
11089 shifted_operand
= recog_data
.operand
[shift_opnum
];
11091 /* Iterate over all the operands in DEP. If we write an operand
11092 that overlaps with SHIFTED_OPERAND, then we have increase the
11093 cost of this dependency. */
11094 extract_insn (dep
);
11095 preprocess_constraints ();
11096 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11098 /* We can ignore strict inputs. */
11099 if (recog_data
.operand_type
[opno
] == OP_IN
)
11102 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11114 /* Adjust cost hook for Cortex A9. */
11116 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11118 switch (REG_NOTE_KIND (link
))
11125 case REG_DEP_OUTPUT
:
11126 if (recog_memoized (insn
) >= 0
11127 && recog_memoized (dep
) >= 0)
11129 if (GET_CODE (PATTERN (insn
)) == SET
)
11132 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11134 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11136 enum attr_type attr_type_insn
= get_attr_type (insn
);
11137 enum attr_type attr_type_dep
= get_attr_type (dep
);
11139 /* By default all dependencies of the form
11142 have an extra latency of 1 cycle because
11143 of the input and output dependency in this
11144 case. However this gets modeled as an true
11145 dependency and hence all these checks. */
11146 if (REG_P (SET_DEST (PATTERN (insn
)))
11147 && REG_P (SET_DEST (PATTERN (dep
)))
11148 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11149 SET_DEST (PATTERN (dep
))))
11151 /* FMACS is a special case where the dependent
11152 instruction can be issued 3 cycles before
11153 the normal latency in case of an output
11155 if ((attr_type_insn
== TYPE_FMACS
11156 || attr_type_insn
== TYPE_FMACD
)
11157 && (attr_type_dep
== TYPE_FMACS
11158 || attr_type_dep
== TYPE_FMACD
))
11160 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11161 *cost
= insn_default_latency (dep
) - 3;
11163 *cost
= insn_default_latency (dep
);
11168 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11169 *cost
= insn_default_latency (dep
) + 1;
11171 *cost
= insn_default_latency (dep
);
11181 gcc_unreachable ();
11187 /* Adjust cost hook for FA726TE. */
11189 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11191 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11192 have penalty of 3. */
11193 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11194 && recog_memoized (insn
) >= 0
11195 && recog_memoized (dep
) >= 0
11196 && get_attr_conds (dep
) == CONDS_SET
)
11198 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11199 if (get_attr_conds (insn
) == CONDS_USE
11200 && get_attr_type (insn
) != TYPE_BRANCH
)
11206 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11207 || get_attr_conds (insn
) == CONDS_USE
)
11217 /* Implement TARGET_REGISTER_MOVE_COST.
11219 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11220 it is typically more expensive than a single memory access. We set
11221 the cost to less than two memory accesses so that floating
11222 point to integer conversion does not go through memory. */
11225 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11226 reg_class_t from
, reg_class_t to
)
11230 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11231 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11233 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11234 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11236 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11243 if (from
== HI_REGS
|| to
== HI_REGS
)
11250 /* Implement TARGET_MEMORY_MOVE_COST. */
11253 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11254 bool in ATTRIBUTE_UNUSED
)
11260 if (GET_MODE_SIZE (mode
) < 4)
11263 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11267 /* Vectorizer cost model implementation. */
11269 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11271 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11273 int misalign ATTRIBUTE_UNUSED
)
11277 switch (type_of_cost
)
11280 return current_tune
->vec_costs
->scalar_stmt_cost
;
11283 return current_tune
->vec_costs
->scalar_load_cost
;
11286 return current_tune
->vec_costs
->scalar_store_cost
;
11289 return current_tune
->vec_costs
->vec_stmt_cost
;
11292 return current_tune
->vec_costs
->vec_align_load_cost
;
11295 return current_tune
->vec_costs
->vec_store_cost
;
11297 case vec_to_scalar
:
11298 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11300 case scalar_to_vec
:
11301 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11303 case unaligned_load
:
11304 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11306 case unaligned_store
:
11307 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11309 case cond_branch_taken
:
11310 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11312 case cond_branch_not_taken
:
11313 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11316 case vec_promote_demote
:
11317 return current_tune
->vec_costs
->vec_stmt_cost
;
11319 case vec_construct
:
11320 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11321 return elements
/ 2 + 1;
11324 gcc_unreachable ();
11328 /* Implement targetm.vectorize.add_stmt_cost. */
11331 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11332 struct _stmt_vec_info
*stmt_info
, int misalign
,
11333 enum vect_cost_model_location where
)
11335 unsigned *cost
= (unsigned *) data
;
11336 unsigned retval
= 0;
11338 if (flag_vect_cost_model
)
11340 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11341 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11343 /* Statements in an inner loop relative to the loop being
11344 vectorized are weighted more heavily. The value here is
11345 arbitrary and could potentially be improved with analysis. */
11346 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11347 count
*= 50; /* FIXME. */
11349 retval
= (unsigned) (count
* stmt_cost
);
11350 cost
[where
] += retval
;
11356 /* Return true if and only if this insn can dual-issue only as older. */
11358 cortexa7_older_only (rtx insn
)
11360 if (recog_memoized (insn
) < 0)
11363 switch (get_attr_type (insn
))
11366 case TYPE_ALUS_REG
:
11367 case TYPE_LOGIC_REG
:
11368 case TYPE_LOGICS_REG
:
11370 case TYPE_ADCS_REG
:
11375 case TYPE_SHIFT_IMM
:
11376 case TYPE_SHIFT_REG
:
11377 case TYPE_LOAD_BYTE
:
11380 case TYPE_FFARITHS
:
11382 case TYPE_FFARITHD
:
11400 case TYPE_F_STORES
:
11407 /* Return true if and only if this insn can dual-issue as younger. */
11409 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11411 if (recog_memoized (insn
) < 0)
11414 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11418 switch (get_attr_type (insn
))
11421 case TYPE_ALUS_IMM
:
11422 case TYPE_LOGIC_IMM
:
11423 case TYPE_LOGICS_IMM
:
11428 case TYPE_MOV_SHIFT
:
11429 case TYPE_MOV_SHIFT_REG
:
11439 /* Look for an instruction that can dual issue only as an older
11440 instruction, and move it in front of any instructions that can
11441 dual-issue as younger, while preserving the relative order of all
11442 other instructions in the ready list. This is a hueuristic to help
11443 dual-issue in later cycles, by postponing issue of more flexible
11444 instructions. This heuristic may affect dual issue opportunities
11445 in the current cycle. */
11447 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11451 int first_older_only
= -1, first_younger
= -1;
11455 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11459 /* Traverse the ready list from the head (the instruction to issue
11460 first), and looking for the first instruction that can issue as
11461 younger and the first instruction that can dual-issue only as
11463 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11465 rtx insn
= ready
[i
];
11466 if (cortexa7_older_only (insn
))
11468 first_older_only
= i
;
11470 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11473 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11477 /* Nothing to reorder because either no younger insn found or insn
11478 that can dual-issue only as older appears before any insn that
11479 can dual-issue as younger. */
11480 if (first_younger
== -1)
11483 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11487 /* Nothing to reorder because no older-only insn in the ready list. */
11488 if (first_older_only
== -1)
11491 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11495 /* Move first_older_only insn before first_younger. */
11497 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11498 INSN_UID(ready
[first_older_only
]),
11499 INSN_UID(ready
[first_younger
]));
11500 rtx first_older_only_insn
= ready
[first_older_only
];
11501 for (i
= first_older_only
; i
< first_younger
; i
++)
11503 ready
[i
] = ready
[i
+1];
11506 ready
[i
] = first_older_only_insn
;
11510 /* Implement TARGET_SCHED_REORDER. */
11512 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11518 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11521 /* Do nothing for other cores. */
11525 return arm_issue_rate ();
11528 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11529 It corrects the value of COST based on the relationship between
11530 INSN and DEP through the dependence LINK. It returns the new
11531 value. There is a per-core adjust_cost hook to adjust scheduler costs
11532 and the per-core hook can choose to completely override the generic
11533 adjust_cost function. Only put bits of code into arm_adjust_cost that
11534 are common across all cores. */
11536 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11540 /* When generating Thumb-1 code, we want to place flag-setting operations
11541 close to a conditional branch which depends on them, so that we can
11542 omit the comparison. */
11544 && REG_NOTE_KIND (link
) == 0
11545 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11546 && recog_memoized (dep
) >= 0
11547 && get_attr_conds (dep
) == CONDS_SET
)
11550 if (current_tune
->sched_adjust_cost
!= NULL
)
11552 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11556 /* XXX Is this strictly true? */
11557 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11558 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11561 /* Call insns don't incur a stall, even if they follow a load. */
11562 if (REG_NOTE_KIND (link
) == 0
11566 if ((i_pat
= single_set (insn
)) != NULL
11567 && MEM_P (SET_SRC (i_pat
))
11568 && (d_pat
= single_set (dep
)) != NULL
11569 && MEM_P (SET_DEST (d_pat
)))
11571 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11572 /* This is a load after a store, there is no conflict if the load reads
11573 from a cached area. Assume that loads from the stack, and from the
11574 constant pool are cached, and that others will miss. This is a
11577 if ((GET_CODE (src_mem
) == SYMBOL_REF
11578 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11579 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11580 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11581 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11589 arm_max_conditional_execute (void)
11591 return max_insns_skipped
;
11595 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11598 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11600 return (optimize
> 0) ? 2 : 0;
11604 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11606 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11609 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11610 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11611 sequences of non-executed instructions in IT blocks probably take the same
11612 amount of time as executed instructions (and the IT instruction itself takes
11613 space in icache). This function was experimentally determined to give good
11614 results on a popular embedded benchmark. */
11617 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11619 return (TARGET_32BIT
&& speed_p
) ? 1
11620 : arm_default_branch_cost (speed_p
, predictable_p
);
11623 static bool fp_consts_inited
= false;
11625 static REAL_VALUE_TYPE value_fp0
;
11628 init_fp_table (void)
11632 r
= REAL_VALUE_ATOF ("0", DFmode
);
11634 fp_consts_inited
= true;
11637 /* Return TRUE if rtx X is a valid immediate FP constant. */
11639 arm_const_double_rtx (rtx x
)
11643 if (!fp_consts_inited
)
11646 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11647 if (REAL_VALUE_MINUS_ZERO (r
))
11650 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11656 /* VFPv3 has a fairly wide range of representable immediates, formed from
11657 "quarter-precision" floating-point values. These can be evaluated using this
11658 formula (with ^ for exponentiation):
11662 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11663 16 <= n <= 31 and 0 <= r <= 7.
11665 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11667 - A (most-significant) is the sign bit.
11668 - BCD are the exponent (encoded as r XOR 3).
11669 - EFGH are the mantissa (encoded as n - 16).
11672 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11673 fconst[sd] instruction, or -1 if X isn't suitable. */
11675 vfp3_const_double_index (rtx x
)
11677 REAL_VALUE_TYPE r
, m
;
11678 int sign
, exponent
;
11679 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11680 unsigned HOST_WIDE_INT mask
;
11681 HOST_WIDE_INT m1
, m2
;
11682 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11684 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11687 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11689 /* We can't represent these things, so detect them first. */
11690 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11693 /* Extract sign, exponent and mantissa. */
11694 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11695 r
= real_value_abs (&r
);
11696 exponent
= REAL_EXP (&r
);
11697 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11698 highest (sign) bit, with a fixed binary point at bit point_pos.
11699 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11700 bits for the mantissa, this may fail (low bits would be lost). */
11701 real_ldexp (&m
, &r
, point_pos
- exponent
);
11702 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11706 /* If there are bits set in the low part of the mantissa, we can't
11707 represent this value. */
11711 /* Now make it so that mantissa contains the most-significant bits, and move
11712 the point_pos to indicate that the least-significant bits have been
11714 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11715 mantissa
= mant_hi
;
11717 /* We can permit four significant bits of mantissa only, plus a high bit
11718 which is always 1. */
11719 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11720 if ((mantissa
& mask
) != 0)
11723 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11724 mantissa
>>= point_pos
- 5;
11726 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11727 floating-point immediate zero with Neon using an integer-zero load, but
11728 that case is handled elsewhere.) */
11732 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11734 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11735 normalized significands are in the range [1, 2). (Our mantissa is shifted
11736 left 4 places at this point relative to normalized IEEE754 values). GCC
11737 internally uses [0.5, 1) (see real.c), so the exponent returned from
11738 REAL_EXP must be altered. */
11739 exponent
= 5 - exponent
;
11741 if (exponent
< 0 || exponent
> 7)
11744 /* Sign, mantissa and exponent are now in the correct form to plug into the
11745 formula described in the comment above. */
11746 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11749 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11751 vfp3_const_double_rtx (rtx x
)
11756 return vfp3_const_double_index (x
) != -1;
11759 /* Recognize immediates which can be used in various Neon instructions. Legal
11760 immediates are described by the following table (for VMVN variants, the
11761 bitwise inverse of the constant shown is recognized. In either case, VMOV
11762 is output and the correct instruction to use for a given constant is chosen
11763 by the assembler). The constant shown is replicated across all elements of
11764 the destination vector.
11766 insn elems variant constant (binary)
11767 ---- ----- ------- -----------------
11768 vmov i32 0 00000000 00000000 00000000 abcdefgh
11769 vmov i32 1 00000000 00000000 abcdefgh 00000000
11770 vmov i32 2 00000000 abcdefgh 00000000 00000000
11771 vmov i32 3 abcdefgh 00000000 00000000 00000000
11772 vmov i16 4 00000000 abcdefgh
11773 vmov i16 5 abcdefgh 00000000
11774 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11775 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11776 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11777 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11778 vmvn i16 10 00000000 abcdefgh
11779 vmvn i16 11 abcdefgh 00000000
11780 vmov i32 12 00000000 00000000 abcdefgh 11111111
11781 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11782 vmov i32 14 00000000 abcdefgh 11111111 11111111
11783 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11784 vmov i8 16 abcdefgh
11785 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11786 eeeeeeee ffffffff gggggggg hhhhhhhh
11787 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11788 vmov f32 19 00000000 00000000 00000000 00000000
11790 For case 18, B = !b. Representable values are exactly those accepted by
11791 vfp3_const_double_index, but are output as floating-point numbers rather
11794 For case 19, we will change it to vmov.i32 when assembling.
11796 Variants 0-5 (inclusive) may also be used as immediates for the second
11797 operand of VORR/VBIC instructions.
11799 The INVERSE argument causes the bitwise inverse of the given operand to be
11800 recognized instead (used for recognizing legal immediates for the VAND/VORN
11801 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11802 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11803 output, rather than the real insns vbic/vorr).
11805 INVERSE makes no difference to the recognition of float vectors.
11807 The return value is the variant of immediate as shown in the above table, or
11808 -1 if the given value doesn't match any of the listed patterns.
11811 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11812 rtx
*modconst
, int *elementwidth
)
11814 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11816 for (i = 0; i < idx; i += (STRIDE)) \
11821 immtype = (CLASS); \
11822 elsize = (ELSIZE); \
11826 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11827 unsigned int innersize
;
11828 unsigned char bytes
[16];
11829 int immtype
= -1, matches
;
11830 unsigned int invmask
= inverse
? 0xff : 0;
11831 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11835 n_elts
= CONST_VECTOR_NUNITS (op
);
11836 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11841 if (mode
== VOIDmode
)
11843 innersize
= GET_MODE_SIZE (mode
);
11846 /* Vectors of float constants. */
11847 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11849 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11850 REAL_VALUE_TYPE r0
;
11852 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11855 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11857 for (i
= 1; i
< n_elts
; i
++)
11859 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11860 REAL_VALUE_TYPE re
;
11862 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11864 if (!REAL_VALUES_EQUAL (r0
, re
))
11869 *modconst
= CONST_VECTOR_ELT (op
, 0);
11874 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11880 /* Splat vector constant out into a byte vector. */
11881 for (i
= 0; i
< n_elts
; i
++)
11883 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11884 unsigned HOST_WIDE_INT elpart
;
11885 unsigned int part
, parts
;
11887 if (CONST_INT_P (el
))
11889 elpart
= INTVAL (el
);
11892 else if (CONST_DOUBLE_P (el
))
11894 elpart
= CONST_DOUBLE_LOW (el
);
11898 gcc_unreachable ();
11900 for (part
= 0; part
< parts
; part
++)
11903 for (byte
= 0; byte
< innersize
; byte
++)
11905 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11906 elpart
>>= BITS_PER_UNIT
;
11908 if (CONST_DOUBLE_P (el
))
11909 elpart
= CONST_DOUBLE_HIGH (el
);
11913 /* Sanity check. */
11914 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11918 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11919 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11921 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11922 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11924 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11925 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11927 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11928 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11930 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11932 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11934 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11935 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11937 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11938 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11940 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11941 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11943 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11944 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11946 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11948 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11950 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11951 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11953 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11954 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11956 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11957 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11959 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11960 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11962 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11964 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11965 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11973 *elementwidth
= elsize
;
11977 unsigned HOST_WIDE_INT imm
= 0;
11979 /* Un-invert bytes of recognized vector, if necessary. */
11981 for (i
= 0; i
< idx
; i
++)
11982 bytes
[i
] ^= invmask
;
11986 /* FIXME: Broken on 32-bit H_W_I hosts. */
11987 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11989 for (i
= 0; i
< 8; i
++)
11990 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11991 << (i
* BITS_PER_UNIT
);
11993 *modconst
= GEN_INT (imm
);
11997 unsigned HOST_WIDE_INT imm
= 0;
11999 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12000 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12002 *modconst
= GEN_INT (imm
);
12010 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12011 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12012 float elements), and a modified constant (whatever should be output for a
12013 VMOV) in *MODCONST. */
12016 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12017 rtx
*modconst
, int *elementwidth
)
12021 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12027 *modconst
= tmpconst
;
12030 *elementwidth
= tmpwidth
;
12035 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12036 the immediate is valid, write a constant suitable for using as an operand
12037 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12038 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12041 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12042 rtx
*modconst
, int *elementwidth
)
12046 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12048 if (retval
< 0 || retval
> 5)
12052 *modconst
= tmpconst
;
12055 *elementwidth
= tmpwidth
;
12060 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12061 the immediate is valid, write a constant suitable for using as an operand
12062 to VSHR/VSHL to *MODCONST and the corresponding element width to
12063 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12064 because they have different limitations. */
12067 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12068 rtx
*modconst
, int *elementwidth
,
12071 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12072 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12073 unsigned HOST_WIDE_INT last_elt
= 0;
12074 unsigned HOST_WIDE_INT maxshift
;
12076 /* Split vector constant out into a byte vector. */
12077 for (i
= 0; i
< n_elts
; i
++)
12079 rtx el
= CONST_VECTOR_ELT (op
, i
);
12080 unsigned HOST_WIDE_INT elpart
;
12082 if (CONST_INT_P (el
))
12083 elpart
= INTVAL (el
);
12084 else if (CONST_DOUBLE_P (el
))
12087 gcc_unreachable ();
12089 if (i
!= 0 && elpart
!= last_elt
)
12095 /* Shift less than element size. */
12096 maxshift
= innersize
* 8;
12100 /* Left shift immediate value can be from 0 to <size>-1. */
12101 if (last_elt
>= maxshift
)
12106 /* Right shift immediate value can be from 1 to <size>. */
12107 if (last_elt
== 0 || last_elt
> maxshift
)
12112 *elementwidth
= innersize
* 8;
12115 *modconst
= CONST_VECTOR_ELT (op
, 0);
12120 /* Return a string suitable for output of Neon immediate logic operation
12124 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12125 int inverse
, int quad
)
12127 int width
, is_valid
;
12128 static char templ
[40];
12130 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12132 gcc_assert (is_valid
!= 0);
12135 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12137 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12142 /* Return a string suitable for output of Neon immediate shift operation
12143 (VSHR or VSHL) MNEM. */
12146 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12147 enum machine_mode mode
, int quad
,
12150 int width
, is_valid
;
12151 static char templ
[40];
12153 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12154 gcc_assert (is_valid
!= 0);
12157 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12159 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12164 /* Output a sequence of pairwise operations to implement a reduction.
12165 NOTE: We do "too much work" here, because pairwise operations work on two
12166 registers-worth of operands in one go. Unfortunately we can't exploit those
12167 extra calculations to do the full operation in fewer steps, I don't think.
12168 Although all vector elements of the result but the first are ignored, we
12169 actually calculate the same result in each of the elements. An alternative
12170 such as initially loading a vector with zero to use as each of the second
12171 operands would use up an additional register and take an extra instruction,
12172 for no particular gain. */
12175 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12176 rtx (*reduc
) (rtx
, rtx
, rtx
))
12178 enum machine_mode inner
= GET_MODE_INNER (mode
);
12179 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12182 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12184 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12185 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12190 /* If VALS is a vector constant that can be loaded into a register
12191 using VDUP, generate instructions to do so and return an RTX to
12192 assign to the register. Otherwise return NULL_RTX. */
12195 neon_vdup_constant (rtx vals
)
12197 enum machine_mode mode
= GET_MODE (vals
);
12198 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12199 int n_elts
= GET_MODE_NUNITS (mode
);
12200 bool all_same
= true;
12204 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12207 for (i
= 0; i
< n_elts
; ++i
)
12209 x
= XVECEXP (vals
, 0, i
);
12210 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12215 /* The elements are not all the same. We could handle repeating
12216 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12217 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12221 /* We can load this constant by using VDUP and a constant in a
12222 single ARM register. This will be cheaper than a vector
12225 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12226 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12229 /* Generate code to load VALS, which is a PARALLEL containing only
12230 constants (for vec_init) or CONST_VECTOR, efficiently into a
12231 register. Returns an RTX to copy into the register, or NULL_RTX
12232 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12235 neon_make_constant (rtx vals
)
12237 enum machine_mode mode
= GET_MODE (vals
);
12239 rtx const_vec
= NULL_RTX
;
12240 int n_elts
= GET_MODE_NUNITS (mode
);
12244 if (GET_CODE (vals
) == CONST_VECTOR
)
12246 else if (GET_CODE (vals
) == PARALLEL
)
12248 /* A CONST_VECTOR must contain only CONST_INTs and
12249 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12250 Only store valid constants in a CONST_VECTOR. */
12251 for (i
= 0; i
< n_elts
; ++i
)
12253 rtx x
= XVECEXP (vals
, 0, i
);
12254 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12257 if (n_const
== n_elts
)
12258 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12261 gcc_unreachable ();
12263 if (const_vec
!= NULL
12264 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12265 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12267 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12268 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12269 pipeline cycle; creating the constant takes one or two ARM
12270 pipeline cycles. */
12272 else if (const_vec
!= NULL_RTX
)
12273 /* Load from constant pool. On Cortex-A8 this takes two cycles
12274 (for either double or quad vectors). We can not take advantage
12275 of single-cycle VLD1 because we need a PC-relative addressing
12279 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12280 We can not construct an initializer. */
12284 /* Initialize vector TARGET to VALS. */
12287 neon_expand_vector_init (rtx target
, rtx vals
)
12289 enum machine_mode mode
= GET_MODE (target
);
12290 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12291 int n_elts
= GET_MODE_NUNITS (mode
);
12292 int n_var
= 0, one_var
= -1;
12293 bool all_same
= true;
12297 for (i
= 0; i
< n_elts
; ++i
)
12299 x
= XVECEXP (vals
, 0, i
);
12300 if (!CONSTANT_P (x
))
12301 ++n_var
, one_var
= i
;
12303 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12309 rtx constant
= neon_make_constant (vals
);
12310 if (constant
!= NULL_RTX
)
12312 emit_move_insn (target
, constant
);
12317 /* Splat a single non-constant element if we can. */
12318 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12320 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12321 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12322 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12326 /* One field is non-constant. Load constant then overwrite varying
12327 field. This is more efficient than using the stack. */
12330 rtx copy
= copy_rtx (vals
);
12331 rtx index
= GEN_INT (one_var
);
12333 /* Load constant part of vector, substitute neighboring value for
12334 varying element. */
12335 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12336 neon_expand_vector_init (target
, copy
);
12338 /* Insert variable. */
12339 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12343 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12346 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12349 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12352 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12355 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12358 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12361 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12364 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12367 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12370 gcc_unreachable ();
12375 /* Construct the vector in memory one field at a time
12376 and load the whole vector. */
12377 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12378 for (i
= 0; i
< n_elts
; i
++)
12379 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12380 i
* GET_MODE_SIZE (inner_mode
)),
12381 XVECEXP (vals
, 0, i
));
12382 emit_move_insn (target
, mem
);
12385 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12386 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12387 reported source locations are bogus. */
12390 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12393 HOST_WIDE_INT lane
;
12395 gcc_assert (CONST_INT_P (operand
));
12397 lane
= INTVAL (operand
);
12399 if (lane
< low
|| lane
>= high
)
12403 /* Bounds-check lanes. */
12406 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12408 bounds_check (operand
, low
, high
, "lane out of range");
12411 /* Bounds-check constants. */
12414 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12416 bounds_check (operand
, low
, high
, "constant out of range");
12420 neon_element_bits (enum machine_mode mode
)
12422 if (mode
== DImode
)
12423 return GET_MODE_BITSIZE (mode
);
12425 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12429 /* Predicates for `match_operand' and `match_operator'. */
12431 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12432 WB is true if full writeback address modes are allowed and is false
12433 if limited writeback address modes (POST_INC and PRE_DEC) are
12437 arm_coproc_mem_operand (rtx op
, bool wb
)
12441 /* Reject eliminable registers. */
12442 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12443 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12444 || reg_mentioned_p (arg_pointer_rtx
, op
)
12445 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12446 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12447 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12448 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12451 /* Constants are converted into offsets from labels. */
12455 ind
= XEXP (op
, 0);
12457 if (reload_completed
12458 && (GET_CODE (ind
) == LABEL_REF
12459 || (GET_CODE (ind
) == CONST
12460 && GET_CODE (XEXP (ind
, 0)) == PLUS
12461 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12462 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12465 /* Match: (mem (reg)). */
12467 return arm_address_register_rtx_p (ind
, 0);
12469 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12470 acceptable in any case (subject to verification by
12471 arm_address_register_rtx_p). We need WB to be true to accept
12472 PRE_INC and POST_DEC. */
12473 if (GET_CODE (ind
) == POST_INC
12474 || GET_CODE (ind
) == PRE_DEC
12476 && (GET_CODE (ind
) == PRE_INC
12477 || GET_CODE (ind
) == POST_DEC
)))
12478 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12481 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12482 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12483 && GET_CODE (XEXP (ind
, 1)) == PLUS
12484 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12485 ind
= XEXP (ind
, 1);
12490 if (GET_CODE (ind
) == PLUS
12491 && REG_P (XEXP (ind
, 0))
12492 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12493 && CONST_INT_P (XEXP (ind
, 1))
12494 && INTVAL (XEXP (ind
, 1)) > -1024
12495 && INTVAL (XEXP (ind
, 1)) < 1024
12496 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12502 /* Return TRUE if OP is a memory operand which we can load or store a vector
12503 to/from. TYPE is one of the following values:
12504 0 - Vector load/stor (vldr)
12505 1 - Core registers (ldm)
12506 2 - Element/structure loads (vld1)
12509 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12513 /* Reject eliminable registers. */
12514 if (! (reload_in_progress
|| reload_completed
)
12515 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12516 || reg_mentioned_p (arg_pointer_rtx
, op
)
12517 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12518 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12519 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12520 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12523 /* Constants are converted into offsets from labels. */
12527 ind
= XEXP (op
, 0);
12529 if (reload_completed
12530 && (GET_CODE (ind
) == LABEL_REF
12531 || (GET_CODE (ind
) == CONST
12532 && GET_CODE (XEXP (ind
, 0)) == PLUS
12533 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12534 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12537 /* Match: (mem (reg)). */
12539 return arm_address_register_rtx_p (ind
, 0);
12541 /* Allow post-increment with Neon registers. */
12542 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12543 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12544 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12546 /* FIXME: vld1 allows register post-modify. */
12552 && GET_CODE (ind
) == PLUS
12553 && REG_P (XEXP (ind
, 0))
12554 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12555 && CONST_INT_P (XEXP (ind
, 1))
12556 && INTVAL (XEXP (ind
, 1)) > -1024
12557 /* For quad modes, we restrict the constant offset to be slightly less
12558 than what the instruction format permits. We have no such constraint
12559 on double mode offsets. (This must match arm_legitimate_index_p.) */
12560 && (INTVAL (XEXP (ind
, 1))
12561 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12562 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12568 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12571 neon_struct_mem_operand (rtx op
)
12575 /* Reject eliminable registers. */
12576 if (! (reload_in_progress
|| reload_completed
)
12577 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12578 || reg_mentioned_p (arg_pointer_rtx
, op
)
12579 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12580 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12581 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12582 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12585 /* Constants are converted into offsets from labels. */
12589 ind
= XEXP (op
, 0);
12591 if (reload_completed
12592 && (GET_CODE (ind
) == LABEL_REF
12593 || (GET_CODE (ind
) == CONST
12594 && GET_CODE (XEXP (ind
, 0)) == PLUS
12595 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12596 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12599 /* Match: (mem (reg)). */
12601 return arm_address_register_rtx_p (ind
, 0);
12603 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12604 if (GET_CODE (ind
) == POST_INC
12605 || GET_CODE (ind
) == PRE_DEC
)
12606 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12611 /* Return true if X is a register that will be eliminated later on. */
12613 arm_eliminable_register (rtx x
)
12615 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12616 || REGNO (x
) == ARG_POINTER_REGNUM
12617 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12618 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12621 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12622 coprocessor registers. Otherwise return NO_REGS. */
12625 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12627 if (mode
== HFmode
)
12629 if (!TARGET_NEON_FP16
)
12630 return GENERAL_REGS
;
12631 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12633 return GENERAL_REGS
;
12636 /* The neon move patterns handle all legitimate vector and struct
12639 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12640 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12641 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12642 || VALID_NEON_STRUCT_MODE (mode
)))
12645 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12648 return GENERAL_REGS
;
12651 /* Values which must be returned in the most-significant end of the return
12655 arm_return_in_msb (const_tree valtype
)
12657 return (TARGET_AAPCS_BASED
12658 && BYTES_BIG_ENDIAN
12659 && (AGGREGATE_TYPE_P (valtype
)
12660 || TREE_CODE (valtype
) == COMPLEX_TYPE
12661 || FIXED_POINT_TYPE_P (valtype
)));
12664 /* Return TRUE if X references a SYMBOL_REF. */
12666 symbol_mentioned_p (rtx x
)
12671 if (GET_CODE (x
) == SYMBOL_REF
)
12674 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12675 are constant offsets, not symbols. */
12676 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12679 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12681 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12687 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12688 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12691 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12698 /* Return TRUE if X references a LABEL_REF. */
12700 label_mentioned_p (rtx x
)
12705 if (GET_CODE (x
) == LABEL_REF
)
12708 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12709 instruction, but they are constant offsets, not symbols. */
12710 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12713 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12714 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12720 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12721 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12724 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12732 tls_mentioned_p (rtx x
)
12734 switch (GET_CODE (x
))
12737 return tls_mentioned_p (XEXP (x
, 0));
12740 if (XINT (x
, 1) == UNSPEC_TLS
)
12748 /* Must not copy any rtx that uses a pc-relative address. */
12751 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12753 if (GET_CODE (*x
) == UNSPEC
12754 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12755 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12761 arm_cannot_copy_insn_p (rtx insn
)
12763 /* The tls call insn cannot be copied, as it is paired with a data
12765 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12768 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12772 minmax_code (rtx x
)
12774 enum rtx_code code
= GET_CODE (x
);
12787 gcc_unreachable ();
12791 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12794 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12795 int *mask
, bool *signed_sat
)
12797 /* The high bound must be a power of two minus one. */
12798 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12802 /* The low bound is either zero (for usat) or one less than the
12803 negation of the high bound (for ssat). */
12804 if (INTVAL (lo_bound
) == 0)
12809 *signed_sat
= false;
12814 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12819 *signed_sat
= true;
12827 /* Return 1 if memory locations are adjacent. */
12829 adjacent_mem_locations (rtx a
, rtx b
)
12831 /* We don't guarantee to preserve the order of these memory refs. */
12832 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12835 if ((REG_P (XEXP (a
, 0))
12836 || (GET_CODE (XEXP (a
, 0)) == PLUS
12837 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12838 && (REG_P (XEXP (b
, 0))
12839 || (GET_CODE (XEXP (b
, 0)) == PLUS
12840 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12842 HOST_WIDE_INT val0
= 0, val1
= 0;
12846 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12848 reg0
= XEXP (XEXP (a
, 0), 0);
12849 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12852 reg0
= XEXP (a
, 0);
12854 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12856 reg1
= XEXP (XEXP (b
, 0), 0);
12857 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12860 reg1
= XEXP (b
, 0);
12862 /* Don't accept any offset that will require multiple
12863 instructions to handle, since this would cause the
12864 arith_adjacentmem pattern to output an overlong sequence. */
12865 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12868 /* Don't allow an eliminable register: register elimination can make
12869 the offset too large. */
12870 if (arm_eliminable_register (reg0
))
12873 val_diff
= val1
- val0
;
12877 /* If the target has load delay slots, then there's no benefit
12878 to using an ldm instruction unless the offset is zero and
12879 we are optimizing for size. */
12880 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12881 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12882 && (val_diff
== 4 || val_diff
== -4));
12885 return ((REGNO (reg0
) == REGNO (reg1
))
12886 && (val_diff
== 4 || val_diff
== -4));
12892 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12893 for load operations, false for store operations. CONSECUTIVE is true
12894 if the register numbers in the operation must be consecutive in the register
12895 bank. RETURN_PC is true if value is to be loaded in PC.
12896 The pattern we are trying to match for load is:
12897 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12898 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12901 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12904 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12905 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12906 3. If consecutive is TRUE, then for kth register being loaded,
12907 REGNO (R_dk) = REGNO (R_d0) + k.
12908 The pattern for store is similar. */
12910 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12911 bool consecutive
, bool return_pc
)
12913 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12914 rtx reg
, mem
, addr
;
12916 unsigned first_regno
;
12917 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12919 bool addr_reg_in_reglist
= false;
12920 bool update
= false;
12925 /* If not in SImode, then registers must be consecutive
12926 (e.g., VLDM instructions for DFmode). */
12927 gcc_assert ((mode
== SImode
) || consecutive
);
12928 /* Setting return_pc for stores is illegal. */
12929 gcc_assert (!return_pc
|| load
);
12931 /* Set up the increments and the regs per val based on the mode. */
12932 reg_increment
= GET_MODE_SIZE (mode
);
12933 regs_per_val
= reg_increment
/ 4;
12934 offset_adj
= return_pc
? 1 : 0;
12937 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12938 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12941 /* Check if this is a write-back. */
12942 elt
= XVECEXP (op
, 0, offset_adj
);
12943 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12949 /* The offset adjustment must be the number of registers being
12950 popped times the size of a single register. */
12951 if (!REG_P (SET_DEST (elt
))
12952 || !REG_P (XEXP (SET_SRC (elt
), 0))
12953 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12954 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12955 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12956 ((count
- 1 - offset_adj
) * reg_increment
))
12960 i
= i
+ offset_adj
;
12961 base
= base
+ offset_adj
;
12962 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12963 success depends on the type: VLDM can do just one reg,
12964 LDM must do at least two. */
12965 if ((count
<= i
) && (mode
== SImode
))
12968 elt
= XVECEXP (op
, 0, i
- 1);
12969 if (GET_CODE (elt
) != SET
)
12974 reg
= SET_DEST (elt
);
12975 mem
= SET_SRC (elt
);
12979 reg
= SET_SRC (elt
);
12980 mem
= SET_DEST (elt
);
12983 if (!REG_P (reg
) || !MEM_P (mem
))
12986 regno
= REGNO (reg
);
12987 first_regno
= regno
;
12988 addr
= XEXP (mem
, 0);
12989 if (GET_CODE (addr
) == PLUS
)
12991 if (!CONST_INT_P (XEXP (addr
, 1)))
12994 offset
= INTVAL (XEXP (addr
, 1));
12995 addr
= XEXP (addr
, 0);
13001 /* Don't allow SP to be loaded unless it is also the base register. It
13002 guarantees that SP is reset correctly when an LDM instruction
13003 is interrupted. Otherwise, we might end up with a corrupt stack. */
13004 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13007 for (; i
< count
; i
++)
13009 elt
= XVECEXP (op
, 0, i
);
13010 if (GET_CODE (elt
) != SET
)
13015 reg
= SET_DEST (elt
);
13016 mem
= SET_SRC (elt
);
13020 reg
= SET_SRC (elt
);
13021 mem
= SET_DEST (elt
);
13025 || GET_MODE (reg
) != mode
13026 || REGNO (reg
) <= regno
13029 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13030 /* Don't allow SP to be loaded unless it is also the base register. It
13031 guarantees that SP is reset correctly when an LDM instruction
13032 is interrupted. Otherwise, we might end up with a corrupt stack. */
13033 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13035 || GET_MODE (mem
) != mode
13036 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13037 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13038 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13039 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13040 offset
+ (i
- base
) * reg_increment
))
13041 && (!REG_P (XEXP (mem
, 0))
13042 || offset
+ (i
- base
) * reg_increment
!= 0)))
13045 regno
= REGNO (reg
);
13046 if (regno
== REGNO (addr
))
13047 addr_reg_in_reglist
= true;
13052 if (update
&& addr_reg_in_reglist
)
13055 /* For Thumb-1, address register is always modified - either by write-back
13056 or by explicit load. If the pattern does not describe an update,
13057 then the address register must be in the list of loaded registers. */
13059 return update
|| addr_reg_in_reglist
;
13065 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13066 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13067 instruction. ADD_OFFSET is nonzero if the base address register needs
13068 to be modified with an add instruction before we can use it. */
13071 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13072 int nops
, HOST_WIDE_INT add_offset
)
13074 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13075 if the offset isn't small enough. The reason 2 ldrs are faster
13076 is because these ARMs are able to do more than one cache access
13077 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13078 whilst the ARM8 has a double bandwidth cache. This means that
13079 these cores can do both an instruction fetch and a data fetch in
13080 a single cycle, so the trick of calculating the address into a
13081 scratch register (one of the result regs) and then doing a load
13082 multiple actually becomes slower (and no smaller in code size).
13083 That is the transformation
13085 ldr rd1, [rbase + offset]
13086 ldr rd2, [rbase + offset + 4]
13090 add rd1, rbase, offset
13091 ldmia rd1, {rd1, rd2}
13093 produces worse code -- '3 cycles + any stalls on rd2' instead of
13094 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13095 access per cycle, the first sequence could never complete in less
13096 than 6 cycles, whereas the ldm sequence would only take 5 and
13097 would make better use of sequential accesses if not hitting the
13100 We cheat here and test 'arm_ld_sched' which we currently know to
13101 only be true for the ARM8, ARM9 and StrongARM. If this ever
13102 changes, then the test below needs to be reworked. */
13103 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13106 /* XScale has load-store double instructions, but they have stricter
13107 alignment requirements than load-store multiple, so we cannot
13110 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13111 the pipeline until completion.
13119 An ldr instruction takes 1-3 cycles, but does not block the
13128 Best case ldr will always win. However, the more ldr instructions
13129 we issue, the less likely we are to be able to schedule them well.
13130 Using ldr instructions also increases code size.
13132 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13133 for counts of 3 or 4 regs. */
13134 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13139 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13140 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13141 an array ORDER which describes the sequence to use when accessing the
13142 offsets that produces an ascending order. In this sequence, each
13143 offset must be larger by exactly 4 than the previous one. ORDER[0]
13144 must have been filled in with the lowest offset by the caller.
13145 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13146 we use to verify that ORDER produces an ascending order of registers.
13147 Return true if it was possible to construct such an order, false if
13151 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13152 int *unsorted_regs
)
13155 for (i
= 1; i
< nops
; i
++)
13159 order
[i
] = order
[i
- 1];
13160 for (j
= 0; j
< nops
; j
++)
13161 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13163 /* We must find exactly one offset that is higher than the
13164 previous one by 4. */
13165 if (order
[i
] != order
[i
- 1])
13169 if (order
[i
] == order
[i
- 1])
13171 /* The register numbers must be ascending. */
13172 if (unsorted_regs
!= NULL
13173 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13179 /* Used to determine in a peephole whether a sequence of load
13180 instructions can be changed into a load-multiple instruction.
13181 NOPS is the number of separate load instructions we are examining. The
13182 first NOPS entries in OPERANDS are the destination registers, the
13183 next NOPS entries are memory operands. If this function is
13184 successful, *BASE is set to the common base register of the memory
13185 accesses; *LOAD_OFFSET is set to the first memory location's offset
13186 from that base register.
13187 REGS is an array filled in with the destination register numbers.
13188 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13189 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13190 the sequence of registers in REGS matches the loads from ascending memory
13191 locations, and the function verifies that the register numbers are
13192 themselves ascending. If CHECK_REGS is false, the register numbers
13193 are stored in the order they are found in the operands. */
13195 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13196 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13198 int unsorted_regs
[MAX_LDM_STM_OPS
];
13199 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13200 int order
[MAX_LDM_STM_OPS
];
13201 rtx base_reg_rtx
= NULL
;
13205 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13206 easily extended if required. */
13207 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13209 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13211 /* Loop over the operands and check that the memory references are
13212 suitable (i.e. immediate offsets from the same base register). At
13213 the same time, extract the target register, and the memory
13215 for (i
= 0; i
< nops
; i
++)
13220 /* Convert a subreg of a mem into the mem itself. */
13221 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13222 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13224 gcc_assert (MEM_P (operands
[nops
+ i
]));
13226 /* Don't reorder volatile memory references; it doesn't seem worth
13227 looking for the case where the order is ok anyway. */
13228 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13231 offset
= const0_rtx
;
13233 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13234 || (GET_CODE (reg
) == SUBREG
13235 && REG_P (reg
= SUBREG_REG (reg
))))
13236 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13237 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13238 || (GET_CODE (reg
) == SUBREG
13239 && REG_P (reg
= SUBREG_REG (reg
))))
13240 && (CONST_INT_P (offset
13241 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13245 base_reg
= REGNO (reg
);
13246 base_reg_rtx
= reg
;
13247 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13250 else if (base_reg
!= (int) REGNO (reg
))
13251 /* Not addressed from the same base register. */
13254 unsorted_regs
[i
] = (REG_P (operands
[i
])
13255 ? REGNO (operands
[i
])
13256 : REGNO (SUBREG_REG (operands
[i
])));
13258 /* If it isn't an integer register, or if it overwrites the
13259 base register but isn't the last insn in the list, then
13260 we can't do this. */
13261 if (unsorted_regs
[i
] < 0
13262 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13263 || unsorted_regs
[i
] > 14
13264 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13267 /* Don't allow SP to be loaded unless it is also the base
13268 register. It guarantees that SP is reset correctly when
13269 an LDM instruction is interrupted. Otherwise, we might
13270 end up with a corrupt stack. */
13271 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13274 unsorted_offsets
[i
] = INTVAL (offset
);
13275 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13279 /* Not a suitable memory address. */
13283 /* All the useful information has now been extracted from the
13284 operands into unsorted_regs and unsorted_offsets; additionally,
13285 order[0] has been set to the lowest offset in the list. Sort
13286 the offsets into order, verifying that they are adjacent, and
13287 check that the register numbers are ascending. */
13288 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13289 check_regs
? unsorted_regs
: NULL
))
13293 memcpy (saved_order
, order
, sizeof order
);
13299 for (i
= 0; i
< nops
; i
++)
13300 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13302 *load_offset
= unsorted_offsets
[order
[0]];
13306 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13309 if (unsorted_offsets
[order
[0]] == 0)
13310 ldm_case
= 1; /* ldmia */
13311 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13312 ldm_case
= 2; /* ldmib */
13313 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13314 ldm_case
= 3; /* ldmda */
13315 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13316 ldm_case
= 4; /* ldmdb */
13317 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13318 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13323 if (!multiple_operation_profitable_p (false, nops
,
13325 ? unsorted_offsets
[order
[0]] : 0))
13331 /* Used to determine in a peephole whether a sequence of store instructions can
13332 be changed into a store-multiple instruction.
13333 NOPS is the number of separate store instructions we are examining.
13334 NOPS_TOTAL is the total number of instructions recognized by the peephole
13336 The first NOPS entries in OPERANDS are the source registers, the next
13337 NOPS entries are memory operands. If this function is successful, *BASE is
13338 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13339 to the first memory location's offset from that base register. REGS is an
13340 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13341 likewise filled with the corresponding rtx's.
13342 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13343 numbers to an ascending order of stores.
13344 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13345 from ascending memory locations, and the function verifies that the register
13346 numbers are themselves ascending. If CHECK_REGS is false, the register
13347 numbers are stored in the order they are found in the operands. */
13349 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13350 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13351 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13353 int unsorted_regs
[MAX_LDM_STM_OPS
];
13354 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13355 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13356 int order
[MAX_LDM_STM_OPS
];
13358 rtx base_reg_rtx
= NULL
;
13361 /* Write back of base register is currently only supported for Thumb 1. */
13362 int base_writeback
= TARGET_THUMB1
;
13364 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13365 easily extended if required. */
13366 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13368 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13370 /* Loop over the operands and check that the memory references are
13371 suitable (i.e. immediate offsets from the same base register). At
13372 the same time, extract the target register, and the memory
13374 for (i
= 0; i
< nops
; i
++)
13379 /* Convert a subreg of a mem into the mem itself. */
13380 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13381 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13383 gcc_assert (MEM_P (operands
[nops
+ i
]));
13385 /* Don't reorder volatile memory references; it doesn't seem worth
13386 looking for the case where the order is ok anyway. */
13387 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13390 offset
= const0_rtx
;
13392 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13393 || (GET_CODE (reg
) == SUBREG
13394 && REG_P (reg
= SUBREG_REG (reg
))))
13395 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13396 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13397 || (GET_CODE (reg
) == SUBREG
13398 && REG_P (reg
= SUBREG_REG (reg
))))
13399 && (CONST_INT_P (offset
13400 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13402 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13403 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13404 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13408 base_reg
= REGNO (reg
);
13409 base_reg_rtx
= reg
;
13410 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13413 else if (base_reg
!= (int) REGNO (reg
))
13414 /* Not addressed from the same base register. */
13417 /* If it isn't an integer register, then we can't do this. */
13418 if (unsorted_regs
[i
] < 0
13419 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13420 /* The effects are unpredictable if the base register is
13421 both updated and stored. */
13422 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13423 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13424 || unsorted_regs
[i
] > 14)
13427 unsorted_offsets
[i
] = INTVAL (offset
);
13428 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13432 /* Not a suitable memory address. */
13436 /* All the useful information has now been extracted from the
13437 operands into unsorted_regs and unsorted_offsets; additionally,
13438 order[0] has been set to the lowest offset in the list. Sort
13439 the offsets into order, verifying that they are adjacent, and
13440 check that the register numbers are ascending. */
13441 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13442 check_regs
? unsorted_regs
: NULL
))
13446 memcpy (saved_order
, order
, sizeof order
);
13452 for (i
= 0; i
< nops
; i
++)
13454 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13456 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13459 *load_offset
= unsorted_offsets
[order
[0]];
13463 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13466 if (unsorted_offsets
[order
[0]] == 0)
13467 stm_case
= 1; /* stmia */
13468 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13469 stm_case
= 2; /* stmib */
13470 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13471 stm_case
= 3; /* stmda */
13472 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13473 stm_case
= 4; /* stmdb */
13477 if (!multiple_operation_profitable_p (false, nops
, 0))
13483 /* Routines for use in generating RTL. */
13485 /* Generate a load-multiple instruction. COUNT is the number of loads in
13486 the instruction; REGS and MEMS are arrays containing the operands.
13487 BASEREG is the base register to be used in addressing the memory operands.
13488 WBACK_OFFSET is nonzero if the instruction should update the base
13492 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13493 HOST_WIDE_INT wback_offset
)
13498 if (!multiple_operation_profitable_p (false, count
, 0))
13504 for (i
= 0; i
< count
; i
++)
13505 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13507 if (wback_offset
!= 0)
13508 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13510 seq
= get_insns ();
13516 result
= gen_rtx_PARALLEL (VOIDmode
,
13517 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13518 if (wback_offset
!= 0)
13520 XVECEXP (result
, 0, 0)
13521 = gen_rtx_SET (VOIDmode
, basereg
,
13522 plus_constant (Pmode
, basereg
, wback_offset
));
13527 for (j
= 0; i
< count
; i
++, j
++)
13528 XVECEXP (result
, 0, i
)
13529 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13534 /* Generate a store-multiple instruction. COUNT is the number of stores in
13535 the instruction; REGS and MEMS are arrays containing the operands.
13536 BASEREG is the base register to be used in addressing the memory operands.
13537 WBACK_OFFSET is nonzero if the instruction should update the base
13541 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13542 HOST_WIDE_INT wback_offset
)
13547 if (GET_CODE (basereg
) == PLUS
)
13548 basereg
= XEXP (basereg
, 0);
13550 if (!multiple_operation_profitable_p (false, count
, 0))
13556 for (i
= 0; i
< count
; i
++)
13557 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13559 if (wback_offset
!= 0)
13560 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13562 seq
= get_insns ();
13568 result
= gen_rtx_PARALLEL (VOIDmode
,
13569 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13570 if (wback_offset
!= 0)
13572 XVECEXP (result
, 0, 0)
13573 = gen_rtx_SET (VOIDmode
, basereg
,
13574 plus_constant (Pmode
, basereg
, wback_offset
));
13579 for (j
= 0; i
< count
; i
++, j
++)
13580 XVECEXP (result
, 0, i
)
13581 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13586 /* Generate either a load-multiple or a store-multiple instruction. This
13587 function can be used in situations where we can start with a single MEM
13588 rtx and adjust its address upwards.
13589 COUNT is the number of operations in the instruction, not counting a
13590 possible update of the base register. REGS is an array containing the
13592 BASEREG is the base register to be used in addressing the memory operands,
13593 which are constructed from BASEMEM.
13594 WRITE_BACK specifies whether the generated instruction should include an
13595 update of the base register.
13596 OFFSETP is used to pass an offset to and from this function; this offset
13597 is not used when constructing the address (instead BASEMEM should have an
13598 appropriate offset in its address), it is used only for setting
13599 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13602 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13603 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13605 rtx mems
[MAX_LDM_STM_OPS
];
13606 HOST_WIDE_INT offset
= *offsetp
;
13609 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13611 if (GET_CODE (basereg
) == PLUS
)
13612 basereg
= XEXP (basereg
, 0);
13614 for (i
= 0; i
< count
; i
++)
13616 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13617 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13625 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13626 write_back
? 4 * count
: 0);
13628 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13629 write_back
? 4 * count
: 0);
13633 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13634 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13636 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13641 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13642 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13644 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13648 /* Called from a peephole2 expander to turn a sequence of loads into an
13649 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13650 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13651 is true if we can reorder the registers because they are used commutatively
13653 Returns true iff we could generate a new instruction. */
13656 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13658 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13659 rtx mems
[MAX_LDM_STM_OPS
];
13660 int i
, j
, base_reg
;
13662 HOST_WIDE_INT offset
;
13663 int write_back
= FALSE
;
13667 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13668 &base_reg
, &offset
, !sort_regs
);
13674 for (i
= 0; i
< nops
- 1; i
++)
13675 for (j
= i
+ 1; j
< nops
; j
++)
13676 if (regs
[i
] > regs
[j
])
13682 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13686 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13687 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13693 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13694 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13696 if (!TARGET_THUMB1
)
13698 base_reg
= regs
[0];
13699 base_reg_rtx
= newbase
;
13703 for (i
= 0; i
< nops
; i
++)
13705 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13706 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13709 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13710 write_back
? offset
+ i
* 4 : 0));
13714 /* Called from a peephole2 expander to turn a sequence of stores into an
13715 STM instruction. OPERANDS are the operands found by the peephole matcher;
13716 NOPS indicates how many separate stores we are trying to combine.
13717 Returns true iff we could generate a new instruction. */
13720 gen_stm_seq (rtx
*operands
, int nops
)
13723 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13724 rtx mems
[MAX_LDM_STM_OPS
];
13727 HOST_WIDE_INT offset
;
13728 int write_back
= FALSE
;
13731 bool base_reg_dies
;
13733 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13734 mem_order
, &base_reg
, &offset
, true);
13739 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13741 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13744 gcc_assert (base_reg_dies
);
13750 gcc_assert (base_reg_dies
);
13751 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13755 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13757 for (i
= 0; i
< nops
; i
++)
13759 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13760 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13763 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13764 write_back
? offset
+ i
* 4 : 0));
13768 /* Called from a peephole2 expander to turn a sequence of stores that are
13769 preceded by constant loads into an STM instruction. OPERANDS are the
13770 operands found by the peephole matcher; NOPS indicates how many
13771 separate stores we are trying to combine; there are 2 * NOPS
13772 instructions in the peephole.
13773 Returns true iff we could generate a new instruction. */
13776 gen_const_stm_seq (rtx
*operands
, int nops
)
13778 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13779 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13780 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13781 rtx mems
[MAX_LDM_STM_OPS
];
13784 HOST_WIDE_INT offset
;
13785 int write_back
= FALSE
;
13788 bool base_reg_dies
;
13790 HARD_REG_SET allocated
;
13792 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13793 mem_order
, &base_reg
, &offset
, false);
13798 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13800 /* If the same register is used more than once, try to find a free
13802 CLEAR_HARD_REG_SET (allocated
);
13803 for (i
= 0; i
< nops
; i
++)
13805 for (j
= i
+ 1; j
< nops
; j
++)
13806 if (regs
[i
] == regs
[j
])
13808 rtx t
= peep2_find_free_register (0, nops
* 2,
13809 TARGET_THUMB1
? "l" : "r",
13810 SImode
, &allocated
);
13814 regs
[i
] = REGNO (t
);
13818 /* Compute an ordering that maps the register numbers to an ascending
13821 for (i
= 0; i
< nops
; i
++)
13822 if (regs
[i
] < regs
[reg_order
[0]])
13825 for (i
= 1; i
< nops
; i
++)
13827 int this_order
= reg_order
[i
- 1];
13828 for (j
= 0; j
< nops
; j
++)
13829 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13830 && (this_order
== reg_order
[i
- 1]
13831 || regs
[j
] < regs
[this_order
]))
13833 reg_order
[i
] = this_order
;
13836 /* Ensure that registers that must be live after the instruction end
13837 up with the correct value. */
13838 for (i
= 0; i
< nops
; i
++)
13840 int this_order
= reg_order
[i
];
13841 if ((this_order
!= mem_order
[i
]
13842 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13843 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13847 /* Load the constants. */
13848 for (i
= 0; i
< nops
; i
++)
13850 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13851 sorted_regs
[i
] = regs
[reg_order
[i
]];
13852 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13855 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13857 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13860 gcc_assert (base_reg_dies
);
13866 gcc_assert (base_reg_dies
);
13867 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13871 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13873 for (i
= 0; i
< nops
; i
++)
13875 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13876 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13879 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13880 write_back
? offset
+ i
* 4 : 0));
13884 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13885 unaligned copies on processors which support unaligned semantics for those
13886 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13887 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13888 An interleave factor of 1 (the minimum) will perform no interleaving.
13889 Load/store multiple are used for aligned addresses where possible. */
13892 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13893 HOST_WIDE_INT length
,
13894 unsigned int interleave_factor
)
13896 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13897 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13898 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13899 HOST_WIDE_INT i
, j
;
13900 HOST_WIDE_INT remaining
= length
, words
;
13901 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13903 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13904 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13905 HOST_WIDE_INT srcoffset
, dstoffset
;
13906 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13909 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13911 /* Use hard registers if we have aligned source or destination so we can use
13912 load/store multiple with contiguous registers. */
13913 if (dst_aligned
|| src_aligned
)
13914 for (i
= 0; i
< interleave_factor
; i
++)
13915 regs
[i
] = gen_rtx_REG (SImode
, i
);
13917 for (i
= 0; i
< interleave_factor
; i
++)
13918 regs
[i
] = gen_reg_rtx (SImode
);
13920 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13921 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13923 srcoffset
= dstoffset
= 0;
13925 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13926 For copying the last bytes we want to subtract this offset again. */
13927 src_autoinc
= dst_autoinc
= 0;
13929 for (i
= 0; i
< interleave_factor
; i
++)
13932 /* Copy BLOCK_SIZE_BYTES chunks. */
13934 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13937 if (src_aligned
&& interleave_factor
> 1)
13939 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13940 TRUE
, srcbase
, &srcoffset
));
13941 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13945 for (j
= 0; j
< interleave_factor
; j
++)
13947 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13949 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13950 srcoffset
+ j
* UNITS_PER_WORD
);
13951 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13953 srcoffset
+= block_size_bytes
;
13957 if (dst_aligned
&& interleave_factor
> 1)
13959 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13960 TRUE
, dstbase
, &dstoffset
));
13961 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13965 for (j
= 0; j
< interleave_factor
; j
++)
13967 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13969 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13970 dstoffset
+ j
* UNITS_PER_WORD
);
13971 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13973 dstoffset
+= block_size_bytes
;
13976 remaining
-= block_size_bytes
;
13979 /* Copy any whole words left (note these aren't interleaved with any
13980 subsequent halfword/byte load/stores in the interests of simplicity). */
13982 words
= remaining
/ UNITS_PER_WORD
;
13984 gcc_assert (words
< interleave_factor
);
13986 if (src_aligned
&& words
> 1)
13988 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13990 src_autoinc
+= UNITS_PER_WORD
* words
;
13994 for (j
= 0; j
< words
; j
++)
13996 addr
= plus_constant (Pmode
, src
,
13997 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13998 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13999 srcoffset
+ j
* UNITS_PER_WORD
);
14000 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14002 srcoffset
+= words
* UNITS_PER_WORD
;
14005 if (dst_aligned
&& words
> 1)
14007 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14009 dst_autoinc
+= words
* UNITS_PER_WORD
;
14013 for (j
= 0; j
< words
; j
++)
14015 addr
= plus_constant (Pmode
, dst
,
14016 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14017 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14018 dstoffset
+ j
* UNITS_PER_WORD
);
14019 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14021 dstoffset
+= words
* UNITS_PER_WORD
;
14024 remaining
-= words
* UNITS_PER_WORD
;
14026 gcc_assert (remaining
< 4);
14028 /* Copy a halfword if necessary. */
14030 if (remaining
>= 2)
14032 halfword_tmp
= gen_reg_rtx (SImode
);
14034 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14035 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14036 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14038 /* Either write out immediately, or delay until we've loaded the last
14039 byte, depending on interleave factor. */
14040 if (interleave_factor
== 1)
14042 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14043 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14044 emit_insn (gen_unaligned_storehi (mem
,
14045 gen_lowpart (HImode
, halfword_tmp
)));
14046 halfword_tmp
= NULL
;
14054 gcc_assert (remaining
< 2);
14056 /* Copy last byte. */
14058 if ((remaining
& 1) != 0)
14060 byte_tmp
= gen_reg_rtx (SImode
);
14062 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14063 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14064 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14066 if (interleave_factor
== 1)
14068 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14069 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14070 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14079 /* Store last halfword if we haven't done so already. */
14083 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14084 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14085 emit_insn (gen_unaligned_storehi (mem
,
14086 gen_lowpart (HImode
, halfword_tmp
)));
14090 /* Likewise for last byte. */
14094 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14095 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14096 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14100 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14103 /* From mips_adjust_block_mem:
14105 Helper function for doing a loop-based block operation on memory
14106 reference MEM. Each iteration of the loop will operate on LENGTH
14109 Create a new base register for use within the loop and point it to
14110 the start of MEM. Create a new memory reference that uses this
14111 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14114 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14117 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14119 /* Although the new mem does not refer to a known location,
14120 it does keep up to LENGTH bytes of alignment. */
14121 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14122 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14125 /* From mips_block_move_loop:
14127 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14128 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14129 the memory regions do not overlap. */
14132 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14133 unsigned int interleave_factor
,
14134 HOST_WIDE_INT bytes_per_iter
)
14136 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14137 HOST_WIDE_INT leftover
;
14139 leftover
= length
% bytes_per_iter
;
14140 length
-= leftover
;
14142 /* Create registers and memory references for use within the loop. */
14143 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14144 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14146 /* Calculate the value that SRC_REG should have after the last iteration of
14148 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14149 0, 0, OPTAB_WIDEN
);
14151 /* Emit the start of the loop. */
14152 label
= gen_label_rtx ();
14153 emit_label (label
);
14155 /* Emit the loop body. */
14156 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14157 interleave_factor
);
14159 /* Move on to the next block. */
14160 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14161 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14163 /* Emit the loop condition. */
14164 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14165 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14167 /* Mop up any left-over bytes. */
14169 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14172 /* Emit a block move when either the source or destination is unaligned (not
14173 aligned to a four-byte boundary). This may need further tuning depending on
14174 core type, optimize_size setting, etc. */
14177 arm_movmemqi_unaligned (rtx
*operands
)
14179 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14183 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14184 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14185 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14186 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14187 or dst_aligned though: allow more interleaving in those cases since the
14188 resulting code can be smaller. */
14189 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14190 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14193 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14194 interleave_factor
, bytes_per_iter
);
14196 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14197 interleave_factor
);
14201 /* Note that the loop created by arm_block_move_unaligned_loop may be
14202 subject to loop unrolling, which makes tuning this condition a little
14205 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14207 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14214 arm_gen_movmemqi (rtx
*operands
)
14216 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14217 HOST_WIDE_INT srcoffset
, dstoffset
;
14219 rtx src
, dst
, srcbase
, dstbase
;
14220 rtx part_bytes_reg
= NULL
;
14223 if (!CONST_INT_P (operands
[2])
14224 || !CONST_INT_P (operands
[3])
14225 || INTVAL (operands
[2]) > 64)
14228 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14229 return arm_movmemqi_unaligned (operands
);
14231 if (INTVAL (operands
[3]) & 3)
14234 dstbase
= operands
[0];
14235 srcbase
= operands
[1];
14237 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14238 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14240 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14241 out_words_to_go
= INTVAL (operands
[2]) / 4;
14242 last_bytes
= INTVAL (operands
[2]) & 3;
14243 dstoffset
= srcoffset
= 0;
14245 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14246 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14248 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14250 if (in_words_to_go
> 4)
14251 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14252 TRUE
, srcbase
, &srcoffset
));
14254 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14255 src
, FALSE
, srcbase
,
14258 if (out_words_to_go
)
14260 if (out_words_to_go
> 4)
14261 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14262 TRUE
, dstbase
, &dstoffset
));
14263 else if (out_words_to_go
!= 1)
14264 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14265 out_words_to_go
, dst
,
14268 dstbase
, &dstoffset
));
14271 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14272 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14273 if (last_bytes
!= 0)
14275 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14281 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14282 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14285 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14286 if (out_words_to_go
)
14290 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14291 sreg
= copy_to_reg (mem
);
14293 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14294 emit_move_insn (mem
, sreg
);
14297 gcc_assert (!in_words_to_go
); /* Sanity check */
14300 if (in_words_to_go
)
14302 gcc_assert (in_words_to_go
> 0);
14304 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14305 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14308 gcc_assert (!last_bytes
|| part_bytes_reg
);
14310 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14312 rtx tmp
= gen_reg_rtx (SImode
);
14314 /* The bytes we want are in the top end of the word. */
14315 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14316 GEN_INT (8 * (4 - last_bytes
))));
14317 part_bytes_reg
= tmp
;
14321 mem
= adjust_automodify_address (dstbase
, QImode
,
14322 plus_constant (Pmode
, dst
,
14324 dstoffset
+ last_bytes
- 1);
14325 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14329 tmp
= gen_reg_rtx (SImode
);
14330 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14331 part_bytes_reg
= tmp
;
14338 if (last_bytes
> 1)
14340 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14341 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14345 rtx tmp
= gen_reg_rtx (SImode
);
14346 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14347 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14348 part_bytes_reg
= tmp
;
14355 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14356 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14363 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14366 next_consecutive_mem (rtx mem
)
14368 enum machine_mode mode
= GET_MODE (mem
);
14369 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14370 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14372 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14375 /* Copy using LDRD/STRD instructions whenever possible.
14376 Returns true upon success. */
14378 gen_movmem_ldrd_strd (rtx
*operands
)
14380 unsigned HOST_WIDE_INT len
;
14381 HOST_WIDE_INT align
;
14382 rtx src
, dst
, base
;
14384 bool src_aligned
, dst_aligned
;
14385 bool src_volatile
, dst_volatile
;
14387 gcc_assert (CONST_INT_P (operands
[2]));
14388 gcc_assert (CONST_INT_P (operands
[3]));
14390 len
= UINTVAL (operands
[2]);
14394 /* Maximum alignment we can assume for both src and dst buffers. */
14395 align
= INTVAL (operands
[3]);
14397 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14400 /* Place src and dst addresses in registers
14401 and update the corresponding mem rtx. */
14403 dst_volatile
= MEM_VOLATILE_P (dst
);
14404 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14405 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14406 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14409 src_volatile
= MEM_VOLATILE_P (src
);
14410 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14411 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14412 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14414 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14417 if (src_volatile
|| dst_volatile
)
14420 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14421 if (!(dst_aligned
|| src_aligned
))
14422 return arm_gen_movmemqi (operands
);
14424 src
= adjust_address (src
, DImode
, 0);
14425 dst
= adjust_address (dst
, DImode
, 0);
14429 reg0
= gen_reg_rtx (DImode
);
14431 emit_move_insn (reg0
, src
);
14433 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14436 emit_move_insn (dst
, reg0
);
14438 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14440 src
= next_consecutive_mem (src
);
14441 dst
= next_consecutive_mem (dst
);
14444 gcc_assert (len
< 8);
14447 /* More than a word but less than a double-word to copy. Copy a word. */
14448 reg0
= gen_reg_rtx (SImode
);
14449 src
= adjust_address (src
, SImode
, 0);
14450 dst
= adjust_address (dst
, SImode
, 0);
14452 emit_move_insn (reg0
, src
);
14454 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14457 emit_move_insn (dst
, reg0
);
14459 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14461 src
= next_consecutive_mem (src
);
14462 dst
= next_consecutive_mem (dst
);
14469 /* Copy the remaining bytes. */
14472 dst
= adjust_address (dst
, HImode
, 0);
14473 src
= adjust_address (src
, HImode
, 0);
14474 reg0
= gen_reg_rtx (SImode
);
14476 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14478 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14481 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14483 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14485 src
= next_consecutive_mem (src
);
14486 dst
= next_consecutive_mem (dst
);
14491 dst
= adjust_address (dst
, QImode
, 0);
14492 src
= adjust_address (src
, QImode
, 0);
14493 reg0
= gen_reg_rtx (QImode
);
14494 emit_move_insn (reg0
, src
);
14495 emit_move_insn (dst
, reg0
);
14499 /* Select a dominance comparison mode if possible for a test of the general
14500 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14501 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14502 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14503 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14504 In all cases OP will be either EQ or NE, but we don't need to know which
14505 here. If we are unable to support a dominance comparison we return
14506 CC mode. This will then fail to match for the RTL expressions that
14507 generate this call. */
14509 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14511 enum rtx_code cond1
, cond2
;
14514 /* Currently we will probably get the wrong result if the individual
14515 comparisons are not simple. This also ensures that it is safe to
14516 reverse a comparison if necessary. */
14517 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14519 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14523 /* The if_then_else variant of this tests the second condition if the
14524 first passes, but is true if the first fails. Reverse the first
14525 condition to get a true "inclusive-or" expression. */
14526 if (cond_or
== DOM_CC_NX_OR_Y
)
14527 cond1
= reverse_condition (cond1
);
14529 /* If the comparisons are not equal, and one doesn't dominate the other,
14530 then we can't do this. */
14532 && !comparison_dominates_p (cond1
, cond2
)
14533 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14538 enum rtx_code temp
= cond1
;
14546 if (cond_or
== DOM_CC_X_AND_Y
)
14551 case EQ
: return CC_DEQmode
;
14552 case LE
: return CC_DLEmode
;
14553 case LEU
: return CC_DLEUmode
;
14554 case GE
: return CC_DGEmode
;
14555 case GEU
: return CC_DGEUmode
;
14556 default: gcc_unreachable ();
14560 if (cond_or
== DOM_CC_X_AND_Y
)
14572 gcc_unreachable ();
14576 if (cond_or
== DOM_CC_X_AND_Y
)
14588 gcc_unreachable ();
14592 if (cond_or
== DOM_CC_X_AND_Y
)
14593 return CC_DLTUmode
;
14598 return CC_DLTUmode
;
14600 return CC_DLEUmode
;
14604 gcc_unreachable ();
14608 if (cond_or
== DOM_CC_X_AND_Y
)
14609 return CC_DGTUmode
;
14614 return CC_DGTUmode
;
14616 return CC_DGEUmode
;
14620 gcc_unreachable ();
14623 /* The remaining cases only occur when both comparisons are the
14626 gcc_assert (cond1
== cond2
);
14630 gcc_assert (cond1
== cond2
);
14634 gcc_assert (cond1
== cond2
);
14638 gcc_assert (cond1
== cond2
);
14639 return CC_DLEUmode
;
14642 gcc_assert (cond1
== cond2
);
14643 return CC_DGEUmode
;
14646 gcc_unreachable ();
14651 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14653 /* All floating point compares return CCFP if it is an equality
14654 comparison, and CCFPE otherwise. */
14655 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14678 gcc_unreachable ();
14682 /* A compare with a shifted operand. Because of canonicalization, the
14683 comparison will have to be swapped when we emit the assembler. */
14684 if (GET_MODE (y
) == SImode
14685 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14686 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14687 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14688 || GET_CODE (x
) == ROTATERT
))
14691 /* This operation is performed swapped, but since we only rely on the Z
14692 flag we don't need an additional mode. */
14693 if (GET_MODE (y
) == SImode
14694 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14695 && GET_CODE (x
) == NEG
14696 && (op
== EQ
|| op
== NE
))
14699 /* This is a special case that is used by combine to allow a
14700 comparison of a shifted byte load to be split into a zero-extend
14701 followed by a comparison of the shifted integer (only valid for
14702 equalities and unsigned inequalities). */
14703 if (GET_MODE (x
) == SImode
14704 && GET_CODE (x
) == ASHIFT
14705 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14706 && GET_CODE (XEXP (x
, 0)) == SUBREG
14707 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14708 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14709 && (op
== EQ
|| op
== NE
14710 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14711 && CONST_INT_P (y
))
14714 /* A construct for a conditional compare, if the false arm contains
14715 0, then both conditions must be true, otherwise either condition
14716 must be true. Not all conditions are possible, so CCmode is
14717 returned if it can't be done. */
14718 if (GET_CODE (x
) == IF_THEN_ELSE
14719 && (XEXP (x
, 2) == const0_rtx
14720 || XEXP (x
, 2) == const1_rtx
)
14721 && COMPARISON_P (XEXP (x
, 0))
14722 && COMPARISON_P (XEXP (x
, 1)))
14723 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14724 INTVAL (XEXP (x
, 2)));
14726 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14727 if (GET_CODE (x
) == AND
14728 && (op
== EQ
|| op
== NE
)
14729 && COMPARISON_P (XEXP (x
, 0))
14730 && COMPARISON_P (XEXP (x
, 1)))
14731 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14734 if (GET_CODE (x
) == IOR
14735 && (op
== EQ
|| op
== NE
)
14736 && COMPARISON_P (XEXP (x
, 0))
14737 && COMPARISON_P (XEXP (x
, 1)))
14738 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14741 /* An operation (on Thumb) where we want to test for a single bit.
14742 This is done by shifting that bit up into the top bit of a
14743 scratch register; we can then branch on the sign bit. */
14745 && GET_MODE (x
) == SImode
14746 && (op
== EQ
|| op
== NE
)
14747 && GET_CODE (x
) == ZERO_EXTRACT
14748 && XEXP (x
, 1) == const1_rtx
)
14751 /* An operation that sets the condition codes as a side-effect, the
14752 V flag is not set correctly, so we can only use comparisons where
14753 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14755 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14756 if (GET_MODE (x
) == SImode
14758 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14759 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14760 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14761 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14762 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14763 || GET_CODE (x
) == LSHIFTRT
14764 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14765 || GET_CODE (x
) == ROTATERT
14766 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14767 return CC_NOOVmode
;
14769 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14772 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14773 && GET_CODE (x
) == PLUS
14774 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14777 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14783 /* A DImode comparison against zero can be implemented by
14784 or'ing the two halves together. */
14785 if (y
== const0_rtx
)
14788 /* We can do an equality test in three Thumb instructions. */
14798 /* DImode unsigned comparisons can be implemented by cmp +
14799 cmpeq without a scratch register. Not worth doing in
14810 /* DImode signed and unsigned comparisons can be implemented
14811 by cmp + sbcs with a scratch register, but that does not
14812 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14813 gcc_assert (op
!= EQ
&& op
!= NE
);
14817 gcc_unreachable ();
14821 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14822 return GET_MODE (x
);
14827 /* X and Y are two things to compare using CODE. Emit the compare insn and
14828 return the rtx for register 0 in the proper mode. FP means this is a
14829 floating point compare: I don't think that it is needed on the arm. */
14831 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14833 enum machine_mode mode
;
14835 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14837 /* We might have X as a constant, Y as a register because of the predicates
14838 used for cmpdi. If so, force X to a register here. */
14839 if (dimode_comparison
&& !REG_P (x
))
14840 x
= force_reg (DImode
, x
);
14842 mode
= SELECT_CC_MODE (code
, x
, y
);
14843 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14845 if (dimode_comparison
14846 && mode
!= CC_CZmode
)
14850 /* To compare two non-zero values for equality, XOR them and
14851 then compare against zero. Not used for ARM mode; there
14852 CC_CZmode is cheaper. */
14853 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14855 gcc_assert (!reload_completed
);
14856 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14860 /* A scratch register is required. */
14861 if (reload_completed
)
14862 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14864 scratch
= gen_rtx_SCRATCH (SImode
);
14866 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14867 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14868 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14871 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14876 /* Generate a sequence of insns that will generate the correct return
14877 address mask depending on the physical architecture that the program
14880 arm_gen_return_addr_mask (void)
14882 rtx reg
= gen_reg_rtx (Pmode
);
14884 emit_insn (gen_return_addr_mask (reg
));
14889 arm_reload_in_hi (rtx
*operands
)
14891 rtx ref
= operands
[1];
14893 HOST_WIDE_INT offset
= 0;
14895 if (GET_CODE (ref
) == SUBREG
)
14897 offset
= SUBREG_BYTE (ref
);
14898 ref
= SUBREG_REG (ref
);
14903 /* We have a pseudo which has been spilt onto the stack; there
14904 are two cases here: the first where there is a simple
14905 stack-slot replacement and a second where the stack-slot is
14906 out of range, or is used as a subreg. */
14907 if (reg_equiv_mem (REGNO (ref
)))
14909 ref
= reg_equiv_mem (REGNO (ref
));
14910 base
= find_replacement (&XEXP (ref
, 0));
14913 /* The slot is out of range, or was dressed up in a SUBREG. */
14914 base
= reg_equiv_address (REGNO (ref
));
14917 base
= find_replacement (&XEXP (ref
, 0));
14919 /* Handle the case where the address is too complex to be offset by 1. */
14920 if (GET_CODE (base
) == MINUS
14921 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14923 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14925 emit_set_insn (base_plus
, base
);
14928 else if (GET_CODE (base
) == PLUS
)
14930 /* The addend must be CONST_INT, or we would have dealt with it above. */
14931 HOST_WIDE_INT hi
, lo
;
14933 offset
+= INTVAL (XEXP (base
, 1));
14934 base
= XEXP (base
, 0);
14936 /* Rework the address into a legal sequence of insns. */
14937 /* Valid range for lo is -4095 -> 4095 */
14940 : -((-offset
) & 0xfff));
14942 /* Corner case, if lo is the max offset then we would be out of range
14943 once we have added the additional 1 below, so bump the msb into the
14944 pre-loading insn(s). */
14948 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14949 ^ (HOST_WIDE_INT
) 0x80000000)
14950 - (HOST_WIDE_INT
) 0x80000000);
14952 gcc_assert (hi
+ lo
== offset
);
14956 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14958 /* Get the base address; addsi3 knows how to handle constants
14959 that require more than one insn. */
14960 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14966 /* Operands[2] may overlap operands[0] (though it won't overlap
14967 operands[1]), that's why we asked for a DImode reg -- so we can
14968 use the bit that does not overlap. */
14969 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14970 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14972 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14974 emit_insn (gen_zero_extendqisi2 (scratch
,
14975 gen_rtx_MEM (QImode
,
14976 plus_constant (Pmode
, base
,
14978 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14979 gen_rtx_MEM (QImode
,
14980 plus_constant (Pmode
, base
,
14982 if (!BYTES_BIG_ENDIAN
)
14983 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14984 gen_rtx_IOR (SImode
,
14987 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14991 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14992 gen_rtx_IOR (SImode
,
14993 gen_rtx_ASHIFT (SImode
, scratch
,
14995 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14998 /* Handle storing a half-word to memory during reload by synthesizing as two
14999 byte stores. Take care not to clobber the input values until after we
15000 have moved them somewhere safe. This code assumes that if the DImode
15001 scratch in operands[2] overlaps either the input value or output address
15002 in some way, then that value must die in this insn (we absolutely need
15003 two scratch registers for some corner cases). */
15005 arm_reload_out_hi (rtx
*operands
)
15007 rtx ref
= operands
[0];
15008 rtx outval
= operands
[1];
15010 HOST_WIDE_INT offset
= 0;
15012 if (GET_CODE (ref
) == SUBREG
)
15014 offset
= SUBREG_BYTE (ref
);
15015 ref
= SUBREG_REG (ref
);
15020 /* We have a pseudo which has been spilt onto the stack; there
15021 are two cases here: the first where there is a simple
15022 stack-slot replacement and a second where the stack-slot is
15023 out of range, or is used as a subreg. */
15024 if (reg_equiv_mem (REGNO (ref
)))
15026 ref
= reg_equiv_mem (REGNO (ref
));
15027 base
= find_replacement (&XEXP (ref
, 0));
15030 /* The slot is out of range, or was dressed up in a SUBREG. */
15031 base
= reg_equiv_address (REGNO (ref
));
15034 base
= find_replacement (&XEXP (ref
, 0));
15036 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15038 /* Handle the case where the address is too complex to be offset by 1. */
15039 if (GET_CODE (base
) == MINUS
15040 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15042 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15044 /* Be careful not to destroy OUTVAL. */
15045 if (reg_overlap_mentioned_p (base_plus
, outval
))
15047 /* Updating base_plus might destroy outval, see if we can
15048 swap the scratch and base_plus. */
15049 if (!reg_overlap_mentioned_p (scratch
, outval
))
15052 scratch
= base_plus
;
15057 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15059 /* Be conservative and copy OUTVAL into the scratch now,
15060 this should only be necessary if outval is a subreg
15061 of something larger than a word. */
15062 /* XXX Might this clobber base? I can't see how it can,
15063 since scratch is known to overlap with OUTVAL, and
15064 must be wider than a word. */
15065 emit_insn (gen_movhi (scratch_hi
, outval
));
15066 outval
= scratch_hi
;
15070 emit_set_insn (base_plus
, base
);
15073 else if (GET_CODE (base
) == PLUS
)
15075 /* The addend must be CONST_INT, or we would have dealt with it above. */
15076 HOST_WIDE_INT hi
, lo
;
15078 offset
+= INTVAL (XEXP (base
, 1));
15079 base
= XEXP (base
, 0);
15081 /* Rework the address into a legal sequence of insns. */
15082 /* Valid range for lo is -4095 -> 4095 */
15085 : -((-offset
) & 0xfff));
15087 /* Corner case, if lo is the max offset then we would be out of range
15088 once we have added the additional 1 below, so bump the msb into the
15089 pre-loading insn(s). */
15093 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15094 ^ (HOST_WIDE_INT
) 0x80000000)
15095 - (HOST_WIDE_INT
) 0x80000000);
15097 gcc_assert (hi
+ lo
== offset
);
15101 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15103 /* Be careful not to destroy OUTVAL. */
15104 if (reg_overlap_mentioned_p (base_plus
, outval
))
15106 /* Updating base_plus might destroy outval, see if we
15107 can swap the scratch and base_plus. */
15108 if (!reg_overlap_mentioned_p (scratch
, outval
))
15111 scratch
= base_plus
;
15116 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15118 /* Be conservative and copy outval into scratch now,
15119 this should only be necessary if outval is a
15120 subreg of something larger than a word. */
15121 /* XXX Might this clobber base? I can't see how it
15122 can, since scratch is known to overlap with
15124 emit_insn (gen_movhi (scratch_hi
, outval
));
15125 outval
= scratch_hi
;
15129 /* Get the base address; addsi3 knows how to handle constants
15130 that require more than one insn. */
15131 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15137 if (BYTES_BIG_ENDIAN
)
15139 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15140 plus_constant (Pmode
, base
,
15142 gen_lowpart (QImode
, outval
)));
15143 emit_insn (gen_lshrsi3 (scratch
,
15144 gen_rtx_SUBREG (SImode
, outval
, 0),
15146 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15148 gen_lowpart (QImode
, scratch
)));
15152 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15154 gen_lowpart (QImode
, outval
)));
15155 emit_insn (gen_lshrsi3 (scratch
,
15156 gen_rtx_SUBREG (SImode
, outval
, 0),
15158 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15159 plus_constant (Pmode
, base
,
15161 gen_lowpart (QImode
, scratch
)));
15165 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15166 (padded to the size of a word) should be passed in a register. */
15169 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15171 if (TARGET_AAPCS_BASED
)
15172 return must_pass_in_stack_var_size (mode
, type
);
15174 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15178 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15179 Return true if an argument passed on the stack should be padded upwards,
15180 i.e. if the least-significant byte has useful data.
15181 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15182 aggregate types are placed in the lowest memory address. */
15185 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15187 if (!TARGET_AAPCS_BASED
)
15188 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15190 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15197 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15198 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15199 register has useful data, and return the opposite if the most
15200 significant byte does. */
15203 arm_pad_reg_upward (enum machine_mode mode
,
15204 tree type
, int first ATTRIBUTE_UNUSED
)
15206 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15208 /* For AAPCS, small aggregates, small fixed-point types,
15209 and small complex types are always padded upwards. */
15212 if ((AGGREGATE_TYPE_P (type
)
15213 || TREE_CODE (type
) == COMPLEX_TYPE
15214 || FIXED_POINT_TYPE_P (type
))
15215 && int_size_in_bytes (type
) <= 4)
15220 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15221 && GET_MODE_SIZE (mode
) <= 4)
15226 /* Otherwise, use default padding. */
15227 return !BYTES_BIG_ENDIAN
;
15230 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15231 assuming that the address in the base register is word aligned. */
15233 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15235 HOST_WIDE_INT max_offset
;
15237 /* Offset must be a multiple of 4 in Thumb mode. */
15238 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15243 else if (TARGET_ARM
)
15248 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15251 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15252 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15253 Assumes that the address in the base register RN is word aligned. Pattern
15254 guarantees that both memory accesses use the same base register,
15255 the offsets are constants within the range, and the gap between the offsets is 4.
15256 If preload complete then check that registers are legal. WBACK indicates whether
15257 address is updated. LOAD indicates whether memory access is load or store. */
15259 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15260 bool wback
, bool load
)
15262 unsigned int t
, t2
, n
;
15264 if (!reload_completed
)
15267 if (!offset_ok_for_ldrd_strd (offset
))
15274 if ((TARGET_THUMB2
)
15275 && ((wback
&& (n
== t
|| n
== t2
))
15276 || (t
== SP_REGNUM
)
15277 || (t
== PC_REGNUM
)
15278 || (t2
== SP_REGNUM
)
15279 || (t2
== PC_REGNUM
)
15280 || (!load
&& (n
== PC_REGNUM
))
15281 || (load
&& (t
== t2
))
15282 /* Triggers Cortex-M3 LDRD errata. */
15283 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15287 && ((wback
&& (n
== t
|| n
== t2
))
15288 || (t2
== PC_REGNUM
)
15289 || (t
% 2 != 0) /* First destination register is not even. */
15291 /* PC can be used as base register (for offset addressing only),
15292 but it is depricated. */
15293 || (n
== PC_REGNUM
)))
15299 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15300 operand MEM's address contains an immediate offset from the base
15301 register and has no side effects, in which case it sets BASE and
15302 OFFSET accordingly. */
15304 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15308 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15310 /* TODO: Handle more general memory operand patterns, such as
15311 PRE_DEC and PRE_INC. */
15313 if (side_effects_p (mem
))
15316 /* Can't deal with subregs. */
15317 if (GET_CODE (mem
) == SUBREG
)
15320 gcc_assert (MEM_P (mem
));
15322 *offset
= const0_rtx
;
15324 addr
= XEXP (mem
, 0);
15326 /* If addr isn't valid for DImode, then we can't handle it. */
15327 if (!arm_legitimate_address_p (DImode
, addr
,
15328 reload_in_progress
|| reload_completed
))
15336 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15338 *base
= XEXP (addr
, 0);
15339 *offset
= XEXP (addr
, 1);
15340 return (REG_P (*base
) && CONST_INT_P (*offset
));
15346 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15348 /* Called from a peephole2 to replace two word-size accesses with a
15349 single LDRD/STRD instruction. Returns true iff we can generate a
15350 new instruction sequence. That is, both accesses use the same base
15351 register and the gap between constant offsets is 4. This function
15352 may reorder its operands to match ldrd/strd RTL templates.
15353 OPERANDS are the operands found by the peephole matcher;
15354 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15355 corresponding memory operands. LOAD indicaates whether the access
15356 is load or store. CONST_STORE indicates a store of constant
15357 integer values held in OPERANDS[4,5] and assumes that the pattern
15358 is of length 4 insn, for the purpose of checking dead registers.
15359 COMMUTE indicates that register operands may be reordered. */
15361 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15362 bool const_store
, bool commute
)
15365 HOST_WIDE_INT offsets
[2], offset
;
15366 rtx base
= NULL_RTX
;
15367 rtx cur_base
, cur_offset
, tmp
;
15369 HARD_REG_SET regset
;
15371 gcc_assert (!const_store
|| !load
);
15372 /* Check that the memory references are immediate offsets from the
15373 same base register. Extract the base register, the destination
15374 registers, and the corresponding memory offsets. */
15375 for (i
= 0; i
< nops
; i
++)
15377 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15382 else if (REGNO (base
) != REGNO (cur_base
))
15385 offsets
[i
] = INTVAL (cur_offset
);
15386 if (GET_CODE (operands
[i
]) == SUBREG
)
15388 tmp
= SUBREG_REG (operands
[i
]);
15389 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15394 /* Make sure there is no dependency between the individual loads. */
15395 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15396 return false; /* RAW */
15398 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15399 return false; /* WAW */
15401 /* If the same input register is used in both stores
15402 when storing different constants, try to find a free register.
15403 For example, the code
15408 can be transformed into
15411 in Thumb mode assuming that r1 is free. */
15413 && REGNO (operands
[0]) == REGNO (operands
[1])
15414 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15418 CLEAR_HARD_REG_SET (regset
);
15419 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15420 if (tmp
== NULL_RTX
)
15423 /* Use the new register in the first load to ensure that
15424 if the original input register is not dead after peephole,
15425 then it will have the correct constant value. */
15428 else if (TARGET_ARM
)
15431 int regno
= REGNO (operands
[0]);
15432 if (!peep2_reg_dead_p (4, operands
[0]))
15434 /* When the input register is even and is not dead after the
15435 pattern, it has to hold the second constant but we cannot
15436 form a legal STRD in ARM mode with this register as the second
15438 if (regno
% 2 == 0)
15441 /* Is regno-1 free? */
15442 SET_HARD_REG_SET (regset
);
15443 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15444 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15445 if (tmp
== NULL_RTX
)
15452 /* Find a DImode register. */
15453 CLEAR_HARD_REG_SET (regset
);
15454 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15455 if (tmp
!= NULL_RTX
)
15457 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15458 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15462 /* Can we use the input register to form a DI register? */
15463 SET_HARD_REG_SET (regset
);
15464 CLEAR_HARD_REG_BIT(regset
,
15465 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15466 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15467 if (tmp
== NULL_RTX
)
15469 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15473 gcc_assert (operands
[0] != NULL_RTX
);
15474 gcc_assert (operands
[1] != NULL_RTX
);
15475 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15476 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15480 /* Make sure the instructions are ordered with lower memory access first. */
15481 if (offsets
[0] > offsets
[1])
15483 gap
= offsets
[0] - offsets
[1];
15484 offset
= offsets
[1];
15486 /* Swap the instructions such that lower memory is accessed first. */
15487 SWAP_RTX (operands
[0], operands
[1]);
15488 SWAP_RTX (operands
[2], operands
[3]);
15490 SWAP_RTX (operands
[4], operands
[5]);
15494 gap
= offsets
[1] - offsets
[0];
15495 offset
= offsets
[0];
15498 /* Make sure accesses are to consecutive memory locations. */
15502 /* Make sure we generate legal instructions. */
15503 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15507 /* In Thumb state, where registers are almost unconstrained, there
15508 is little hope to fix it. */
15512 if (load
&& commute
)
15514 /* Try reordering registers. */
15515 SWAP_RTX (operands
[0], operands
[1]);
15516 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15523 /* If input registers are dead after this pattern, they can be
15524 reordered or replaced by other registers that are free in the
15525 current pattern. */
15526 if (!peep2_reg_dead_p (4, operands
[0])
15527 || !peep2_reg_dead_p (4, operands
[1]))
15530 /* Try to reorder the input registers. */
15531 /* For example, the code
15536 can be transformed into
15541 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15544 SWAP_RTX (operands
[0], operands
[1]);
15548 /* Try to find a free DI register. */
15549 CLEAR_HARD_REG_SET (regset
);
15550 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15551 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15554 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15555 if (tmp
== NULL_RTX
)
15558 /* DREG must be an even-numbered register in DImode.
15559 Split it into SI registers. */
15560 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15561 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15562 gcc_assert (operands
[0] != NULL_RTX
);
15563 gcc_assert (operands
[1] != NULL_RTX
);
15564 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15565 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15567 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15580 /* Print a symbolic form of X to the debug file, F. */
15582 arm_print_value (FILE *f
, rtx x
)
15584 switch (GET_CODE (x
))
15587 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15591 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15599 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15601 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15602 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15610 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15614 fprintf (f
, "`%s'", XSTR (x
, 0));
15618 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15622 arm_print_value (f
, XEXP (x
, 0));
15626 arm_print_value (f
, XEXP (x
, 0));
15628 arm_print_value (f
, XEXP (x
, 1));
15636 fprintf (f
, "????");
15641 /* Routines for manipulation of the constant pool. */
15643 /* Arm instructions cannot load a large constant directly into a
15644 register; they have to come from a pc relative load. The constant
15645 must therefore be placed in the addressable range of the pc
15646 relative load. Depending on the precise pc relative load
15647 instruction the range is somewhere between 256 bytes and 4k. This
15648 means that we often have to dump a constant inside a function, and
15649 generate code to branch around it.
15651 It is important to minimize this, since the branches will slow
15652 things down and make the code larger.
15654 Normally we can hide the table after an existing unconditional
15655 branch so that there is no interruption of the flow, but in the
15656 worst case the code looks like this:
15674 We fix this by performing a scan after scheduling, which notices
15675 which instructions need to have their operands fetched from the
15676 constant table and builds the table.
15678 The algorithm starts by building a table of all the constants that
15679 need fixing up and all the natural barriers in the function (places
15680 where a constant table can be dropped without breaking the flow).
15681 For each fixup we note how far the pc-relative replacement will be
15682 able to reach and the offset of the instruction into the function.
15684 Having built the table we then group the fixes together to form
15685 tables that are as large as possible (subject to addressing
15686 constraints) and emit each table of constants after the last
15687 barrier that is within range of all the instructions in the group.
15688 If a group does not contain a barrier, then we forcibly create one
15689 by inserting a jump instruction into the flow. Once the table has
15690 been inserted, the insns are then modified to reference the
15691 relevant entry in the pool.
15693 Possible enhancements to the algorithm (not implemented) are:
15695 1) For some processors and object formats, there may be benefit in
15696 aligning the pools to the start of cache lines; this alignment
15697 would need to be taken into account when calculating addressability
15700 /* These typedefs are located at the start of this file, so that
15701 they can be used in the prototypes there. This comment is to
15702 remind readers of that fact so that the following structures
15703 can be understood more easily.
15705 typedef struct minipool_node Mnode;
15706 typedef struct minipool_fixup Mfix; */
15708 struct minipool_node
15710 /* Doubly linked chain of entries. */
15713 /* The maximum offset into the code that this entry can be placed. While
15714 pushing fixes for forward references, all entries are sorted in order
15715 of increasing max_address. */
15716 HOST_WIDE_INT max_address
;
15717 /* Similarly for an entry inserted for a backwards ref. */
15718 HOST_WIDE_INT min_address
;
15719 /* The number of fixes referencing this entry. This can become zero
15720 if we "unpush" an entry. In this case we ignore the entry when we
15721 come to emit the code. */
15723 /* The offset from the start of the minipool. */
15724 HOST_WIDE_INT offset
;
15725 /* The value in table. */
15727 /* The mode of value. */
15728 enum machine_mode mode
;
15729 /* The size of the value. With iWMMXt enabled
15730 sizes > 4 also imply an alignment of 8-bytes. */
15734 struct minipool_fixup
15738 HOST_WIDE_INT address
;
15740 enum machine_mode mode
;
15744 HOST_WIDE_INT forwards
;
15745 HOST_WIDE_INT backwards
;
15748 /* Fixes less than a word need padding out to a word boundary. */
15749 #define MINIPOOL_FIX_SIZE(mode) \
15750 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15752 static Mnode
* minipool_vector_head
;
15753 static Mnode
* minipool_vector_tail
;
15754 static rtx minipool_vector_label
;
15755 static int minipool_pad
;
15757 /* The linked list of all minipool fixes required for this function. */
15758 Mfix
* minipool_fix_head
;
15759 Mfix
* minipool_fix_tail
;
15760 /* The fix entry for the current minipool, once it has been placed. */
15761 Mfix
* minipool_barrier
;
15763 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15764 #define JUMP_TABLES_IN_TEXT_SECTION 0
15767 static HOST_WIDE_INT
15768 get_jump_table_size (rtx insn
)
15770 /* ADDR_VECs only take room if read-only data does into the text
15772 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15774 rtx body
= PATTERN (insn
);
15775 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15776 HOST_WIDE_INT size
;
15777 HOST_WIDE_INT modesize
;
15779 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15780 size
= modesize
* XVECLEN (body
, elt
);
15784 /* Round up size of TBB table to a halfword boundary. */
15785 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15788 /* No padding necessary for TBH. */
15791 /* Add two bytes for alignment on Thumb. */
15796 gcc_unreachable ();
15804 /* Return the maximum amount of padding that will be inserted before
15807 static HOST_WIDE_INT
15808 get_label_padding (rtx label
)
15810 HOST_WIDE_INT align
, min_insn_size
;
15812 align
= 1 << label_to_alignment (label
);
15813 min_insn_size
= TARGET_THUMB
? 2 : 4;
15814 return align
> min_insn_size
? align
- min_insn_size
: 0;
15817 /* Move a minipool fix MP from its current location to before MAX_MP.
15818 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15819 constraints may need updating. */
15821 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15822 HOST_WIDE_INT max_address
)
15824 /* The code below assumes these are different. */
15825 gcc_assert (mp
!= max_mp
);
15827 if (max_mp
== NULL
)
15829 if (max_address
< mp
->max_address
)
15830 mp
->max_address
= max_address
;
15834 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15835 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15837 mp
->max_address
= max_address
;
15839 /* Unlink MP from its current position. Since max_mp is non-null,
15840 mp->prev must be non-null. */
15841 mp
->prev
->next
= mp
->next
;
15842 if (mp
->next
!= NULL
)
15843 mp
->next
->prev
= mp
->prev
;
15845 minipool_vector_tail
= mp
->prev
;
15847 /* Re-insert it before MAX_MP. */
15849 mp
->prev
= max_mp
->prev
;
15852 if (mp
->prev
!= NULL
)
15853 mp
->prev
->next
= mp
;
15855 minipool_vector_head
= mp
;
15858 /* Save the new entry. */
15861 /* Scan over the preceding entries and adjust their addresses as
15863 while (mp
->prev
!= NULL
15864 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15866 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15873 /* Add a constant to the minipool for a forward reference. Returns the
15874 node added or NULL if the constant will not fit in this pool. */
15876 add_minipool_forward_ref (Mfix
*fix
)
15878 /* If set, max_mp is the first pool_entry that has a lower
15879 constraint than the one we are trying to add. */
15880 Mnode
* max_mp
= NULL
;
15881 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15884 /* If the minipool starts before the end of FIX->INSN then this FIX
15885 can not be placed into the current pool. Furthermore, adding the
15886 new constant pool entry may cause the pool to start FIX_SIZE bytes
15888 if (minipool_vector_head
&&
15889 (fix
->address
+ get_attr_length (fix
->insn
)
15890 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15893 /* Scan the pool to see if a constant with the same value has
15894 already been added. While we are doing this, also note the
15895 location where we must insert the constant if it doesn't already
15897 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15899 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15900 && fix
->mode
== mp
->mode
15901 && (!LABEL_P (fix
->value
)
15902 || (CODE_LABEL_NUMBER (fix
->value
)
15903 == CODE_LABEL_NUMBER (mp
->value
)))
15904 && rtx_equal_p (fix
->value
, mp
->value
))
15906 /* More than one fix references this entry. */
15908 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15911 /* Note the insertion point if necessary. */
15913 && mp
->max_address
> max_address
)
15916 /* If we are inserting an 8-bytes aligned quantity and
15917 we have not already found an insertion point, then
15918 make sure that all such 8-byte aligned quantities are
15919 placed at the start of the pool. */
15920 if (ARM_DOUBLEWORD_ALIGN
15922 && fix
->fix_size
>= 8
15923 && mp
->fix_size
< 8)
15926 max_address
= mp
->max_address
;
15930 /* The value is not currently in the minipool, so we need to create
15931 a new entry for it. If MAX_MP is NULL, the entry will be put on
15932 the end of the list since the placement is less constrained than
15933 any existing entry. Otherwise, we insert the new fix before
15934 MAX_MP and, if necessary, adjust the constraints on the other
15937 mp
->fix_size
= fix
->fix_size
;
15938 mp
->mode
= fix
->mode
;
15939 mp
->value
= fix
->value
;
15941 /* Not yet required for a backwards ref. */
15942 mp
->min_address
= -65536;
15944 if (max_mp
== NULL
)
15946 mp
->max_address
= max_address
;
15948 mp
->prev
= minipool_vector_tail
;
15950 if (mp
->prev
== NULL
)
15952 minipool_vector_head
= mp
;
15953 minipool_vector_label
= gen_label_rtx ();
15956 mp
->prev
->next
= mp
;
15958 minipool_vector_tail
= mp
;
15962 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15963 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15965 mp
->max_address
= max_address
;
15968 mp
->prev
= max_mp
->prev
;
15970 if (mp
->prev
!= NULL
)
15971 mp
->prev
->next
= mp
;
15973 minipool_vector_head
= mp
;
15976 /* Save the new entry. */
15979 /* Scan over the preceding entries and adjust their addresses as
15981 while (mp
->prev
!= NULL
15982 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15984 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15992 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15993 HOST_WIDE_INT min_address
)
15995 HOST_WIDE_INT offset
;
15997 /* The code below assumes these are different. */
15998 gcc_assert (mp
!= min_mp
);
16000 if (min_mp
== NULL
)
16002 if (min_address
> mp
->min_address
)
16003 mp
->min_address
= min_address
;
16007 /* We will adjust this below if it is too loose. */
16008 mp
->min_address
= min_address
;
16010 /* Unlink MP from its current position. Since min_mp is non-null,
16011 mp->next must be non-null. */
16012 mp
->next
->prev
= mp
->prev
;
16013 if (mp
->prev
!= NULL
)
16014 mp
->prev
->next
= mp
->next
;
16016 minipool_vector_head
= mp
->next
;
16018 /* Reinsert it after MIN_MP. */
16020 mp
->next
= min_mp
->next
;
16022 if (mp
->next
!= NULL
)
16023 mp
->next
->prev
= mp
;
16025 minipool_vector_tail
= mp
;
16031 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16033 mp
->offset
= offset
;
16034 if (mp
->refcount
> 0)
16035 offset
+= mp
->fix_size
;
16037 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16038 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16044 /* Add a constant to the minipool for a backward reference. Returns the
16045 node added or NULL if the constant will not fit in this pool.
16047 Note that the code for insertion for a backwards reference can be
16048 somewhat confusing because the calculated offsets for each fix do
16049 not take into account the size of the pool (which is still under
16052 add_minipool_backward_ref (Mfix
*fix
)
16054 /* If set, min_mp is the last pool_entry that has a lower constraint
16055 than the one we are trying to add. */
16056 Mnode
*min_mp
= NULL
;
16057 /* This can be negative, since it is only a constraint. */
16058 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16061 /* If we can't reach the current pool from this insn, or if we can't
16062 insert this entry at the end of the pool without pushing other
16063 fixes out of range, then we don't try. This ensures that we
16064 can't fail later on. */
16065 if (min_address
>= minipool_barrier
->address
16066 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16067 >= minipool_barrier
->address
))
16070 /* Scan the pool to see if a constant with the same value has
16071 already been added. While we are doing this, also note the
16072 location where we must insert the constant if it doesn't already
16074 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16076 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16077 && fix
->mode
== mp
->mode
16078 && (!LABEL_P (fix
->value
)
16079 || (CODE_LABEL_NUMBER (fix
->value
)
16080 == CODE_LABEL_NUMBER (mp
->value
)))
16081 && rtx_equal_p (fix
->value
, mp
->value
)
16082 /* Check that there is enough slack to move this entry to the
16083 end of the table (this is conservative). */
16084 && (mp
->max_address
16085 > (minipool_barrier
->address
16086 + minipool_vector_tail
->offset
16087 + minipool_vector_tail
->fix_size
)))
16090 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16093 if (min_mp
!= NULL
)
16094 mp
->min_address
+= fix
->fix_size
;
16097 /* Note the insertion point if necessary. */
16098 if (mp
->min_address
< min_address
)
16100 /* For now, we do not allow the insertion of 8-byte alignment
16101 requiring nodes anywhere but at the start of the pool. */
16102 if (ARM_DOUBLEWORD_ALIGN
16103 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16108 else if (mp
->max_address
16109 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16111 /* Inserting before this entry would push the fix beyond
16112 its maximum address (which can happen if we have
16113 re-located a forwards fix); force the new fix to come
16115 if (ARM_DOUBLEWORD_ALIGN
16116 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16121 min_address
= mp
->min_address
+ fix
->fix_size
;
16124 /* Do not insert a non-8-byte aligned quantity before 8-byte
16125 aligned quantities. */
16126 else if (ARM_DOUBLEWORD_ALIGN
16127 && fix
->fix_size
< 8
16128 && mp
->fix_size
>= 8)
16131 min_address
= mp
->min_address
+ fix
->fix_size
;
16136 /* We need to create a new entry. */
16138 mp
->fix_size
= fix
->fix_size
;
16139 mp
->mode
= fix
->mode
;
16140 mp
->value
= fix
->value
;
16142 mp
->max_address
= minipool_barrier
->address
+ 65536;
16144 mp
->min_address
= min_address
;
16146 if (min_mp
== NULL
)
16149 mp
->next
= minipool_vector_head
;
16151 if (mp
->next
== NULL
)
16153 minipool_vector_tail
= mp
;
16154 minipool_vector_label
= gen_label_rtx ();
16157 mp
->next
->prev
= mp
;
16159 minipool_vector_head
= mp
;
16163 mp
->next
= min_mp
->next
;
16167 if (mp
->next
!= NULL
)
16168 mp
->next
->prev
= mp
;
16170 minipool_vector_tail
= mp
;
16173 /* Save the new entry. */
16181 /* Scan over the following entries and adjust their offsets. */
16182 while (mp
->next
!= NULL
)
16184 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16185 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16188 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16190 mp
->next
->offset
= mp
->offset
;
16199 assign_minipool_offsets (Mfix
*barrier
)
16201 HOST_WIDE_INT offset
= 0;
16204 minipool_barrier
= barrier
;
16206 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16208 mp
->offset
= offset
;
16210 if (mp
->refcount
> 0)
16211 offset
+= mp
->fix_size
;
16215 /* Output the literal table */
16217 dump_minipool (rtx scan
)
16223 if (ARM_DOUBLEWORD_ALIGN
)
16224 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16225 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16232 fprintf (dump_file
,
16233 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16234 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16236 scan
= emit_label_after (gen_label_rtx (), scan
);
16237 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16238 scan
= emit_label_after (minipool_vector_label
, scan
);
16240 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16242 if (mp
->refcount
> 0)
16246 fprintf (dump_file
,
16247 ";; Offset %u, min %ld, max %ld ",
16248 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16249 (unsigned long) mp
->max_address
);
16250 arm_print_value (dump_file
, mp
->value
);
16251 fputc ('\n', dump_file
);
16254 switch (mp
->fix_size
)
16256 #ifdef HAVE_consttable_1
16258 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16262 #ifdef HAVE_consttable_2
16264 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16268 #ifdef HAVE_consttable_4
16270 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16274 #ifdef HAVE_consttable_8
16276 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16280 #ifdef HAVE_consttable_16
16282 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16287 gcc_unreachable ();
16295 minipool_vector_head
= minipool_vector_tail
= NULL
;
16296 scan
= emit_insn_after (gen_consttable_end (), scan
);
16297 scan
= emit_barrier_after (scan
);
16300 /* Return the cost of forcibly inserting a barrier after INSN. */
16302 arm_barrier_cost (rtx insn
)
16304 /* Basing the location of the pool on the loop depth is preferable,
16305 but at the moment, the basic block information seems to be
16306 corrupt by this stage of the compilation. */
16307 int base_cost
= 50;
16308 rtx next
= next_nonnote_insn (insn
);
16310 if (next
!= NULL
&& LABEL_P (next
))
16313 switch (GET_CODE (insn
))
16316 /* It will always be better to place the table before the label, rather
16325 return base_cost
- 10;
16328 return base_cost
+ 10;
16332 /* Find the best place in the insn stream in the range
16333 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16334 Create the barrier by inserting a jump and add a new fix entry for
16337 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16339 HOST_WIDE_INT count
= 0;
16341 rtx from
= fix
->insn
;
16342 /* The instruction after which we will insert the jump. */
16343 rtx selected
= NULL
;
16345 /* The address at which the jump instruction will be placed. */
16346 HOST_WIDE_INT selected_address
;
16348 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16349 rtx label
= gen_label_rtx ();
16351 selected_cost
= arm_barrier_cost (from
);
16352 selected_address
= fix
->address
;
16354 while (from
&& count
< max_count
)
16359 /* This code shouldn't have been called if there was a natural barrier
16361 gcc_assert (!BARRIER_P (from
));
16363 /* Count the length of this insn. This must stay in sync with the
16364 code that pushes minipool fixes. */
16365 if (LABEL_P (from
))
16366 count
+= get_label_padding (from
);
16368 count
+= get_attr_length (from
);
16370 /* If there is a jump table, add its length. */
16371 if (tablejump_p (from
, NULL
, &tmp
))
16373 count
+= get_jump_table_size (tmp
);
16375 /* Jump tables aren't in a basic block, so base the cost on
16376 the dispatch insn. If we select this location, we will
16377 still put the pool after the table. */
16378 new_cost
= arm_barrier_cost (from
);
16380 if (count
< max_count
16381 && (!selected
|| new_cost
<= selected_cost
))
16384 selected_cost
= new_cost
;
16385 selected_address
= fix
->address
+ count
;
16388 /* Continue after the dispatch table. */
16389 from
= NEXT_INSN (tmp
);
16393 new_cost
= arm_barrier_cost (from
);
16395 if (count
< max_count
16396 && (!selected
|| new_cost
<= selected_cost
))
16399 selected_cost
= new_cost
;
16400 selected_address
= fix
->address
+ count
;
16403 from
= NEXT_INSN (from
);
16406 /* Make sure that we found a place to insert the jump. */
16407 gcc_assert (selected
);
16409 /* Make sure we do not split a call and its corresponding
16410 CALL_ARG_LOCATION note. */
16411 if (CALL_P (selected
))
16413 rtx next
= NEXT_INSN (selected
);
16414 if (next
&& NOTE_P (next
)
16415 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16419 /* Create a new JUMP_INSN that branches around a barrier. */
16420 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16421 JUMP_LABEL (from
) = label
;
16422 barrier
= emit_barrier_after (from
);
16423 emit_label_after (label
, barrier
);
16425 /* Create a minipool barrier entry for the new barrier. */
16426 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16427 new_fix
->insn
= barrier
;
16428 new_fix
->address
= selected_address
;
16429 new_fix
->next
= fix
->next
;
16430 fix
->next
= new_fix
;
16435 /* Record that there is a natural barrier in the insn stream at
16438 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16440 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16443 fix
->address
= address
;
16446 if (minipool_fix_head
!= NULL
)
16447 minipool_fix_tail
->next
= fix
;
16449 minipool_fix_head
= fix
;
16451 minipool_fix_tail
= fix
;
16454 /* Record INSN, which will need fixing up to load a value from the
16455 minipool. ADDRESS is the offset of the insn since the start of the
16456 function; LOC is a pointer to the part of the insn which requires
16457 fixing; VALUE is the constant that must be loaded, which is of type
16460 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16461 enum machine_mode mode
, rtx value
)
16463 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16466 fix
->address
= address
;
16469 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16470 fix
->value
= value
;
16471 fix
->forwards
= get_attr_pool_range (insn
);
16472 fix
->backwards
= get_attr_neg_pool_range (insn
);
16473 fix
->minipool
= NULL
;
16475 /* If an insn doesn't have a range defined for it, then it isn't
16476 expecting to be reworked by this code. Better to stop now than
16477 to generate duff assembly code. */
16478 gcc_assert (fix
->forwards
|| fix
->backwards
);
16480 /* If an entry requires 8-byte alignment then assume all constant pools
16481 require 4 bytes of padding. Trying to do this later on a per-pool
16482 basis is awkward because existing pool entries have to be modified. */
16483 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16488 fprintf (dump_file
,
16489 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16490 GET_MODE_NAME (mode
),
16491 INSN_UID (insn
), (unsigned long) address
,
16492 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16493 arm_print_value (dump_file
, fix
->value
);
16494 fprintf (dump_file
, "\n");
16497 /* Add it to the chain of fixes. */
16500 if (minipool_fix_head
!= NULL
)
16501 minipool_fix_tail
->next
= fix
;
16503 minipool_fix_head
= fix
;
16505 minipool_fix_tail
= fix
;
16508 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16509 Returns the number of insns needed, or 99 if we always want to synthesize
16512 arm_max_const_double_inline_cost ()
16514 /* Let the value get synthesized to avoid the use of literal pools. */
16515 if (arm_disable_literal_pool
)
16518 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16521 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16522 Returns the number of insns needed, or 99 if we don't know how to
16525 arm_const_double_inline_cost (rtx val
)
16527 rtx lowpart
, highpart
;
16528 enum machine_mode mode
;
16530 mode
= GET_MODE (val
);
16532 if (mode
== VOIDmode
)
16535 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16537 lowpart
= gen_lowpart (SImode
, val
);
16538 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16540 gcc_assert (CONST_INT_P (lowpart
));
16541 gcc_assert (CONST_INT_P (highpart
));
16543 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16544 NULL_RTX
, NULL_RTX
, 0, 0)
16545 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16546 NULL_RTX
, NULL_RTX
, 0, 0));
16549 /* Return true if it is worthwhile to split a 64-bit constant into two
16550 32-bit operations. This is the case if optimizing for size, or
16551 if we have load delay slots, or if one 32-bit part can be done with
16552 a single data operation. */
16554 arm_const_double_by_parts (rtx val
)
16556 enum machine_mode mode
= GET_MODE (val
);
16559 if (optimize_size
|| arm_ld_sched
)
16562 if (mode
== VOIDmode
)
16565 part
= gen_highpart_mode (SImode
, mode
, val
);
16567 gcc_assert (CONST_INT_P (part
));
16569 if (const_ok_for_arm (INTVAL (part
))
16570 || const_ok_for_arm (~INTVAL (part
)))
16573 part
= gen_lowpart (SImode
, val
);
16575 gcc_assert (CONST_INT_P (part
));
16577 if (const_ok_for_arm (INTVAL (part
))
16578 || const_ok_for_arm (~INTVAL (part
)))
16584 /* Return true if it is possible to inline both the high and low parts
16585 of a 64-bit constant into 32-bit data processing instructions. */
16587 arm_const_double_by_immediates (rtx val
)
16589 enum machine_mode mode
= GET_MODE (val
);
16592 if (mode
== VOIDmode
)
16595 part
= gen_highpart_mode (SImode
, mode
, val
);
16597 gcc_assert (CONST_INT_P (part
));
16599 if (!const_ok_for_arm (INTVAL (part
)))
16602 part
= gen_lowpart (SImode
, val
);
16604 gcc_assert (CONST_INT_P (part
));
16606 if (!const_ok_for_arm (INTVAL (part
)))
16612 /* Scan INSN and note any of its operands that need fixing.
16613 If DO_PUSHES is false we do not actually push any of the fixups
16616 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16620 extract_insn (insn
);
16622 if (!constrain_operands (1))
16623 fatal_insn_not_found (insn
);
16625 if (recog_data
.n_alternatives
== 0)
16628 /* Fill in recog_op_alt with information about the constraints of
16630 preprocess_constraints ();
16632 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16634 /* Things we need to fix can only occur in inputs. */
16635 if (recog_data
.operand_type
[opno
] != OP_IN
)
16638 /* If this alternative is a memory reference, then any mention
16639 of constants in this alternative is really to fool reload
16640 into allowing us to accept one there. We need to fix them up
16641 now so that we output the right code. */
16642 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16644 rtx op
= recog_data
.operand
[opno
];
16646 if (CONSTANT_P (op
))
16649 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16650 recog_data
.operand_mode
[opno
], op
);
16652 else if (MEM_P (op
)
16653 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16654 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16658 rtx cop
= avoid_constant_pool_reference (op
);
16660 /* Casting the address of something to a mode narrower
16661 than a word can cause avoid_constant_pool_reference()
16662 to return the pool reference itself. That's no good to
16663 us here. Lets just hope that we can use the
16664 constant pool value directly. */
16666 cop
= get_pool_constant (XEXP (op
, 0));
16668 push_minipool_fix (insn
, address
,
16669 recog_data
.operand_loc
[opno
],
16670 recog_data
.operand_mode
[opno
], cop
);
16680 /* Rewrite move insn into subtract of 0 if the condition codes will
16681 be useful in next conditional jump insn. */
16684 thumb1_reorg (void)
16688 FOR_EACH_BB_FN (bb
, cfun
)
16691 rtx pat
, op0
, set
= NULL
;
16692 rtx prev
, insn
= BB_END (bb
);
16693 bool insn_clobbered
= false;
16695 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16696 insn
= PREV_INSN (insn
);
16698 /* Find the last cbranchsi4_insn in basic block BB. */
16699 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16702 /* Get the register with which we are comparing. */
16703 pat
= PATTERN (insn
);
16704 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16706 /* Find the first flag setting insn before INSN in basic block BB. */
16707 gcc_assert (insn
!= BB_HEAD (bb
));
16708 for (prev
= PREV_INSN (insn
);
16710 && prev
!= BB_HEAD (bb
)
16712 || DEBUG_INSN_P (prev
)
16713 || ((set
= single_set (prev
)) != NULL
16714 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16715 prev
= PREV_INSN (prev
))
16717 if (reg_set_p (op0
, prev
))
16718 insn_clobbered
= true;
16721 /* Skip if op0 is clobbered by insn other than prev. */
16722 if (insn_clobbered
)
16728 dest
= SET_DEST (set
);
16729 src
= SET_SRC (set
);
16730 if (!low_register_operand (dest
, SImode
)
16731 || !low_register_operand (src
, SImode
))
16734 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16735 in INSN. Both src and dest of the move insn are checked. */
16736 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16738 dest
= copy_rtx (dest
);
16739 src
= copy_rtx (src
);
16740 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16741 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16742 INSN_CODE (prev
) = -1;
16743 /* Set test register in INSN to dest. */
16744 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16745 INSN_CODE (insn
) = -1;
16750 /* Convert instructions to their cc-clobbering variant if possible, since
16751 that allows us to use smaller encodings. */
16754 thumb2_reorg (void)
16759 INIT_REG_SET (&live
);
16761 /* We are freeing block_for_insn in the toplev to keep compatibility
16762 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16763 compute_bb_for_insn ();
16766 FOR_EACH_BB_FN (bb
, cfun
)
16770 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16771 df_simulate_initialize_backwards (bb
, &live
);
16772 FOR_BB_INSNS_REVERSE (bb
, insn
)
16774 if (NONJUMP_INSN_P (insn
)
16775 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16776 && GET_CODE (PATTERN (insn
)) == SET
)
16778 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16779 rtx pat
= PATTERN (insn
);
16780 rtx dst
= XEXP (pat
, 0);
16781 rtx src
= XEXP (pat
, 1);
16782 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16784 if (!OBJECT_P (src
))
16785 op0
= XEXP (src
, 0);
16787 if (BINARY_P (src
))
16788 op1
= XEXP (src
, 1);
16790 if (low_register_operand (dst
, SImode
))
16792 switch (GET_CODE (src
))
16795 /* Adding two registers and storing the result
16796 in the first source is already a 16-bit
16798 if (rtx_equal_p (dst
, op0
)
16799 && register_operand (op1
, SImode
))
16802 if (low_register_operand (op0
, SImode
))
16804 /* ADDS <Rd>,<Rn>,<Rm> */
16805 if (low_register_operand (op1
, SImode
))
16807 /* ADDS <Rdn>,#<imm8> */
16808 /* SUBS <Rdn>,#<imm8> */
16809 else if (rtx_equal_p (dst
, op0
)
16810 && CONST_INT_P (op1
)
16811 && IN_RANGE (INTVAL (op1
), -255, 255))
16813 /* ADDS <Rd>,<Rn>,#<imm3> */
16814 /* SUBS <Rd>,<Rn>,#<imm3> */
16815 else if (CONST_INT_P (op1
)
16816 && IN_RANGE (INTVAL (op1
), -7, 7))
16819 /* ADCS <Rd>, <Rn> */
16820 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16821 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16822 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16824 && COMPARISON_P (op1
)
16825 && cc_register (XEXP (op1
, 0), VOIDmode
)
16826 && maybe_get_arm_condition_code (op1
) == ARM_CS
16827 && XEXP (op1
, 1) == const0_rtx
)
16832 /* RSBS <Rd>,<Rn>,#0
16833 Not handled here: see NEG below. */
16834 /* SUBS <Rd>,<Rn>,#<imm3>
16836 Not handled here: see PLUS above. */
16837 /* SUBS <Rd>,<Rn>,<Rm> */
16838 if (low_register_operand (op0
, SImode
)
16839 && low_register_operand (op1
, SImode
))
16844 /* MULS <Rdm>,<Rn>,<Rdm>
16845 As an exception to the rule, this is only used
16846 when optimizing for size since MULS is slow on all
16847 known implementations. We do not even want to use
16848 MULS in cold code, if optimizing for speed, so we
16849 test the global flag here. */
16850 if (!optimize_size
)
16852 /* else fall through. */
16856 /* ANDS <Rdn>,<Rm> */
16857 if (rtx_equal_p (dst
, op0
)
16858 && low_register_operand (op1
, SImode
))
16860 else if (rtx_equal_p (dst
, op1
)
16861 && low_register_operand (op0
, SImode
))
16862 action
= SWAP_CONV
;
16868 /* ASRS <Rdn>,<Rm> */
16869 /* LSRS <Rdn>,<Rm> */
16870 /* LSLS <Rdn>,<Rm> */
16871 if (rtx_equal_p (dst
, op0
)
16872 && low_register_operand (op1
, SImode
))
16874 /* ASRS <Rd>,<Rm>,#<imm5> */
16875 /* LSRS <Rd>,<Rm>,#<imm5> */
16876 /* LSLS <Rd>,<Rm>,#<imm5> */
16877 else if (low_register_operand (op0
, SImode
)
16878 && CONST_INT_P (op1
)
16879 && IN_RANGE (INTVAL (op1
), 0, 31))
16884 /* RORS <Rdn>,<Rm> */
16885 if (rtx_equal_p (dst
, op0
)
16886 && low_register_operand (op1
, SImode
))
16892 /* MVNS <Rd>,<Rm> */
16893 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16894 if (low_register_operand (op0
, SImode
))
16899 /* MOVS <Rd>,#<imm8> */
16900 if (CONST_INT_P (src
)
16901 && IN_RANGE (INTVAL (src
), 0, 255))
16906 /* MOVS and MOV<c> with registers have different
16907 encodings, so are not relevant here. */
16915 if (action
!= SKIP
)
16917 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16918 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16921 if (action
== SWAP_CONV
)
16923 src
= copy_rtx (src
);
16924 XEXP (src
, 0) = op1
;
16925 XEXP (src
, 1) = op0
;
16926 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16927 vec
= gen_rtvec (2, pat
, clobber
);
16929 else /* action == CONV */
16930 vec
= gen_rtvec (2, pat
, clobber
);
16932 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16933 INSN_CODE (insn
) = -1;
16937 if (NONDEBUG_INSN_P (insn
))
16938 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16942 CLEAR_REG_SET (&live
);
16945 /* Gcc puts the pool in the wrong place for ARM, since we can only
16946 load addresses a limited distance around the pc. We do some
16947 special munging to move the constant pool values to the correct
16948 point in the code. */
16953 HOST_WIDE_INT address
= 0;
16958 else if (TARGET_THUMB2
)
16961 /* Ensure all insns that must be split have been split at this point.
16962 Otherwise, the pool placement code below may compute incorrect
16963 insn lengths. Note that when optimizing, all insns have already
16964 been split at this point. */
16966 split_all_insns_noflow ();
16968 minipool_fix_head
= minipool_fix_tail
= NULL
;
16970 /* The first insn must always be a note, or the code below won't
16971 scan it properly. */
16972 insn
= get_insns ();
16973 gcc_assert (NOTE_P (insn
));
16976 /* Scan all the insns and record the operands that will need fixing. */
16977 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16979 if (BARRIER_P (insn
))
16980 push_minipool_barrier (insn
, address
);
16981 else if (INSN_P (insn
))
16985 note_invalid_constants (insn
, address
, true);
16986 address
+= get_attr_length (insn
);
16988 /* If the insn is a vector jump, add the size of the table
16989 and skip the table. */
16990 if (tablejump_p (insn
, NULL
, &table
))
16992 address
+= get_jump_table_size (table
);
16996 else if (LABEL_P (insn
))
16997 /* Add the worst-case padding due to alignment. We don't add
16998 the _current_ padding because the minipool insertions
16999 themselves might change it. */
17000 address
+= get_label_padding (insn
);
17003 fix
= minipool_fix_head
;
17005 /* Now scan the fixups and perform the required changes. */
17010 Mfix
* last_added_fix
;
17011 Mfix
* last_barrier
= NULL
;
17014 /* Skip any further barriers before the next fix. */
17015 while (fix
&& BARRIER_P (fix
->insn
))
17018 /* No more fixes. */
17022 last_added_fix
= NULL
;
17024 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17026 if (BARRIER_P (ftmp
->insn
))
17028 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17031 last_barrier
= ftmp
;
17033 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17036 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17039 /* If we found a barrier, drop back to that; any fixes that we
17040 could have reached but come after the barrier will now go in
17041 the next mini-pool. */
17042 if (last_barrier
!= NULL
)
17044 /* Reduce the refcount for those fixes that won't go into this
17046 for (fdel
= last_barrier
->next
;
17047 fdel
&& fdel
!= ftmp
;
17050 fdel
->minipool
->refcount
--;
17051 fdel
->minipool
= NULL
;
17054 ftmp
= last_barrier
;
17058 /* ftmp is first fix that we can't fit into this pool and
17059 there no natural barriers that we could use. Insert a
17060 new barrier in the code somewhere between the previous
17061 fix and this one, and arrange to jump around it. */
17062 HOST_WIDE_INT max_address
;
17064 /* The last item on the list of fixes must be a barrier, so
17065 we can never run off the end of the list of fixes without
17066 last_barrier being set. */
17069 max_address
= minipool_vector_head
->max_address
;
17070 /* Check that there isn't another fix that is in range that
17071 we couldn't fit into this pool because the pool was
17072 already too large: we need to put the pool before such an
17073 instruction. The pool itself may come just after the
17074 fix because create_fix_barrier also allows space for a
17075 jump instruction. */
17076 if (ftmp
->address
< max_address
)
17077 max_address
= ftmp
->address
+ 1;
17079 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17082 assign_minipool_offsets (last_barrier
);
17086 if (!BARRIER_P (ftmp
->insn
)
17087 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17094 /* Scan over the fixes we have identified for this pool, fixing them
17095 up and adding the constants to the pool itself. */
17096 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17097 this_fix
= this_fix
->next
)
17098 if (!BARRIER_P (this_fix
->insn
))
17101 = plus_constant (Pmode
,
17102 gen_rtx_LABEL_REF (VOIDmode
,
17103 minipool_vector_label
),
17104 this_fix
->minipool
->offset
);
17105 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17108 dump_minipool (last_barrier
->insn
);
17112 /* From now on we must synthesize any constants that we can't handle
17113 directly. This can happen if the RTL gets split during final
17114 instruction generation. */
17115 after_arm_reorg
= 1;
17117 /* Free the minipool memory. */
17118 obstack_free (&minipool_obstack
, minipool_startobj
);
17121 /* Routines to output assembly language. */
17123 /* If the rtx is the correct value then return the string of the number.
17124 In this way we can ensure that valid double constants are generated even
17125 when cross compiling. */
17127 fp_immediate_constant (rtx x
)
17131 if (!fp_consts_inited
)
17134 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17136 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17140 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17141 static const char *
17142 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17144 if (!fp_consts_inited
)
17147 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17151 /* OPERANDS[0] is the entire list of insns that constitute pop,
17152 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17153 is in the list, UPDATE is true iff the list contains explicit
17154 update of base register. */
17156 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17162 const char *conditional
;
17163 int num_saves
= XVECLEN (operands
[0], 0);
17164 unsigned int regno
;
17165 unsigned int regno_base
= REGNO (operands
[1]);
17168 offset
+= update
? 1 : 0;
17169 offset
+= return_pc
? 1 : 0;
17171 /* Is the base register in the list? */
17172 for (i
= offset
; i
< num_saves
; i
++)
17174 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17175 /* If SP is in the list, then the base register must be SP. */
17176 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17177 /* If base register is in the list, there must be no explicit update. */
17178 if (regno
== regno_base
)
17179 gcc_assert (!update
);
17182 conditional
= reverse
? "%?%D0" : "%?%d0";
17183 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17185 /* Output pop (not stmfd) because it has a shorter encoding. */
17186 gcc_assert (update
);
17187 sprintf (pattern
, "pop%s\t{", conditional
);
17191 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17192 It's just a convention, their semantics are identical. */
17193 if (regno_base
== SP_REGNUM
)
17194 sprintf (pattern
, "ldm%sfd\t", conditional
);
17195 else if (TARGET_UNIFIED_ASM
)
17196 sprintf (pattern
, "ldmia%s\t", conditional
);
17198 sprintf (pattern
, "ldm%sia\t", conditional
);
17200 strcat (pattern
, reg_names
[regno_base
]);
17202 strcat (pattern
, "!, {");
17204 strcat (pattern
, ", {");
17207 /* Output the first destination register. */
17209 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17211 /* Output the rest of the destination registers. */
17212 for (i
= offset
+ 1; i
< num_saves
; i
++)
17214 strcat (pattern
, ", ");
17216 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17219 strcat (pattern
, "}");
17221 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17222 strcat (pattern
, "^");
17224 output_asm_insn (pattern
, &cond
);
17228 /* Output the assembly for a store multiple. */
17231 vfp_output_fstmd (rtx
* operands
)
17238 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17239 p
= strlen (pattern
);
17241 gcc_assert (REG_P (operands
[1]));
17243 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17244 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17246 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17248 strcpy (&pattern
[p
], "}");
17250 output_asm_insn (pattern
, operands
);
17255 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17256 number of bytes pushed. */
17259 vfp_emit_fstmd (int base_reg
, int count
)
17266 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17267 register pairs are stored by a store multiple insn. We avoid this
17268 by pushing an extra pair. */
17269 if (count
== 2 && !arm_arch6
)
17271 if (base_reg
== LAST_VFP_REGNUM
- 3)
17276 /* FSTMD may not store more than 16 doubleword registers at once. Split
17277 larger stores into multiple parts (up to a maximum of two, in
17282 /* NOTE: base_reg is an internal register number, so each D register
17284 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17285 saved
+= vfp_emit_fstmd (base_reg
, 16);
17289 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17290 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17292 reg
= gen_rtx_REG (DFmode
, base_reg
);
17295 XVECEXP (par
, 0, 0)
17296 = gen_rtx_SET (VOIDmode
,
17299 gen_rtx_PRE_MODIFY (Pmode
,
17302 (Pmode
, stack_pointer_rtx
,
17305 gen_rtx_UNSPEC (BLKmode
,
17306 gen_rtvec (1, reg
),
17307 UNSPEC_PUSH_MULT
));
17309 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17310 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17311 RTX_FRAME_RELATED_P (tmp
) = 1;
17312 XVECEXP (dwarf
, 0, 0) = tmp
;
17314 tmp
= gen_rtx_SET (VOIDmode
,
17315 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17317 RTX_FRAME_RELATED_P (tmp
) = 1;
17318 XVECEXP (dwarf
, 0, 1) = tmp
;
17320 for (i
= 1; i
< count
; i
++)
17322 reg
= gen_rtx_REG (DFmode
, base_reg
);
17324 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17326 tmp
= gen_rtx_SET (VOIDmode
,
17327 gen_frame_mem (DFmode
,
17328 plus_constant (Pmode
,
17332 RTX_FRAME_RELATED_P (tmp
) = 1;
17333 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17336 par
= emit_insn (par
);
17337 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17338 RTX_FRAME_RELATED_P (par
) = 1;
17343 /* Emit a call instruction with pattern PAT. ADDR is the address of
17344 the call target. */
17347 arm_emit_call_insn (rtx pat
, rtx addr
)
17351 insn
= emit_call_insn (pat
);
17353 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17354 If the call might use such an entry, add a use of the PIC register
17355 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17356 if (TARGET_VXWORKS_RTP
17358 && GET_CODE (addr
) == SYMBOL_REF
17359 && (SYMBOL_REF_DECL (addr
)
17360 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17361 : !SYMBOL_REF_LOCAL_P (addr
)))
17363 require_pic_register ();
17364 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17368 /* Output a 'call' insn. */
17370 output_call (rtx
*operands
)
17372 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17374 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17375 if (REGNO (operands
[0]) == LR_REGNUM
)
17377 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17378 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17381 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17383 if (TARGET_INTERWORK
|| arm_arch4t
)
17384 output_asm_insn ("bx%?\t%0", operands
);
17386 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17391 /* Output a 'call' insn that is a reference in memory. This is
17392 disabled for ARMv5 and we prefer a blx instead because otherwise
17393 there's a significant performance overhead. */
17395 output_call_mem (rtx
*operands
)
17397 gcc_assert (!arm_arch5
);
17398 if (TARGET_INTERWORK
)
17400 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17402 output_asm_insn ("bx%?\t%|ip", operands
);
17404 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17406 /* LR is used in the memory address. We load the address in the
17407 first instruction. It's safe to use IP as the target of the
17408 load since the call will kill it anyway. */
17409 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17410 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17412 output_asm_insn ("bx%?\t%|ip", operands
);
17414 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17418 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17419 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17426 /* Output a move from arm registers to arm registers of a long double
17427 OPERANDS[0] is the destination.
17428 OPERANDS[1] is the source. */
17430 output_mov_long_double_arm_from_arm (rtx
*operands
)
17432 /* We have to be careful here because the two might overlap. */
17433 int dest_start
= REGNO (operands
[0]);
17434 int src_start
= REGNO (operands
[1]);
17438 if (dest_start
< src_start
)
17440 for (i
= 0; i
< 3; i
++)
17442 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17443 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17444 output_asm_insn ("mov%?\t%0, %1", ops
);
17449 for (i
= 2; i
>= 0; i
--)
17451 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17452 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17453 output_asm_insn ("mov%?\t%0, %1", ops
);
17461 arm_emit_movpair (rtx dest
, rtx src
)
17463 /* If the src is an immediate, simplify it. */
17464 if (CONST_INT_P (src
))
17466 HOST_WIDE_INT val
= INTVAL (src
);
17467 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17468 if ((val
>> 16) & 0x0000ffff)
17469 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17471 GEN_INT ((val
>> 16) & 0x0000ffff));
17474 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17475 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17478 /* Output a move between double words. It must be REG<-MEM
17481 output_move_double (rtx
*operands
, bool emit
, int *count
)
17483 enum rtx_code code0
= GET_CODE (operands
[0]);
17484 enum rtx_code code1
= GET_CODE (operands
[1]);
17489 /* The only case when this might happen is when
17490 you are looking at the length of a DImode instruction
17491 that has an invalid constant in it. */
17492 if (code0
== REG
&& code1
!= MEM
)
17494 gcc_assert (!emit
);
17501 unsigned int reg0
= REGNO (operands
[0]);
17503 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17505 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17507 switch (GET_CODE (XEXP (operands
[1], 0)))
17514 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17515 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17517 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17522 gcc_assert (TARGET_LDRD
);
17524 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17531 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17533 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17541 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17543 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17548 gcc_assert (TARGET_LDRD
);
17550 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17555 /* Autoicrement addressing modes should never have overlapping
17556 base and destination registers, and overlapping index registers
17557 are already prohibited, so this doesn't need to worry about
17559 otherops
[0] = operands
[0];
17560 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17561 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17563 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17565 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17567 /* Registers overlap so split out the increment. */
17570 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17571 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17578 /* Use a single insn if we can.
17579 FIXME: IWMMXT allows offsets larger than ldrd can
17580 handle, fix these up with a pair of ldr. */
17582 || !CONST_INT_P (otherops
[2])
17583 || (INTVAL (otherops
[2]) > -256
17584 && INTVAL (otherops
[2]) < 256))
17587 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17593 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17594 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17604 /* Use a single insn if we can.
17605 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17606 fix these up with a pair of ldr. */
17608 || !CONST_INT_P (otherops
[2])
17609 || (INTVAL (otherops
[2]) > -256
17610 && INTVAL (otherops
[2]) < 256))
17613 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17619 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17620 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17630 /* We might be able to use ldrd %0, %1 here. However the range is
17631 different to ldr/adr, and it is broken on some ARMv7-M
17632 implementations. */
17633 /* Use the second register of the pair to avoid problematic
17635 otherops
[1] = operands
[1];
17637 output_asm_insn ("adr%?\t%0, %1", otherops
);
17638 operands
[1] = otherops
[0];
17642 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17644 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17651 /* ??? This needs checking for thumb2. */
17653 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17654 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17656 otherops
[0] = operands
[0];
17657 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17658 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17660 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17662 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17664 switch ((int) INTVAL (otherops
[2]))
17668 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17674 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17680 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17684 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17685 operands
[1] = otherops
[0];
17687 && (REG_P (otherops
[2])
17689 || (CONST_INT_P (otherops
[2])
17690 && INTVAL (otherops
[2]) > -256
17691 && INTVAL (otherops
[2]) < 256)))
17693 if (reg_overlap_mentioned_p (operands
[0],
17697 /* Swap base and index registers over to
17698 avoid a conflict. */
17700 otherops
[1] = otherops
[2];
17703 /* If both registers conflict, it will usually
17704 have been fixed by a splitter. */
17705 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17706 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17710 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17711 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17718 otherops
[0] = operands
[0];
17720 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17725 if (CONST_INT_P (otherops
[2]))
17729 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17730 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17732 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17738 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17744 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17751 return "ldr%(d%)\t%0, [%1]";
17753 return "ldm%(ia%)\t%1, %M0";
17757 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17758 /* Take care of overlapping base/data reg. */
17759 if (reg_mentioned_p (operands
[0], operands
[1]))
17763 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17764 output_asm_insn ("ldr%?\t%0, %1", operands
);
17774 output_asm_insn ("ldr%?\t%0, %1", operands
);
17775 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17785 /* Constraints should ensure this. */
17786 gcc_assert (code0
== MEM
&& code1
== REG
);
17787 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17788 || (TARGET_ARM
&& TARGET_LDRD
));
17790 switch (GET_CODE (XEXP (operands
[0], 0)))
17796 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17798 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17803 gcc_assert (TARGET_LDRD
);
17805 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17812 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17814 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17822 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17824 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17829 gcc_assert (TARGET_LDRD
);
17831 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17836 otherops
[0] = operands
[1];
17837 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17838 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17840 /* IWMMXT allows offsets larger than ldrd can handle,
17841 fix these up with a pair of ldr. */
17843 && CONST_INT_P (otherops
[2])
17844 && (INTVAL(otherops
[2]) <= -256
17845 || INTVAL(otherops
[2]) >= 256))
17847 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17851 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17852 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17861 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17862 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17868 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17871 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17876 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17881 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17882 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17884 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17888 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17895 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17902 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17907 && (REG_P (otherops
[2])
17909 || (CONST_INT_P (otherops
[2])
17910 && INTVAL (otherops
[2]) > -256
17911 && INTVAL (otherops
[2]) < 256)))
17913 otherops
[0] = operands
[1];
17914 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17916 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17922 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17923 otherops
[1] = operands
[1];
17926 output_asm_insn ("str%?\t%1, %0", operands
);
17927 output_asm_insn ("str%?\t%H1, %0", otherops
);
17937 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17938 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17941 output_move_quad (rtx
*operands
)
17943 if (REG_P (operands
[0]))
17945 /* Load, or reg->reg move. */
17947 if (MEM_P (operands
[1]))
17949 switch (GET_CODE (XEXP (operands
[1], 0)))
17952 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17957 output_asm_insn ("adr%?\t%0, %1", operands
);
17958 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17962 gcc_unreachable ();
17970 gcc_assert (REG_P (operands
[1]));
17972 dest
= REGNO (operands
[0]);
17973 src
= REGNO (operands
[1]);
17975 /* This seems pretty dumb, but hopefully GCC won't try to do it
17978 for (i
= 0; i
< 4; i
++)
17980 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17981 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17982 output_asm_insn ("mov%?\t%0, %1", ops
);
17985 for (i
= 3; i
>= 0; i
--)
17987 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17988 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17989 output_asm_insn ("mov%?\t%0, %1", ops
);
17995 gcc_assert (MEM_P (operands
[0]));
17996 gcc_assert (REG_P (operands
[1]));
17997 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17999 switch (GET_CODE (XEXP (operands
[0], 0)))
18002 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18006 gcc_unreachable ();
18013 /* Output a VFP load or store instruction. */
18016 output_move_vfp (rtx
*operands
)
18018 rtx reg
, mem
, addr
, ops
[2];
18019 int load
= REG_P (operands
[0]);
18020 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18021 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18024 enum machine_mode mode
;
18026 reg
= operands
[!load
];
18027 mem
= operands
[load
];
18029 mode
= GET_MODE (reg
);
18031 gcc_assert (REG_P (reg
));
18032 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18033 gcc_assert (mode
== SFmode
18037 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18038 gcc_assert (MEM_P (mem
));
18040 addr
= XEXP (mem
, 0);
18042 switch (GET_CODE (addr
))
18045 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18046 ops
[0] = XEXP (addr
, 0);
18051 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18052 ops
[0] = XEXP (addr
, 0);
18057 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18063 sprintf (buff
, templ
,
18064 load
? "ld" : "st",
18067 integer_p
? "\t%@ int" : "");
18068 output_asm_insn (buff
, ops
);
18073 /* Output a Neon double-word or quad-word load or store, or a load
18074 or store for larger structure modes.
18076 WARNING: The ordering of elements is weird in big-endian mode,
18077 because the EABI requires that vectors stored in memory appear
18078 as though they were stored by a VSTM, as required by the EABI.
18079 GCC RTL defines element ordering based on in-memory order.
18080 This can be different from the architectural ordering of elements
18081 within a NEON register. The intrinsics defined in arm_neon.h use the
18082 NEON register element ordering, not the GCC RTL element ordering.
18084 For example, the in-memory ordering of a big-endian a quadword
18085 vector with 16-bit elements when stored from register pair {d0,d1}
18086 will be (lowest address first, d0[N] is NEON register element N):
18088 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18090 When necessary, quadword registers (dN, dN+1) are moved to ARM
18091 registers from rN in the order:
18093 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18095 So that STM/LDM can be used on vectors in ARM registers, and the
18096 same memory layout will result as if VSTM/VLDM were used.
18098 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18099 possible, which allows use of appropriate alignment tags.
18100 Note that the choice of "64" is independent of the actual vector
18101 element size; this size simply ensures that the behavior is
18102 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18104 Due to limitations of those instructions, use of VST1.64/VLD1.64
18105 is not possible if:
18106 - the address contains PRE_DEC, or
18107 - the mode refers to more than 4 double-word registers
18109 In those cases, it would be possible to replace VSTM/VLDM by a
18110 sequence of instructions; this is not currently implemented since
18111 this is not certain to actually improve performance. */
18114 output_move_neon (rtx
*operands
)
18116 rtx reg
, mem
, addr
, ops
[2];
18117 int regno
, nregs
, load
= REG_P (operands
[0]);
18120 enum machine_mode mode
;
18122 reg
= operands
[!load
];
18123 mem
= operands
[load
];
18125 mode
= GET_MODE (reg
);
18127 gcc_assert (REG_P (reg
));
18128 regno
= REGNO (reg
);
18129 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18130 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18131 || NEON_REGNO_OK_FOR_QUAD (regno
));
18132 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18133 || VALID_NEON_QREG_MODE (mode
)
18134 || VALID_NEON_STRUCT_MODE (mode
));
18135 gcc_assert (MEM_P (mem
));
18137 addr
= XEXP (mem
, 0);
18139 /* Strip off const from addresses like (const (plus (...))). */
18140 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18141 addr
= XEXP (addr
, 0);
18143 switch (GET_CODE (addr
))
18146 /* We have to use vldm / vstm for too-large modes. */
18149 templ
= "v%smia%%?\t%%0!, %%h1";
18150 ops
[0] = XEXP (addr
, 0);
18154 templ
= "v%s1.64\t%%h1, %%A0";
18161 /* We have to use vldm / vstm in this case, since there is no
18162 pre-decrement form of the vld1 / vst1 instructions. */
18163 templ
= "v%smdb%%?\t%%0!, %%h1";
18164 ops
[0] = XEXP (addr
, 0);
18169 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18170 gcc_unreachable ();
18177 for (i
= 0; i
< nregs
; i
++)
18179 /* We're only using DImode here because it's a convenient size. */
18180 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18181 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18182 if (reg_overlap_mentioned_p (ops
[0], mem
))
18184 gcc_assert (overlap
== -1);
18189 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18190 output_asm_insn (buff
, ops
);
18195 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18196 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18197 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18198 output_asm_insn (buff
, ops
);
18205 /* We have to use vldm / vstm for too-large modes. */
18207 templ
= "v%smia%%?\t%%m0, %%h1";
18209 templ
= "v%s1.64\t%%h1, %%A0";
18215 sprintf (buff
, templ
, load
? "ld" : "st");
18216 output_asm_insn (buff
, ops
);
18221 /* Compute and return the length of neon_mov<mode>, where <mode> is
18222 one of VSTRUCT modes: EI, OI, CI or XI. */
18224 arm_attr_length_move_neon (rtx insn
)
18226 rtx reg
, mem
, addr
;
18228 enum machine_mode mode
;
18230 extract_insn_cached (insn
);
18232 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18234 mode
= GET_MODE (recog_data
.operand
[0]);
18245 gcc_unreachable ();
18249 load
= REG_P (recog_data
.operand
[0]);
18250 reg
= recog_data
.operand
[!load
];
18251 mem
= recog_data
.operand
[load
];
18253 gcc_assert (MEM_P (mem
));
18255 mode
= GET_MODE (reg
);
18256 addr
= XEXP (mem
, 0);
18258 /* Strip off const from addresses like (const (plus (...))). */
18259 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18260 addr
= XEXP (addr
, 0);
18262 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18264 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18271 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18275 arm_address_offset_is_imm (rtx insn
)
18279 extract_insn_cached (insn
);
18281 if (REG_P (recog_data
.operand
[0]))
18284 mem
= recog_data
.operand
[0];
18286 gcc_assert (MEM_P (mem
));
18288 addr
= XEXP (mem
, 0);
18291 || (GET_CODE (addr
) == PLUS
18292 && REG_P (XEXP (addr
, 0))
18293 && CONST_INT_P (XEXP (addr
, 1))))
18299 /* Output an ADD r, s, #n where n may be too big for one instruction.
18300 If adding zero to one register, output nothing. */
18302 output_add_immediate (rtx
*operands
)
18304 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18306 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18309 output_multi_immediate (operands
,
18310 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18313 output_multi_immediate (operands
,
18314 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18321 /* Output a multiple immediate operation.
18322 OPERANDS is the vector of operands referred to in the output patterns.
18323 INSTR1 is the output pattern to use for the first constant.
18324 INSTR2 is the output pattern to use for subsequent constants.
18325 IMMED_OP is the index of the constant slot in OPERANDS.
18326 N is the constant value. */
18327 static const char *
18328 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18329 int immed_op
, HOST_WIDE_INT n
)
18331 #if HOST_BITS_PER_WIDE_INT > 32
18337 /* Quick and easy output. */
18338 operands
[immed_op
] = const0_rtx
;
18339 output_asm_insn (instr1
, operands
);
18344 const char * instr
= instr1
;
18346 /* Note that n is never zero here (which would give no output). */
18347 for (i
= 0; i
< 32; i
+= 2)
18351 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18352 output_asm_insn (instr
, operands
);
18362 /* Return the name of a shifter operation. */
18363 static const char *
18364 arm_shift_nmem(enum rtx_code code
)
18369 return ARM_LSL_NAME
;
18385 /* Return the appropriate ARM instruction for the operation code.
18386 The returned result should not be overwritten. OP is the rtx of the
18387 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18390 arithmetic_instr (rtx op
, int shift_first_arg
)
18392 switch (GET_CODE (op
))
18398 return shift_first_arg
? "rsb" : "sub";
18413 return arm_shift_nmem(GET_CODE(op
));
18416 gcc_unreachable ();
18420 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18421 for the operation code. The returned result should not be overwritten.
18422 OP is the rtx code of the shift.
18423 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18425 static const char *
18426 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18429 enum rtx_code code
= GET_CODE (op
);
18434 if (!CONST_INT_P (XEXP (op
, 1)))
18436 output_operand_lossage ("invalid shift operand");
18441 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18449 mnem
= arm_shift_nmem(code
);
18450 if (CONST_INT_P (XEXP (op
, 1)))
18452 *amountp
= INTVAL (XEXP (op
, 1));
18454 else if (REG_P (XEXP (op
, 1)))
18461 output_operand_lossage ("invalid shift operand");
18467 /* We never have to worry about the amount being other than a
18468 power of 2, since this case can never be reloaded from a reg. */
18469 if (!CONST_INT_P (XEXP (op
, 1)))
18471 output_operand_lossage ("invalid shift operand");
18475 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18477 /* Amount must be a power of two. */
18478 if (*amountp
& (*amountp
- 1))
18480 output_operand_lossage ("invalid shift operand");
18484 *amountp
= int_log2 (*amountp
);
18485 return ARM_LSL_NAME
;
18488 output_operand_lossage ("invalid shift operand");
18492 /* This is not 100% correct, but follows from the desire to merge
18493 multiplication by a power of 2 with the recognizer for a
18494 shift. >=32 is not a valid shift for "lsl", so we must try and
18495 output a shift that produces the correct arithmetical result.
18496 Using lsr #32 is identical except for the fact that the carry bit
18497 is not set correctly if we set the flags; but we never use the
18498 carry bit from such an operation, so we can ignore that. */
18499 if (code
== ROTATERT
)
18500 /* Rotate is just modulo 32. */
18502 else if (*amountp
!= (*amountp
& 31))
18504 if (code
== ASHIFT
)
18509 /* Shifts of 0 are no-ops. */
18516 /* Obtain the shift from the POWER of two. */
18518 static HOST_WIDE_INT
18519 int_log2 (HOST_WIDE_INT power
)
18521 HOST_WIDE_INT shift
= 0;
18523 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18525 gcc_assert (shift
<= 31);
18532 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18533 because /bin/as is horribly restrictive. The judgement about
18534 whether or not each character is 'printable' (and can be output as
18535 is) or not (and must be printed with an octal escape) must be made
18536 with reference to the *host* character set -- the situation is
18537 similar to that discussed in the comments above pp_c_char in
18538 c-pretty-print.c. */
18540 #define MAX_ASCII_LEN 51
18543 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18546 int len_so_far
= 0;
18548 fputs ("\t.ascii\t\"", stream
);
18550 for (i
= 0; i
< len
; i
++)
18554 if (len_so_far
>= MAX_ASCII_LEN
)
18556 fputs ("\"\n\t.ascii\t\"", stream
);
18562 if (c
== '\\' || c
== '\"')
18564 putc ('\\', stream
);
18572 fprintf (stream
, "\\%03o", c
);
18577 fputs ("\"\n", stream
);
18580 /* Compute the register save mask for registers 0 through 12
18581 inclusive. This code is used by arm_compute_save_reg_mask. */
18583 static unsigned long
18584 arm_compute_save_reg0_reg12_mask (void)
18586 unsigned long func_type
= arm_current_func_type ();
18587 unsigned long save_reg_mask
= 0;
18590 if (IS_INTERRUPT (func_type
))
18592 unsigned int max_reg
;
18593 /* Interrupt functions must not corrupt any registers,
18594 even call clobbered ones. If this is a leaf function
18595 we can just examine the registers used by the RTL, but
18596 otherwise we have to assume that whatever function is
18597 called might clobber anything, and so we have to save
18598 all the call-clobbered registers as well. */
18599 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18600 /* FIQ handlers have registers r8 - r12 banked, so
18601 we only need to check r0 - r7, Normal ISRs only
18602 bank r14 and r15, so we must check up to r12.
18603 r13 is the stack pointer which is always preserved,
18604 so we do not need to consider it here. */
18609 for (reg
= 0; reg
<= max_reg
; reg
++)
18610 if (df_regs_ever_live_p (reg
)
18611 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18612 save_reg_mask
|= (1 << reg
);
18614 /* Also save the pic base register if necessary. */
18616 && !TARGET_SINGLE_PIC_BASE
18617 && arm_pic_register
!= INVALID_REGNUM
18618 && crtl
->uses_pic_offset_table
)
18619 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18621 else if (IS_VOLATILE(func_type
))
18623 /* For noreturn functions we historically omitted register saves
18624 altogether. However this really messes up debugging. As a
18625 compromise save just the frame pointers. Combined with the link
18626 register saved elsewhere this should be sufficient to get
18628 if (frame_pointer_needed
)
18629 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18630 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18631 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18632 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18633 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18637 /* In the normal case we only need to save those registers
18638 which are call saved and which are used by this function. */
18639 for (reg
= 0; reg
<= 11; reg
++)
18640 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18641 save_reg_mask
|= (1 << reg
);
18643 /* Handle the frame pointer as a special case. */
18644 if (frame_pointer_needed
)
18645 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18647 /* If we aren't loading the PIC register,
18648 don't stack it even though it may be live. */
18650 && !TARGET_SINGLE_PIC_BASE
18651 && arm_pic_register
!= INVALID_REGNUM
18652 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18653 || crtl
->uses_pic_offset_table
))
18654 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18656 /* The prologue will copy SP into R0, so save it. */
18657 if (IS_STACKALIGN (func_type
))
18658 save_reg_mask
|= 1;
18661 /* Save registers so the exception handler can modify them. */
18662 if (crtl
->calls_eh_return
)
18668 reg
= EH_RETURN_DATA_REGNO (i
);
18669 if (reg
== INVALID_REGNUM
)
18671 save_reg_mask
|= 1 << reg
;
18675 return save_reg_mask
;
18678 /* Return true if r3 is live at the start of the function. */
18681 arm_r3_live_at_start_p (void)
18683 /* Just look at cfg info, which is still close enough to correct at this
18684 point. This gives false positives for broken functions that might use
18685 uninitialized data that happens to be allocated in r3, but who cares? */
18686 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18689 /* Compute the number of bytes used to store the static chain register on the
18690 stack, above the stack frame. We need to know this accurately to get the
18691 alignment of the rest of the stack frame correct. */
18694 arm_compute_static_chain_stack_bytes (void)
18696 /* See the defining assertion in arm_expand_prologue. */
18697 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18698 && IS_NESTED (arm_current_func_type ())
18699 && arm_r3_live_at_start_p ()
18700 && crtl
->args
.pretend_args_size
== 0)
18706 /* Compute a bit mask of which registers need to be
18707 saved on the stack for the current function.
18708 This is used by arm_get_frame_offsets, which may add extra registers. */
18710 static unsigned long
18711 arm_compute_save_reg_mask (void)
18713 unsigned int save_reg_mask
= 0;
18714 unsigned long func_type
= arm_current_func_type ();
18717 if (IS_NAKED (func_type
))
18718 /* This should never really happen. */
18721 /* If we are creating a stack frame, then we must save the frame pointer,
18722 IP (which will hold the old stack pointer), LR and the PC. */
18723 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18725 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18728 | (1 << PC_REGNUM
);
18730 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18732 /* Decide if we need to save the link register.
18733 Interrupt routines have their own banked link register,
18734 so they never need to save it.
18735 Otherwise if we do not use the link register we do not need to save
18736 it. If we are pushing other registers onto the stack however, we
18737 can save an instruction in the epilogue by pushing the link register
18738 now and then popping it back into the PC. This incurs extra memory
18739 accesses though, so we only do it when optimizing for size, and only
18740 if we know that we will not need a fancy return sequence. */
18741 if (df_regs_ever_live_p (LR_REGNUM
)
18744 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18745 && !crtl
->calls_eh_return
))
18746 save_reg_mask
|= 1 << LR_REGNUM
;
18748 if (cfun
->machine
->lr_save_eliminated
)
18749 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18751 if (TARGET_REALLY_IWMMXT
18752 && ((bit_count (save_reg_mask
)
18753 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18754 arm_compute_static_chain_stack_bytes())
18757 /* The total number of registers that are going to be pushed
18758 onto the stack is odd. We need to ensure that the stack
18759 is 64-bit aligned before we start to save iWMMXt registers,
18760 and also before we start to create locals. (A local variable
18761 might be a double or long long which we will load/store using
18762 an iWMMXt instruction). Therefore we need to push another
18763 ARM register, so that the stack will be 64-bit aligned. We
18764 try to avoid using the arg registers (r0 -r3) as they might be
18765 used to pass values in a tail call. */
18766 for (reg
= 4; reg
<= 12; reg
++)
18767 if ((save_reg_mask
& (1 << reg
)) == 0)
18771 save_reg_mask
|= (1 << reg
);
18774 cfun
->machine
->sibcall_blocked
= 1;
18775 save_reg_mask
|= (1 << 3);
18779 /* We may need to push an additional register for use initializing the
18780 PIC base register. */
18781 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18782 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18784 reg
= thumb_find_work_register (1 << 4);
18785 if (!call_used_regs
[reg
])
18786 save_reg_mask
|= (1 << reg
);
18789 return save_reg_mask
;
18793 /* Compute a bit mask of which registers need to be
18794 saved on the stack for the current function. */
18795 static unsigned long
18796 thumb1_compute_save_reg_mask (void)
18798 unsigned long mask
;
18802 for (reg
= 0; reg
< 12; reg
++)
18803 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18807 && !TARGET_SINGLE_PIC_BASE
18808 && arm_pic_register
!= INVALID_REGNUM
18809 && crtl
->uses_pic_offset_table
)
18810 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18812 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18813 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18814 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18816 /* LR will also be pushed if any lo regs are pushed. */
18817 if (mask
& 0xff || thumb_force_lr_save ())
18818 mask
|= (1 << LR_REGNUM
);
18820 /* Make sure we have a low work register if we need one.
18821 We will need one if we are going to push a high register,
18822 but we are not currently intending to push a low register. */
18823 if ((mask
& 0xff) == 0
18824 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18826 /* Use thumb_find_work_register to choose which register
18827 we will use. If the register is live then we will
18828 have to push it. Use LAST_LO_REGNUM as our fallback
18829 choice for the register to select. */
18830 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18831 /* Make sure the register returned by thumb_find_work_register is
18832 not part of the return value. */
18833 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18834 reg
= LAST_LO_REGNUM
;
18836 if (! call_used_regs
[reg
])
18840 /* The 504 below is 8 bytes less than 512 because there are two possible
18841 alignment words. We can't tell here if they will be present or not so we
18842 have to play it safe and assume that they are. */
18843 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18844 ROUND_UP_WORD (get_frame_size ()) +
18845 crtl
->outgoing_args_size
) >= 504)
18847 /* This is the same as the code in thumb1_expand_prologue() which
18848 determines which register to use for stack decrement. */
18849 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18850 if (mask
& (1 << reg
))
18853 if (reg
> LAST_LO_REGNUM
)
18855 /* Make sure we have a register available for stack decrement. */
18856 mask
|= 1 << LAST_LO_REGNUM
;
18864 /* Return the number of bytes required to save VFP registers. */
18866 arm_get_vfp_saved_size (void)
18868 unsigned int regno
;
18873 /* Space for saved VFP registers. */
18874 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18877 for (regno
= FIRST_VFP_REGNUM
;
18878 regno
< LAST_VFP_REGNUM
;
18881 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18882 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18886 /* Workaround ARM10 VFPr1 bug. */
18887 if (count
== 2 && !arm_arch6
)
18889 saved
+= count
* 8;
18898 if (count
== 2 && !arm_arch6
)
18900 saved
+= count
* 8;
18907 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18908 everything bar the final return instruction. If simple_return is true,
18909 then do not output epilogue, because it has already been emitted in RTL. */
18911 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18912 bool simple_return
)
18914 char conditional
[10];
18917 unsigned long live_regs_mask
;
18918 unsigned long func_type
;
18919 arm_stack_offsets
*offsets
;
18921 func_type
= arm_current_func_type ();
18923 if (IS_NAKED (func_type
))
18926 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18928 /* If this function was declared non-returning, and we have
18929 found a tail call, then we have to trust that the called
18930 function won't return. */
18935 /* Otherwise, trap an attempted return by aborting. */
18937 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18939 assemble_external_libcall (ops
[1]);
18940 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18946 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18948 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18950 cfun
->machine
->return_used_this_function
= 1;
18952 offsets
= arm_get_frame_offsets ();
18953 live_regs_mask
= offsets
->saved_regs_mask
;
18955 if (!simple_return
&& live_regs_mask
)
18957 const char * return_reg
;
18959 /* If we do not have any special requirements for function exit
18960 (e.g. interworking) then we can load the return address
18961 directly into the PC. Otherwise we must load it into LR. */
18963 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18964 return_reg
= reg_names
[PC_REGNUM
];
18966 return_reg
= reg_names
[LR_REGNUM
];
18968 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18970 /* There are three possible reasons for the IP register
18971 being saved. 1) a stack frame was created, in which case
18972 IP contains the old stack pointer, or 2) an ISR routine
18973 corrupted it, or 3) it was saved to align the stack on
18974 iWMMXt. In case 1, restore IP into SP, otherwise just
18976 if (frame_pointer_needed
)
18978 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18979 live_regs_mask
|= (1 << SP_REGNUM
);
18982 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18985 /* On some ARM architectures it is faster to use LDR rather than
18986 LDM to load a single register. On other architectures, the
18987 cost is the same. In 26 bit mode, or for exception handlers,
18988 we have to use LDM to load the PC so that the CPSR is also
18990 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18991 if (live_regs_mask
== (1U << reg
))
18994 if (reg
<= LAST_ARM_REGNUM
18995 && (reg
!= LR_REGNUM
18997 || ! IS_INTERRUPT (func_type
)))
18999 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19000 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19007 /* Generate the load multiple instruction to restore the
19008 registers. Note we can get here, even if
19009 frame_pointer_needed is true, but only if sp already
19010 points to the base of the saved core registers. */
19011 if (live_regs_mask
& (1 << SP_REGNUM
))
19013 unsigned HOST_WIDE_INT stack_adjust
;
19015 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19016 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19018 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19019 if (TARGET_UNIFIED_ASM
)
19020 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19022 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19025 /* If we can't use ldmib (SA110 bug),
19026 then try to pop r3 instead. */
19028 live_regs_mask
|= 1 << 3;
19030 if (TARGET_UNIFIED_ASM
)
19031 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19033 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19037 if (TARGET_UNIFIED_ASM
)
19038 sprintf (instr
, "pop%s\t{", conditional
);
19040 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19042 p
= instr
+ strlen (instr
);
19044 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19045 if (live_regs_mask
& (1 << reg
))
19047 int l
= strlen (reg_names
[reg
]);
19053 memcpy (p
, ", ", 2);
19057 memcpy (p
, "%|", 2);
19058 memcpy (p
+ 2, reg_names
[reg
], l
);
19062 if (live_regs_mask
& (1 << LR_REGNUM
))
19064 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19065 /* If returning from an interrupt, restore the CPSR. */
19066 if (IS_INTERRUPT (func_type
))
19073 output_asm_insn (instr
, & operand
);
19075 /* See if we need to generate an extra instruction to
19076 perform the actual function return. */
19078 && func_type
!= ARM_FT_INTERWORKED
19079 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19081 /* The return has already been handled
19082 by loading the LR into the PC. */
19089 switch ((int) ARM_FUNC_TYPE (func_type
))
19093 /* ??? This is wrong for unified assembly syntax. */
19094 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19097 case ARM_FT_INTERWORKED
:
19098 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19101 case ARM_FT_EXCEPTION
:
19102 /* ??? This is wrong for unified assembly syntax. */
19103 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19107 /* Use bx if it's available. */
19108 if (arm_arch5
|| arm_arch4t
)
19109 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19111 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19115 output_asm_insn (instr
, & operand
);
19121 /* Write the function name into the code section, directly preceding
19122 the function prologue.
19124 Code will be output similar to this:
19126 .ascii "arm_poke_function_name", 0
19129 .word 0xff000000 + (t1 - t0)
19130 arm_poke_function_name
19132 stmfd sp!, {fp, ip, lr, pc}
19135 When performing a stack backtrace, code can inspect the value
19136 of 'pc' stored at 'fp' + 0. If the trace function then looks
19137 at location pc - 12 and the top 8 bits are set, then we know
19138 that there is a function name embedded immediately preceding this
19139 location and has length ((pc[-3]) & 0xff000000).
19141 We assume that pc is declared as a pointer to an unsigned long.
19143 It is of no benefit to output the function name if we are assembling
19144 a leaf function. These function types will not contain a stack
19145 backtrace structure, therefore it is not possible to determine the
19148 arm_poke_function_name (FILE *stream
, const char *name
)
19150 unsigned long alignlength
;
19151 unsigned long length
;
19154 length
= strlen (name
) + 1;
19155 alignlength
= ROUND_UP_WORD (length
);
19157 ASM_OUTPUT_ASCII (stream
, name
, length
);
19158 ASM_OUTPUT_ALIGN (stream
, 2);
19159 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19160 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19163 /* Place some comments into the assembler stream
19164 describing the current function. */
19166 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19168 unsigned long func_type
;
19170 /* ??? Do we want to print some of the below anyway? */
19174 /* Sanity check. */
19175 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19177 func_type
= arm_current_func_type ();
19179 switch ((int) ARM_FUNC_TYPE (func_type
))
19182 case ARM_FT_NORMAL
:
19184 case ARM_FT_INTERWORKED
:
19185 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19188 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19191 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19193 case ARM_FT_EXCEPTION
:
19194 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19198 if (IS_NAKED (func_type
))
19199 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19201 if (IS_VOLATILE (func_type
))
19202 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19204 if (IS_NESTED (func_type
))
19205 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19206 if (IS_STACKALIGN (func_type
))
19207 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19209 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19211 crtl
->args
.pretend_args_size
, frame_size
);
19213 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19214 frame_pointer_needed
,
19215 cfun
->machine
->uses_anonymous_args
);
19217 if (cfun
->machine
->lr_save_eliminated
)
19218 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19220 if (crtl
->calls_eh_return
)
19221 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19226 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19227 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19229 arm_stack_offsets
*offsets
;
19235 /* Emit any call-via-reg trampolines that are needed for v4t support
19236 of call_reg and call_value_reg type insns. */
19237 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19239 rtx label
= cfun
->machine
->call_via
[regno
];
19243 switch_to_section (function_section (current_function_decl
));
19244 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19245 CODE_LABEL_NUMBER (label
));
19246 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19250 /* ??? Probably not safe to set this here, since it assumes that a
19251 function will be emitted as assembly immediately after we generate
19252 RTL for it. This does not happen for inline functions. */
19253 cfun
->machine
->return_used_this_function
= 0;
19255 else /* TARGET_32BIT */
19257 /* We need to take into account any stack-frame rounding. */
19258 offsets
= arm_get_frame_offsets ();
19260 gcc_assert (!use_return_insn (FALSE
, NULL
)
19261 || (cfun
->machine
->return_used_this_function
!= 0)
19262 || offsets
->saved_regs
== offsets
->outgoing_args
19263 || frame_pointer_needed
);
19265 /* Reset the ARM-specific per-function variables. */
19266 after_arm_reorg
= 0;
19270 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19271 STR and STRD. If an even number of registers are being pushed, one
19272 or more STRD patterns are created for each register pair. If an
19273 odd number of registers are pushed, emit an initial STR followed by
19274 as many STRD instructions as are needed. This works best when the
19275 stack is initially 64-bit aligned (the normal case), since it
19276 ensures that each STRD is also 64-bit aligned. */
19278 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19283 rtx par
= NULL_RTX
;
19284 rtx dwarf
= NULL_RTX
;
19288 num_regs
= bit_count (saved_regs_mask
);
19290 /* Must be at least one register to save, and can't save SP or PC. */
19291 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19292 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19293 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19295 /* Create sequence for DWARF info. All the frame-related data for
19296 debugging is held in this wrapper. */
19297 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19299 /* Describe the stack adjustment. */
19300 tmp
= gen_rtx_SET (VOIDmode
,
19302 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19303 RTX_FRAME_RELATED_P (tmp
) = 1;
19304 XVECEXP (dwarf
, 0, 0) = tmp
;
19306 /* Find the first register. */
19307 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19312 /* If there's an odd number of registers to push. Start off by
19313 pushing a single register. This ensures that subsequent strd
19314 operations are dword aligned (assuming that SP was originally
19315 64-bit aligned). */
19316 if ((num_regs
& 1) != 0)
19318 rtx reg
, mem
, insn
;
19320 reg
= gen_rtx_REG (SImode
, regno
);
19322 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19323 stack_pointer_rtx
));
19325 mem
= gen_frame_mem (Pmode
,
19327 (Pmode
, stack_pointer_rtx
,
19328 plus_constant (Pmode
, stack_pointer_rtx
,
19331 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19332 RTX_FRAME_RELATED_P (tmp
) = 1;
19333 insn
= emit_insn (tmp
);
19334 RTX_FRAME_RELATED_P (insn
) = 1;
19335 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19336 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19338 RTX_FRAME_RELATED_P (tmp
) = 1;
19341 XVECEXP (dwarf
, 0, i
) = tmp
;
19345 while (i
< num_regs
)
19346 if (saved_regs_mask
& (1 << regno
))
19348 rtx reg1
, reg2
, mem1
, mem2
;
19349 rtx tmp0
, tmp1
, tmp2
;
19352 /* Find the register to pair with this one. */
19353 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19357 reg1
= gen_rtx_REG (SImode
, regno
);
19358 reg2
= gen_rtx_REG (SImode
, regno2
);
19365 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19368 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19370 -4 * (num_regs
- 1)));
19371 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19372 plus_constant (Pmode
, stack_pointer_rtx
,
19374 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19375 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19376 RTX_FRAME_RELATED_P (tmp0
) = 1;
19377 RTX_FRAME_RELATED_P (tmp1
) = 1;
19378 RTX_FRAME_RELATED_P (tmp2
) = 1;
19379 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19380 XVECEXP (par
, 0, 0) = tmp0
;
19381 XVECEXP (par
, 0, 1) = tmp1
;
19382 XVECEXP (par
, 0, 2) = tmp2
;
19383 insn
= emit_insn (par
);
19384 RTX_FRAME_RELATED_P (insn
) = 1;
19385 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19389 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19392 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19395 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19396 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19397 RTX_FRAME_RELATED_P (tmp1
) = 1;
19398 RTX_FRAME_RELATED_P (tmp2
) = 1;
19399 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19400 XVECEXP (par
, 0, 0) = tmp1
;
19401 XVECEXP (par
, 0, 1) = tmp2
;
19405 /* Create unwind information. This is an approximation. */
19406 tmp1
= gen_rtx_SET (VOIDmode
,
19407 gen_frame_mem (Pmode
,
19408 plus_constant (Pmode
,
19412 tmp2
= gen_rtx_SET (VOIDmode
,
19413 gen_frame_mem (Pmode
,
19414 plus_constant (Pmode
,
19419 RTX_FRAME_RELATED_P (tmp1
) = 1;
19420 RTX_FRAME_RELATED_P (tmp2
) = 1;
19421 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19422 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19424 regno
= regno2
+ 1;
19432 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19433 whenever possible, otherwise it emits single-word stores. The first store
19434 also allocates stack space for all saved registers, using writeback with
19435 post-addressing mode. All other stores use offset addressing. If no STRD
19436 can be emitted, this function emits a sequence of single-word stores,
19437 and not an STM as before, because single-word stores provide more freedom
19438 scheduling and can be turned into an STM by peephole optimizations. */
19440 arm_emit_strd_push (unsigned long saved_regs_mask
)
19443 int i
, j
, dwarf_index
= 0;
19445 rtx dwarf
= NULL_RTX
;
19446 rtx insn
= NULL_RTX
;
19449 /* TODO: A more efficient code can be emitted by changing the
19450 layout, e.g., first push all pairs that can use STRD to keep the
19451 stack aligned, and then push all other registers. */
19452 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19453 if (saved_regs_mask
& (1 << i
))
19456 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19457 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19458 gcc_assert (num_regs
> 0);
19460 /* Create sequence for DWARF info. */
19461 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19463 /* For dwarf info, we generate explicit stack update. */
19464 tmp
= gen_rtx_SET (VOIDmode
,
19466 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19467 RTX_FRAME_RELATED_P (tmp
) = 1;
19468 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19470 /* Save registers. */
19471 offset
= - 4 * num_regs
;
19473 while (j
<= LAST_ARM_REGNUM
)
19474 if (saved_regs_mask
& (1 << j
))
19477 && (saved_regs_mask
& (1 << (j
+ 1))))
19479 /* Current register and previous register form register pair for
19480 which STRD can be generated. */
19483 /* Allocate stack space for all saved registers. */
19484 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19485 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19486 mem
= gen_frame_mem (DImode
, tmp
);
19489 else if (offset
> 0)
19490 mem
= gen_frame_mem (DImode
,
19491 plus_constant (Pmode
,
19495 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19497 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19498 RTX_FRAME_RELATED_P (tmp
) = 1;
19499 tmp
= emit_insn (tmp
);
19501 /* Record the first store insn. */
19502 if (dwarf_index
== 1)
19505 /* Generate dwarf info. */
19506 mem
= gen_frame_mem (SImode
,
19507 plus_constant (Pmode
,
19510 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19511 RTX_FRAME_RELATED_P (tmp
) = 1;
19512 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19514 mem
= gen_frame_mem (SImode
,
19515 plus_constant (Pmode
,
19518 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19519 RTX_FRAME_RELATED_P (tmp
) = 1;
19520 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19527 /* Emit a single word store. */
19530 /* Allocate stack space for all saved registers. */
19531 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19532 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19533 mem
= gen_frame_mem (SImode
, tmp
);
19536 else if (offset
> 0)
19537 mem
= gen_frame_mem (SImode
,
19538 plus_constant (Pmode
,
19542 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19544 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19545 RTX_FRAME_RELATED_P (tmp
) = 1;
19546 tmp
= emit_insn (tmp
);
19548 /* Record the first store insn. */
19549 if (dwarf_index
== 1)
19552 /* Generate dwarf info. */
19553 mem
= gen_frame_mem (SImode
,
19554 plus_constant(Pmode
,
19557 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19558 RTX_FRAME_RELATED_P (tmp
) = 1;
19559 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19568 /* Attach dwarf info to the first insn we generate. */
19569 gcc_assert (insn
!= NULL_RTX
);
19570 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19571 RTX_FRAME_RELATED_P (insn
) = 1;
19574 /* Generate and emit an insn that we will recognize as a push_multi.
19575 Unfortunately, since this insn does not reflect very well the actual
19576 semantics of the operation, we need to annotate the insn for the benefit
19577 of DWARF2 frame unwind information. */
19579 emit_multi_reg_push (unsigned long mask
)
19582 int num_dwarf_regs
;
19586 int dwarf_par_index
;
19589 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19590 if (mask
& (1 << i
))
19593 gcc_assert (num_regs
&& num_regs
<= 16);
19595 /* We don't record the PC in the dwarf frame information. */
19596 num_dwarf_regs
= num_regs
;
19597 if (mask
& (1 << PC_REGNUM
))
19600 /* For the body of the insn we are going to generate an UNSPEC in
19601 parallel with several USEs. This allows the insn to be recognized
19602 by the push_multi pattern in the arm.md file.
19604 The body of the insn looks something like this:
19607 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19608 (const_int:SI <num>)))
19609 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19615 For the frame note however, we try to be more explicit and actually
19616 show each register being stored into the stack frame, plus a (single)
19617 decrement of the stack pointer. We do it this way in order to be
19618 friendly to the stack unwinding code, which only wants to see a single
19619 stack decrement per instruction. The RTL we generate for the note looks
19620 something like this:
19623 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19624 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19625 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19626 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19630 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19631 instead we'd have a parallel expression detailing all
19632 the stores to the various memory addresses so that debug
19633 information is more up-to-date. Remember however while writing
19634 this to take care of the constraints with the push instruction.
19636 Note also that this has to be taken care of for the VFP registers.
19638 For more see PR43399. */
19640 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19641 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19642 dwarf_par_index
= 1;
19644 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19646 if (mask
& (1 << i
))
19648 reg
= gen_rtx_REG (SImode
, i
);
19650 XVECEXP (par
, 0, 0)
19651 = gen_rtx_SET (VOIDmode
,
19654 gen_rtx_PRE_MODIFY (Pmode
,
19657 (Pmode
, stack_pointer_rtx
,
19660 gen_rtx_UNSPEC (BLKmode
,
19661 gen_rtvec (1, reg
),
19662 UNSPEC_PUSH_MULT
));
19664 if (i
!= PC_REGNUM
)
19666 tmp
= gen_rtx_SET (VOIDmode
,
19667 gen_frame_mem (SImode
, stack_pointer_rtx
),
19669 RTX_FRAME_RELATED_P (tmp
) = 1;
19670 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
19678 for (j
= 1, i
++; j
< num_regs
; i
++)
19680 if (mask
& (1 << i
))
19682 reg
= gen_rtx_REG (SImode
, i
);
19684 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19686 if (i
!= PC_REGNUM
)
19689 = gen_rtx_SET (VOIDmode
,
19692 plus_constant (Pmode
, stack_pointer_rtx
,
19695 RTX_FRAME_RELATED_P (tmp
) = 1;
19696 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19703 par
= emit_insn (par
);
19705 tmp
= gen_rtx_SET (VOIDmode
,
19707 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19708 RTX_FRAME_RELATED_P (tmp
) = 1;
19709 XVECEXP (dwarf
, 0, 0) = tmp
;
19711 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19716 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19717 SIZE is the offset to be adjusted.
19718 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19720 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19724 RTX_FRAME_RELATED_P (insn
) = 1;
19725 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19726 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19729 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19730 SAVED_REGS_MASK shows which registers need to be restored.
19732 Unfortunately, since this insn does not reflect very well the actual
19733 semantics of the operation, we need to annotate the insn for the benefit
19734 of DWARF2 frame unwind information. */
19736 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19741 rtx dwarf
= NULL_RTX
;
19747 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19748 offset_adj
= return_in_pc
? 1 : 0;
19749 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19750 if (saved_regs_mask
& (1 << i
))
19753 gcc_assert (num_regs
&& num_regs
<= 16);
19755 /* If SP is in reglist, then we don't emit SP update insn. */
19756 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19758 /* The parallel needs to hold num_regs SETs
19759 and one SET for the stack update. */
19760 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19765 XVECEXP (par
, 0, 0) = tmp
;
19770 /* Increment the stack pointer, based on there being
19771 num_regs 4-byte registers to restore. */
19772 tmp
= gen_rtx_SET (VOIDmode
,
19774 plus_constant (Pmode
,
19777 RTX_FRAME_RELATED_P (tmp
) = 1;
19778 XVECEXP (par
, 0, offset_adj
) = tmp
;
19781 /* Now restore every reg, which may include PC. */
19782 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19783 if (saved_regs_mask
& (1 << i
))
19785 reg
= gen_rtx_REG (SImode
, i
);
19786 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19788 /* Emit single load with writeback. */
19789 tmp
= gen_frame_mem (SImode
,
19790 gen_rtx_POST_INC (Pmode
,
19791 stack_pointer_rtx
));
19792 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19793 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19797 tmp
= gen_rtx_SET (VOIDmode
,
19801 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19802 RTX_FRAME_RELATED_P (tmp
) = 1;
19803 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19805 /* We need to maintain a sequence for DWARF info too. As dwarf info
19806 should not have PC, skip PC. */
19807 if (i
!= PC_REGNUM
)
19808 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19814 par
= emit_jump_insn (par
);
19816 par
= emit_insn (par
);
19818 REG_NOTES (par
) = dwarf
;
19820 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19821 stack_pointer_rtx
, stack_pointer_rtx
);
19824 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19825 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19827 Unfortunately, since this insn does not reflect very well the actual
19828 semantics of the operation, we need to annotate the insn for the benefit
19829 of DWARF2 frame unwind information. */
19831 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19835 rtx dwarf
= NULL_RTX
;
19838 gcc_assert (num_regs
&& num_regs
<= 32);
19840 /* Workaround ARM10 VFPr1 bug. */
19841 if (num_regs
== 2 && !arm_arch6
)
19843 if (first_reg
== 15)
19849 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19850 there could be up to 32 D-registers to restore.
19851 If there are more than 16 D-registers, make two recursive calls,
19852 each of which emits one pop_multi instruction. */
19855 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19856 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19860 /* The parallel needs to hold num_regs SETs
19861 and one SET for the stack update. */
19862 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19864 /* Increment the stack pointer, based on there being
19865 num_regs 8-byte registers to restore. */
19866 tmp
= gen_rtx_SET (VOIDmode
,
19868 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19869 RTX_FRAME_RELATED_P (tmp
) = 1;
19870 XVECEXP (par
, 0, 0) = tmp
;
19872 /* Now show every reg that will be restored, using a SET for each. */
19873 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19875 reg
= gen_rtx_REG (DFmode
, i
);
19877 tmp
= gen_rtx_SET (VOIDmode
,
19881 plus_constant (Pmode
, base_reg
, 8 * j
)));
19882 RTX_FRAME_RELATED_P (tmp
) = 1;
19883 XVECEXP (par
, 0, j
+ 1) = tmp
;
19885 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19890 par
= emit_insn (par
);
19891 REG_NOTES (par
) = dwarf
;
19893 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19894 base_reg
, base_reg
);
19897 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19898 number of registers are being popped, multiple LDRD patterns are created for
19899 all register pairs. If odd number of registers are popped, last register is
19900 loaded by using LDR pattern. */
19902 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19906 rtx par
= NULL_RTX
;
19907 rtx dwarf
= NULL_RTX
;
19908 rtx tmp
, reg
, tmp1
;
19911 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19912 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19913 if (saved_regs_mask
& (1 << i
))
19916 gcc_assert (num_regs
&& num_regs
<= 16);
19918 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19919 to be popped. So, if num_regs is even, now it will become odd,
19920 and we can generate pop with PC. If num_regs is odd, it will be
19921 even now, and ldr with return can be generated for PC. */
19925 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19927 /* Var j iterates over all the registers to gather all the registers in
19928 saved_regs_mask. Var i gives index of saved registers in stack frame.
19929 A PARALLEL RTX of register-pair is created here, so that pattern for
19930 LDRD can be matched. As PC is always last register to be popped, and
19931 we have already decremented num_regs if PC, we don't have to worry
19932 about PC in this loop. */
19933 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19934 if (saved_regs_mask
& (1 << j
))
19936 /* Create RTX for memory load. */
19937 reg
= gen_rtx_REG (SImode
, j
);
19938 tmp
= gen_rtx_SET (SImode
,
19940 gen_frame_mem (SImode
,
19941 plus_constant (Pmode
,
19942 stack_pointer_rtx
, 4 * i
)));
19943 RTX_FRAME_RELATED_P (tmp
) = 1;
19947 /* When saved-register index (i) is even, the RTX to be emitted is
19948 yet to be created. Hence create it first. The LDRD pattern we
19949 are generating is :
19950 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19951 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19952 where target registers need not be consecutive. */
19953 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19957 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19958 added as 0th element and if i is odd, reg_i is added as 1st element
19959 of LDRD pattern shown above. */
19960 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19961 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19965 /* When saved-register index (i) is odd, RTXs for both the registers
19966 to be loaded are generated in above given LDRD pattern, and the
19967 pattern can be emitted now. */
19968 par
= emit_insn (par
);
19969 REG_NOTES (par
) = dwarf
;
19970 RTX_FRAME_RELATED_P (par
) = 1;
19976 /* If the number of registers pushed is odd AND return_in_pc is false OR
19977 number of registers are even AND return_in_pc is true, last register is
19978 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19979 then LDR with post increment. */
19981 /* Increment the stack pointer, based on there being
19982 num_regs 4-byte registers to restore. */
19983 tmp
= gen_rtx_SET (VOIDmode
,
19985 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19986 RTX_FRAME_RELATED_P (tmp
) = 1;
19987 tmp
= emit_insn (tmp
);
19990 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19991 stack_pointer_rtx
, stack_pointer_rtx
);
19996 if (((num_regs
% 2) == 1 && !return_in_pc
)
19997 || ((num_regs
% 2) == 0 && return_in_pc
))
19999 /* Scan for the single register to be popped. Skip until the saved
20000 register is found. */
20001 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20003 /* Gen LDR with post increment here. */
20004 tmp1
= gen_rtx_MEM (SImode
,
20005 gen_rtx_POST_INC (SImode
,
20006 stack_pointer_rtx
));
20007 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20009 reg
= gen_rtx_REG (SImode
, j
);
20010 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20011 RTX_FRAME_RELATED_P (tmp
) = 1;
20012 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20016 /* If return_in_pc, j must be PC_REGNUM. */
20017 gcc_assert (j
== PC_REGNUM
);
20018 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20019 XVECEXP (par
, 0, 0) = ret_rtx
;
20020 XVECEXP (par
, 0, 1) = tmp
;
20021 par
= emit_jump_insn (par
);
20025 par
= emit_insn (tmp
);
20026 REG_NOTES (par
) = dwarf
;
20027 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20028 stack_pointer_rtx
, stack_pointer_rtx
);
20032 else if ((num_regs
% 2) == 1 && return_in_pc
)
20034 /* There are 2 registers to be popped. So, generate the pattern
20035 pop_multiple_with_stack_update_and_return to pop in PC. */
20036 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20042 /* LDRD in ARM mode needs consecutive registers as operands. This function
20043 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20044 offset addressing and then generates one separate stack udpate. This provides
20045 more scheduling freedom, compared to writeback on every load. However,
20046 if the function returns using load into PC directly
20047 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20048 before the last load. TODO: Add a peephole optimization to recognize
20049 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20050 peephole optimization to merge the load at stack-offset zero
20051 with the stack update instruction using load with writeback
20052 in post-index addressing mode. */
20054 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20058 rtx par
= NULL_RTX
;
20059 rtx dwarf
= NULL_RTX
;
20062 /* Restore saved registers. */
20063 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20065 while (j
<= LAST_ARM_REGNUM
)
20066 if (saved_regs_mask
& (1 << j
))
20069 && (saved_regs_mask
& (1 << (j
+ 1)))
20070 && (j
+ 1) != PC_REGNUM
)
20072 /* Current register and next register form register pair for which
20073 LDRD can be generated. PC is always the last register popped, and
20074 we handle it separately. */
20076 mem
= gen_frame_mem (DImode
,
20077 plus_constant (Pmode
,
20081 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20083 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20084 tmp
= emit_insn (tmp
);
20085 RTX_FRAME_RELATED_P (tmp
) = 1;
20087 /* Generate dwarf info. */
20089 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20090 gen_rtx_REG (SImode
, j
),
20092 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20093 gen_rtx_REG (SImode
, j
+ 1),
20096 REG_NOTES (tmp
) = dwarf
;
20101 else if (j
!= PC_REGNUM
)
20103 /* Emit a single word load. */
20105 mem
= gen_frame_mem (SImode
,
20106 plus_constant (Pmode
,
20110 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20112 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20113 tmp
= emit_insn (tmp
);
20114 RTX_FRAME_RELATED_P (tmp
) = 1;
20116 /* Generate dwarf info. */
20117 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20118 gen_rtx_REG (SImode
, j
),
20124 else /* j == PC_REGNUM */
20130 /* Update the stack. */
20133 tmp
= gen_rtx_SET (Pmode
,
20135 plus_constant (Pmode
,
20138 tmp
= emit_insn (tmp
);
20139 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20140 stack_pointer_rtx
, stack_pointer_rtx
);
20144 if (saved_regs_mask
& (1 << PC_REGNUM
))
20146 /* Only PC is to be popped. */
20147 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20148 XVECEXP (par
, 0, 0) = ret_rtx
;
20149 tmp
= gen_rtx_SET (SImode
,
20150 gen_rtx_REG (SImode
, PC_REGNUM
),
20151 gen_frame_mem (SImode
,
20152 gen_rtx_POST_INC (SImode
,
20153 stack_pointer_rtx
)));
20154 RTX_FRAME_RELATED_P (tmp
) = 1;
20155 XVECEXP (par
, 0, 1) = tmp
;
20156 par
= emit_jump_insn (par
);
20158 /* Generate dwarf info. */
20159 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20160 gen_rtx_REG (SImode
, PC_REGNUM
),
20162 REG_NOTES (par
) = dwarf
;
20163 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20164 stack_pointer_rtx
, stack_pointer_rtx
);
20168 /* Calculate the size of the return value that is passed in registers. */
20170 arm_size_return_regs (void)
20172 enum machine_mode mode
;
20174 if (crtl
->return_rtx
!= 0)
20175 mode
= GET_MODE (crtl
->return_rtx
);
20177 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20179 return GET_MODE_SIZE (mode
);
20182 /* Return true if the current function needs to save/restore LR. */
20184 thumb_force_lr_save (void)
20186 return !cfun
->machine
->lr_save_eliminated
20187 && (!leaf_function_p ()
20188 || thumb_far_jump_used_p ()
20189 || df_regs_ever_live_p (LR_REGNUM
));
20192 /* We do not know if r3 will be available because
20193 we do have an indirect tailcall happening in this
20194 particular case. */
20196 is_indirect_tailcall_p (rtx call
)
20198 rtx pat
= PATTERN (call
);
20200 /* Indirect tail call. */
20201 pat
= XVECEXP (pat
, 0, 0);
20202 if (GET_CODE (pat
) == SET
)
20203 pat
= SET_SRC (pat
);
20205 pat
= XEXP (XEXP (pat
, 0), 0);
20206 return REG_P (pat
);
20209 /* Return true if r3 is used by any of the tail call insns in the
20210 current function. */
20212 any_sibcall_could_use_r3 (void)
20217 if (!crtl
->tail_call_emit
)
20219 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20220 if (e
->flags
& EDGE_SIBCALL
)
20222 rtx call
= BB_END (e
->src
);
20223 if (!CALL_P (call
))
20224 call
= prev_nonnote_nondebug_insn (call
);
20225 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20226 if (find_regno_fusage (call
, USE
, 3)
20227 || is_indirect_tailcall_p (call
))
20234 /* Compute the distance from register FROM to register TO.
20235 These can be the arg pointer (26), the soft frame pointer (25),
20236 the stack pointer (13) or the hard frame pointer (11).
20237 In thumb mode r7 is used as the soft frame pointer, if needed.
20238 Typical stack layout looks like this:
20240 old stack pointer -> | |
20243 | | saved arguments for
20244 | | vararg functions
20247 hard FP & arg pointer -> | | \
20255 soft frame pointer -> | | /
20260 locals base pointer -> | | /
20265 current stack pointer -> | | /
20268 For a given function some or all of these stack components
20269 may not be needed, giving rise to the possibility of
20270 eliminating some of the registers.
20272 The values returned by this function must reflect the behavior
20273 of arm_expand_prologue() and arm_compute_save_reg_mask().
20275 The sign of the number returned reflects the direction of stack
20276 growth, so the values are positive for all eliminations except
20277 from the soft frame pointer to the hard frame pointer.
20279 SFP may point just inside the local variables block to ensure correct
20283 /* Calculate stack offsets. These are used to calculate register elimination
20284 offsets and in prologue/epilogue code. Also calculates which registers
20285 should be saved. */
20287 static arm_stack_offsets
*
20288 arm_get_frame_offsets (void)
20290 struct arm_stack_offsets
*offsets
;
20291 unsigned long func_type
;
20295 HOST_WIDE_INT frame_size
;
20298 offsets
= &cfun
->machine
->stack_offsets
;
20300 /* We need to know if we are a leaf function. Unfortunately, it
20301 is possible to be called after start_sequence has been called,
20302 which causes get_insns to return the insns for the sequence,
20303 not the function, which will cause leaf_function_p to return
20304 the incorrect result.
20306 to know about leaf functions once reload has completed, and the
20307 frame size cannot be changed after that time, so we can safely
20308 use the cached value. */
20310 if (reload_completed
)
20313 /* Initially this is the size of the local variables. It will translated
20314 into an offset once we have determined the size of preceding data. */
20315 frame_size
= ROUND_UP_WORD (get_frame_size ());
20317 leaf
= leaf_function_p ();
20319 /* Space for variadic functions. */
20320 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20322 /* In Thumb mode this is incorrect, but never used. */
20324 = (offsets
->saved_args
20325 + arm_compute_static_chain_stack_bytes ()
20326 + (frame_pointer_needed
? 4 : 0));
20330 unsigned int regno
;
20332 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20333 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20334 saved
= core_saved
;
20336 /* We know that SP will be doubleword aligned on entry, and we must
20337 preserve that condition at any subroutine call. We also require the
20338 soft frame pointer to be doubleword aligned. */
20340 if (TARGET_REALLY_IWMMXT
)
20342 /* Check for the call-saved iWMMXt registers. */
20343 for (regno
= FIRST_IWMMXT_REGNUM
;
20344 regno
<= LAST_IWMMXT_REGNUM
;
20346 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20350 func_type
= arm_current_func_type ();
20351 /* Space for saved VFP registers. */
20352 if (! IS_VOLATILE (func_type
)
20353 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20354 saved
+= arm_get_vfp_saved_size ();
20356 else /* TARGET_THUMB1 */
20358 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20359 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20360 saved
= core_saved
;
20361 if (TARGET_BACKTRACE
)
20365 /* Saved registers include the stack frame. */
20366 offsets
->saved_regs
20367 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20368 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20370 /* A leaf function does not need any stack alignment if it has nothing
20372 if (leaf
&& frame_size
== 0
20373 /* However if it calls alloca(), we have a dynamically allocated
20374 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20375 && ! cfun
->calls_alloca
)
20377 offsets
->outgoing_args
= offsets
->soft_frame
;
20378 offsets
->locals_base
= offsets
->soft_frame
;
20382 /* Ensure SFP has the correct alignment. */
20383 if (ARM_DOUBLEWORD_ALIGN
20384 && (offsets
->soft_frame
& 7))
20386 offsets
->soft_frame
+= 4;
20387 /* Try to align stack by pushing an extra reg. Don't bother doing this
20388 when there is a stack frame as the alignment will be rolled into
20389 the normal stack adjustment. */
20390 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20394 /* If it is safe to use r3, then do so. This sometimes
20395 generates better code on Thumb-2 by avoiding the need to
20396 use 32-bit push/pop instructions. */
20397 if (! any_sibcall_could_use_r3 ()
20398 && arm_size_return_regs () <= 12
20399 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20401 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20406 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20408 /* Avoid fixed registers; they may be changed at
20409 arbitrary times so it's unsafe to restore them
20410 during the epilogue. */
20412 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20421 offsets
->saved_regs
+= 4;
20422 offsets
->saved_regs_mask
|= (1 << reg
);
20427 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20428 offsets
->outgoing_args
= (offsets
->locals_base
20429 + crtl
->outgoing_args_size
);
20431 if (ARM_DOUBLEWORD_ALIGN
)
20433 /* Ensure SP remains doubleword aligned. */
20434 if (offsets
->outgoing_args
& 7)
20435 offsets
->outgoing_args
+= 4;
20436 gcc_assert (!(offsets
->outgoing_args
& 7));
20443 /* Calculate the relative offsets for the different stack pointers. Positive
20444 offsets are in the direction of stack growth. */
20447 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20449 arm_stack_offsets
*offsets
;
20451 offsets
= arm_get_frame_offsets ();
20453 /* OK, now we have enough information to compute the distances.
20454 There must be an entry in these switch tables for each pair
20455 of registers in ELIMINABLE_REGS, even if some of the entries
20456 seem to be redundant or useless. */
20459 case ARG_POINTER_REGNUM
:
20462 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20465 case FRAME_POINTER_REGNUM
:
20466 /* This is the reverse of the soft frame pointer
20467 to hard frame pointer elimination below. */
20468 return offsets
->soft_frame
- offsets
->saved_args
;
20470 case ARM_HARD_FRAME_POINTER_REGNUM
:
20471 /* This is only non-zero in the case where the static chain register
20472 is stored above the frame. */
20473 return offsets
->frame
- offsets
->saved_args
- 4;
20475 case STACK_POINTER_REGNUM
:
20476 /* If nothing has been pushed on the stack at all
20477 then this will return -4. This *is* correct! */
20478 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20481 gcc_unreachable ();
20483 gcc_unreachable ();
20485 case FRAME_POINTER_REGNUM
:
20488 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20491 case ARM_HARD_FRAME_POINTER_REGNUM
:
20492 /* The hard frame pointer points to the top entry in the
20493 stack frame. The soft frame pointer to the bottom entry
20494 in the stack frame. If there is no stack frame at all,
20495 then they are identical. */
20497 return offsets
->frame
- offsets
->soft_frame
;
20499 case STACK_POINTER_REGNUM
:
20500 return offsets
->outgoing_args
- offsets
->soft_frame
;
20503 gcc_unreachable ();
20505 gcc_unreachable ();
20508 /* You cannot eliminate from the stack pointer.
20509 In theory you could eliminate from the hard frame
20510 pointer to the stack pointer, but this will never
20511 happen, since if a stack frame is not needed the
20512 hard frame pointer will never be used. */
20513 gcc_unreachable ();
20517 /* Given FROM and TO register numbers, say whether this elimination is
20518 allowed. Frame pointer elimination is automatically handled.
20520 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20521 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20522 pointer, we must eliminate FRAME_POINTER_REGNUM into
20523 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20524 ARG_POINTER_REGNUM. */
20527 arm_can_eliminate (const int from
, const int to
)
20529 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20530 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20531 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20532 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20536 /* Emit RTL to save coprocessor registers on function entry. Returns the
20537 number of bytes pushed. */
20540 arm_save_coproc_regs(void)
20542 int saved_size
= 0;
20544 unsigned start_reg
;
20547 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20548 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20550 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20551 insn
= gen_rtx_MEM (V2SImode
, insn
);
20552 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20553 RTX_FRAME_RELATED_P (insn
) = 1;
20557 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20559 start_reg
= FIRST_VFP_REGNUM
;
20561 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20563 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20564 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20566 if (start_reg
!= reg
)
20567 saved_size
+= vfp_emit_fstmd (start_reg
,
20568 (reg
- start_reg
) / 2);
20569 start_reg
= reg
+ 2;
20572 if (start_reg
!= reg
)
20573 saved_size
+= vfp_emit_fstmd (start_reg
,
20574 (reg
- start_reg
) / 2);
20580 /* Set the Thumb frame pointer from the stack pointer. */
20583 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20585 HOST_WIDE_INT amount
;
20588 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20590 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20591 stack_pointer_rtx
, GEN_INT (amount
)));
20594 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20595 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20596 expects the first two operands to be the same. */
20599 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20601 hard_frame_pointer_rtx
));
20605 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20606 hard_frame_pointer_rtx
,
20607 stack_pointer_rtx
));
20609 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20610 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20611 RTX_FRAME_RELATED_P (dwarf
) = 1;
20612 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20615 RTX_FRAME_RELATED_P (insn
) = 1;
20618 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20621 arm_expand_prologue (void)
20626 unsigned long live_regs_mask
;
20627 unsigned long func_type
;
20629 int saved_pretend_args
= 0;
20630 int saved_regs
= 0;
20631 unsigned HOST_WIDE_INT args_to_push
;
20632 arm_stack_offsets
*offsets
;
20634 func_type
= arm_current_func_type ();
20636 /* Naked functions don't have prologues. */
20637 if (IS_NAKED (func_type
))
20640 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20641 args_to_push
= crtl
->args
.pretend_args_size
;
20643 /* Compute which register we will have to save onto the stack. */
20644 offsets
= arm_get_frame_offsets ();
20645 live_regs_mask
= offsets
->saved_regs_mask
;
20647 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20649 if (IS_STACKALIGN (func_type
))
20653 /* Handle a word-aligned stack pointer. We generate the following:
20658 <save and restore r0 in normal prologue/epilogue>
20662 The unwinder doesn't need to know about the stack realignment.
20663 Just tell it we saved SP in r0. */
20664 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20666 r0
= gen_rtx_REG (SImode
, 0);
20667 r1
= gen_rtx_REG (SImode
, 1);
20669 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20670 RTX_FRAME_RELATED_P (insn
) = 1;
20671 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20673 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20675 /* ??? The CFA changes here, which may cause GDB to conclude that it
20676 has entered a different function. That said, the unwind info is
20677 correct, individually, before and after this instruction because
20678 we've described the save of SP, which will override the default
20679 handling of SP as restoring from the CFA. */
20680 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20683 /* For APCS frames, if IP register is clobbered
20684 when creating frame, save that register in a special
20686 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20688 if (IS_INTERRUPT (func_type
))
20690 /* Interrupt functions must not corrupt any registers.
20691 Creating a frame pointer however, corrupts the IP
20692 register, so we must push it first. */
20693 emit_multi_reg_push (1 << IP_REGNUM
);
20695 /* Do not set RTX_FRAME_RELATED_P on this insn.
20696 The dwarf stack unwinding code only wants to see one
20697 stack decrement per function, and this is not it. If
20698 this instruction is labeled as being part of the frame
20699 creation sequence then dwarf2out_frame_debug_expr will
20700 die when it encounters the assignment of IP to FP
20701 later on, since the use of SP here establishes SP as
20702 the CFA register and not IP.
20704 Anyway this instruction is not really part of the stack
20705 frame creation although it is part of the prologue. */
20707 else if (IS_NESTED (func_type
))
20709 /* The static chain register is the same as the IP register
20710 used as a scratch register during stack frame creation.
20711 To get around this need to find somewhere to store IP
20712 whilst the frame is being created. We try the following
20715 1. The last argument register r3 if it is available.
20716 2. A slot on the stack above the frame if there are no
20717 arguments to push onto the stack.
20718 3. Register r3 again, after pushing the argument registers
20719 onto the stack, if this is a varargs function.
20720 4. The last slot on the stack created for the arguments to
20721 push, if this isn't a varargs function.
20723 Note - we only need to tell the dwarf2 backend about the SP
20724 adjustment in the second variant; the static chain register
20725 doesn't need to be unwound, as it doesn't contain a value
20726 inherited from the caller. */
20728 if (!arm_r3_live_at_start_p ())
20729 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20730 else if (args_to_push
== 0)
20734 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20737 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20738 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20741 /* Just tell the dwarf backend that we adjusted SP. */
20742 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20743 plus_constant (Pmode
, stack_pointer_rtx
,
20745 RTX_FRAME_RELATED_P (insn
) = 1;
20746 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20750 /* Store the args on the stack. */
20751 if (cfun
->machine
->uses_anonymous_args
)
20754 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20755 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20756 saved_pretend_args
= 1;
20762 if (args_to_push
== 4)
20763 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20766 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20767 plus_constant (Pmode
,
20771 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20773 /* Just tell the dwarf backend that we adjusted SP. */
20775 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20776 plus_constant (Pmode
, stack_pointer_rtx
,
20778 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20781 RTX_FRAME_RELATED_P (insn
) = 1;
20782 fp_offset
= args_to_push
;
20787 insn
= emit_set_insn (ip_rtx
,
20788 plus_constant (Pmode
, stack_pointer_rtx
,
20790 RTX_FRAME_RELATED_P (insn
) = 1;
20795 /* Push the argument registers, or reserve space for them. */
20796 if (cfun
->machine
->uses_anonymous_args
)
20797 insn
= emit_multi_reg_push
20798 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20801 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20802 GEN_INT (- args_to_push
)));
20803 RTX_FRAME_RELATED_P (insn
) = 1;
20806 /* If this is an interrupt service routine, and the link register
20807 is going to be pushed, and we're not generating extra
20808 push of IP (needed when frame is needed and frame layout if apcs),
20809 subtracting four from LR now will mean that the function return
20810 can be done with a single instruction. */
20811 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20812 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20813 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20816 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20818 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20821 if (live_regs_mask
)
20823 saved_regs
+= bit_count (live_regs_mask
) * 4;
20824 if (optimize_size
&& !frame_pointer_needed
20825 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20827 /* If no coprocessor registers are being pushed and we don't have
20828 to worry about a frame pointer then push extra registers to
20829 create the stack frame. This is done is a way that does not
20830 alter the frame layout, so is independent of the epilogue. */
20834 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20836 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20837 if (frame
&& n
* 4 >= frame
)
20840 live_regs_mask
|= (1 << n
) - 1;
20841 saved_regs
+= frame
;
20846 && current_tune
->prefer_ldrd_strd
20847 && !optimize_function_for_size_p (cfun
))
20851 thumb2_emit_strd_push (live_regs_mask
);
20853 else if (TARGET_ARM
20854 && !TARGET_APCS_FRAME
20855 && !IS_INTERRUPT (func_type
))
20857 arm_emit_strd_push (live_regs_mask
);
20861 insn
= emit_multi_reg_push (live_regs_mask
);
20862 RTX_FRAME_RELATED_P (insn
) = 1;
20867 insn
= emit_multi_reg_push (live_regs_mask
);
20868 RTX_FRAME_RELATED_P (insn
) = 1;
20872 if (! IS_VOLATILE (func_type
))
20873 saved_regs
+= arm_save_coproc_regs ();
20875 if (frame_pointer_needed
&& TARGET_ARM
)
20877 /* Create the new frame pointer. */
20878 if (TARGET_APCS_FRAME
)
20880 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20881 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20882 RTX_FRAME_RELATED_P (insn
) = 1;
20884 if (IS_NESTED (func_type
))
20886 /* Recover the static chain register. */
20887 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20888 insn
= gen_rtx_REG (SImode
, 3);
20891 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20892 insn
= gen_frame_mem (SImode
, insn
);
20894 emit_set_insn (ip_rtx
, insn
);
20895 /* Add a USE to stop propagate_one_insn() from barfing. */
20896 emit_insn (gen_force_register_use (ip_rtx
));
20901 insn
= GEN_INT (saved_regs
- 4);
20902 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20903 stack_pointer_rtx
, insn
));
20904 RTX_FRAME_RELATED_P (insn
) = 1;
20908 if (flag_stack_usage_info
)
20909 current_function_static_stack_size
20910 = offsets
->outgoing_args
- offsets
->saved_args
;
20912 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20914 /* This add can produce multiple insns for a large constant, so we
20915 need to get tricky. */
20916 rtx last
= get_last_insn ();
20918 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20919 - offsets
->outgoing_args
);
20921 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20925 last
= last
? NEXT_INSN (last
) : get_insns ();
20926 RTX_FRAME_RELATED_P (last
) = 1;
20928 while (last
!= insn
);
20930 /* If the frame pointer is needed, emit a special barrier that
20931 will prevent the scheduler from moving stores to the frame
20932 before the stack adjustment. */
20933 if (frame_pointer_needed
)
20934 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20935 hard_frame_pointer_rtx
));
20939 if (frame_pointer_needed
&& TARGET_THUMB2
)
20940 thumb_set_frame_pointer (offsets
);
20942 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20944 unsigned long mask
;
20946 mask
= live_regs_mask
;
20947 mask
&= THUMB2_WORK_REGS
;
20948 if (!IS_NESTED (func_type
))
20949 mask
|= (1 << IP_REGNUM
);
20950 arm_load_pic_register (mask
);
20953 /* If we are profiling, make sure no instructions are scheduled before
20954 the call to mcount. Similarly if the user has requested no
20955 scheduling in the prolog. Similarly if we want non-call exceptions
20956 using the EABI unwinder, to prevent faulting instructions from being
20957 swapped with a stack adjustment. */
20958 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20959 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20960 && cfun
->can_throw_non_call_exceptions
))
20961 emit_insn (gen_blockage ());
20963 /* If the link register is being kept alive, with the return address in it,
20964 then make sure that it does not get reused by the ce2 pass. */
20965 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20966 cfun
->machine
->lr_save_eliminated
= 1;
20969 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20971 arm_print_condition (FILE *stream
)
20973 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20975 /* Branch conversion is not implemented for Thumb-2. */
20978 output_operand_lossage ("predicated Thumb instruction");
20981 if (current_insn_predicate
!= NULL
)
20983 output_operand_lossage
20984 ("predicated instruction in conditional sequence");
20988 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20990 else if (current_insn_predicate
)
20992 enum arm_cond_code code
;
20996 output_operand_lossage ("predicated Thumb instruction");
21000 code
= get_arm_condition_code (current_insn_predicate
);
21001 fputs (arm_condition_codes
[code
], stream
);
21006 /* If CODE is 'd', then the X is a condition operand and the instruction
21007 should only be executed if the condition is true.
21008 if CODE is 'D', then the X is a condition operand and the instruction
21009 should only be executed if the condition is false: however, if the mode
21010 of the comparison is CCFPEmode, then always execute the instruction -- we
21011 do this because in these circumstances !GE does not necessarily imply LT;
21012 in these cases the instruction pattern will take care to make sure that
21013 an instruction containing %d will follow, thereby undoing the effects of
21014 doing this instruction unconditionally.
21015 If CODE is 'N' then X is a floating point operand that must be negated
21017 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21018 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21020 arm_print_operand (FILE *stream
, rtx x
, int code
)
21025 fputs (ASM_COMMENT_START
, stream
);
21029 fputs (user_label_prefix
, stream
);
21033 fputs (REGISTER_PREFIX
, stream
);
21037 arm_print_condition (stream
);
21041 /* Nothing in unified syntax, otherwise the current condition code. */
21042 if (!TARGET_UNIFIED_ASM
)
21043 arm_print_condition (stream
);
21047 /* The current condition code in unified syntax, otherwise nothing. */
21048 if (TARGET_UNIFIED_ASM
)
21049 arm_print_condition (stream
);
21053 /* The current condition code for a condition code setting instruction.
21054 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21055 if (TARGET_UNIFIED_ASM
)
21057 fputc('s', stream
);
21058 arm_print_condition (stream
);
21062 arm_print_condition (stream
);
21063 fputc('s', stream
);
21068 /* If the instruction is conditionally executed then print
21069 the current condition code, otherwise print 's'. */
21070 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21071 if (current_insn_predicate
)
21072 arm_print_condition (stream
);
21074 fputc('s', stream
);
21077 /* %# is a "break" sequence. It doesn't output anything, but is used to
21078 separate e.g. operand numbers from following text, if that text consists
21079 of further digits which we don't want to be part of the operand
21087 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21088 r
= real_value_negate (&r
);
21089 fprintf (stream
, "%s", fp_const_from_val (&r
));
21093 /* An integer or symbol address without a preceding # sign. */
21095 switch (GET_CODE (x
))
21098 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21102 output_addr_const (stream
, x
);
21106 if (GET_CODE (XEXP (x
, 0)) == PLUS
21107 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21109 output_addr_const (stream
, x
);
21112 /* Fall through. */
21115 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21119 /* An integer that we want to print in HEX. */
21121 switch (GET_CODE (x
))
21124 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21128 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21133 if (CONST_INT_P (x
))
21136 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21137 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21141 putc ('~', stream
);
21142 output_addr_const (stream
, x
);
21147 /* The low 16 bits of an immediate constant. */
21148 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21152 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21156 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21164 shift
= shift_op (x
, &val
);
21168 fprintf (stream
, ", %s ", shift
);
21170 arm_print_operand (stream
, XEXP (x
, 1), 0);
21172 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21177 /* An explanation of the 'Q', 'R' and 'H' register operands:
21179 In a pair of registers containing a DI or DF value the 'Q'
21180 operand returns the register number of the register containing
21181 the least significant part of the value. The 'R' operand returns
21182 the register number of the register containing the most
21183 significant part of the value.
21185 The 'H' operand returns the higher of the two register numbers.
21186 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21187 same as the 'Q' operand, since the most significant part of the
21188 value is held in the lower number register. The reverse is true
21189 on systems where WORDS_BIG_ENDIAN is false.
21191 The purpose of these operands is to distinguish between cases
21192 where the endian-ness of the values is important (for example
21193 when they are added together), and cases where the endian-ness
21194 is irrelevant, but the order of register operations is important.
21195 For example when loading a value from memory into a register
21196 pair, the endian-ness does not matter. Provided that the value
21197 from the lower memory address is put into the lower numbered
21198 register, and the value from the higher address is put into the
21199 higher numbered register, the load will work regardless of whether
21200 the value being loaded is big-wordian or little-wordian. The
21201 order of the two register loads can matter however, if the address
21202 of the memory location is actually held in one of the registers
21203 being overwritten by the load.
21205 The 'Q' and 'R' constraints are also available for 64-bit
21208 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21210 rtx part
= gen_lowpart (SImode
, x
);
21211 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21215 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21217 output_operand_lossage ("invalid operand for code '%c'", code
);
21221 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21225 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21227 enum machine_mode mode
= GET_MODE (x
);
21230 if (mode
== VOIDmode
)
21232 part
= gen_highpart_mode (SImode
, mode
, x
);
21233 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21237 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21239 output_operand_lossage ("invalid operand for code '%c'", code
);
21243 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21247 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21249 output_operand_lossage ("invalid operand for code '%c'", code
);
21253 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21257 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21259 output_operand_lossage ("invalid operand for code '%c'", code
);
21263 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21267 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21269 output_operand_lossage ("invalid operand for code '%c'", code
);
21273 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21277 asm_fprintf (stream
, "%r",
21278 REG_P (XEXP (x
, 0))
21279 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21283 asm_fprintf (stream
, "{%r-%r}",
21285 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21288 /* Like 'M', but writing doubleword vector registers, for use by Neon
21292 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21293 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21295 asm_fprintf (stream
, "{d%d}", regno
);
21297 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21302 /* CONST_TRUE_RTX means always -- that's the default. */
21303 if (x
== const_true_rtx
)
21306 if (!COMPARISON_P (x
))
21308 output_operand_lossage ("invalid operand for code '%c'", code
);
21312 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21317 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21318 want to do that. */
21319 if (x
== const_true_rtx
)
21321 output_operand_lossage ("instruction never executed");
21324 if (!COMPARISON_P (x
))
21326 output_operand_lossage ("invalid operand for code '%c'", code
);
21330 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21331 (get_arm_condition_code (x
))],
21341 /* Former Maverick support, removed after GCC-4.7. */
21342 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21347 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21348 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21349 /* Bad value for wCG register number. */
21351 output_operand_lossage ("invalid operand for code '%c'", code
);
21356 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21359 /* Print an iWMMXt control register name. */
21361 if (!CONST_INT_P (x
)
21363 || INTVAL (x
) >= 16)
21364 /* Bad value for wC register number. */
21366 output_operand_lossage ("invalid operand for code '%c'", code
);
21372 static const char * wc_reg_names
[16] =
21374 "wCID", "wCon", "wCSSF", "wCASF",
21375 "wC4", "wC5", "wC6", "wC7",
21376 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21377 "wC12", "wC13", "wC14", "wC15"
21380 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21384 /* Print the high single-precision register of a VFP double-precision
21388 int mode
= GET_MODE (x
);
21391 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21393 output_operand_lossage ("invalid operand for code '%c'", code
);
21398 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21400 output_operand_lossage ("invalid operand for code '%c'", code
);
21404 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21408 /* Print a VFP/Neon double precision or quad precision register name. */
21412 int mode
= GET_MODE (x
);
21413 int is_quad
= (code
== 'q');
21416 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21418 output_operand_lossage ("invalid operand for code '%c'", code
);
21423 || !IS_VFP_REGNUM (REGNO (x
)))
21425 output_operand_lossage ("invalid operand for code '%c'", code
);
21430 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21431 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21433 output_operand_lossage ("invalid operand for code '%c'", code
);
21437 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21438 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21442 /* These two codes print the low/high doubleword register of a Neon quad
21443 register, respectively. For pair-structure types, can also print
21444 low/high quadword registers. */
21448 int mode
= GET_MODE (x
);
21451 if ((GET_MODE_SIZE (mode
) != 16
21452 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21454 output_operand_lossage ("invalid operand for code '%c'", code
);
21459 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21461 output_operand_lossage ("invalid operand for code '%c'", code
);
21465 if (GET_MODE_SIZE (mode
) == 16)
21466 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21467 + (code
== 'f' ? 1 : 0));
21469 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21470 + (code
== 'f' ? 1 : 0));
21474 /* Print a VFPv3 floating-point constant, represented as an integer
21478 int index
= vfp3_const_double_index (x
);
21479 gcc_assert (index
!= -1);
21480 fprintf (stream
, "%d", index
);
21484 /* Print bits representing opcode features for Neon.
21486 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21487 and polynomials as unsigned.
21489 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21491 Bit 2 is 1 for rounding functions, 0 otherwise. */
21493 /* Identify the type as 's', 'u', 'p' or 'f'. */
21496 HOST_WIDE_INT bits
= INTVAL (x
);
21497 fputc ("uspf"[bits
& 3], stream
);
21501 /* Likewise, but signed and unsigned integers are both 'i'. */
21504 HOST_WIDE_INT bits
= INTVAL (x
);
21505 fputc ("iipf"[bits
& 3], stream
);
21509 /* As for 'T', but emit 'u' instead of 'p'. */
21512 HOST_WIDE_INT bits
= INTVAL (x
);
21513 fputc ("usuf"[bits
& 3], stream
);
21517 /* Bit 2: rounding (vs none). */
21520 HOST_WIDE_INT bits
= INTVAL (x
);
21521 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21525 /* Memory operand for vld1/vst1 instruction. */
21529 bool postinc
= FALSE
;
21530 unsigned align
, memsize
, align_bits
;
21532 gcc_assert (MEM_P (x
));
21533 addr
= XEXP (x
, 0);
21534 if (GET_CODE (addr
) == POST_INC
)
21537 addr
= XEXP (addr
, 0);
21539 asm_fprintf (stream
, "[%r", REGNO (addr
));
21541 /* We know the alignment of this access, so we can emit a hint in the
21542 instruction (for some alignments) as an aid to the memory subsystem
21544 align
= MEM_ALIGN (x
) >> 3;
21545 memsize
= MEM_SIZE (x
);
21547 /* Only certain alignment specifiers are supported by the hardware. */
21548 if (memsize
== 32 && (align
% 32) == 0)
21550 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21552 else if (memsize
>= 8 && (align
% 8) == 0)
21557 if (align_bits
!= 0)
21558 asm_fprintf (stream
, ":%d", align_bits
);
21560 asm_fprintf (stream
, "]");
21563 fputs("!", stream
);
21571 gcc_assert (MEM_P (x
));
21572 addr
= XEXP (x
, 0);
21573 gcc_assert (REG_P (addr
));
21574 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21578 /* Translate an S register number into a D register number and element index. */
21581 int mode
= GET_MODE (x
);
21584 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21586 output_operand_lossage ("invalid operand for code '%c'", code
);
21591 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21593 output_operand_lossage ("invalid operand for code '%c'", code
);
21597 regno
= regno
- FIRST_VFP_REGNUM
;
21598 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21603 gcc_assert (CONST_DOUBLE_P (x
));
21605 result
= vfp3_const_double_for_fract_bits (x
);
21607 result
= vfp3_const_double_for_bits (x
);
21608 fprintf (stream
, "#%d", result
);
21611 /* Register specifier for vld1.16/vst1.16. Translate the S register
21612 number into a D register number and element index. */
21615 int mode
= GET_MODE (x
);
21618 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21620 output_operand_lossage ("invalid operand for code '%c'", code
);
21625 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21627 output_operand_lossage ("invalid operand for code '%c'", code
);
21631 regno
= regno
- FIRST_VFP_REGNUM
;
21632 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21639 output_operand_lossage ("missing operand");
21643 switch (GET_CODE (x
))
21646 asm_fprintf (stream
, "%r", REGNO (x
));
21650 output_memory_reference_mode
= GET_MODE (x
);
21651 output_address (XEXP (x
, 0));
21658 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21659 sizeof (fpstr
), 0, 1);
21660 fprintf (stream
, "#%s", fpstr
);
21663 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21667 gcc_assert (GET_CODE (x
) != NEG
);
21668 fputc ('#', stream
);
21669 if (GET_CODE (x
) == HIGH
)
21671 fputs (":lower16:", stream
);
21675 output_addr_const (stream
, x
);
21681 /* Target hook for printing a memory address. */
21683 arm_print_operand_address (FILE *stream
, rtx x
)
21687 int is_minus
= GET_CODE (x
) == MINUS
;
21690 asm_fprintf (stream
, "[%r]", REGNO (x
));
21691 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21693 rtx base
= XEXP (x
, 0);
21694 rtx index
= XEXP (x
, 1);
21695 HOST_WIDE_INT offset
= 0;
21697 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21699 /* Ensure that BASE is a register. */
21700 /* (one of them must be). */
21701 /* Also ensure the SP is not used as in index register. */
21706 switch (GET_CODE (index
))
21709 offset
= INTVAL (index
);
21712 asm_fprintf (stream
, "[%r, #%wd]",
21713 REGNO (base
), offset
);
21717 asm_fprintf (stream
, "[%r, %s%r]",
21718 REGNO (base
), is_minus
? "-" : "",
21728 asm_fprintf (stream
, "[%r, %s%r",
21729 REGNO (base
), is_minus
? "-" : "",
21730 REGNO (XEXP (index
, 0)));
21731 arm_print_operand (stream
, index
, 'S');
21732 fputs ("]", stream
);
21737 gcc_unreachable ();
21740 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21741 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21743 extern enum machine_mode output_memory_reference_mode
;
21745 gcc_assert (REG_P (XEXP (x
, 0)));
21747 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21748 asm_fprintf (stream
, "[%r, #%s%d]!",
21749 REGNO (XEXP (x
, 0)),
21750 GET_CODE (x
) == PRE_DEC
? "-" : "",
21751 GET_MODE_SIZE (output_memory_reference_mode
));
21753 asm_fprintf (stream
, "[%r], #%s%d",
21754 REGNO (XEXP (x
, 0)),
21755 GET_CODE (x
) == POST_DEC
? "-" : "",
21756 GET_MODE_SIZE (output_memory_reference_mode
));
21758 else if (GET_CODE (x
) == PRE_MODIFY
)
21760 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21761 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21762 asm_fprintf (stream
, "#%wd]!",
21763 INTVAL (XEXP (XEXP (x
, 1), 1)));
21765 asm_fprintf (stream
, "%r]!",
21766 REGNO (XEXP (XEXP (x
, 1), 1)));
21768 else if (GET_CODE (x
) == POST_MODIFY
)
21770 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21771 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21772 asm_fprintf (stream
, "#%wd",
21773 INTVAL (XEXP (XEXP (x
, 1), 1)));
21775 asm_fprintf (stream
, "%r",
21776 REGNO (XEXP (XEXP (x
, 1), 1)));
21778 else output_addr_const (stream
, x
);
21783 asm_fprintf (stream
, "[%r]", REGNO (x
));
21784 else if (GET_CODE (x
) == POST_INC
)
21785 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21786 else if (GET_CODE (x
) == PLUS
)
21788 gcc_assert (REG_P (XEXP (x
, 0)));
21789 if (CONST_INT_P (XEXP (x
, 1)))
21790 asm_fprintf (stream
, "[%r, #%wd]",
21791 REGNO (XEXP (x
, 0)),
21792 INTVAL (XEXP (x
, 1)));
21794 asm_fprintf (stream
, "[%r, %r]",
21795 REGNO (XEXP (x
, 0)),
21796 REGNO (XEXP (x
, 1)));
21799 output_addr_const (stream
, x
);
21803 /* Target hook for indicating whether a punctuation character for
21804 TARGET_PRINT_OPERAND is valid. */
21806 arm_print_operand_punct_valid_p (unsigned char code
)
21808 return (code
== '@' || code
== '|' || code
== '.'
21809 || code
== '(' || code
== ')' || code
== '#'
21810 || (TARGET_32BIT
&& (code
== '?'))
21811 || (TARGET_THUMB2
&& (code
== '!'))
21812 || (TARGET_THUMB
&& (code
== '_')));
21815 /* Target hook for assembling integer objects. The ARM version needs to
21816 handle word-sized values specially. */
21818 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21820 enum machine_mode mode
;
21822 if (size
== UNITS_PER_WORD
&& aligned_p
)
21824 fputs ("\t.word\t", asm_out_file
);
21825 output_addr_const (asm_out_file
, x
);
21827 /* Mark symbols as position independent. We only do this in the
21828 .text segment, not in the .data segment. */
21829 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21830 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21832 /* See legitimize_pic_address for an explanation of the
21833 TARGET_VXWORKS_RTP check. */
21834 if (!arm_pic_data_is_text_relative
21835 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21836 fputs ("(GOT)", asm_out_file
);
21838 fputs ("(GOTOFF)", asm_out_file
);
21840 fputc ('\n', asm_out_file
);
21844 mode
= GET_MODE (x
);
21846 if (arm_vector_mode_supported_p (mode
))
21850 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21852 units
= CONST_VECTOR_NUNITS (x
);
21853 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21855 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21856 for (i
= 0; i
< units
; i
++)
21858 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21860 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21863 for (i
= 0; i
< units
; i
++)
21865 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21866 REAL_VALUE_TYPE rval
;
21868 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21871 (rval
, GET_MODE_INNER (mode
),
21872 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21878 return default_assemble_integer (x
, size
, aligned_p
);
21882 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21886 if (!TARGET_AAPCS_BASED
)
21889 default_named_section_asm_out_constructor
21890 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21894 /* Put these in the .init_array section, using a special relocation. */
21895 if (priority
!= DEFAULT_INIT_PRIORITY
)
21898 sprintf (buf
, "%s.%.5u",
21899 is_ctor
? ".init_array" : ".fini_array",
21901 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21908 switch_to_section (s
);
21909 assemble_align (POINTER_SIZE
);
21910 fputs ("\t.word\t", asm_out_file
);
21911 output_addr_const (asm_out_file
, symbol
);
21912 fputs ("(target1)\n", asm_out_file
);
21915 /* Add a function to the list of static constructors. */
21918 arm_elf_asm_constructor (rtx symbol
, int priority
)
21920 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21923 /* Add a function to the list of static destructors. */
21926 arm_elf_asm_destructor (rtx symbol
, int priority
)
21928 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21931 /* A finite state machine takes care of noticing whether or not instructions
21932 can be conditionally executed, and thus decrease execution time and code
21933 size by deleting branch instructions. The fsm is controlled by
21934 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21936 /* The state of the fsm controlling condition codes are:
21937 0: normal, do nothing special
21938 1: make ASM_OUTPUT_OPCODE not output this instruction
21939 2: make ASM_OUTPUT_OPCODE not output this instruction
21940 3: make instructions conditional
21941 4: make instructions conditional
21943 State transitions (state->state by whom under condition):
21944 0 -> 1 final_prescan_insn if the `target' is a label
21945 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21946 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21947 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21948 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21949 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21950 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21951 (the target insn is arm_target_insn).
21953 If the jump clobbers the conditions then we use states 2 and 4.
21955 A similar thing can be done with conditional return insns.
21957 XXX In case the `target' is an unconditional branch, this conditionalising
21958 of the instructions always reduces code size, but not always execution
21959 time. But then, I want to reduce the code size to somewhere near what
21960 /bin/cc produces. */
21962 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21963 instructions. When a COND_EXEC instruction is seen the subsequent
21964 instructions are scanned so that multiple conditional instructions can be
21965 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21966 specify the length and true/false mask for the IT block. These will be
21967 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21969 /* Returns the index of the ARM condition code string in
21970 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21971 COMPARISON should be an rtx like `(eq (...) (...))'. */
21974 maybe_get_arm_condition_code (rtx comparison
)
21976 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21977 enum arm_cond_code code
;
21978 enum rtx_code comp_code
= GET_CODE (comparison
);
21980 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21981 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21982 XEXP (comparison
, 1));
21986 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21987 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21988 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21989 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21990 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21991 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21992 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21993 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21994 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21995 case CC_DLTUmode
: code
= ARM_CC
;
21998 if (comp_code
== EQ
)
21999 return ARM_INVERSE_CONDITION_CODE (code
);
22000 if (comp_code
== NE
)
22007 case NE
: return ARM_NE
;
22008 case EQ
: return ARM_EQ
;
22009 case GE
: return ARM_PL
;
22010 case LT
: return ARM_MI
;
22011 default: return ARM_NV
;
22017 case NE
: return ARM_NE
;
22018 case EQ
: return ARM_EQ
;
22019 default: return ARM_NV
;
22025 case NE
: return ARM_MI
;
22026 case EQ
: return ARM_PL
;
22027 default: return ARM_NV
;
22032 /* We can handle all cases except UNEQ and LTGT. */
22035 case GE
: return ARM_GE
;
22036 case GT
: return ARM_GT
;
22037 case LE
: return ARM_LS
;
22038 case LT
: return ARM_MI
;
22039 case NE
: return ARM_NE
;
22040 case EQ
: return ARM_EQ
;
22041 case ORDERED
: return ARM_VC
;
22042 case UNORDERED
: return ARM_VS
;
22043 case UNLT
: return ARM_LT
;
22044 case UNLE
: return ARM_LE
;
22045 case UNGT
: return ARM_HI
;
22046 case UNGE
: return ARM_PL
;
22047 /* UNEQ and LTGT do not have a representation. */
22048 case UNEQ
: /* Fall through. */
22049 case LTGT
: /* Fall through. */
22050 default: return ARM_NV
;
22056 case NE
: return ARM_NE
;
22057 case EQ
: return ARM_EQ
;
22058 case GE
: return ARM_LE
;
22059 case GT
: return ARM_LT
;
22060 case LE
: return ARM_GE
;
22061 case LT
: return ARM_GT
;
22062 case GEU
: return ARM_LS
;
22063 case GTU
: return ARM_CC
;
22064 case LEU
: return ARM_CS
;
22065 case LTU
: return ARM_HI
;
22066 default: return ARM_NV
;
22072 case LTU
: return ARM_CS
;
22073 case GEU
: return ARM_CC
;
22074 default: return ARM_NV
;
22080 case NE
: return ARM_NE
;
22081 case EQ
: return ARM_EQ
;
22082 case GEU
: return ARM_CS
;
22083 case GTU
: return ARM_HI
;
22084 case LEU
: return ARM_LS
;
22085 case LTU
: return ARM_CC
;
22086 default: return ARM_NV
;
22092 case GE
: return ARM_GE
;
22093 case LT
: return ARM_LT
;
22094 case GEU
: return ARM_CS
;
22095 case LTU
: return ARM_CC
;
22096 default: return ARM_NV
;
22102 case NE
: return ARM_NE
;
22103 case EQ
: return ARM_EQ
;
22104 case GE
: return ARM_GE
;
22105 case GT
: return ARM_GT
;
22106 case LE
: return ARM_LE
;
22107 case LT
: return ARM_LT
;
22108 case GEU
: return ARM_CS
;
22109 case GTU
: return ARM_HI
;
22110 case LEU
: return ARM_LS
;
22111 case LTU
: return ARM_CC
;
22112 default: return ARM_NV
;
22115 default: gcc_unreachable ();
22119 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22120 static enum arm_cond_code
22121 get_arm_condition_code (rtx comparison
)
22123 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22124 gcc_assert (code
!= ARM_NV
);
22128 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22131 thumb2_final_prescan_insn (rtx insn
)
22133 rtx first_insn
= insn
;
22134 rtx body
= PATTERN (insn
);
22136 enum arm_cond_code code
;
22141 /* max_insns_skipped in the tune was already taken into account in the
22142 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22143 just emit the IT blocks as we can. It does not make sense to split
22145 max
= MAX_INSN_PER_IT_BLOCK
;
22147 /* Remove the previous insn from the count of insns to be output. */
22148 if (arm_condexec_count
)
22149 arm_condexec_count
--;
22151 /* Nothing to do if we are already inside a conditional block. */
22152 if (arm_condexec_count
)
22155 if (GET_CODE (body
) != COND_EXEC
)
22158 /* Conditional jumps are implemented directly. */
22162 predicate
= COND_EXEC_TEST (body
);
22163 arm_current_cc
= get_arm_condition_code (predicate
);
22165 n
= get_attr_ce_count (insn
);
22166 arm_condexec_count
= 1;
22167 arm_condexec_mask
= (1 << n
) - 1;
22168 arm_condexec_masklen
= n
;
22169 /* See if subsequent instructions can be combined into the same block. */
22172 insn
= next_nonnote_insn (insn
);
22174 /* Jumping into the middle of an IT block is illegal, so a label or
22175 barrier terminates the block. */
22176 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22179 body
= PATTERN (insn
);
22180 /* USE and CLOBBER aren't really insns, so just skip them. */
22181 if (GET_CODE (body
) == USE
22182 || GET_CODE (body
) == CLOBBER
)
22185 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22186 if (GET_CODE (body
) != COND_EXEC
)
22188 /* Maximum number of conditionally executed instructions in a block. */
22189 n
= get_attr_ce_count (insn
);
22190 if (arm_condexec_masklen
+ n
> max
)
22193 predicate
= COND_EXEC_TEST (body
);
22194 code
= get_arm_condition_code (predicate
);
22195 mask
= (1 << n
) - 1;
22196 if (arm_current_cc
== code
)
22197 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22198 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22201 arm_condexec_count
++;
22202 arm_condexec_masklen
+= n
;
22204 /* A jump must be the last instruction in a conditional block. */
22208 /* Restore recog_data (getting the attributes of other insns can
22209 destroy this array, but final.c assumes that it remains intact
22210 across this call). */
22211 extract_constrain_insn_cached (first_insn
);
22215 arm_final_prescan_insn (rtx insn
)
22217 /* BODY will hold the body of INSN. */
22218 rtx body
= PATTERN (insn
);
22220 /* This will be 1 if trying to repeat the trick, and things need to be
22221 reversed if it appears to fail. */
22224 /* If we start with a return insn, we only succeed if we find another one. */
22225 int seeking_return
= 0;
22226 enum rtx_code return_code
= UNKNOWN
;
22228 /* START_INSN will hold the insn from where we start looking. This is the
22229 first insn after the following code_label if REVERSE is true. */
22230 rtx start_insn
= insn
;
22232 /* If in state 4, check if the target branch is reached, in order to
22233 change back to state 0. */
22234 if (arm_ccfsm_state
== 4)
22236 if (insn
== arm_target_insn
)
22238 arm_target_insn
= NULL
;
22239 arm_ccfsm_state
= 0;
22244 /* If in state 3, it is possible to repeat the trick, if this insn is an
22245 unconditional branch to a label, and immediately following this branch
22246 is the previous target label which is only used once, and the label this
22247 branch jumps to is not too far off. */
22248 if (arm_ccfsm_state
== 3)
22250 if (simplejump_p (insn
))
22252 start_insn
= next_nonnote_insn (start_insn
);
22253 if (BARRIER_P (start_insn
))
22255 /* XXX Isn't this always a barrier? */
22256 start_insn
= next_nonnote_insn (start_insn
);
22258 if (LABEL_P (start_insn
)
22259 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22260 && LABEL_NUSES (start_insn
) == 1)
22265 else if (ANY_RETURN_P (body
))
22267 start_insn
= next_nonnote_insn (start_insn
);
22268 if (BARRIER_P (start_insn
))
22269 start_insn
= next_nonnote_insn (start_insn
);
22270 if (LABEL_P (start_insn
)
22271 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22272 && LABEL_NUSES (start_insn
) == 1)
22275 seeking_return
= 1;
22276 return_code
= GET_CODE (body
);
22285 gcc_assert (!arm_ccfsm_state
|| reverse
);
22286 if (!JUMP_P (insn
))
22289 /* This jump might be paralleled with a clobber of the condition codes
22290 the jump should always come first */
22291 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22292 body
= XVECEXP (body
, 0, 0);
22295 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22296 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22299 int fail
= FALSE
, succeed
= FALSE
;
22300 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22301 int then_not_else
= TRUE
;
22302 rtx this_insn
= start_insn
, label
= 0;
22304 /* Register the insn jumped to. */
22307 if (!seeking_return
)
22308 label
= XEXP (SET_SRC (body
), 0);
22310 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22311 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22312 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22314 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22315 then_not_else
= FALSE
;
22317 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22319 seeking_return
= 1;
22320 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22322 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22324 seeking_return
= 1;
22325 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22326 then_not_else
= FALSE
;
22329 gcc_unreachable ();
22331 /* See how many insns this branch skips, and what kind of insns. If all
22332 insns are okay, and the label or unconditional branch to the same
22333 label is not too far away, succeed. */
22334 for (insns_skipped
= 0;
22335 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22339 this_insn
= next_nonnote_insn (this_insn
);
22343 switch (GET_CODE (this_insn
))
22346 /* Succeed if it is the target label, otherwise fail since
22347 control falls in from somewhere else. */
22348 if (this_insn
== label
)
22350 arm_ccfsm_state
= 1;
22358 /* Succeed if the following insn is the target label.
22360 If return insns are used then the last insn in a function
22361 will be a barrier. */
22362 this_insn
= next_nonnote_insn (this_insn
);
22363 if (this_insn
&& this_insn
== label
)
22365 arm_ccfsm_state
= 1;
22373 /* The AAPCS says that conditional calls should not be
22374 used since they make interworking inefficient (the
22375 linker can't transform BL<cond> into BLX). That's
22376 only a problem if the machine has BLX. */
22383 /* Succeed if the following insn is the target label, or
22384 if the following two insns are a barrier and the
22386 this_insn
= next_nonnote_insn (this_insn
);
22387 if (this_insn
&& BARRIER_P (this_insn
))
22388 this_insn
= next_nonnote_insn (this_insn
);
22390 if (this_insn
&& this_insn
== label
22391 && insns_skipped
< max_insns_skipped
)
22393 arm_ccfsm_state
= 1;
22401 /* If this is an unconditional branch to the same label, succeed.
22402 If it is to another label, do nothing. If it is conditional,
22404 /* XXX Probably, the tests for SET and the PC are
22407 scanbody
= PATTERN (this_insn
);
22408 if (GET_CODE (scanbody
) == SET
22409 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22411 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22412 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22414 arm_ccfsm_state
= 2;
22417 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22420 /* Fail if a conditional return is undesirable (e.g. on a
22421 StrongARM), but still allow this if optimizing for size. */
22422 else if (GET_CODE (scanbody
) == return_code
22423 && !use_return_insn (TRUE
, NULL
)
22426 else if (GET_CODE (scanbody
) == return_code
)
22428 arm_ccfsm_state
= 2;
22431 else if (GET_CODE (scanbody
) == PARALLEL
)
22433 switch (get_attr_conds (this_insn
))
22443 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22448 /* Instructions using or affecting the condition codes make it
22450 scanbody
= PATTERN (this_insn
);
22451 if (!(GET_CODE (scanbody
) == SET
22452 || GET_CODE (scanbody
) == PARALLEL
)
22453 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22463 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22464 arm_target_label
= CODE_LABEL_NUMBER (label
);
22467 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22469 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22471 this_insn
= next_nonnote_insn (this_insn
);
22472 gcc_assert (!this_insn
22473 || (!BARRIER_P (this_insn
)
22474 && !LABEL_P (this_insn
)));
22478 /* Oh, dear! we ran off the end.. give up. */
22479 extract_constrain_insn_cached (insn
);
22480 arm_ccfsm_state
= 0;
22481 arm_target_insn
= NULL
;
22484 arm_target_insn
= this_insn
;
22487 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22490 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22492 if (reverse
|| then_not_else
)
22493 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22496 /* Restore recog_data (getting the attributes of other insns can
22497 destroy this array, but final.c assumes that it remains intact
22498 across this call. */
22499 extract_constrain_insn_cached (insn
);
22503 /* Output IT instructions. */
22505 thumb2_asm_output_opcode (FILE * stream
)
22510 if (arm_condexec_mask
)
22512 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22513 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22515 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22516 arm_condition_codes
[arm_current_cc
]);
22517 arm_condexec_mask
= 0;
22521 /* Returns true if REGNO is a valid register
22522 for holding a quantity of type MODE. */
22524 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22526 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22527 return (regno
== CC_REGNUM
22528 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22529 && regno
== VFPCC_REGNUM
));
22532 /* For the Thumb we only allow values bigger than SImode in
22533 registers 0 - 6, so that there is always a second low
22534 register available to hold the upper part of the value.
22535 We probably we ought to ensure that the register is the
22536 start of an even numbered register pair. */
22537 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22539 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22540 && IS_VFP_REGNUM (regno
))
22542 if (mode
== SFmode
|| mode
== SImode
)
22543 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22545 if (mode
== DFmode
)
22546 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22548 /* VFP registers can hold HFmode values, but there is no point in
22549 putting them there unless we have hardware conversion insns. */
22550 if (mode
== HFmode
)
22551 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22554 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22555 || (VALID_NEON_QREG_MODE (mode
)
22556 && NEON_REGNO_OK_FOR_QUAD (regno
))
22557 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22558 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22559 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22560 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22561 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22566 if (TARGET_REALLY_IWMMXT
)
22568 if (IS_IWMMXT_GR_REGNUM (regno
))
22569 return mode
== SImode
;
22571 if (IS_IWMMXT_REGNUM (regno
))
22572 return VALID_IWMMXT_REG_MODE (mode
);
22575 /* We allow almost any value to be stored in the general registers.
22576 Restrict doubleword quantities to even register pairs so that we can
22577 use ldrd. Do not allow very large Neon structure opaque modes in
22578 general registers; they would use too many. */
22579 if (regno
<= LAST_ARM_REGNUM
)
22580 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22581 && ARM_NUM_REGS (mode
) <= 4;
22583 if (regno
== FRAME_POINTER_REGNUM
22584 || regno
== ARG_POINTER_REGNUM
)
22585 /* We only allow integers in the fake hard registers. */
22586 return GET_MODE_CLASS (mode
) == MODE_INT
;
22591 /* Implement MODES_TIEABLE_P. */
22594 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22596 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22599 /* We specifically want to allow elements of "structure" modes to
22600 be tieable to the structure. This more general condition allows
22601 other rarer situations too. */
22603 && (VALID_NEON_DREG_MODE (mode1
)
22604 || VALID_NEON_QREG_MODE (mode1
)
22605 || VALID_NEON_STRUCT_MODE (mode1
))
22606 && (VALID_NEON_DREG_MODE (mode2
)
22607 || VALID_NEON_QREG_MODE (mode2
)
22608 || VALID_NEON_STRUCT_MODE (mode2
)))
22614 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22615 not used in arm mode. */
22618 arm_regno_class (int regno
)
22622 if (regno
== STACK_POINTER_REGNUM
)
22624 if (regno
== CC_REGNUM
)
22631 if (TARGET_THUMB2
&& regno
< 8)
22634 if ( regno
<= LAST_ARM_REGNUM
22635 || regno
== FRAME_POINTER_REGNUM
22636 || regno
== ARG_POINTER_REGNUM
)
22637 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22639 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22640 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22642 if (IS_VFP_REGNUM (regno
))
22644 if (regno
<= D7_VFP_REGNUM
)
22645 return VFP_D0_D7_REGS
;
22646 else if (regno
<= LAST_LO_VFP_REGNUM
)
22647 return VFP_LO_REGS
;
22649 return VFP_HI_REGS
;
22652 if (IS_IWMMXT_REGNUM (regno
))
22653 return IWMMXT_REGS
;
22655 if (IS_IWMMXT_GR_REGNUM (regno
))
22656 return IWMMXT_GR_REGS
;
22661 /* Handle a special case when computing the offset
22662 of an argument from the frame pointer. */
22664 arm_debugger_arg_offset (int value
, rtx addr
)
22668 /* We are only interested if dbxout_parms() failed to compute the offset. */
22672 /* We can only cope with the case where the address is held in a register. */
22676 /* If we are using the frame pointer to point at the argument, then
22677 an offset of 0 is correct. */
22678 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22681 /* If we are using the stack pointer to point at the
22682 argument, then an offset of 0 is correct. */
22683 /* ??? Check this is consistent with thumb2 frame layout. */
22684 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22685 && REGNO (addr
) == SP_REGNUM
)
22688 /* Oh dear. The argument is pointed to by a register rather
22689 than being held in a register, or being stored at a known
22690 offset from the frame pointer. Since GDB only understands
22691 those two kinds of argument we must translate the address
22692 held in the register into an offset from the frame pointer.
22693 We do this by searching through the insns for the function
22694 looking to see where this register gets its value. If the
22695 register is initialized from the frame pointer plus an offset
22696 then we are in luck and we can continue, otherwise we give up.
22698 This code is exercised by producing debugging information
22699 for a function with arguments like this:
22701 double func (double a, double b, int c, double d) {return d;}
22703 Without this code the stab for parameter 'd' will be set to
22704 an offset of 0 from the frame pointer, rather than 8. */
22706 /* The if() statement says:
22708 If the insn is a normal instruction
22709 and if the insn is setting the value in a register
22710 and if the register being set is the register holding the address of the argument
22711 and if the address is computing by an addition
22712 that involves adding to a register
22713 which is the frame pointer
22718 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22720 if ( NONJUMP_INSN_P (insn
)
22721 && GET_CODE (PATTERN (insn
)) == SET
22722 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22723 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22724 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22725 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22726 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22729 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22738 warning (0, "unable to compute real location of stacked parameter");
22739 value
= 8; /* XXX magic hack */
22760 T_MAX
/* Size of enum. Keep last. */
22761 } neon_builtin_type_mode
;
22763 #define TYPE_MODE_BIT(X) (1 << (X))
22765 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22766 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22767 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22768 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22769 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22770 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22772 #define v8qi_UP T_V8QI
22773 #define v4hi_UP T_V4HI
22774 #define v4hf_UP T_V4HF
22775 #define v2si_UP T_V2SI
22776 #define v2sf_UP T_V2SF
22778 #define v16qi_UP T_V16QI
22779 #define v8hi_UP T_V8HI
22780 #define v4si_UP T_V4SI
22781 #define v4sf_UP T_V4SF
22782 #define v2di_UP T_V2DI
22787 #define UP(X) X##_UP
22823 NEON_LOADSTRUCTLANE
,
22825 NEON_STORESTRUCTLANE
,
22834 const neon_itype itype
;
22835 const neon_builtin_type_mode mode
;
22836 const enum insn_code code
;
22837 unsigned int fcode
;
22838 } neon_builtin_datum
;
22840 #define CF(N,X) CODE_FOR_neon_##N##X
22842 #define VAR1(T, N, A) \
22843 {#N, NEON_##T, UP (A), CF (N, A), 0}
22844 #define VAR2(T, N, A, B) \
22846 {#N, NEON_##T, UP (B), CF (N, B), 0}
22847 #define VAR3(T, N, A, B, C) \
22848 VAR2 (T, N, A, B), \
22849 {#N, NEON_##T, UP (C), CF (N, C), 0}
22850 #define VAR4(T, N, A, B, C, D) \
22851 VAR3 (T, N, A, B, C), \
22852 {#N, NEON_##T, UP (D), CF (N, D), 0}
22853 #define VAR5(T, N, A, B, C, D, E) \
22854 VAR4 (T, N, A, B, C, D), \
22855 {#N, NEON_##T, UP (E), CF (N, E), 0}
22856 #define VAR6(T, N, A, B, C, D, E, F) \
22857 VAR5 (T, N, A, B, C, D, E), \
22858 {#N, NEON_##T, UP (F), CF (N, F), 0}
22859 #define VAR7(T, N, A, B, C, D, E, F, G) \
22860 VAR6 (T, N, A, B, C, D, E, F), \
22861 {#N, NEON_##T, UP (G), CF (N, G), 0}
22862 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22863 VAR7 (T, N, A, B, C, D, E, F, G), \
22864 {#N, NEON_##T, UP (H), CF (N, H), 0}
22865 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22866 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22867 {#N, NEON_##T, UP (I), CF (N, I), 0}
22868 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22869 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22870 {#N, NEON_##T, UP (J), CF (N, J), 0}
22872 /* The NEON builtin data can be found in arm_neon_builtins.def.
22873 The mode entries in the following table correspond to the "key" type of the
22874 instruction variant, i.e. equivalent to that which would be specified after
22875 the assembler mnemonic, which usually refers to the last vector operand.
22876 (Signed/unsigned/polynomial types are not differentiated between though, and
22877 are all mapped onto the same mode for a given element size.) The modes
22878 listed per instruction should be the same as those defined for that
22879 instruction's pattern in neon.md. */
22881 static neon_builtin_datum neon_builtin_data
[] =
22883 #include "arm_neon_builtins.def"
22898 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22899 #define VAR1(T, N, A) \
22901 #define VAR2(T, N, A, B) \
22904 #define VAR3(T, N, A, B, C) \
22905 VAR2 (T, N, A, B), \
22907 #define VAR4(T, N, A, B, C, D) \
22908 VAR3 (T, N, A, B, C), \
22910 #define VAR5(T, N, A, B, C, D, E) \
22911 VAR4 (T, N, A, B, C, D), \
22913 #define VAR6(T, N, A, B, C, D, E, F) \
22914 VAR5 (T, N, A, B, C, D, E), \
22916 #define VAR7(T, N, A, B, C, D, E, F, G) \
22917 VAR6 (T, N, A, B, C, D, E, F), \
22919 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22920 VAR7 (T, N, A, B, C, D, E, F, G), \
22922 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22923 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22925 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22926 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22930 ARM_BUILTIN_GETWCGR0
,
22931 ARM_BUILTIN_GETWCGR1
,
22932 ARM_BUILTIN_GETWCGR2
,
22933 ARM_BUILTIN_GETWCGR3
,
22935 ARM_BUILTIN_SETWCGR0
,
22936 ARM_BUILTIN_SETWCGR1
,
22937 ARM_BUILTIN_SETWCGR2
,
22938 ARM_BUILTIN_SETWCGR3
,
22942 ARM_BUILTIN_WAVG2BR
,
22943 ARM_BUILTIN_WAVG2HR
,
22944 ARM_BUILTIN_WAVG2B
,
22945 ARM_BUILTIN_WAVG2H
,
22952 ARM_BUILTIN_WMACSZ
,
22954 ARM_BUILTIN_WMACUZ
,
22957 ARM_BUILTIN_WSADBZ
,
22959 ARM_BUILTIN_WSADHZ
,
22961 ARM_BUILTIN_WALIGNI
,
22962 ARM_BUILTIN_WALIGNR0
,
22963 ARM_BUILTIN_WALIGNR1
,
22964 ARM_BUILTIN_WALIGNR2
,
22965 ARM_BUILTIN_WALIGNR3
,
22968 ARM_BUILTIN_TMIAPH
,
22969 ARM_BUILTIN_TMIABB
,
22970 ARM_BUILTIN_TMIABT
,
22971 ARM_BUILTIN_TMIATB
,
22972 ARM_BUILTIN_TMIATT
,
22974 ARM_BUILTIN_TMOVMSKB
,
22975 ARM_BUILTIN_TMOVMSKH
,
22976 ARM_BUILTIN_TMOVMSKW
,
22978 ARM_BUILTIN_TBCSTB
,
22979 ARM_BUILTIN_TBCSTH
,
22980 ARM_BUILTIN_TBCSTW
,
22982 ARM_BUILTIN_WMADDS
,
22983 ARM_BUILTIN_WMADDU
,
22985 ARM_BUILTIN_WPACKHSS
,
22986 ARM_BUILTIN_WPACKWSS
,
22987 ARM_BUILTIN_WPACKDSS
,
22988 ARM_BUILTIN_WPACKHUS
,
22989 ARM_BUILTIN_WPACKWUS
,
22990 ARM_BUILTIN_WPACKDUS
,
22995 ARM_BUILTIN_WADDSSB
,
22996 ARM_BUILTIN_WADDSSH
,
22997 ARM_BUILTIN_WADDSSW
,
22998 ARM_BUILTIN_WADDUSB
,
22999 ARM_BUILTIN_WADDUSH
,
23000 ARM_BUILTIN_WADDUSW
,
23004 ARM_BUILTIN_WSUBSSB
,
23005 ARM_BUILTIN_WSUBSSH
,
23006 ARM_BUILTIN_WSUBSSW
,
23007 ARM_BUILTIN_WSUBUSB
,
23008 ARM_BUILTIN_WSUBUSH
,
23009 ARM_BUILTIN_WSUBUSW
,
23016 ARM_BUILTIN_WCMPEQB
,
23017 ARM_BUILTIN_WCMPEQH
,
23018 ARM_BUILTIN_WCMPEQW
,
23019 ARM_BUILTIN_WCMPGTUB
,
23020 ARM_BUILTIN_WCMPGTUH
,
23021 ARM_BUILTIN_WCMPGTUW
,
23022 ARM_BUILTIN_WCMPGTSB
,
23023 ARM_BUILTIN_WCMPGTSH
,
23024 ARM_BUILTIN_WCMPGTSW
,
23026 ARM_BUILTIN_TEXTRMSB
,
23027 ARM_BUILTIN_TEXTRMSH
,
23028 ARM_BUILTIN_TEXTRMSW
,
23029 ARM_BUILTIN_TEXTRMUB
,
23030 ARM_BUILTIN_TEXTRMUH
,
23031 ARM_BUILTIN_TEXTRMUW
,
23032 ARM_BUILTIN_TINSRB
,
23033 ARM_BUILTIN_TINSRH
,
23034 ARM_BUILTIN_TINSRW
,
23036 ARM_BUILTIN_WMAXSW
,
23037 ARM_BUILTIN_WMAXSH
,
23038 ARM_BUILTIN_WMAXSB
,
23039 ARM_BUILTIN_WMAXUW
,
23040 ARM_BUILTIN_WMAXUH
,
23041 ARM_BUILTIN_WMAXUB
,
23042 ARM_BUILTIN_WMINSW
,
23043 ARM_BUILTIN_WMINSH
,
23044 ARM_BUILTIN_WMINSB
,
23045 ARM_BUILTIN_WMINUW
,
23046 ARM_BUILTIN_WMINUH
,
23047 ARM_BUILTIN_WMINUB
,
23049 ARM_BUILTIN_WMULUM
,
23050 ARM_BUILTIN_WMULSM
,
23051 ARM_BUILTIN_WMULUL
,
23053 ARM_BUILTIN_PSADBH
,
23054 ARM_BUILTIN_WSHUFH
,
23068 ARM_BUILTIN_WSLLHI
,
23069 ARM_BUILTIN_WSLLWI
,
23070 ARM_BUILTIN_WSLLDI
,
23071 ARM_BUILTIN_WSRAHI
,
23072 ARM_BUILTIN_WSRAWI
,
23073 ARM_BUILTIN_WSRADI
,
23074 ARM_BUILTIN_WSRLHI
,
23075 ARM_BUILTIN_WSRLWI
,
23076 ARM_BUILTIN_WSRLDI
,
23077 ARM_BUILTIN_WRORHI
,
23078 ARM_BUILTIN_WRORWI
,
23079 ARM_BUILTIN_WRORDI
,
23081 ARM_BUILTIN_WUNPCKIHB
,
23082 ARM_BUILTIN_WUNPCKIHH
,
23083 ARM_BUILTIN_WUNPCKIHW
,
23084 ARM_BUILTIN_WUNPCKILB
,
23085 ARM_BUILTIN_WUNPCKILH
,
23086 ARM_BUILTIN_WUNPCKILW
,
23088 ARM_BUILTIN_WUNPCKEHSB
,
23089 ARM_BUILTIN_WUNPCKEHSH
,
23090 ARM_BUILTIN_WUNPCKEHSW
,
23091 ARM_BUILTIN_WUNPCKEHUB
,
23092 ARM_BUILTIN_WUNPCKEHUH
,
23093 ARM_BUILTIN_WUNPCKEHUW
,
23094 ARM_BUILTIN_WUNPCKELSB
,
23095 ARM_BUILTIN_WUNPCKELSH
,
23096 ARM_BUILTIN_WUNPCKELSW
,
23097 ARM_BUILTIN_WUNPCKELUB
,
23098 ARM_BUILTIN_WUNPCKELUH
,
23099 ARM_BUILTIN_WUNPCKELUW
,
23105 ARM_BUILTIN_WADDSUBHX
,
23106 ARM_BUILTIN_WSUBADDHX
,
23108 ARM_BUILTIN_WABSDIFFB
,
23109 ARM_BUILTIN_WABSDIFFH
,
23110 ARM_BUILTIN_WABSDIFFW
,
23112 ARM_BUILTIN_WADDCH
,
23113 ARM_BUILTIN_WADDCW
,
23116 ARM_BUILTIN_WAVG4R
,
23118 ARM_BUILTIN_WMADDSX
,
23119 ARM_BUILTIN_WMADDUX
,
23121 ARM_BUILTIN_WMADDSN
,
23122 ARM_BUILTIN_WMADDUN
,
23124 ARM_BUILTIN_WMULWSM
,
23125 ARM_BUILTIN_WMULWUM
,
23127 ARM_BUILTIN_WMULWSMR
,
23128 ARM_BUILTIN_WMULWUMR
,
23130 ARM_BUILTIN_WMULWL
,
23132 ARM_BUILTIN_WMULSMR
,
23133 ARM_BUILTIN_WMULUMR
,
23135 ARM_BUILTIN_WQMULM
,
23136 ARM_BUILTIN_WQMULMR
,
23138 ARM_BUILTIN_WQMULWM
,
23139 ARM_BUILTIN_WQMULWMR
,
23141 ARM_BUILTIN_WADDBHUSM
,
23142 ARM_BUILTIN_WADDBHUSL
,
23144 ARM_BUILTIN_WQMIABB
,
23145 ARM_BUILTIN_WQMIABT
,
23146 ARM_BUILTIN_WQMIATB
,
23147 ARM_BUILTIN_WQMIATT
,
23149 ARM_BUILTIN_WQMIABBN
,
23150 ARM_BUILTIN_WQMIABTN
,
23151 ARM_BUILTIN_WQMIATBN
,
23152 ARM_BUILTIN_WQMIATTN
,
23154 ARM_BUILTIN_WMIABB
,
23155 ARM_BUILTIN_WMIABT
,
23156 ARM_BUILTIN_WMIATB
,
23157 ARM_BUILTIN_WMIATT
,
23159 ARM_BUILTIN_WMIABBN
,
23160 ARM_BUILTIN_WMIABTN
,
23161 ARM_BUILTIN_WMIATBN
,
23162 ARM_BUILTIN_WMIATTN
,
23164 ARM_BUILTIN_WMIAWBB
,
23165 ARM_BUILTIN_WMIAWBT
,
23166 ARM_BUILTIN_WMIAWTB
,
23167 ARM_BUILTIN_WMIAWTT
,
23169 ARM_BUILTIN_WMIAWBBN
,
23170 ARM_BUILTIN_WMIAWBTN
,
23171 ARM_BUILTIN_WMIAWTBN
,
23172 ARM_BUILTIN_WMIAWTTN
,
23174 ARM_BUILTIN_WMERGE
,
23176 ARM_BUILTIN_CRC32B
,
23177 ARM_BUILTIN_CRC32H
,
23178 ARM_BUILTIN_CRC32W
,
23179 ARM_BUILTIN_CRC32CB
,
23180 ARM_BUILTIN_CRC32CH
,
23181 ARM_BUILTIN_CRC32CW
,
23187 #define CRYPTO1(L, U, M1, M2) \
23188 ARM_BUILTIN_CRYPTO_##U,
23189 #define CRYPTO2(L, U, M1, M2, M3) \
23190 ARM_BUILTIN_CRYPTO_##U,
23191 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23192 ARM_BUILTIN_CRYPTO_##U,
23194 #include "crypto.def"
23200 #include "arm_neon_builtins.def"
23205 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23219 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23221 #define NUM_DREG_TYPES 5
23222 #define NUM_QREG_TYPES 6
23225 arm_init_neon_builtins (void)
23227 unsigned int i
, fcode
;
23230 tree neon_intQI_type_node
;
23231 tree neon_intHI_type_node
;
23232 tree neon_floatHF_type_node
;
23233 tree neon_polyQI_type_node
;
23234 tree neon_polyHI_type_node
;
23235 tree neon_intSI_type_node
;
23236 tree neon_intDI_type_node
;
23237 tree neon_intUTI_type_node
;
23238 tree neon_float_type_node
;
23240 tree intQI_pointer_node
;
23241 tree intHI_pointer_node
;
23242 tree intSI_pointer_node
;
23243 tree intDI_pointer_node
;
23244 tree float_pointer_node
;
23246 tree const_intQI_node
;
23247 tree const_intHI_node
;
23248 tree const_intSI_node
;
23249 tree const_intDI_node
;
23250 tree const_float_node
;
23252 tree const_intQI_pointer_node
;
23253 tree const_intHI_pointer_node
;
23254 tree const_intSI_pointer_node
;
23255 tree const_intDI_pointer_node
;
23256 tree const_float_pointer_node
;
23258 tree V8QI_type_node
;
23259 tree V4HI_type_node
;
23260 tree V4HF_type_node
;
23261 tree V2SI_type_node
;
23262 tree V2SF_type_node
;
23263 tree V16QI_type_node
;
23264 tree V8HI_type_node
;
23265 tree V4SI_type_node
;
23266 tree V4SF_type_node
;
23267 tree V2DI_type_node
;
23269 tree intUQI_type_node
;
23270 tree intUHI_type_node
;
23271 tree intUSI_type_node
;
23272 tree intUDI_type_node
;
23274 tree intEI_type_node
;
23275 tree intOI_type_node
;
23276 tree intCI_type_node
;
23277 tree intXI_type_node
;
23279 tree V8QI_pointer_node
;
23280 tree V4HI_pointer_node
;
23281 tree V2SI_pointer_node
;
23282 tree V2SF_pointer_node
;
23283 tree V16QI_pointer_node
;
23284 tree V8HI_pointer_node
;
23285 tree V4SI_pointer_node
;
23286 tree V4SF_pointer_node
;
23287 tree V2DI_pointer_node
;
23289 tree void_ftype_pv8qi_v8qi_v8qi
;
23290 tree void_ftype_pv4hi_v4hi_v4hi
;
23291 tree void_ftype_pv2si_v2si_v2si
;
23292 tree void_ftype_pv2sf_v2sf_v2sf
;
23293 tree void_ftype_pdi_di_di
;
23294 tree void_ftype_pv16qi_v16qi_v16qi
;
23295 tree void_ftype_pv8hi_v8hi_v8hi
;
23296 tree void_ftype_pv4si_v4si_v4si
;
23297 tree void_ftype_pv4sf_v4sf_v4sf
;
23298 tree void_ftype_pv2di_v2di_v2di
;
23300 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23301 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23302 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23304 /* Create distinguished type nodes for NEON vector element types,
23305 and pointers to values of such types, so we can detect them later. */
23306 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23307 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23308 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23309 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23310 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23311 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23312 neon_float_type_node
= make_node (REAL_TYPE
);
23313 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23314 layout_type (neon_float_type_node
);
23315 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23316 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23317 layout_type (neon_floatHF_type_node
);
23319 /* Define typedefs which exactly correspond to the modes we are basing vector
23320 types on. If you change these names you'll need to change
23321 the table used by arm_mangle_type too. */
23322 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23323 "__builtin_neon_qi");
23324 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23325 "__builtin_neon_hi");
23326 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23327 "__builtin_neon_hf");
23328 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23329 "__builtin_neon_si");
23330 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23331 "__builtin_neon_sf");
23332 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23333 "__builtin_neon_di");
23334 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23335 "__builtin_neon_poly8");
23336 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23337 "__builtin_neon_poly16");
23339 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23340 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23341 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23342 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23343 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23345 /* Next create constant-qualified versions of the above types. */
23346 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23348 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23350 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23352 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23354 const_float_node
= build_qualified_type (neon_float_type_node
,
23357 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23358 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23359 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23360 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23361 const_float_pointer_node
= build_pointer_type (const_float_node
);
23363 /* Now create vector types based on our NEON element types. */
23364 /* 64-bit vectors. */
23366 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23368 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23370 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23372 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23374 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23375 /* 128-bit vectors. */
23377 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23379 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23381 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23383 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23385 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23387 /* Unsigned integer types for various mode sizes. */
23388 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23389 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23390 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23391 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23392 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23395 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23396 "__builtin_neon_uqi");
23397 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23398 "__builtin_neon_uhi");
23399 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23400 "__builtin_neon_usi");
23401 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23402 "__builtin_neon_udi");
23403 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23404 "__builtin_neon_poly64");
23405 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23406 "__builtin_neon_poly128");
23408 /* Opaque integer types for structures of vectors. */
23409 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23410 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23411 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23412 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23414 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23415 "__builtin_neon_ti");
23416 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23417 "__builtin_neon_ei");
23418 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23419 "__builtin_neon_oi");
23420 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23421 "__builtin_neon_ci");
23422 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23423 "__builtin_neon_xi");
23425 /* Pointers to vector types. */
23426 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23427 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23428 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23429 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23430 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23431 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23432 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23433 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23434 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23436 /* Operations which return results as pairs. */
23437 void_ftype_pv8qi_v8qi_v8qi
=
23438 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23439 V8QI_type_node
, NULL
);
23440 void_ftype_pv4hi_v4hi_v4hi
=
23441 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23442 V4HI_type_node
, NULL
);
23443 void_ftype_pv2si_v2si_v2si
=
23444 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23445 V2SI_type_node
, NULL
);
23446 void_ftype_pv2sf_v2sf_v2sf
=
23447 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23448 V2SF_type_node
, NULL
);
23449 void_ftype_pdi_di_di
=
23450 build_function_type_list (void_type_node
, intDI_pointer_node
,
23451 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23452 void_ftype_pv16qi_v16qi_v16qi
=
23453 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23454 V16QI_type_node
, V16QI_type_node
, NULL
);
23455 void_ftype_pv8hi_v8hi_v8hi
=
23456 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23457 V8HI_type_node
, NULL
);
23458 void_ftype_pv4si_v4si_v4si
=
23459 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23460 V4SI_type_node
, NULL
);
23461 void_ftype_pv4sf_v4sf_v4sf
=
23462 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23463 V4SF_type_node
, NULL
);
23464 void_ftype_pv2di_v2di_v2di
=
23465 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23466 V2DI_type_node
, NULL
);
23468 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23470 tree V4USI_type_node
=
23471 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23473 tree V16UQI_type_node
=
23474 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23476 tree v16uqi_ftype_v16uqi
23477 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23479 tree v16uqi_ftype_v16uqi_v16uqi
23480 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23481 V16UQI_type_node
, NULL_TREE
);
23483 tree v4usi_ftype_v4usi
23484 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23486 tree v4usi_ftype_v4usi_v4usi
23487 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23488 V4USI_type_node
, NULL_TREE
);
23490 tree v4usi_ftype_v4usi_v4usi_v4usi
23491 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23492 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23494 tree uti_ftype_udi_udi
23495 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23496 intUDI_type_node
, NULL_TREE
);
23509 ARM_BUILTIN_CRYPTO_##U
23511 "__builtin_arm_crypto_"#L
23512 #define FT1(R, A) \
23514 #define FT2(R, A1, A2) \
23515 R##_ftype_##A1##_##A2
23516 #define FT3(R, A1, A2, A3) \
23517 R##_ftype_##A1##_##A2##_##A3
23518 #define CRYPTO1(L, U, R, A) \
23519 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23520 C (U), BUILT_IN_MD, \
23522 #define CRYPTO2(L, U, R, A1, A2) \
23523 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23524 C (U), BUILT_IN_MD, \
23527 #define CRYPTO3(L, U, R, A1, A2, A3) \
23528 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23529 C (U), BUILT_IN_MD, \
23531 #include "crypto.def"
23542 dreg_types
[0] = V8QI_type_node
;
23543 dreg_types
[1] = V4HI_type_node
;
23544 dreg_types
[2] = V2SI_type_node
;
23545 dreg_types
[3] = V2SF_type_node
;
23546 dreg_types
[4] = neon_intDI_type_node
;
23548 qreg_types
[0] = V16QI_type_node
;
23549 qreg_types
[1] = V8HI_type_node
;
23550 qreg_types
[2] = V4SI_type_node
;
23551 qreg_types
[3] = V4SF_type_node
;
23552 qreg_types
[4] = V2DI_type_node
;
23553 qreg_types
[5] = neon_intUTI_type_node
;
23555 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23558 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23560 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23561 reinterp_ftype_dreg
[i
][j
]
23562 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23564 reinterp_ftype_qreg
[i
][j
]
23565 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23569 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23570 i
< ARRAY_SIZE (neon_builtin_data
);
23573 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23575 const char* const modenames
[] = {
23576 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23577 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23582 int is_load
= 0, is_store
= 0;
23584 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23591 case NEON_LOAD1LANE
:
23592 case NEON_LOADSTRUCT
:
23593 case NEON_LOADSTRUCTLANE
:
23595 /* Fall through. */
23597 case NEON_STORE1LANE
:
23598 case NEON_STORESTRUCT
:
23599 case NEON_STORESTRUCTLANE
:
23602 /* Fall through. */
23606 case NEON_LOGICBINOP
:
23607 case NEON_SHIFTINSERT
:
23614 case NEON_SHIFTIMM
:
23615 case NEON_SHIFTACC
:
23621 case NEON_LANEMULL
:
23622 case NEON_LANEMULH
:
23624 case NEON_SCALARMUL
:
23625 case NEON_SCALARMULL
:
23626 case NEON_SCALARMULH
:
23627 case NEON_SCALARMAC
:
23633 tree return_type
= void_type_node
, args
= void_list_node
;
23635 /* Build a function type directly from the insn_data for
23636 this builtin. The build_function_type() function takes
23637 care of removing duplicates for us. */
23638 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23642 if (is_load
&& k
== 1)
23644 /* Neon load patterns always have the memory
23645 operand in the operand 1 position. */
23646 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23647 == neon_struct_operand
);
23653 eltype
= const_intQI_pointer_node
;
23658 eltype
= const_intHI_pointer_node
;
23663 eltype
= const_intSI_pointer_node
;
23668 eltype
= const_float_pointer_node
;
23673 eltype
= const_intDI_pointer_node
;
23676 default: gcc_unreachable ();
23679 else if (is_store
&& k
== 0)
23681 /* Similarly, Neon store patterns use operand 0 as
23682 the memory location to store to. */
23683 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23684 == neon_struct_operand
);
23690 eltype
= intQI_pointer_node
;
23695 eltype
= intHI_pointer_node
;
23700 eltype
= intSI_pointer_node
;
23705 eltype
= float_pointer_node
;
23710 eltype
= intDI_pointer_node
;
23713 default: gcc_unreachable ();
23718 switch (insn_data
[d
->code
].operand
[k
].mode
)
23720 case VOIDmode
: eltype
= void_type_node
; break;
23722 case QImode
: eltype
= neon_intQI_type_node
; break;
23723 case HImode
: eltype
= neon_intHI_type_node
; break;
23724 case SImode
: eltype
= neon_intSI_type_node
; break;
23725 case SFmode
: eltype
= neon_float_type_node
; break;
23726 case DImode
: eltype
= neon_intDI_type_node
; break;
23727 case TImode
: eltype
= intTI_type_node
; break;
23728 case EImode
: eltype
= intEI_type_node
; break;
23729 case OImode
: eltype
= intOI_type_node
; break;
23730 case CImode
: eltype
= intCI_type_node
; break;
23731 case XImode
: eltype
= intXI_type_node
; break;
23732 /* 64-bit vectors. */
23733 case V8QImode
: eltype
= V8QI_type_node
; break;
23734 case V4HImode
: eltype
= V4HI_type_node
; break;
23735 case V2SImode
: eltype
= V2SI_type_node
; break;
23736 case V2SFmode
: eltype
= V2SF_type_node
; break;
23737 /* 128-bit vectors. */
23738 case V16QImode
: eltype
= V16QI_type_node
; break;
23739 case V8HImode
: eltype
= V8HI_type_node
; break;
23740 case V4SImode
: eltype
= V4SI_type_node
; break;
23741 case V4SFmode
: eltype
= V4SF_type_node
; break;
23742 case V2DImode
: eltype
= V2DI_type_node
; break;
23743 default: gcc_unreachable ();
23747 if (k
== 0 && !is_store
)
23748 return_type
= eltype
;
23750 args
= tree_cons (NULL_TREE
, eltype
, args
);
23753 ftype
= build_function_type (return_type
, args
);
23757 case NEON_RESULTPAIR
:
23759 switch (insn_data
[d
->code
].operand
[1].mode
)
23761 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23762 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23763 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23764 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23765 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23766 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23767 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23768 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23769 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23770 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23771 default: gcc_unreachable ();
23776 case NEON_REINTERP
:
23778 /* We iterate over NUM_DREG_TYPES doubleword types,
23779 then NUM_QREG_TYPES quadword types.
23780 V4HF is not a type used in reinterpret, so we translate
23781 d->mode to the correct index in reinterp_ftype_dreg. */
23783 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
23784 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
23786 switch (insn_data
[d
->code
].operand
[0].mode
)
23788 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23789 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23790 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23791 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23792 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23793 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23794 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23795 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23796 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23797 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23798 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
23799 default: gcc_unreachable ();
23803 case NEON_FLOAT_WIDEN
:
23805 tree eltype
= NULL_TREE
;
23806 tree return_type
= NULL_TREE
;
23808 switch (insn_data
[d
->code
].operand
[1].mode
)
23811 eltype
= V4HF_type_node
;
23812 return_type
= V4SF_type_node
;
23814 default: gcc_unreachable ();
23816 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23819 case NEON_FLOAT_NARROW
:
23821 tree eltype
= NULL_TREE
;
23822 tree return_type
= NULL_TREE
;
23824 switch (insn_data
[d
->code
].operand
[1].mode
)
23827 eltype
= V4SF_type_node
;
23828 return_type
= V4HF_type_node
;
23830 default: gcc_unreachable ();
23832 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23836 gcc_unreachable ();
23839 gcc_assert (ftype
!= NULL
);
23841 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23843 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23845 arm_builtin_decls
[fcode
] = decl
;
23849 #undef NUM_DREG_TYPES
23850 #undef NUM_QREG_TYPES
23852 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23855 if ((MASK) & insn_flags) \
23858 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23859 BUILT_IN_MD, NULL, NULL_TREE); \
23860 arm_builtin_decls[CODE] = bdecl; \
23865 struct builtin_description
23867 const unsigned int mask
;
23868 const enum insn_code icode
;
23869 const char * const name
;
23870 const enum arm_builtins code
;
23871 const enum rtx_code comparison
;
23872 const unsigned int flag
;
23875 static const struct builtin_description bdesc_2arg
[] =
23877 #define IWMMXT_BUILTIN(code, string, builtin) \
23878 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23879 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23881 #define IWMMXT2_BUILTIN(code, string, builtin) \
23882 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23883 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23885 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23886 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23887 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23888 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23889 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23890 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23891 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23892 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23893 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23894 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23895 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23896 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23897 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23898 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23899 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23900 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23901 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23902 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23903 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23904 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23905 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23906 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23907 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23908 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23909 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23910 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23911 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23912 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23913 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23914 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23915 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23916 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23917 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23918 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23919 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23920 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23921 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23922 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23923 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23924 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23925 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23926 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23927 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23928 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23929 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23930 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23931 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23932 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23933 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23934 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23935 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23936 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23937 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23938 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23939 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23940 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23941 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23942 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23943 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23944 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23945 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23946 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23947 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23948 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23949 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23950 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23951 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23952 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23953 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23954 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23955 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23956 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23957 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
23958 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
23959 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
23960 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
23961 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
23962 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
23964 #define IWMMXT_BUILTIN2(code, builtin) \
23965 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23967 #define IWMMXT2_BUILTIN2(code, builtin) \
23968 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23970 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
23971 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
23972 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
23973 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
23974 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
23975 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
23976 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
23977 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
23978 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
23979 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
23981 #define CRC32_BUILTIN(L, U) \
23982 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23984 CRC32_BUILTIN (crc32b
, CRC32B
)
23985 CRC32_BUILTIN (crc32h
, CRC32H
)
23986 CRC32_BUILTIN (crc32w
, CRC32W
)
23987 CRC32_BUILTIN (crc32cb
, CRC32CB
)
23988 CRC32_BUILTIN (crc32ch
, CRC32CH
)
23989 CRC32_BUILTIN (crc32cw
, CRC32CW
)
23990 #undef CRC32_BUILTIN
23993 #define CRYPTO_BUILTIN(L, U) \
23994 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23999 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24000 #define CRYPTO1(L, U, R, A)
24001 #define CRYPTO3(L, U, R, A1, A2, A3)
24002 #include "crypto.def"
24009 static const struct builtin_description bdesc_1arg
[] =
24011 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24012 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24013 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24014 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24015 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24016 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24017 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24018 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24019 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24020 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24021 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24022 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24023 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24024 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24025 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24026 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24027 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24028 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24029 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24030 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24031 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24032 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24033 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24034 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24036 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24037 #define CRYPTO2(L, U, R, A1, A2)
24038 #define CRYPTO3(L, U, R, A1, A2, A3)
24039 #include "crypto.def"
24045 static const struct builtin_description bdesc_3arg
[] =
24047 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24048 #define CRYPTO1(L, U, R, A)
24049 #define CRYPTO2(L, U, R, A1, A2)
24050 #include "crypto.def"
24055 #undef CRYPTO_BUILTIN
24057 /* Set up all the iWMMXt builtins. This is not called if
24058 TARGET_IWMMXT is zero. */
24061 arm_init_iwmmxt_builtins (void)
24063 const struct builtin_description
* d
;
24066 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24067 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24068 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24070 tree v8qi_ftype_v8qi_v8qi_int
24071 = build_function_type_list (V8QI_type_node
,
24072 V8QI_type_node
, V8QI_type_node
,
24073 integer_type_node
, NULL_TREE
);
24074 tree v4hi_ftype_v4hi_int
24075 = build_function_type_list (V4HI_type_node
,
24076 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24077 tree v2si_ftype_v2si_int
24078 = build_function_type_list (V2SI_type_node
,
24079 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24080 tree v2si_ftype_di_di
24081 = build_function_type_list (V2SI_type_node
,
24082 long_long_integer_type_node
,
24083 long_long_integer_type_node
,
24085 tree di_ftype_di_int
24086 = build_function_type_list (long_long_integer_type_node
,
24087 long_long_integer_type_node
,
24088 integer_type_node
, NULL_TREE
);
24089 tree di_ftype_di_int_int
24090 = build_function_type_list (long_long_integer_type_node
,
24091 long_long_integer_type_node
,
24093 integer_type_node
, NULL_TREE
);
24094 tree int_ftype_v8qi
24095 = build_function_type_list (integer_type_node
,
24096 V8QI_type_node
, NULL_TREE
);
24097 tree int_ftype_v4hi
24098 = build_function_type_list (integer_type_node
,
24099 V4HI_type_node
, NULL_TREE
);
24100 tree int_ftype_v2si
24101 = build_function_type_list (integer_type_node
,
24102 V2SI_type_node
, NULL_TREE
);
24103 tree int_ftype_v8qi_int
24104 = build_function_type_list (integer_type_node
,
24105 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24106 tree int_ftype_v4hi_int
24107 = build_function_type_list (integer_type_node
,
24108 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24109 tree int_ftype_v2si_int
24110 = build_function_type_list (integer_type_node
,
24111 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24112 tree v8qi_ftype_v8qi_int_int
24113 = build_function_type_list (V8QI_type_node
,
24114 V8QI_type_node
, integer_type_node
,
24115 integer_type_node
, NULL_TREE
);
24116 tree v4hi_ftype_v4hi_int_int
24117 = build_function_type_list (V4HI_type_node
,
24118 V4HI_type_node
, integer_type_node
,
24119 integer_type_node
, NULL_TREE
);
24120 tree v2si_ftype_v2si_int_int
24121 = build_function_type_list (V2SI_type_node
,
24122 V2SI_type_node
, integer_type_node
,
24123 integer_type_node
, NULL_TREE
);
24124 /* Miscellaneous. */
24125 tree v8qi_ftype_v4hi_v4hi
24126 = build_function_type_list (V8QI_type_node
,
24127 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24128 tree v4hi_ftype_v2si_v2si
24129 = build_function_type_list (V4HI_type_node
,
24130 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24131 tree v8qi_ftype_v4hi_v8qi
24132 = build_function_type_list (V8QI_type_node
,
24133 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24134 tree v2si_ftype_v4hi_v4hi
24135 = build_function_type_list (V2SI_type_node
,
24136 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24137 tree v2si_ftype_v8qi_v8qi
24138 = build_function_type_list (V2SI_type_node
,
24139 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24140 tree v4hi_ftype_v4hi_di
24141 = build_function_type_list (V4HI_type_node
,
24142 V4HI_type_node
, long_long_integer_type_node
,
24144 tree v2si_ftype_v2si_di
24145 = build_function_type_list (V2SI_type_node
,
24146 V2SI_type_node
, long_long_integer_type_node
,
24149 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24150 tree int_ftype_void
24151 = build_function_type_list (integer_type_node
, NULL_TREE
);
24153 = build_function_type_list (long_long_integer_type_node
,
24154 V8QI_type_node
, NULL_TREE
);
24156 = build_function_type_list (long_long_integer_type_node
,
24157 V4HI_type_node
, NULL_TREE
);
24159 = build_function_type_list (long_long_integer_type_node
,
24160 V2SI_type_node
, NULL_TREE
);
24161 tree v2si_ftype_v4hi
24162 = build_function_type_list (V2SI_type_node
,
24163 V4HI_type_node
, NULL_TREE
);
24164 tree v4hi_ftype_v8qi
24165 = build_function_type_list (V4HI_type_node
,
24166 V8QI_type_node
, NULL_TREE
);
24167 tree v8qi_ftype_v8qi
24168 = build_function_type_list (V8QI_type_node
,
24169 V8QI_type_node
, NULL_TREE
);
24170 tree v4hi_ftype_v4hi
24171 = build_function_type_list (V4HI_type_node
,
24172 V4HI_type_node
, NULL_TREE
);
24173 tree v2si_ftype_v2si
24174 = build_function_type_list (V2SI_type_node
,
24175 V2SI_type_node
, NULL_TREE
);
24177 tree di_ftype_di_v4hi_v4hi
24178 = build_function_type_list (long_long_unsigned_type_node
,
24179 long_long_unsigned_type_node
,
24180 V4HI_type_node
, V4HI_type_node
,
24183 tree di_ftype_v4hi_v4hi
24184 = build_function_type_list (long_long_unsigned_type_node
,
24185 V4HI_type_node
,V4HI_type_node
,
24188 tree v2si_ftype_v2si_v4hi_v4hi
24189 = build_function_type_list (V2SI_type_node
,
24190 V2SI_type_node
, V4HI_type_node
,
24191 V4HI_type_node
, NULL_TREE
);
24193 tree v2si_ftype_v2si_v8qi_v8qi
24194 = build_function_type_list (V2SI_type_node
,
24195 V2SI_type_node
, V8QI_type_node
,
24196 V8QI_type_node
, NULL_TREE
);
24198 tree di_ftype_di_v2si_v2si
24199 = build_function_type_list (long_long_unsigned_type_node
,
24200 long_long_unsigned_type_node
,
24201 V2SI_type_node
, V2SI_type_node
,
24204 tree di_ftype_di_di_int
24205 = build_function_type_list (long_long_unsigned_type_node
,
24206 long_long_unsigned_type_node
,
24207 long_long_unsigned_type_node
,
24208 integer_type_node
, NULL_TREE
);
24210 tree void_ftype_int
24211 = build_function_type_list (void_type_node
,
24212 integer_type_node
, NULL_TREE
);
24214 tree v8qi_ftype_char
24215 = build_function_type_list (V8QI_type_node
,
24216 signed_char_type_node
, NULL_TREE
);
24218 tree v4hi_ftype_short
24219 = build_function_type_list (V4HI_type_node
,
24220 short_integer_type_node
, NULL_TREE
);
24222 tree v2si_ftype_int
24223 = build_function_type_list (V2SI_type_node
,
24224 integer_type_node
, NULL_TREE
);
24226 /* Normal vector binops. */
24227 tree v8qi_ftype_v8qi_v8qi
24228 = build_function_type_list (V8QI_type_node
,
24229 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24230 tree v4hi_ftype_v4hi_v4hi
24231 = build_function_type_list (V4HI_type_node
,
24232 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24233 tree v2si_ftype_v2si_v2si
24234 = build_function_type_list (V2SI_type_node
,
24235 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24236 tree di_ftype_di_di
24237 = build_function_type_list (long_long_unsigned_type_node
,
24238 long_long_unsigned_type_node
,
24239 long_long_unsigned_type_node
,
24242 /* Add all builtins that are more or less simple operations on two
24244 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24246 /* Use one of the operands; the target can have a different mode for
24247 mask-generating compares. */
24248 enum machine_mode mode
;
24251 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24254 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24259 type
= v8qi_ftype_v8qi_v8qi
;
24262 type
= v4hi_ftype_v4hi_v4hi
;
24265 type
= v2si_ftype_v2si_v2si
;
24268 type
= di_ftype_di_di
;
24272 gcc_unreachable ();
24275 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24278 /* Add the remaining MMX insns with somewhat more complicated types. */
24279 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24280 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24281 ARM_BUILTIN_ ## CODE)
24283 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24284 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24285 ARM_BUILTIN_ ## CODE)
24287 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24288 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24289 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24290 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24291 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24292 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24293 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24294 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24295 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24297 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24298 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24299 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24300 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24301 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24302 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24304 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24305 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24306 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24307 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24308 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24309 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24311 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24312 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24313 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24314 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24315 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24316 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24318 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24319 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24320 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24321 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24322 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24323 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24325 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24327 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24328 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24329 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24330 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24331 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24332 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24333 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24334 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24335 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24336 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24338 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24339 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24340 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24341 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24342 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24343 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24344 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24345 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24346 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24348 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24349 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24350 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24352 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24353 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24354 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24356 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24357 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24359 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24360 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24361 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24362 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24363 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24364 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24366 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24367 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24368 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24369 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24370 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24371 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24372 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24373 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24374 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24375 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24376 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24377 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24379 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24380 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24381 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24382 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24384 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24385 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24386 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24387 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24388 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24389 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24390 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24392 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24393 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24394 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24396 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24397 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24398 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24399 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24401 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24402 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24403 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24404 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24406 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24407 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24408 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24409 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24411 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24412 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24413 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24414 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24416 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24417 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24418 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24419 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24421 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24422 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24423 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24424 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24426 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24428 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24429 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24430 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24432 #undef iwmmx_mbuiltin
24433 #undef iwmmx2_mbuiltin
24437 arm_init_fp16_builtins (void)
24439 tree fp16_type
= make_node (REAL_TYPE
);
24440 TYPE_PRECISION (fp16_type
) = 16;
24441 layout_type (fp16_type
);
24442 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24446 arm_init_crc32_builtins ()
24448 tree si_ftype_si_qi
24449 = build_function_type_list (unsigned_intSI_type_node
,
24450 unsigned_intSI_type_node
,
24451 unsigned_intQI_type_node
, NULL_TREE
);
24452 tree si_ftype_si_hi
24453 = build_function_type_list (unsigned_intSI_type_node
,
24454 unsigned_intSI_type_node
,
24455 unsigned_intHI_type_node
, NULL_TREE
);
24456 tree si_ftype_si_si
24457 = build_function_type_list (unsigned_intSI_type_node
,
24458 unsigned_intSI_type_node
,
24459 unsigned_intSI_type_node
, NULL_TREE
);
24461 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24462 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24463 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24464 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24465 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24466 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24467 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24468 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24469 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24470 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24471 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24472 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24473 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24474 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24475 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24476 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24477 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24478 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24482 arm_init_builtins (void)
24484 if (TARGET_REALLY_IWMMXT
)
24485 arm_init_iwmmxt_builtins ();
24488 arm_init_neon_builtins ();
24490 if (arm_fp16_format
)
24491 arm_init_fp16_builtins ();
24494 arm_init_crc32_builtins ();
24497 /* Return the ARM builtin for CODE. */
24500 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24502 if (code
>= ARM_BUILTIN_MAX
)
24503 return error_mark_node
;
24505 return arm_builtin_decls
[code
];
24508 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24510 static const char *
24511 arm_invalid_parameter_type (const_tree t
)
24513 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24514 return N_("function parameters cannot have __fp16 type");
24518 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24520 static const char *
24521 arm_invalid_return_type (const_tree t
)
24523 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24524 return N_("functions cannot return __fp16 type");
24528 /* Implement TARGET_PROMOTED_TYPE. */
24531 arm_promoted_type (const_tree t
)
24533 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24534 return float_type_node
;
24538 /* Implement TARGET_CONVERT_TO_TYPE.
24539 Specifically, this hook implements the peculiarity of the ARM
24540 half-precision floating-point C semantics that requires conversions between
24541 __fp16 to or from double to do an intermediate conversion to float. */
24544 arm_convert_to_type (tree type
, tree expr
)
24546 tree fromtype
= TREE_TYPE (expr
);
24547 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24549 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24550 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24551 return convert (type
, convert (float_type_node
, expr
));
24555 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24556 This simply adds HFmode as a supported mode; even though we don't
24557 implement arithmetic on this type directly, it's supported by
24558 optabs conversions, much the way the double-word arithmetic is
24559 special-cased in the default hook. */
24562 arm_scalar_mode_supported_p (enum machine_mode mode
)
24564 if (mode
== HFmode
)
24565 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24566 else if (ALL_FIXED_POINT_MODE_P (mode
))
24569 return default_scalar_mode_supported_p (mode
);
24572 /* Errors in the source file can cause expand_expr to return const0_rtx
24573 where we expect a vector. To avoid crashing, use one of the vector
24574 clear instructions. */
24577 safe_vector_operand (rtx x
, enum machine_mode mode
)
24579 if (x
!= const0_rtx
)
24581 x
= gen_reg_rtx (mode
);
24583 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24584 : gen_rtx_SUBREG (DImode
, x
, 0)));
24588 /* Function to expand ternary builtins. */
24590 arm_expand_ternop_builtin (enum insn_code icode
,
24591 tree exp
, rtx target
)
24594 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24595 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24596 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24598 rtx op0
= expand_normal (arg0
);
24599 rtx op1
= expand_normal (arg1
);
24600 rtx op2
= expand_normal (arg2
);
24601 rtx op3
= NULL_RTX
;
24603 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24604 lane operand depending on endianness. */
24605 bool builtin_sha1cpm_p
= false;
24607 if (insn_data
[icode
].n_operands
== 5)
24609 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24610 || icode
== CODE_FOR_crypto_sha1p
24611 || icode
== CODE_FOR_crypto_sha1m
);
24612 builtin_sha1cpm_p
= true;
24614 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24615 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24616 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24617 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24620 if (VECTOR_MODE_P (mode0
))
24621 op0
= safe_vector_operand (op0
, mode0
);
24622 if (VECTOR_MODE_P (mode1
))
24623 op1
= safe_vector_operand (op1
, mode1
);
24624 if (VECTOR_MODE_P (mode2
))
24625 op2
= safe_vector_operand (op2
, mode2
);
24628 || GET_MODE (target
) != tmode
24629 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24630 target
= gen_reg_rtx (tmode
);
24632 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24633 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24634 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24636 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24637 op0
= copy_to_mode_reg (mode0
, op0
);
24638 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24639 op1
= copy_to_mode_reg (mode1
, op1
);
24640 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24641 op2
= copy_to_mode_reg (mode2
, op2
);
24642 if (builtin_sha1cpm_p
)
24643 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24645 if (builtin_sha1cpm_p
)
24646 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24648 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24655 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24658 arm_expand_binop_builtin (enum insn_code icode
,
24659 tree exp
, rtx target
)
24662 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24663 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24664 rtx op0
= expand_normal (arg0
);
24665 rtx op1
= expand_normal (arg1
);
24666 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24667 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24668 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24670 if (VECTOR_MODE_P (mode0
))
24671 op0
= safe_vector_operand (op0
, mode0
);
24672 if (VECTOR_MODE_P (mode1
))
24673 op1
= safe_vector_operand (op1
, mode1
);
24676 || GET_MODE (target
) != tmode
24677 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24678 target
= gen_reg_rtx (tmode
);
24680 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24681 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24683 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24684 op0
= copy_to_mode_reg (mode0
, op0
);
24685 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24686 op1
= copy_to_mode_reg (mode1
, op1
);
24688 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24695 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24698 arm_expand_unop_builtin (enum insn_code icode
,
24699 tree exp
, rtx target
, int do_load
)
24702 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24703 rtx op0
= expand_normal (arg0
);
24704 rtx op1
= NULL_RTX
;
24705 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24706 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24707 bool builtin_sha1h_p
= false;
24709 if (insn_data
[icode
].n_operands
== 3)
24711 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24712 builtin_sha1h_p
= true;
24716 || GET_MODE (target
) != tmode
24717 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24718 target
= gen_reg_rtx (tmode
);
24720 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
24723 if (VECTOR_MODE_P (mode0
))
24724 op0
= safe_vector_operand (op0
, mode0
);
24726 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24727 op0
= copy_to_mode_reg (mode0
, op0
);
24729 if (builtin_sha1h_p
)
24730 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24732 if (builtin_sha1h_p
)
24733 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24735 pat
= GEN_FCN (icode
) (target
, op0
);
24743 NEON_ARG_COPY_TO_REG
,
24749 #define NEON_MAX_BUILTIN_ARGS 5
24751 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24752 and return an expression for the accessed memory.
24754 The intrinsic function operates on a block of registers that has
24755 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24756 function references the memory at EXP of type TYPE and in mode
24757 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24761 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
24762 enum machine_mode reg_mode
,
24763 neon_builtin_type_mode type_mode
)
24765 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
24766 tree elem_type
, upper_bound
, array_type
;
24768 /* Work out the size of the register block in bytes. */
24769 reg_size
= GET_MODE_SIZE (reg_mode
);
24771 /* Work out the size of each vector in bytes. */
24772 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
24773 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
24775 /* Work out how many vectors there are. */
24776 gcc_assert (reg_size
% vector_size
== 0);
24777 nvectors
= reg_size
/ vector_size
;
24779 /* Work out the type of each element. */
24780 gcc_assert (POINTER_TYPE_P (type
));
24781 elem_type
= TREE_TYPE (type
);
24783 /* Work out how many elements are being loaded or stored.
24784 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24785 and memory elements; anything else implies a lane load or store. */
24786 if (mem_mode
== reg_mode
)
24787 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
24791 /* Create a type that describes the full access. */
24792 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
24793 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
24795 /* Dereference EXP using that type. */
24796 return fold_build2 (MEM_REF
, array_type
, exp
,
24797 build_int_cst (build_pointer_type (array_type
), 0));
24800 /* Expand a Neon builtin. */
24802 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
24803 neon_builtin_type_mode type_mode
,
24804 tree exp
, int fcode
, ...)
24808 tree arg
[NEON_MAX_BUILTIN_ARGS
];
24809 rtx op
[NEON_MAX_BUILTIN_ARGS
];
24812 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24813 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
24814 enum machine_mode other_mode
;
24820 || GET_MODE (target
) != tmode
24821 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24822 target
= gen_reg_rtx (tmode
);
24824 va_start (ap
, fcode
);
24826 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24830 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24832 if (thisarg
== NEON_ARG_STOP
)
24836 opno
= argc
+ have_retval
;
24837 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24838 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24839 arg_type
= TREE_VALUE (formals
);
24840 if (thisarg
== NEON_ARG_MEMORY
)
24842 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24843 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24844 mode
[argc
], other_mode
,
24848 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24850 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
24851 (thisarg
== NEON_ARG_MEMORY
24852 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
24856 case NEON_ARG_COPY_TO_REG
:
24857 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24858 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24859 (op
[argc
], mode
[argc
]))
24860 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24863 case NEON_ARG_CONSTANT
:
24864 /* FIXME: This error message is somewhat unhelpful. */
24865 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24866 (op
[argc
], mode
[argc
]))
24867 error ("argument must be a constant");
24870 case NEON_ARG_MEMORY
:
24871 /* Check if expand failed. */
24872 if (op
[argc
] == const0_rtx
)
24874 gcc_assert (MEM_P (op
[argc
]));
24875 PUT_MODE (op
[argc
], mode
[argc
]);
24876 /* ??? arm_neon.h uses the same built-in functions for signed
24877 and unsigned accesses, casting where necessary. This isn't
24879 set_mem_alias_set (op
[argc
], 0);
24880 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24881 (op
[argc
], mode
[argc
]))
24882 op
[argc
] = (replace_equiv_address
24883 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24886 case NEON_ARG_STOP
:
24887 gcc_unreachable ();
24891 formals
= TREE_CHAIN (formals
);
24901 pat
= GEN_FCN (icode
) (target
, op
[0]);
24905 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24909 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24913 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24917 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24921 gcc_unreachable ();
24927 pat
= GEN_FCN (icode
) (op
[0]);
24931 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24935 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24939 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24943 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24947 gcc_unreachable ();
24958 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24959 constants defined per-instruction or per instruction-variant. Instead, the
24960 required info is looked up in the table neon_builtin_data. */
24962 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
24964 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
24965 neon_itype itype
= d
->itype
;
24966 enum insn_code icode
= d
->code
;
24967 neon_builtin_type_mode type_mode
= d
->mode
;
24974 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24975 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24979 case NEON_SCALARMUL
:
24980 case NEON_SCALARMULL
:
24981 case NEON_SCALARMULH
:
24982 case NEON_SHIFTINSERT
:
24983 case NEON_LOGICBINOP
:
24984 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24985 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24989 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24990 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24991 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24995 case NEON_SHIFTIMM
:
24996 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24997 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25001 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25002 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25007 case NEON_FLOAT_WIDEN
:
25008 case NEON_FLOAT_NARROW
:
25009 case NEON_REINTERP
:
25010 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25011 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25015 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25016 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25018 case NEON_RESULTPAIR
:
25019 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25020 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25024 case NEON_LANEMULL
:
25025 case NEON_LANEMULH
:
25026 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25027 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25028 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25031 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25032 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25033 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25035 case NEON_SHIFTACC
:
25036 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25037 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25038 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25040 case NEON_SCALARMAC
:
25041 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25042 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25043 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25047 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25048 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25052 case NEON_LOADSTRUCT
:
25053 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25054 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25056 case NEON_LOAD1LANE
:
25057 case NEON_LOADSTRUCTLANE
:
25058 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25059 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25063 case NEON_STORESTRUCT
:
25064 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25065 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25067 case NEON_STORE1LANE
:
25068 case NEON_STORESTRUCTLANE
:
25069 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25070 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25074 gcc_unreachable ();
25077 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25079 neon_reinterpret (rtx dest
, rtx src
)
25081 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25084 /* Emit code to place a Neon pair result in memory locations (with equal
25087 neon_emit_pair_result_insn (enum machine_mode mode
,
25088 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25091 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25092 rtx tmp1
= gen_reg_rtx (mode
);
25093 rtx tmp2
= gen_reg_rtx (mode
);
25095 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25097 emit_move_insn (mem
, tmp1
);
25098 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25099 emit_move_insn (mem
, tmp2
);
25102 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25103 not to early-clobber SRC registers in the process.
25105 We assume that the operands described by SRC and DEST represent a
25106 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25107 number of components into which the copy has been decomposed. */
25109 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25113 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25114 || REGNO (operands
[0]) < REGNO (operands
[1]))
25116 for (i
= 0; i
< count
; i
++)
25118 operands
[2 * i
] = dest
[i
];
25119 operands
[2 * i
+ 1] = src
[i
];
25124 for (i
= 0; i
< count
; i
++)
25126 operands
[2 * i
] = dest
[count
- i
- 1];
25127 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25132 /* Split operands into moves from op[1] + op[2] into op[0]. */
25135 neon_split_vcombine (rtx operands
[3])
25137 unsigned int dest
= REGNO (operands
[0]);
25138 unsigned int src1
= REGNO (operands
[1]);
25139 unsigned int src2
= REGNO (operands
[2]);
25140 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25141 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25142 rtx destlo
, desthi
;
25144 if (src1
== dest
&& src2
== dest
+ halfregs
)
25146 /* No-op move. Can't split to nothing; emit something. */
25147 emit_note (NOTE_INSN_DELETED
);
25151 /* Preserve register attributes for variable tracking. */
25152 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25153 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25154 GET_MODE_SIZE (halfmode
));
25156 /* Special case of reversed high/low parts. Use VSWP. */
25157 if (src2
== dest
&& src1
== dest
+ halfregs
)
25159 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25160 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25161 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25165 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25167 /* Try to avoid unnecessary moves if part of the result
25168 is in the right place already. */
25170 emit_move_insn (destlo
, operands
[1]);
25171 if (src2
!= dest
+ halfregs
)
25172 emit_move_insn (desthi
, operands
[2]);
25176 if (src2
!= dest
+ halfregs
)
25177 emit_move_insn (desthi
, operands
[2]);
25179 emit_move_insn (destlo
, operands
[1]);
25183 /* Expand an expression EXP that calls a built-in function,
25184 with result going to TARGET if that's convenient
25185 (and in mode MODE if that's convenient).
25186 SUBTARGET may be used as the target for computing one of EXP's operands.
25187 IGNORE is nonzero if the value is to be ignored. */
25190 arm_expand_builtin (tree exp
,
25192 rtx subtarget ATTRIBUTE_UNUSED
,
25193 enum machine_mode mode ATTRIBUTE_UNUSED
,
25194 int ignore ATTRIBUTE_UNUSED
)
25196 const struct builtin_description
* d
;
25197 enum insn_code icode
;
25198 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25206 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25208 enum machine_mode tmode
;
25209 enum machine_mode mode0
;
25210 enum machine_mode mode1
;
25211 enum machine_mode mode2
;
25217 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25218 return arm_expand_neon_builtin (fcode
, exp
, target
);
25222 case ARM_BUILTIN_TEXTRMSB
:
25223 case ARM_BUILTIN_TEXTRMUB
:
25224 case ARM_BUILTIN_TEXTRMSH
:
25225 case ARM_BUILTIN_TEXTRMUH
:
25226 case ARM_BUILTIN_TEXTRMSW
:
25227 case ARM_BUILTIN_TEXTRMUW
:
25228 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25229 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25230 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25231 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25232 : CODE_FOR_iwmmxt_textrmw
);
25234 arg0
= CALL_EXPR_ARG (exp
, 0);
25235 arg1
= CALL_EXPR_ARG (exp
, 1);
25236 op0
= expand_normal (arg0
);
25237 op1
= expand_normal (arg1
);
25238 tmode
= insn_data
[icode
].operand
[0].mode
;
25239 mode0
= insn_data
[icode
].operand
[1].mode
;
25240 mode1
= insn_data
[icode
].operand
[2].mode
;
25242 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25243 op0
= copy_to_mode_reg (mode0
, op0
);
25244 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25246 /* @@@ better error message */
25247 error ("selector must be an immediate");
25248 return gen_reg_rtx (tmode
);
25251 opint
= INTVAL (op1
);
25252 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25254 if (opint
> 7 || opint
< 0)
25255 error ("the range of selector should be in 0 to 7");
25257 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25259 if (opint
> 3 || opint
< 0)
25260 error ("the range of selector should be in 0 to 3");
25262 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25264 if (opint
> 1 || opint
< 0)
25265 error ("the range of selector should be in 0 to 1");
25269 || GET_MODE (target
) != tmode
25270 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25271 target
= gen_reg_rtx (tmode
);
25272 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25278 case ARM_BUILTIN_WALIGNI
:
25279 /* If op2 is immediate, call walighi, else call walighr. */
25280 arg0
= CALL_EXPR_ARG (exp
, 0);
25281 arg1
= CALL_EXPR_ARG (exp
, 1);
25282 arg2
= CALL_EXPR_ARG (exp
, 2);
25283 op0
= expand_normal (arg0
);
25284 op1
= expand_normal (arg1
);
25285 op2
= expand_normal (arg2
);
25286 if (CONST_INT_P (op2
))
25288 icode
= CODE_FOR_iwmmxt_waligni
;
25289 tmode
= insn_data
[icode
].operand
[0].mode
;
25290 mode0
= insn_data
[icode
].operand
[1].mode
;
25291 mode1
= insn_data
[icode
].operand
[2].mode
;
25292 mode2
= insn_data
[icode
].operand
[3].mode
;
25293 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25294 op0
= copy_to_mode_reg (mode0
, op0
);
25295 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25296 op1
= copy_to_mode_reg (mode1
, op1
);
25297 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25298 selector
= INTVAL (op2
);
25299 if (selector
> 7 || selector
< 0)
25300 error ("the range of selector should be in 0 to 7");
25304 icode
= CODE_FOR_iwmmxt_walignr
;
25305 tmode
= insn_data
[icode
].operand
[0].mode
;
25306 mode0
= insn_data
[icode
].operand
[1].mode
;
25307 mode1
= insn_data
[icode
].operand
[2].mode
;
25308 mode2
= insn_data
[icode
].operand
[3].mode
;
25309 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25310 op0
= copy_to_mode_reg (mode0
, op0
);
25311 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25312 op1
= copy_to_mode_reg (mode1
, op1
);
25313 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25314 op2
= copy_to_mode_reg (mode2
, op2
);
25317 || GET_MODE (target
) != tmode
25318 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25319 target
= gen_reg_rtx (tmode
);
25320 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25326 case ARM_BUILTIN_TINSRB
:
25327 case ARM_BUILTIN_TINSRH
:
25328 case ARM_BUILTIN_TINSRW
:
25329 case ARM_BUILTIN_WMERGE
:
25330 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25331 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25332 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25333 : CODE_FOR_iwmmxt_tinsrw
);
25334 arg0
= CALL_EXPR_ARG (exp
, 0);
25335 arg1
= CALL_EXPR_ARG (exp
, 1);
25336 arg2
= CALL_EXPR_ARG (exp
, 2);
25337 op0
= expand_normal (arg0
);
25338 op1
= expand_normal (arg1
);
25339 op2
= expand_normal (arg2
);
25340 tmode
= insn_data
[icode
].operand
[0].mode
;
25341 mode0
= insn_data
[icode
].operand
[1].mode
;
25342 mode1
= insn_data
[icode
].operand
[2].mode
;
25343 mode2
= insn_data
[icode
].operand
[3].mode
;
25345 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25346 op0
= copy_to_mode_reg (mode0
, op0
);
25347 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25348 op1
= copy_to_mode_reg (mode1
, op1
);
25349 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25351 error ("selector must be an immediate");
25354 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25356 selector
= INTVAL (op2
);
25357 if (selector
> 7 || selector
< 0)
25358 error ("the range of selector should be in 0 to 7");
25360 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25361 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25362 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25365 selector
= INTVAL (op2
);
25366 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25367 error ("the range of selector should be in 0 to 7");
25368 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25369 error ("the range of selector should be in 0 to 3");
25370 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25371 error ("the range of selector should be in 0 to 1");
25373 op2
= GEN_INT (mask
);
25376 || GET_MODE (target
) != tmode
25377 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25378 target
= gen_reg_rtx (tmode
);
25379 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25385 case ARM_BUILTIN_SETWCGR0
:
25386 case ARM_BUILTIN_SETWCGR1
:
25387 case ARM_BUILTIN_SETWCGR2
:
25388 case ARM_BUILTIN_SETWCGR3
:
25389 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25390 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25391 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25392 : CODE_FOR_iwmmxt_setwcgr3
);
25393 arg0
= CALL_EXPR_ARG (exp
, 0);
25394 op0
= expand_normal (arg0
);
25395 mode0
= insn_data
[icode
].operand
[0].mode
;
25396 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25397 op0
= copy_to_mode_reg (mode0
, op0
);
25398 pat
= GEN_FCN (icode
) (op0
);
25404 case ARM_BUILTIN_GETWCGR0
:
25405 case ARM_BUILTIN_GETWCGR1
:
25406 case ARM_BUILTIN_GETWCGR2
:
25407 case ARM_BUILTIN_GETWCGR3
:
25408 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25409 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25410 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25411 : CODE_FOR_iwmmxt_getwcgr3
);
25412 tmode
= insn_data
[icode
].operand
[0].mode
;
25414 || GET_MODE (target
) != tmode
25415 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25416 target
= gen_reg_rtx (tmode
);
25417 pat
= GEN_FCN (icode
) (target
);
25423 case ARM_BUILTIN_WSHUFH
:
25424 icode
= CODE_FOR_iwmmxt_wshufh
;
25425 arg0
= CALL_EXPR_ARG (exp
, 0);
25426 arg1
= CALL_EXPR_ARG (exp
, 1);
25427 op0
= expand_normal (arg0
);
25428 op1
= expand_normal (arg1
);
25429 tmode
= insn_data
[icode
].operand
[0].mode
;
25430 mode1
= insn_data
[icode
].operand
[1].mode
;
25431 mode2
= insn_data
[icode
].operand
[2].mode
;
25433 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25434 op0
= copy_to_mode_reg (mode1
, op0
);
25435 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25437 error ("mask must be an immediate");
25440 selector
= INTVAL (op1
);
25441 if (selector
< 0 || selector
> 255)
25442 error ("the range of mask should be in 0 to 255");
25444 || GET_MODE (target
) != tmode
25445 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25446 target
= gen_reg_rtx (tmode
);
25447 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25453 case ARM_BUILTIN_WMADDS
:
25454 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25455 case ARM_BUILTIN_WMADDSX
:
25456 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25457 case ARM_BUILTIN_WMADDSN
:
25458 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25459 case ARM_BUILTIN_WMADDU
:
25460 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25461 case ARM_BUILTIN_WMADDUX
:
25462 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25463 case ARM_BUILTIN_WMADDUN
:
25464 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25465 case ARM_BUILTIN_WSADBZ
:
25466 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25467 case ARM_BUILTIN_WSADHZ
:
25468 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25470 /* Several three-argument builtins. */
25471 case ARM_BUILTIN_WMACS
:
25472 case ARM_BUILTIN_WMACU
:
25473 case ARM_BUILTIN_TMIA
:
25474 case ARM_BUILTIN_TMIAPH
:
25475 case ARM_BUILTIN_TMIATT
:
25476 case ARM_BUILTIN_TMIATB
:
25477 case ARM_BUILTIN_TMIABT
:
25478 case ARM_BUILTIN_TMIABB
:
25479 case ARM_BUILTIN_WQMIABB
:
25480 case ARM_BUILTIN_WQMIABT
:
25481 case ARM_BUILTIN_WQMIATB
:
25482 case ARM_BUILTIN_WQMIATT
:
25483 case ARM_BUILTIN_WQMIABBN
:
25484 case ARM_BUILTIN_WQMIABTN
:
25485 case ARM_BUILTIN_WQMIATBN
:
25486 case ARM_BUILTIN_WQMIATTN
:
25487 case ARM_BUILTIN_WMIABB
:
25488 case ARM_BUILTIN_WMIABT
:
25489 case ARM_BUILTIN_WMIATB
:
25490 case ARM_BUILTIN_WMIATT
:
25491 case ARM_BUILTIN_WMIABBN
:
25492 case ARM_BUILTIN_WMIABTN
:
25493 case ARM_BUILTIN_WMIATBN
:
25494 case ARM_BUILTIN_WMIATTN
:
25495 case ARM_BUILTIN_WMIAWBB
:
25496 case ARM_BUILTIN_WMIAWBT
:
25497 case ARM_BUILTIN_WMIAWTB
:
25498 case ARM_BUILTIN_WMIAWTT
:
25499 case ARM_BUILTIN_WMIAWBBN
:
25500 case ARM_BUILTIN_WMIAWBTN
:
25501 case ARM_BUILTIN_WMIAWTBN
:
25502 case ARM_BUILTIN_WMIAWTTN
:
25503 case ARM_BUILTIN_WSADB
:
25504 case ARM_BUILTIN_WSADH
:
25505 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25506 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25507 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25508 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25509 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25510 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25511 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25512 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25513 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25514 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25515 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25516 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25517 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25518 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25519 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25520 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25521 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25522 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25523 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25524 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25525 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25526 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25527 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25528 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25529 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25530 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25531 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25532 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25533 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25534 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25535 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25536 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25537 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25538 : CODE_FOR_iwmmxt_wsadh
);
25539 arg0
= CALL_EXPR_ARG (exp
, 0);
25540 arg1
= CALL_EXPR_ARG (exp
, 1);
25541 arg2
= CALL_EXPR_ARG (exp
, 2);
25542 op0
= expand_normal (arg0
);
25543 op1
= expand_normal (arg1
);
25544 op2
= expand_normal (arg2
);
25545 tmode
= insn_data
[icode
].operand
[0].mode
;
25546 mode0
= insn_data
[icode
].operand
[1].mode
;
25547 mode1
= insn_data
[icode
].operand
[2].mode
;
25548 mode2
= insn_data
[icode
].operand
[3].mode
;
25550 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25551 op0
= copy_to_mode_reg (mode0
, op0
);
25552 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25553 op1
= copy_to_mode_reg (mode1
, op1
);
25554 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25555 op2
= copy_to_mode_reg (mode2
, op2
);
25557 || GET_MODE (target
) != tmode
25558 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25559 target
= gen_reg_rtx (tmode
);
25560 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25566 case ARM_BUILTIN_WZERO
:
25567 target
= gen_reg_rtx (DImode
);
25568 emit_insn (gen_iwmmxt_clrdi (target
));
25571 case ARM_BUILTIN_WSRLHI
:
25572 case ARM_BUILTIN_WSRLWI
:
25573 case ARM_BUILTIN_WSRLDI
:
25574 case ARM_BUILTIN_WSLLHI
:
25575 case ARM_BUILTIN_WSLLWI
:
25576 case ARM_BUILTIN_WSLLDI
:
25577 case ARM_BUILTIN_WSRAHI
:
25578 case ARM_BUILTIN_WSRAWI
:
25579 case ARM_BUILTIN_WSRADI
:
25580 case ARM_BUILTIN_WRORHI
:
25581 case ARM_BUILTIN_WRORWI
:
25582 case ARM_BUILTIN_WRORDI
:
25583 case ARM_BUILTIN_WSRLH
:
25584 case ARM_BUILTIN_WSRLW
:
25585 case ARM_BUILTIN_WSRLD
:
25586 case ARM_BUILTIN_WSLLH
:
25587 case ARM_BUILTIN_WSLLW
:
25588 case ARM_BUILTIN_WSLLD
:
25589 case ARM_BUILTIN_WSRAH
:
25590 case ARM_BUILTIN_WSRAW
:
25591 case ARM_BUILTIN_WSRAD
:
25592 case ARM_BUILTIN_WRORH
:
25593 case ARM_BUILTIN_WRORW
:
25594 case ARM_BUILTIN_WRORD
:
25595 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25596 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25597 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25598 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25599 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25600 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25601 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25602 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25603 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25604 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25605 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25606 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25607 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25608 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25609 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25610 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25611 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25612 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25613 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25614 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25615 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25616 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25617 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25618 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25619 : CODE_FOR_nothing
);
25620 arg1
= CALL_EXPR_ARG (exp
, 1);
25621 op1
= expand_normal (arg1
);
25622 if (GET_MODE (op1
) == VOIDmode
)
25624 imm
= INTVAL (op1
);
25625 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25626 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25627 && (imm
< 0 || imm
> 32))
25629 if (fcode
== ARM_BUILTIN_WRORHI
)
25630 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25631 else if (fcode
== ARM_BUILTIN_WRORWI
)
25632 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25633 else if (fcode
== ARM_BUILTIN_WRORH
)
25634 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25636 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25638 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25639 && (imm
< 0 || imm
> 64))
25641 if (fcode
== ARM_BUILTIN_WRORDI
)
25642 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25644 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25648 if (fcode
== ARM_BUILTIN_WSRLHI
)
25649 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25650 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25651 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25652 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25653 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25654 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25655 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25656 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25657 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25658 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25659 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25660 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25661 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25662 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25663 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25664 else if (fcode
== ARM_BUILTIN_WSRADI
)
25665 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25666 else if (fcode
== ARM_BUILTIN_WSRLH
)
25667 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25668 else if (fcode
== ARM_BUILTIN_WSRLW
)
25669 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25670 else if (fcode
== ARM_BUILTIN_WSRLD
)
25671 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25672 else if (fcode
== ARM_BUILTIN_WSLLH
)
25673 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25674 else if (fcode
== ARM_BUILTIN_WSLLW
)
25675 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25676 else if (fcode
== ARM_BUILTIN_WSLLD
)
25677 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25678 else if (fcode
== ARM_BUILTIN_WSRAH
)
25679 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25680 else if (fcode
== ARM_BUILTIN_WSRAW
)
25681 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25683 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25686 return arm_expand_binop_builtin (icode
, exp
, target
);
25692 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25693 if (d
->code
== (const enum arm_builtins
) fcode
)
25694 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25696 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25697 if (d
->code
== (const enum arm_builtins
) fcode
)
25698 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25700 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25701 if (d
->code
== (const enum arm_builtins
) fcode
)
25702 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25704 /* @@@ Should really do something sensible here. */
25708 /* Return the number (counting from 0) of
25709 the least significant set bit in MASK. */
25712 number_of_first_bit_set (unsigned mask
)
25714 return ctz_hwi (mask
);
25717 /* Like emit_multi_reg_push, but allowing for a different set of
25718 registers to be described as saved. MASK is the set of registers
25719 to be saved; REAL_REGS is the set of registers to be described as
25720 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25723 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25725 unsigned long regno
;
25726 rtx par
[10], tmp
, reg
, insn
;
25729 /* Build the parallel of the registers actually being stored. */
25730 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25732 regno
= ctz_hwi (mask
);
25733 reg
= gen_rtx_REG (SImode
, regno
);
25736 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25738 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25743 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25744 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25745 tmp
= gen_frame_mem (BLKmode
, tmp
);
25746 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
25749 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25750 insn
= emit_insn (tmp
);
25752 /* Always build the stack adjustment note for unwind info. */
25753 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25754 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
25757 /* Build the parallel of the registers recorded as saved for unwind. */
25758 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25760 regno
= ctz_hwi (real_regs
);
25761 reg
= gen_rtx_REG (SImode
, regno
);
25763 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25764 tmp
= gen_frame_mem (SImode
, tmp
);
25765 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
25766 RTX_FRAME_RELATED_P (tmp
) = 1;
25774 RTX_FRAME_RELATED_P (par
[0]) = 1;
25775 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25778 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25783 /* Emit code to push or pop registers to or from the stack. F is the
25784 assembly file. MASK is the registers to pop. */
25786 thumb_pop (FILE *f
, unsigned long mask
)
25789 int lo_mask
= mask
& 0xFF;
25790 int pushed_words
= 0;
25794 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25796 /* Special case. Do not generate a POP PC statement here, do it in
25798 thumb_exit (f
, -1);
25802 fprintf (f
, "\tpop\t{");
25804 /* Look at the low registers first. */
25805 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25809 asm_fprintf (f
, "%r", regno
);
25811 if ((lo_mask
& ~1) != 0)
25818 if (mask
& (1 << PC_REGNUM
))
25820 /* Catch popping the PC. */
25821 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
25822 || crtl
->calls_eh_return
)
25824 /* The PC is never poped directly, instead
25825 it is popped into r3 and then BX is used. */
25826 fprintf (f
, "}\n");
25828 thumb_exit (f
, -1);
25837 asm_fprintf (f
, "%r", PC_REGNUM
);
25841 fprintf (f
, "}\n");
25844 /* Generate code to return from a thumb function.
25845 If 'reg_containing_return_addr' is -1, then the return address is
25846 actually on the stack, at the stack pointer. */
25848 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25850 unsigned regs_available_for_popping
;
25851 unsigned regs_to_pop
;
25853 unsigned available
;
25857 int restore_a4
= FALSE
;
25859 /* Compute the registers we need to pop. */
25863 if (reg_containing_return_addr
== -1)
25865 regs_to_pop
|= 1 << LR_REGNUM
;
25869 if (TARGET_BACKTRACE
)
25871 /* Restore the (ARM) frame pointer and stack pointer. */
25872 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25876 /* If there is nothing to pop then just emit the BX instruction and
25878 if (pops_needed
== 0)
25880 if (crtl
->calls_eh_return
)
25881 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25883 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25886 /* Otherwise if we are not supporting interworking and we have not created
25887 a backtrace structure and the function was not entered in ARM mode then
25888 just pop the return address straight into the PC. */
25889 else if (!TARGET_INTERWORK
25890 && !TARGET_BACKTRACE
25891 && !is_called_in_ARM_mode (current_function_decl
)
25892 && !crtl
->calls_eh_return
)
25894 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25898 /* Find out how many of the (return) argument registers we can corrupt. */
25899 regs_available_for_popping
= 0;
25901 /* If returning via __builtin_eh_return, the bottom three registers
25902 all contain information needed for the return. */
25903 if (crtl
->calls_eh_return
)
25907 /* If we can deduce the registers used from the function's
25908 return value. This is more reliable that examining
25909 df_regs_ever_live_p () because that will be set if the register is
25910 ever used in the function, not just if the register is used
25911 to hold a return value. */
25913 if (crtl
->return_rtx
!= 0)
25914 mode
= GET_MODE (crtl
->return_rtx
);
25916 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25918 size
= GET_MODE_SIZE (mode
);
25922 /* In a void function we can use any argument register.
25923 In a function that returns a structure on the stack
25924 we can use the second and third argument registers. */
25925 if (mode
== VOIDmode
)
25926 regs_available_for_popping
=
25927 (1 << ARG_REGISTER (1))
25928 | (1 << ARG_REGISTER (2))
25929 | (1 << ARG_REGISTER (3));
25931 regs_available_for_popping
=
25932 (1 << ARG_REGISTER (2))
25933 | (1 << ARG_REGISTER (3));
25935 else if (size
<= 4)
25936 regs_available_for_popping
=
25937 (1 << ARG_REGISTER (2))
25938 | (1 << ARG_REGISTER (3));
25939 else if (size
<= 8)
25940 regs_available_for_popping
=
25941 (1 << ARG_REGISTER (3));
25944 /* Match registers to be popped with registers into which we pop them. */
25945 for (available
= regs_available_for_popping
,
25946 required
= regs_to_pop
;
25947 required
!= 0 && available
!= 0;
25948 available
&= ~(available
& - available
),
25949 required
&= ~(required
& - required
))
25952 /* If we have any popping registers left over, remove them. */
25954 regs_available_for_popping
&= ~available
;
25956 /* Otherwise if we need another popping register we can use
25957 the fourth argument register. */
25958 else if (pops_needed
)
25960 /* If we have not found any free argument registers and
25961 reg a4 contains the return address, we must move it. */
25962 if (regs_available_for_popping
== 0
25963 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25965 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25966 reg_containing_return_addr
= LR_REGNUM
;
25968 else if (size
> 12)
25970 /* Register a4 is being used to hold part of the return value,
25971 but we have dire need of a free, low register. */
25974 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25977 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25979 /* The fourth argument register is available. */
25980 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25986 /* Pop as many registers as we can. */
25987 thumb_pop (f
, regs_available_for_popping
);
25989 /* Process the registers we popped. */
25990 if (reg_containing_return_addr
== -1)
25992 /* The return address was popped into the lowest numbered register. */
25993 regs_to_pop
&= ~(1 << LR_REGNUM
);
25995 reg_containing_return_addr
=
25996 number_of_first_bit_set (regs_available_for_popping
);
25998 /* Remove this register for the mask of available registers, so that
25999 the return address will not be corrupted by further pops. */
26000 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26003 /* If we popped other registers then handle them here. */
26004 if (regs_available_for_popping
)
26008 /* Work out which register currently contains the frame pointer. */
26009 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26011 /* Move it into the correct place. */
26012 asm_fprintf (f
, "\tmov\t%r, %r\n",
26013 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26015 /* (Temporarily) remove it from the mask of popped registers. */
26016 regs_available_for_popping
&= ~(1 << frame_pointer
);
26017 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26019 if (regs_available_for_popping
)
26023 /* We popped the stack pointer as well,
26024 find the register that contains it. */
26025 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26027 /* Move it into the stack register. */
26028 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26030 /* At this point we have popped all necessary registers, so
26031 do not worry about restoring regs_available_for_popping
26032 to its correct value:
26034 assert (pops_needed == 0)
26035 assert (regs_available_for_popping == (1 << frame_pointer))
26036 assert (regs_to_pop == (1 << STACK_POINTER)) */
26040 /* Since we have just move the popped value into the frame
26041 pointer, the popping register is available for reuse, and
26042 we know that we still have the stack pointer left to pop. */
26043 regs_available_for_popping
|= (1 << frame_pointer
);
26047 /* If we still have registers left on the stack, but we no longer have
26048 any registers into which we can pop them, then we must move the return
26049 address into the link register and make available the register that
26051 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26053 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26055 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26056 reg_containing_return_addr
);
26058 reg_containing_return_addr
= LR_REGNUM
;
26061 /* If we have registers left on the stack then pop some more.
26062 We know that at most we will want to pop FP and SP. */
26063 if (pops_needed
> 0)
26068 thumb_pop (f
, regs_available_for_popping
);
26070 /* We have popped either FP or SP.
26071 Move whichever one it is into the correct register. */
26072 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26073 move_to
= number_of_first_bit_set (regs_to_pop
);
26075 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26077 regs_to_pop
&= ~(1 << move_to
);
26082 /* If we still have not popped everything then we must have only
26083 had one register available to us and we are now popping the SP. */
26084 if (pops_needed
> 0)
26088 thumb_pop (f
, regs_available_for_popping
);
26090 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26092 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26094 assert (regs_to_pop == (1 << STACK_POINTER))
26095 assert (pops_needed == 1)
26099 /* If necessary restore the a4 register. */
26102 if (reg_containing_return_addr
!= LR_REGNUM
)
26104 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26105 reg_containing_return_addr
= LR_REGNUM
;
26108 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26111 if (crtl
->calls_eh_return
)
26112 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26114 /* Return to caller. */
26115 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26118 /* Scan INSN just before assembler is output for it.
26119 For Thumb-1, we track the status of the condition codes; this
26120 information is used in the cbranchsi4_insn pattern. */
26122 thumb1_final_prescan_insn (rtx insn
)
26124 if (flag_print_asm_name
)
26125 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26126 INSN_ADDRESSES (INSN_UID (insn
)));
26127 /* Don't overwrite the previous setter when we get to a cbranch. */
26128 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26130 enum attr_conds conds
;
26132 if (cfun
->machine
->thumb1_cc_insn
)
26134 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26135 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26138 conds
= get_attr_conds (insn
);
26139 if (conds
== CONDS_SET
)
26141 rtx set
= single_set (insn
);
26142 cfun
->machine
->thumb1_cc_insn
= insn
;
26143 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26144 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26145 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26146 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26148 rtx src1
= XEXP (SET_SRC (set
), 1);
26149 if (src1
== const0_rtx
)
26150 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26152 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26154 /* Record the src register operand instead of dest because
26155 cprop_hardreg pass propagates src. */
26156 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26159 else if (conds
!= CONDS_NOCOND
)
26160 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26163 /* Check if unexpected far jump is used. */
26164 if (cfun
->machine
->lr_save_eliminated
26165 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26166 internal_error("Unexpected thumb1 far jump");
26170 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26172 unsigned HOST_WIDE_INT mask
= 0xff;
26175 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26176 if (val
== 0) /* XXX */
26179 for (i
= 0; i
< 25; i
++)
26180 if ((val
& (mask
<< i
)) == val
)
26186 /* Returns nonzero if the current function contains,
26187 or might contain a far jump. */
26189 thumb_far_jump_used_p (void)
26192 bool far_jump
= false;
26193 unsigned int func_size
= 0;
26195 /* This test is only important for leaf functions. */
26196 /* assert (!leaf_function_p ()); */
26198 /* If we have already decided that far jumps may be used,
26199 do not bother checking again, and always return true even if
26200 it turns out that they are not being used. Once we have made
26201 the decision that far jumps are present (and that hence the link
26202 register will be pushed onto the stack) we cannot go back on it. */
26203 if (cfun
->machine
->far_jump_used
)
26206 /* If this function is not being called from the prologue/epilogue
26207 generation code then it must be being called from the
26208 INITIAL_ELIMINATION_OFFSET macro. */
26209 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26211 /* In this case we know that we are being asked about the elimination
26212 of the arg pointer register. If that register is not being used,
26213 then there are no arguments on the stack, and we do not have to
26214 worry that a far jump might force the prologue to push the link
26215 register, changing the stack offsets. In this case we can just
26216 return false, since the presence of far jumps in the function will
26217 not affect stack offsets.
26219 If the arg pointer is live (or if it was live, but has now been
26220 eliminated and so set to dead) then we do have to test to see if
26221 the function might contain a far jump. This test can lead to some
26222 false negatives, since before reload is completed, then length of
26223 branch instructions is not known, so gcc defaults to returning their
26224 longest length, which in turn sets the far jump attribute to true.
26226 A false negative will not result in bad code being generated, but it
26227 will result in a needless push and pop of the link register. We
26228 hope that this does not occur too often.
26230 If we need doubleword stack alignment this could affect the other
26231 elimination offsets so we can't risk getting it wrong. */
26232 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26233 cfun
->machine
->arg_pointer_live
= 1;
26234 else if (!cfun
->machine
->arg_pointer_live
)
26238 /* Check to see if the function contains a branch
26239 insn with the far jump attribute set. */
26240 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26242 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26246 func_size
+= get_attr_length (insn
);
26249 /* Attribute far_jump will always be true for thumb1 before
26250 shorten_branch pass. So checking far_jump attribute before
26251 shorten_branch isn't much useful.
26253 Following heuristic tries to estimate more accurately if a far jump
26254 may finally be used. The heuristic is very conservative as there is
26255 no chance to roll-back the decision of not to use far jump.
26257 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26258 2-byte insn is associated with a 4 byte constant pool. Using
26259 function size 2048/3 as the threshold is conservative enough. */
26262 if ((func_size
* 3) >= 2048)
26264 /* Record the fact that we have decided that
26265 the function does use far jumps. */
26266 cfun
->machine
->far_jump_used
= 1;
26274 /* Return nonzero if FUNC must be entered in ARM mode. */
26276 is_called_in_ARM_mode (tree func
)
26278 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26280 /* Ignore the problem about functions whose address is taken. */
26281 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26285 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26291 /* Given the stack offsets and register mask in OFFSETS, decide how
26292 many additional registers to push instead of subtracting a constant
26293 from SP. For epilogues the principle is the same except we use pop.
26294 FOR_PROLOGUE indicates which we're generating. */
26296 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26298 HOST_WIDE_INT amount
;
26299 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26300 /* Extract a mask of the ones we can give to the Thumb's push/pop
26302 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26303 /* Then count how many other high registers will need to be pushed. */
26304 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26305 int n_free
, reg_base
, size
;
26307 if (!for_prologue
&& frame_pointer_needed
)
26308 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26310 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26312 /* If the stack frame size is 512 exactly, we can save one load
26313 instruction, which should make this a win even when optimizing
26315 if (!optimize_size
&& amount
!= 512)
26318 /* Can't do this if there are high registers to push. */
26319 if (high_regs_pushed
!= 0)
26322 /* Shouldn't do it in the prologue if no registers would normally
26323 be pushed at all. In the epilogue, also allow it if we'll have
26324 a pop insn for the PC. */
26327 || TARGET_BACKTRACE
26328 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26329 || TARGET_INTERWORK
26330 || crtl
->args
.pretend_args_size
!= 0))
26333 /* Don't do this if thumb_expand_prologue wants to emit instructions
26334 between the push and the stack frame allocation. */
26336 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26337 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26344 size
= arm_size_return_regs ();
26345 reg_base
= ARM_NUM_INTS (size
);
26346 live_regs_mask
>>= reg_base
;
26349 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26350 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26352 live_regs_mask
>>= 1;
26358 gcc_assert (amount
/ 4 * 4 == amount
);
26360 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26361 return (amount
- 508) / 4;
26362 if (amount
<= n_free
* 4)
26367 /* The bits which aren't usefully expanded as rtl. */
26369 thumb1_unexpanded_epilogue (void)
26371 arm_stack_offsets
*offsets
;
26373 unsigned long live_regs_mask
= 0;
26374 int high_regs_pushed
= 0;
26376 int had_to_push_lr
;
26379 if (cfun
->machine
->return_used_this_function
!= 0)
26382 if (IS_NAKED (arm_current_func_type ()))
26385 offsets
= arm_get_frame_offsets ();
26386 live_regs_mask
= offsets
->saved_regs_mask
;
26387 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26389 /* If we can deduce the registers used from the function's return value.
26390 This is more reliable that examining df_regs_ever_live_p () because that
26391 will be set if the register is ever used in the function, not just if
26392 the register is used to hold a return value. */
26393 size
= arm_size_return_regs ();
26395 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26398 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26399 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26402 /* The prolog may have pushed some high registers to use as
26403 work registers. e.g. the testsuite file:
26404 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26405 compiles to produce:
26406 push {r4, r5, r6, r7, lr}
26410 as part of the prolog. We have to undo that pushing here. */
26412 if (high_regs_pushed
)
26414 unsigned long mask
= live_regs_mask
& 0xff;
26417 /* The available low registers depend on the size of the value we are
26425 /* Oh dear! We have no low registers into which we can pop
26428 ("no low registers available for popping high registers");
26430 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26431 if (live_regs_mask
& (1 << next_hi_reg
))
26434 while (high_regs_pushed
)
26436 /* Find lo register(s) into which the high register(s) can
26438 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26440 if (mask
& (1 << regno
))
26441 high_regs_pushed
--;
26442 if (high_regs_pushed
== 0)
26446 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26448 /* Pop the values into the low register(s). */
26449 thumb_pop (asm_out_file
, mask
);
26451 /* Move the value(s) into the high registers. */
26452 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26454 if (mask
& (1 << regno
))
26456 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26459 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26460 if (live_regs_mask
& (1 << next_hi_reg
))
26465 live_regs_mask
&= ~0x0f00;
26468 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26469 live_regs_mask
&= 0xff;
26471 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26473 /* Pop the return address into the PC. */
26474 if (had_to_push_lr
)
26475 live_regs_mask
|= 1 << PC_REGNUM
;
26477 /* Either no argument registers were pushed or a backtrace
26478 structure was created which includes an adjusted stack
26479 pointer, so just pop everything. */
26480 if (live_regs_mask
)
26481 thumb_pop (asm_out_file
, live_regs_mask
);
26483 /* We have either just popped the return address into the
26484 PC or it is was kept in LR for the entire function.
26485 Note that thumb_pop has already called thumb_exit if the
26486 PC was in the list. */
26487 if (!had_to_push_lr
)
26488 thumb_exit (asm_out_file
, LR_REGNUM
);
26492 /* Pop everything but the return address. */
26493 if (live_regs_mask
)
26494 thumb_pop (asm_out_file
, live_regs_mask
);
26496 if (had_to_push_lr
)
26500 /* We have no free low regs, so save one. */
26501 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26505 /* Get the return address into a temporary register. */
26506 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26510 /* Move the return address to lr. */
26511 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26513 /* Restore the low register. */
26514 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26519 regno
= LAST_ARG_REGNUM
;
26524 /* Remove the argument registers that were pushed onto the stack. */
26525 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26526 SP_REGNUM
, SP_REGNUM
,
26527 crtl
->args
.pretend_args_size
);
26529 thumb_exit (asm_out_file
, regno
);
26535 /* Functions to save and restore machine-specific function data. */
26536 static struct machine_function
*
26537 arm_init_machine_status (void)
26539 struct machine_function
*machine
;
26540 machine
= ggc_alloc_cleared_machine_function ();
26542 #if ARM_FT_UNKNOWN != 0
26543 machine
->func_type
= ARM_FT_UNKNOWN
;
26548 /* Return an RTX indicating where the return address to the
26549 calling function can be found. */
26551 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26556 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26559 /* Do anything needed before RTL is emitted for each function. */
26561 arm_init_expanders (void)
26563 /* Arrange to initialize and mark the machine per-function status. */
26564 init_machine_status
= arm_init_machine_status
;
26566 /* This is to stop the combine pass optimizing away the alignment
26567 adjustment of va_arg. */
26568 /* ??? It is claimed that this should not be necessary. */
26570 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26574 /* Like arm_compute_initial_elimination offset. Simpler because there
26575 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26576 to point at the base of the local variables after static stack
26577 space for a function has been allocated. */
26580 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26582 arm_stack_offsets
*offsets
;
26584 offsets
= arm_get_frame_offsets ();
26588 case ARG_POINTER_REGNUM
:
26591 case STACK_POINTER_REGNUM
:
26592 return offsets
->outgoing_args
- offsets
->saved_args
;
26594 case FRAME_POINTER_REGNUM
:
26595 return offsets
->soft_frame
- offsets
->saved_args
;
26597 case ARM_HARD_FRAME_POINTER_REGNUM
:
26598 return offsets
->saved_regs
- offsets
->saved_args
;
26600 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26601 return offsets
->locals_base
- offsets
->saved_args
;
26604 gcc_unreachable ();
26608 case FRAME_POINTER_REGNUM
:
26611 case STACK_POINTER_REGNUM
:
26612 return offsets
->outgoing_args
- offsets
->soft_frame
;
26614 case ARM_HARD_FRAME_POINTER_REGNUM
:
26615 return offsets
->saved_regs
- offsets
->soft_frame
;
26617 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26618 return offsets
->locals_base
- offsets
->soft_frame
;
26621 gcc_unreachable ();
26626 gcc_unreachable ();
26630 /* Generate the function's prologue. */
26633 thumb1_expand_prologue (void)
26637 HOST_WIDE_INT amount
;
26638 arm_stack_offsets
*offsets
;
26639 unsigned long func_type
;
26641 unsigned long live_regs_mask
;
26642 unsigned long l_mask
;
26643 unsigned high_regs_pushed
= 0;
26645 func_type
= arm_current_func_type ();
26647 /* Naked functions don't have prologues. */
26648 if (IS_NAKED (func_type
))
26651 if (IS_INTERRUPT (func_type
))
26653 error ("interrupt Service Routines cannot be coded in Thumb mode");
26657 if (is_called_in_ARM_mode (current_function_decl
))
26658 emit_insn (gen_prologue_thumb1_interwork ());
26660 offsets
= arm_get_frame_offsets ();
26661 live_regs_mask
= offsets
->saved_regs_mask
;
26663 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26664 l_mask
= live_regs_mask
& 0x40ff;
26665 /* Then count how many other high registers will need to be pushed. */
26666 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26668 if (crtl
->args
.pretend_args_size
)
26670 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26672 if (cfun
->machine
->uses_anonymous_args
)
26674 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26675 unsigned long mask
;
26677 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26678 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26680 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26684 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26685 stack_pointer_rtx
, x
));
26687 RTX_FRAME_RELATED_P (insn
) = 1;
26690 if (TARGET_BACKTRACE
)
26692 HOST_WIDE_INT offset
= 0;
26693 unsigned work_register
;
26694 rtx work_reg
, x
, arm_hfp_rtx
;
26696 /* We have been asked to create a stack backtrace structure.
26697 The code looks like this:
26701 0 sub SP, #16 Reserve space for 4 registers.
26702 2 push {R7} Push low registers.
26703 4 add R7, SP, #20 Get the stack pointer before the push.
26704 6 str R7, [SP, #8] Store the stack pointer
26705 (before reserving the space).
26706 8 mov R7, PC Get hold of the start of this code + 12.
26707 10 str R7, [SP, #16] Store it.
26708 12 mov R7, FP Get hold of the current frame pointer.
26709 14 str R7, [SP, #4] Store it.
26710 16 mov R7, LR Get hold of the current return address.
26711 18 str R7, [SP, #12] Store it.
26712 20 add R7, SP, #16 Point at the start of the
26713 backtrace structure.
26714 22 mov FP, R7 Put this value into the frame pointer. */
26716 work_register
= thumb_find_work_register (live_regs_mask
);
26717 work_reg
= gen_rtx_REG (SImode
, work_register
);
26718 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26720 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26721 stack_pointer_rtx
, GEN_INT (-16)));
26722 RTX_FRAME_RELATED_P (insn
) = 1;
26726 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26727 RTX_FRAME_RELATED_P (insn
) = 1;
26729 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26732 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26733 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26735 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26736 x
= gen_frame_mem (SImode
, x
);
26737 emit_move_insn (x
, work_reg
);
26739 /* Make sure that the instruction fetching the PC is in the right place
26740 to calculate "start of backtrace creation code + 12". */
26741 /* ??? The stores using the common WORK_REG ought to be enough to
26742 prevent the scheduler from doing anything weird. Failing that
26743 we could always move all of the following into an UNSPEC_VOLATILE. */
26746 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26747 emit_move_insn (work_reg
, x
);
26749 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26750 x
= gen_frame_mem (SImode
, x
);
26751 emit_move_insn (x
, work_reg
);
26753 emit_move_insn (work_reg
, arm_hfp_rtx
);
26755 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26756 x
= gen_frame_mem (SImode
, x
);
26757 emit_move_insn (x
, work_reg
);
26761 emit_move_insn (work_reg
, arm_hfp_rtx
);
26763 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26764 x
= gen_frame_mem (SImode
, x
);
26765 emit_move_insn (x
, work_reg
);
26767 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26768 emit_move_insn (work_reg
, x
);
26770 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26771 x
= gen_frame_mem (SImode
, x
);
26772 emit_move_insn (x
, work_reg
);
26775 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26776 emit_move_insn (work_reg
, x
);
26778 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26779 x
= gen_frame_mem (SImode
, x
);
26780 emit_move_insn (x
, work_reg
);
26782 x
= GEN_INT (offset
+ 12);
26783 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26785 emit_move_insn (arm_hfp_rtx
, work_reg
);
26787 /* Optimization: If we are not pushing any low registers but we are going
26788 to push some high registers then delay our first push. This will just
26789 be a push of LR and we can combine it with the push of the first high
26791 else if ((l_mask
& 0xff) != 0
26792 || (high_regs_pushed
== 0 && l_mask
))
26794 unsigned long mask
= l_mask
;
26795 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26796 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26797 RTX_FRAME_RELATED_P (insn
) = 1;
26800 if (high_regs_pushed
)
26802 unsigned pushable_regs
;
26803 unsigned next_hi_reg
;
26804 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26805 : crtl
->args
.info
.nregs
;
26806 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26808 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26809 if (live_regs_mask
& (1 << next_hi_reg
))
26812 /* Here we need to mask out registers used for passing arguments
26813 even if they can be pushed. This is to avoid using them to stash the high
26814 registers. Such kind of stash may clobber the use of arguments. */
26815 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
26817 if (pushable_regs
== 0)
26818 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26820 while (high_regs_pushed
> 0)
26822 unsigned long real_regs_mask
= 0;
26824 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26826 if (pushable_regs
& (1 << regno
))
26828 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26829 gen_rtx_REG (SImode
, next_hi_reg
));
26831 high_regs_pushed
--;
26832 real_regs_mask
|= (1 << next_hi_reg
);
26834 if (high_regs_pushed
)
26836 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26838 if (live_regs_mask
& (1 << next_hi_reg
))
26843 pushable_regs
&= ~((1 << regno
) - 1);
26849 /* If we had to find a work register and we have not yet
26850 saved the LR then add it to the list of regs to push. */
26851 if (l_mask
== (1 << LR_REGNUM
))
26853 pushable_regs
|= l_mask
;
26854 real_regs_mask
|= l_mask
;
26858 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26859 RTX_FRAME_RELATED_P (insn
) = 1;
26863 /* Load the pic register before setting the frame pointer,
26864 so we can use r7 as a temporary work register. */
26865 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26866 arm_load_pic_register (live_regs_mask
);
26868 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26869 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26870 stack_pointer_rtx
);
26872 if (flag_stack_usage_info
)
26873 current_function_static_stack_size
26874 = offsets
->outgoing_args
- offsets
->saved_args
;
26876 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26877 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26882 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26883 GEN_INT (- amount
)));
26884 RTX_FRAME_RELATED_P (insn
) = 1;
26890 /* The stack decrement is too big for an immediate value in a single
26891 insn. In theory we could issue multiple subtracts, but after
26892 three of them it becomes more space efficient to place the full
26893 value in the constant pool and load into a register. (Also the
26894 ARM debugger really likes to see only one stack decrement per
26895 function). So instead we look for a scratch register into which
26896 we can load the decrement, and then we subtract this from the
26897 stack pointer. Unfortunately on the thumb the only available
26898 scratch registers are the argument registers, and we cannot use
26899 these as they may hold arguments to the function. Instead we
26900 attempt to locate a call preserved register which is used by this
26901 function. If we can find one, then we know that it will have
26902 been pushed at the start of the prologue and so we can corrupt
26904 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26905 if (live_regs_mask
& (1 << regno
))
26908 gcc_assert(regno
<= LAST_LO_REGNUM
);
26910 reg
= gen_rtx_REG (SImode
, regno
);
26912 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26914 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26915 stack_pointer_rtx
, reg
));
26917 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26918 plus_constant (Pmode
, stack_pointer_rtx
,
26920 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26921 RTX_FRAME_RELATED_P (insn
) = 1;
26925 if (frame_pointer_needed
)
26926 thumb_set_frame_pointer (offsets
);
26928 /* If we are profiling, make sure no instructions are scheduled before
26929 the call to mcount. Similarly if the user has requested no
26930 scheduling in the prolog. Similarly if we want non-call exceptions
26931 using the EABI unwinder, to prevent faulting instructions from being
26932 swapped with a stack adjustment. */
26933 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26934 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26935 && cfun
->can_throw_non_call_exceptions
))
26936 emit_insn (gen_blockage ());
26938 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26939 if (live_regs_mask
& 0xff)
26940 cfun
->machine
->lr_save_eliminated
= 0;
26943 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26944 POP instruction can be generated. LR should be replaced by PC. All
26945 the checks required are already done by USE_RETURN_INSN (). Hence,
26946 all we really need to check here is if single register is to be
26947 returned, or multiple register return. */
26949 thumb2_expand_return (bool simple_return
)
26952 unsigned long saved_regs_mask
;
26953 arm_stack_offsets
*offsets
;
26955 offsets
= arm_get_frame_offsets ();
26956 saved_regs_mask
= offsets
->saved_regs_mask
;
26958 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26959 if (saved_regs_mask
& (1 << i
))
26962 if (!simple_return
&& saved_regs_mask
)
26966 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26967 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
26968 rtx addr
= gen_rtx_MEM (SImode
,
26969 gen_rtx_POST_INC (SImode
,
26970 stack_pointer_rtx
));
26971 set_mem_alias_set (addr
, get_frame_alias_set ());
26972 XVECEXP (par
, 0, 0) = ret_rtx
;
26973 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
26974 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26975 emit_jump_insn (par
);
26979 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26980 saved_regs_mask
|= (1 << PC_REGNUM
);
26981 arm_emit_multi_reg_pop (saved_regs_mask
);
26986 emit_jump_insn (simple_return_rtx
);
26991 thumb1_expand_epilogue (void)
26993 HOST_WIDE_INT amount
;
26994 arm_stack_offsets
*offsets
;
26997 /* Naked functions don't have prologues. */
26998 if (IS_NAKED (arm_current_func_type ()))
27001 offsets
= arm_get_frame_offsets ();
27002 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27004 if (frame_pointer_needed
)
27006 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27007 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27009 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27011 gcc_assert (amount
>= 0);
27014 emit_insn (gen_blockage ());
27017 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27018 GEN_INT (amount
)));
27021 /* r3 is always free in the epilogue. */
27022 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27024 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27025 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27029 /* Emit a USE (stack_pointer_rtx), so that
27030 the stack adjustment will not be deleted. */
27031 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27033 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27034 emit_insn (gen_blockage ());
27036 /* Emit a clobber for each insn that will be restored in the epilogue,
27037 so that flow2 will get register lifetimes correct. */
27038 for (regno
= 0; regno
< 13; regno
++)
27039 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27040 emit_clobber (gen_rtx_REG (SImode
, regno
));
27042 if (! df_regs_ever_live_p (LR_REGNUM
))
27043 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27046 /* Epilogue code for APCS frame. */
27048 arm_expand_epilogue_apcs_frame (bool really_return
)
27050 unsigned long func_type
;
27051 unsigned long saved_regs_mask
;
27054 int floats_from_frame
= 0;
27055 arm_stack_offsets
*offsets
;
27057 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27058 func_type
= arm_current_func_type ();
27060 /* Get frame offsets for ARM. */
27061 offsets
= arm_get_frame_offsets ();
27062 saved_regs_mask
= offsets
->saved_regs_mask
;
27064 /* Find the offset of the floating-point save area in the frame. */
27066 = (offsets
->saved_args
27067 + arm_compute_static_chain_stack_bytes ()
27070 /* Compute how many core registers saved and how far away the floats are. */
27071 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27072 if (saved_regs_mask
& (1 << i
))
27075 floats_from_frame
+= 4;
27078 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27082 /* The offset is from IP_REGNUM. */
27083 int saved_size
= arm_get_vfp_saved_size ();
27084 if (saved_size
> 0)
27086 floats_from_frame
+= saved_size
;
27087 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
27088 hard_frame_pointer_rtx
,
27089 GEN_INT (-floats_from_frame
)));
27092 /* Generate VFP register multi-pop. */
27093 start_reg
= FIRST_VFP_REGNUM
;
27095 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27096 /* Look for a case where a reg does not need restoring. */
27097 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27098 && (!df_regs_ever_live_p (i
+ 1)
27099 || call_used_regs
[i
+ 1]))
27101 if (start_reg
!= i
)
27102 arm_emit_vfp_multi_reg_pop (start_reg
,
27103 (i
- start_reg
) / 2,
27104 gen_rtx_REG (SImode
,
27109 /* Restore the remaining regs that we have discovered (or possibly
27110 even all of them, if the conditional in the for loop never
27112 if (start_reg
!= i
)
27113 arm_emit_vfp_multi_reg_pop (start_reg
,
27114 (i
- start_reg
) / 2,
27115 gen_rtx_REG (SImode
, IP_REGNUM
));
27120 /* The frame pointer is guaranteed to be non-double-word aligned, as
27121 it is set to double-word-aligned old_stack_pointer - 4. */
27123 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27125 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27126 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27128 rtx addr
= gen_frame_mem (V2SImode
,
27129 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27131 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27132 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27133 gen_rtx_REG (V2SImode
, i
),
27139 /* saved_regs_mask should contain IP which contains old stack pointer
27140 at the time of activation creation. Since SP and IP are adjacent registers,
27141 we can restore the value directly into SP. */
27142 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27143 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27144 saved_regs_mask
|= (1 << SP_REGNUM
);
27146 /* There are two registers left in saved_regs_mask - LR and PC. We
27147 only need to restore LR (the return address), but to
27148 save time we can load it directly into PC, unless we need a
27149 special function exit sequence, or we are not really returning. */
27151 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27152 && !crtl
->calls_eh_return
)
27153 /* Delete LR from the register mask, so that LR on
27154 the stack is loaded into the PC in the register mask. */
27155 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27157 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27159 num_regs
= bit_count (saved_regs_mask
);
27160 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27162 emit_insn (gen_blockage ());
27163 /* Unwind the stack to just below the saved registers. */
27164 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27165 hard_frame_pointer_rtx
,
27166 GEN_INT (- 4 * num_regs
)));
27169 arm_emit_multi_reg_pop (saved_regs_mask
);
27171 if (IS_INTERRUPT (func_type
))
27173 /* Interrupt handlers will have pushed the
27174 IP onto the stack, so restore it now. */
27176 rtx addr
= gen_rtx_MEM (SImode
,
27177 gen_rtx_POST_INC (SImode
,
27178 stack_pointer_rtx
));
27179 set_mem_alias_set (addr
, get_frame_alias_set ());
27180 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27181 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27182 gen_rtx_REG (SImode
, IP_REGNUM
),
27186 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27189 if (crtl
->calls_eh_return
)
27190 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27192 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27194 if (IS_STACKALIGN (func_type
))
27195 /* Restore the original stack pointer. Before prologue, the stack was
27196 realigned and the original stack pointer saved in r0. For details,
27197 see comment in arm_expand_prologue. */
27198 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27200 emit_jump_insn (simple_return_rtx
);
27203 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27204 function is not a sibcall. */
27206 arm_expand_epilogue (bool really_return
)
27208 unsigned long func_type
;
27209 unsigned long saved_regs_mask
;
27213 arm_stack_offsets
*offsets
;
27215 func_type
= arm_current_func_type ();
27217 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27218 let output_return_instruction take care of instruction emission if any. */
27219 if (IS_NAKED (func_type
)
27220 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27223 emit_jump_insn (simple_return_rtx
);
27227 /* If we are throwing an exception, then we really must be doing a
27228 return, so we can't tail-call. */
27229 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27231 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27233 arm_expand_epilogue_apcs_frame (really_return
);
27237 /* Get frame offsets for ARM. */
27238 offsets
= arm_get_frame_offsets ();
27239 saved_regs_mask
= offsets
->saved_regs_mask
;
27240 num_regs
= bit_count (saved_regs_mask
);
27242 if (frame_pointer_needed
)
27245 /* Restore stack pointer if necessary. */
27248 /* In ARM mode, frame pointer points to first saved register.
27249 Restore stack pointer to last saved register. */
27250 amount
= offsets
->frame
- offsets
->saved_regs
;
27252 /* Force out any pending memory operations that reference stacked data
27253 before stack de-allocation occurs. */
27254 emit_insn (gen_blockage ());
27255 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27256 hard_frame_pointer_rtx
,
27257 GEN_INT (amount
)));
27258 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27260 hard_frame_pointer_rtx
);
27262 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27264 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27268 /* In Thumb-2 mode, the frame pointer points to the last saved
27270 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27273 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27274 hard_frame_pointer_rtx
,
27275 GEN_INT (amount
)));
27276 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27277 hard_frame_pointer_rtx
,
27278 hard_frame_pointer_rtx
);
27281 /* Force out any pending memory operations that reference stacked data
27282 before stack de-allocation occurs. */
27283 emit_insn (gen_blockage ());
27284 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27285 hard_frame_pointer_rtx
));
27286 arm_add_cfa_adjust_cfa_note (insn
, 0,
27288 hard_frame_pointer_rtx
);
27289 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27291 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27296 /* Pop off outgoing args and local frame to adjust stack pointer to
27297 last saved register. */
27298 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27302 /* Force out any pending memory operations that reference stacked data
27303 before stack de-allocation occurs. */
27304 emit_insn (gen_blockage ());
27305 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27307 GEN_INT (amount
)));
27308 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27309 stack_pointer_rtx
, stack_pointer_rtx
);
27310 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27312 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27316 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27318 /* Generate VFP register multi-pop. */
27319 int end_reg
= LAST_VFP_REGNUM
+ 1;
27321 /* Scan the registers in reverse order. We need to match
27322 any groupings made in the prologue and generate matching
27323 vldm operations. The need to match groups is because,
27324 unlike pop, vldm can only do consecutive regs. */
27325 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27326 /* Look for a case where a reg does not need restoring. */
27327 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27328 && (!df_regs_ever_live_p (i
+ 1)
27329 || call_used_regs
[i
+ 1]))
27331 /* Restore the regs discovered so far (from reg+2 to
27333 if (end_reg
> i
+ 2)
27334 arm_emit_vfp_multi_reg_pop (i
+ 2,
27335 (end_reg
- (i
+ 2)) / 2,
27336 stack_pointer_rtx
);
27340 /* Restore the remaining regs that we have discovered (or possibly
27341 even all of them, if the conditional in the for loop never
27343 if (end_reg
> i
+ 2)
27344 arm_emit_vfp_multi_reg_pop (i
+ 2,
27345 (end_reg
- (i
+ 2)) / 2,
27346 stack_pointer_rtx
);
27350 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27351 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27354 rtx addr
= gen_rtx_MEM (V2SImode
,
27355 gen_rtx_POST_INC (SImode
,
27356 stack_pointer_rtx
));
27357 set_mem_alias_set (addr
, get_frame_alias_set ());
27358 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27359 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27360 gen_rtx_REG (V2SImode
, i
),
27362 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27363 stack_pointer_rtx
, stack_pointer_rtx
);
27366 if (saved_regs_mask
)
27369 bool return_in_pc
= false;
27371 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27372 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27373 && !IS_STACKALIGN (func_type
)
27375 && crtl
->args
.pretend_args_size
== 0
27376 && saved_regs_mask
& (1 << LR_REGNUM
)
27377 && !crtl
->calls_eh_return
)
27379 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27380 saved_regs_mask
|= (1 << PC_REGNUM
);
27381 return_in_pc
= true;
27384 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27386 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27387 if (saved_regs_mask
& (1 << i
))
27389 rtx addr
= gen_rtx_MEM (SImode
,
27390 gen_rtx_POST_INC (SImode
,
27391 stack_pointer_rtx
));
27392 set_mem_alias_set (addr
, get_frame_alias_set ());
27394 if (i
== PC_REGNUM
)
27396 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27397 XVECEXP (insn
, 0, 0) = ret_rtx
;
27398 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27399 gen_rtx_REG (SImode
, i
),
27401 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27402 insn
= emit_jump_insn (insn
);
27406 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27408 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27409 gen_rtx_REG (SImode
, i
),
27411 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27413 stack_pointer_rtx
);
27420 && current_tune
->prefer_ldrd_strd
27421 && !optimize_function_for_size_p (cfun
))
27424 thumb2_emit_ldrd_pop (saved_regs_mask
);
27425 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27426 arm_emit_ldrd_pop (saved_regs_mask
);
27428 arm_emit_multi_reg_pop (saved_regs_mask
);
27431 arm_emit_multi_reg_pop (saved_regs_mask
);
27434 if (return_in_pc
== true)
27438 if (crtl
->args
.pretend_args_size
)
27441 rtx dwarf
= NULL_RTX
;
27442 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27444 GEN_INT (crtl
->args
.pretend_args_size
)));
27446 RTX_FRAME_RELATED_P (tmp
) = 1;
27448 if (cfun
->machine
->uses_anonymous_args
)
27450 /* Restore pretend args. Refer arm_expand_prologue on how to save
27451 pretend_args in stack. */
27452 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27453 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27454 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27455 if (saved_regs_mask
& (1 << i
))
27457 rtx reg
= gen_rtx_REG (SImode
, i
);
27458 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27461 REG_NOTES (tmp
) = dwarf
;
27463 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27464 stack_pointer_rtx
, stack_pointer_rtx
);
27467 if (!really_return
)
27470 if (crtl
->calls_eh_return
)
27471 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27473 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27475 if (IS_STACKALIGN (func_type
))
27476 /* Restore the original stack pointer. Before prologue, the stack was
27477 realigned and the original stack pointer saved in r0. For details,
27478 see comment in arm_expand_prologue. */
27479 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27481 emit_jump_insn (simple_return_rtx
);
27484 /* Implementation of insn prologue_thumb1_interwork. This is the first
27485 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27488 thumb1_output_interwork (void)
27491 FILE *f
= asm_out_file
;
27493 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27494 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27496 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27498 /* Generate code sequence to switch us into Thumb mode. */
27499 /* The .code 32 directive has already been emitted by
27500 ASM_DECLARE_FUNCTION_NAME. */
27501 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27502 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27504 /* Generate a label, so that the debugger will notice the
27505 change in instruction sets. This label is also used by
27506 the assembler to bypass the ARM code when this function
27507 is called from a Thumb encoded function elsewhere in the
27508 same file. Hence the definition of STUB_NAME here must
27509 agree with the definition in gas/config/tc-arm.c. */
27511 #define STUB_NAME ".real_start_of"
27513 fprintf (f
, "\t.code\t16\n");
27515 if (arm_dllexport_name_p (name
))
27516 name
= arm_strip_name_encoding (name
);
27518 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27519 fprintf (f
, "\t.thumb_func\n");
27520 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27525 /* Handle the case of a double word load into a low register from
27526 a computed memory address. The computed address may involve a
27527 register which is overwritten by the load. */
27529 thumb_load_double_from_address (rtx
*operands
)
27537 gcc_assert (REG_P (operands
[0]));
27538 gcc_assert (MEM_P (operands
[1]));
27540 /* Get the memory address. */
27541 addr
= XEXP (operands
[1], 0);
27543 /* Work out how the memory address is computed. */
27544 switch (GET_CODE (addr
))
27547 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27549 if (REGNO (operands
[0]) == REGNO (addr
))
27551 output_asm_insn ("ldr\t%H0, %2", operands
);
27552 output_asm_insn ("ldr\t%0, %1", operands
);
27556 output_asm_insn ("ldr\t%0, %1", operands
);
27557 output_asm_insn ("ldr\t%H0, %2", operands
);
27562 /* Compute <address> + 4 for the high order load. */
27563 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27565 output_asm_insn ("ldr\t%0, %1", operands
);
27566 output_asm_insn ("ldr\t%H0, %2", operands
);
27570 arg1
= XEXP (addr
, 0);
27571 arg2
= XEXP (addr
, 1);
27573 if (CONSTANT_P (arg1
))
27574 base
= arg2
, offset
= arg1
;
27576 base
= arg1
, offset
= arg2
;
27578 gcc_assert (REG_P (base
));
27580 /* Catch the case of <address> = <reg> + <reg> */
27581 if (REG_P (offset
))
27583 int reg_offset
= REGNO (offset
);
27584 int reg_base
= REGNO (base
);
27585 int reg_dest
= REGNO (operands
[0]);
27587 /* Add the base and offset registers together into the
27588 higher destination register. */
27589 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27590 reg_dest
+ 1, reg_base
, reg_offset
);
27592 /* Load the lower destination register from the address in
27593 the higher destination register. */
27594 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27595 reg_dest
, reg_dest
+ 1);
27597 /* Load the higher destination register from its own address
27599 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27600 reg_dest
+ 1, reg_dest
+ 1);
27604 /* Compute <address> + 4 for the high order load. */
27605 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27607 /* If the computed address is held in the low order register
27608 then load the high order register first, otherwise always
27609 load the low order register first. */
27610 if (REGNO (operands
[0]) == REGNO (base
))
27612 output_asm_insn ("ldr\t%H0, %2", operands
);
27613 output_asm_insn ("ldr\t%0, %1", operands
);
27617 output_asm_insn ("ldr\t%0, %1", operands
);
27618 output_asm_insn ("ldr\t%H0, %2", operands
);
27624 /* With no registers to worry about we can just load the value
27626 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27628 output_asm_insn ("ldr\t%H0, %2", operands
);
27629 output_asm_insn ("ldr\t%0, %1", operands
);
27633 gcc_unreachable ();
27640 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27647 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27650 operands
[4] = operands
[5];
27653 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27654 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27658 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27661 operands
[4] = operands
[5];
27664 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27667 operands
[5] = operands
[6];
27670 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27673 operands
[4] = operands
[5];
27677 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27678 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27682 gcc_unreachable ();
27688 /* Output a call-via instruction for thumb state. */
27690 thumb_call_via_reg (rtx reg
)
27692 int regno
= REGNO (reg
);
27695 gcc_assert (regno
< LR_REGNUM
);
27697 /* If we are in the normal text section we can use a single instance
27698 per compilation unit. If we are doing function sections, then we need
27699 an entry per section, since we can't rely on reachability. */
27700 if (in_section
== text_section
)
27702 thumb_call_reg_needed
= 1;
27704 if (thumb_call_via_label
[regno
] == NULL
)
27705 thumb_call_via_label
[regno
] = gen_label_rtx ();
27706 labelp
= thumb_call_via_label
+ regno
;
27710 if (cfun
->machine
->call_via
[regno
] == NULL
)
27711 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27712 labelp
= cfun
->machine
->call_via
+ regno
;
27715 output_asm_insn ("bl\t%a0", labelp
);
27719 /* Routines for generating rtl. */
27721 thumb_expand_movmemqi (rtx
*operands
)
27723 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27724 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27725 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27726 HOST_WIDE_INT offset
= 0;
27730 emit_insn (gen_movmem12b (out
, in
, out
, in
));
27736 emit_insn (gen_movmem8b (out
, in
, out
, in
));
27742 rtx reg
= gen_reg_rtx (SImode
);
27743 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27744 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27751 rtx reg
= gen_reg_rtx (HImode
);
27752 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27753 plus_constant (Pmode
, in
,
27755 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27764 rtx reg
= gen_reg_rtx (QImode
);
27765 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27766 plus_constant (Pmode
, in
,
27768 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27775 thumb_reload_out_hi (rtx
*operands
)
27777 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27780 /* Handle reading a half-word from memory during reload. */
27782 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
27784 gcc_unreachable ();
27787 /* Return the length of a function name prefix
27788 that starts with the character 'c'. */
27790 arm_get_strip_length (int c
)
27794 ARM_NAME_ENCODING_LENGTHS
27799 /* Return a pointer to a function's name with any
27800 and all prefix encodings stripped from it. */
27802 arm_strip_name_encoding (const char *name
)
27806 while ((skip
= arm_get_strip_length (* name
)))
27812 /* If there is a '*' anywhere in the name's prefix, then
27813 emit the stripped name verbatim, otherwise prepend an
27814 underscore if leading underscores are being used. */
27816 arm_asm_output_labelref (FILE *stream
, const char *name
)
27821 while ((skip
= arm_get_strip_length (* name
)))
27823 verbatim
|= (*name
== '*');
27828 fputs (name
, stream
);
27830 asm_fprintf (stream
, "%U%s", name
);
27833 /* This function is used to emit an EABI tag and its associated value.
27834 We emit the numerical value of the tag in case the assembler does not
27835 support textual tags. (Eg gas prior to 2.20). If requested we include
27836 the tag name in a comment so that anyone reading the assembler output
27837 will know which tag is being set.
27839 This function is not static because arm-c.c needs it too. */
27842 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27844 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27845 if (flag_verbose_asm
|| flag_debug_asm
)
27846 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27847 asm_fprintf (asm_out_file
, "\n");
27851 arm_file_start (void)
27855 if (TARGET_UNIFIED_ASM
)
27856 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27860 const char *fpu_name
;
27861 if (arm_selected_arch
)
27863 const char* pos
= strchr (arm_selected_arch
->name
, '+');
27867 gcc_assert (strlen (arm_selected_arch
->name
)
27868 <= sizeof (buf
) / sizeof (*pos
));
27869 strncpy (buf
, arm_selected_arch
->name
,
27870 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
27871 buf
[pos
- arm_selected_arch
->name
] = '\0';
27872 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
27873 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
27876 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27878 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27879 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27882 const char* truncated_name
27883 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
27884 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27887 if (TARGET_SOFT_FLOAT
)
27889 fpu_name
= "softvfp";
27893 fpu_name
= arm_fpu_desc
->name
;
27894 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27896 if (TARGET_HARD_FLOAT
)
27897 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27898 if (TARGET_HARD_FLOAT_ABI
)
27899 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27902 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27904 /* Some of these attributes only apply when the corresponding features
27905 are used. However we don't have any easy way of figuring this out.
27906 Conservatively record the setting that would have been used. */
27908 if (flag_rounding_math
)
27909 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27911 if (!flag_unsafe_math_optimizations
)
27913 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27914 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27916 if (flag_signaling_nans
)
27917 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27919 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27920 flag_finite_math_only
? 1 : 3);
27922 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27923 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27924 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27925 flag_short_enums
? 1 : 2);
27927 /* Tag_ABI_optimization_goals. */
27930 else if (optimize
>= 2)
27936 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27938 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27941 if (arm_fp16_format
)
27942 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27943 (int) arm_fp16_format
);
27945 if (arm_lang_output_object_attributes_hook
)
27946 arm_lang_output_object_attributes_hook();
27949 default_file_start ();
27953 arm_file_end (void)
27957 if (NEED_INDICATE_EXEC_STACK
)
27958 /* Add .note.GNU-stack. */
27959 file_end_indicate_exec_stack ();
27961 if (! thumb_call_reg_needed
)
27964 switch_to_section (text_section
);
27965 asm_fprintf (asm_out_file
, "\t.code 16\n");
27966 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27968 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27970 rtx label
= thumb_call_via_label
[regno
];
27974 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27975 CODE_LABEL_NUMBER (label
));
27976 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27982 /* Symbols in the text segment can be accessed without indirecting via the
27983 constant pool; it may take an extra binary operation, but this is still
27984 faster than indirecting via memory. Don't do this when not optimizing,
27985 since we won't be calculating al of the offsets necessary to do this
27989 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27991 if (optimize
> 0 && TREE_CONSTANT (decl
))
27992 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27994 default_encode_section_info (decl
, rtl
, first
);
27996 #endif /* !ARM_PE */
27999 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28001 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28002 && !strcmp (prefix
, "L"))
28004 arm_ccfsm_state
= 0;
28005 arm_target_insn
= NULL
;
28007 default_internal_label (stream
, prefix
, labelno
);
28010 /* Output code to add DELTA to the first argument, and then jump
28011 to FUNCTION. Used for C++ multiple inheritance. */
28013 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28014 HOST_WIDE_INT delta
,
28015 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28018 static int thunk_label
= 0;
28021 int mi_delta
= delta
;
28022 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28024 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28027 mi_delta
= - mi_delta
;
28029 final_start_function (emit_barrier (), file
, 1);
28033 int labelno
= thunk_label
++;
28034 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28035 /* Thunks are entered in arm mode when avaiable. */
28036 if (TARGET_THUMB1_ONLY
)
28038 /* push r3 so we can use it as a temporary. */
28039 /* TODO: Omit this save if r3 is not used. */
28040 fputs ("\tpush {r3}\n", file
);
28041 fputs ("\tldr\tr3, ", file
);
28045 fputs ("\tldr\tr12, ", file
);
28047 assemble_name (file
, label
);
28048 fputc ('\n', file
);
28051 /* If we are generating PIC, the ldr instruction below loads
28052 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28053 the address of the add + 8, so we have:
28055 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28058 Note that we have "+ 1" because some versions of GNU ld
28059 don't set the low bit of the result for R_ARM_REL32
28060 relocations against thumb function symbols.
28061 On ARMv6M this is +4, not +8. */
28062 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28063 assemble_name (file
, labelpc
);
28064 fputs (":\n", file
);
28065 if (TARGET_THUMB1_ONLY
)
28067 /* This is 2 insns after the start of the thunk, so we know it
28068 is 4-byte aligned. */
28069 fputs ("\tadd\tr3, pc, r3\n", file
);
28070 fputs ("\tmov r12, r3\n", file
);
28073 fputs ("\tadd\tr12, pc, r12\n", file
);
28075 else if (TARGET_THUMB1_ONLY
)
28076 fputs ("\tmov r12, r3\n", file
);
28078 if (TARGET_THUMB1_ONLY
)
28080 if (mi_delta
> 255)
28082 fputs ("\tldr\tr3, ", file
);
28083 assemble_name (file
, label
);
28084 fputs ("+4\n", file
);
28085 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28086 mi_op
, this_regno
, this_regno
);
28088 else if (mi_delta
!= 0)
28090 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28091 mi_op
, this_regno
, this_regno
,
28097 /* TODO: Use movw/movt for large constants when available. */
28098 while (mi_delta
!= 0)
28100 if ((mi_delta
& (3 << shift
)) == 0)
28104 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28105 mi_op
, this_regno
, this_regno
,
28106 mi_delta
& (0xff << shift
));
28107 mi_delta
&= ~(0xff << shift
);
28114 if (TARGET_THUMB1_ONLY
)
28115 fputs ("\tpop\t{r3}\n", file
);
28117 fprintf (file
, "\tbx\tr12\n");
28118 ASM_OUTPUT_ALIGN (file
, 2);
28119 assemble_name (file
, label
);
28120 fputs (":\n", file
);
28123 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28124 rtx tem
= XEXP (DECL_RTL (function
), 0);
28125 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28126 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28128 gen_rtx_SYMBOL_REF (Pmode
,
28129 ggc_strdup (labelpc
)));
28130 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28133 /* Output ".word .LTHUNKn". */
28134 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28136 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28137 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28141 fputs ("\tb\t", file
);
28142 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28143 if (NEED_PLT_RELOC
)
28144 fputs ("(PLT)", file
);
28145 fputc ('\n', file
);
28148 final_end_function ();
28152 arm_emit_vector_const (FILE *file
, rtx x
)
28155 const char * pattern
;
28157 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28159 switch (GET_MODE (x
))
28161 case V2SImode
: pattern
= "%08x"; break;
28162 case V4HImode
: pattern
= "%04x"; break;
28163 case V8QImode
: pattern
= "%02x"; break;
28164 default: gcc_unreachable ();
28167 fprintf (file
, "0x");
28168 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28172 element
= CONST_VECTOR_ELT (x
, i
);
28173 fprintf (file
, pattern
, INTVAL (element
));
28179 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28180 HFmode constant pool entries are actually loaded with ldr. */
28182 arm_emit_fp16_const (rtx c
)
28187 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28188 bits
= real_to_target (NULL
, &r
, HFmode
);
28189 if (WORDS_BIG_ENDIAN
)
28190 assemble_zeros (2);
28191 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28192 if (!WORDS_BIG_ENDIAN
)
28193 assemble_zeros (2);
28197 arm_output_load_gr (rtx
*operands
)
28204 if (!MEM_P (operands
[1])
28205 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28206 || !REG_P (reg
= XEXP (sum
, 0))
28207 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28208 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28209 return "wldrw%?\t%0, %1";
28211 /* Fix up an out-of-range load of a GR register. */
28212 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28213 wcgr
= operands
[0];
28215 output_asm_insn ("ldr%?\t%0, %1", operands
);
28217 operands
[0] = wcgr
;
28219 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28220 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28225 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28227 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28228 named arg and all anonymous args onto the stack.
28229 XXX I know the prologue shouldn't be pushing registers, but it is faster
28233 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28234 enum machine_mode mode
,
28237 int second_time ATTRIBUTE_UNUSED
)
28239 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28242 cfun
->machine
->uses_anonymous_args
= 1;
28243 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28245 nregs
= pcum
->aapcs_ncrn
;
28246 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28250 nregs
= pcum
->nregs
;
28252 if (nregs
< NUM_ARG_REGS
)
28253 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28256 /* We can't rely on the caller doing the proper promotion when
28257 using APCS or ATPCS. */
28260 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28262 return !TARGET_AAPCS_BASED
;
28265 static enum machine_mode
28266 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28267 enum machine_mode mode
,
28268 int *punsignedp ATTRIBUTE_UNUSED
,
28269 const_tree fntype ATTRIBUTE_UNUSED
,
28270 int for_return ATTRIBUTE_UNUSED
)
28272 if (GET_MODE_CLASS (mode
) == MODE_INT
28273 && GET_MODE_SIZE (mode
) < 4)
28279 /* AAPCS based ABIs use short enums by default. */
28282 arm_default_short_enums (void)
28284 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28288 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28291 arm_align_anon_bitfield (void)
28293 return TARGET_AAPCS_BASED
;
28297 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28300 arm_cxx_guard_type (void)
28302 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28306 /* The EABI says test the least significant bit of a guard variable. */
28309 arm_cxx_guard_mask_bit (void)
28311 return TARGET_AAPCS_BASED
;
28315 /* The EABI specifies that all array cookies are 8 bytes long. */
28318 arm_get_cookie_size (tree type
)
28322 if (!TARGET_AAPCS_BASED
)
28323 return default_cxx_get_cookie_size (type
);
28325 size
= build_int_cst (sizetype
, 8);
28330 /* The EABI says that array cookies should also contain the element size. */
28333 arm_cookie_has_size (void)
28335 return TARGET_AAPCS_BASED
;
28339 /* The EABI says constructors and destructors should return a pointer to
28340 the object constructed/destroyed. */
28343 arm_cxx_cdtor_returns_this (void)
28345 return TARGET_AAPCS_BASED
;
28348 /* The EABI says that an inline function may never be the key
28352 arm_cxx_key_method_may_be_inline (void)
28354 return !TARGET_AAPCS_BASED
;
28358 arm_cxx_determine_class_data_visibility (tree decl
)
28360 if (!TARGET_AAPCS_BASED
28361 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28364 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28365 is exported. However, on systems without dynamic vague linkage,
28366 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28367 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28368 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28370 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28371 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28375 arm_cxx_class_data_always_comdat (void)
28377 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28378 vague linkage if the class has no key function. */
28379 return !TARGET_AAPCS_BASED
;
28383 /* The EABI says __aeabi_atexit should be used to register static
28387 arm_cxx_use_aeabi_atexit (void)
28389 return TARGET_AAPCS_BASED
;
28394 arm_set_return_address (rtx source
, rtx scratch
)
28396 arm_stack_offsets
*offsets
;
28397 HOST_WIDE_INT delta
;
28399 unsigned long saved_regs
;
28401 offsets
= arm_get_frame_offsets ();
28402 saved_regs
= offsets
->saved_regs_mask
;
28404 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28405 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28408 if (frame_pointer_needed
)
28409 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28412 /* LR will be the first saved register. */
28413 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28418 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28419 GEN_INT (delta
& ~4095)));
28424 addr
= stack_pointer_rtx
;
28426 addr
= plus_constant (Pmode
, addr
, delta
);
28428 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28434 thumb_set_return_address (rtx source
, rtx scratch
)
28436 arm_stack_offsets
*offsets
;
28437 HOST_WIDE_INT delta
;
28438 HOST_WIDE_INT limit
;
28441 unsigned long mask
;
28445 offsets
= arm_get_frame_offsets ();
28446 mask
= offsets
->saved_regs_mask
;
28447 if (mask
& (1 << LR_REGNUM
))
28450 /* Find the saved regs. */
28451 if (frame_pointer_needed
)
28453 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28454 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28460 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28463 /* Allow for the stack frame. */
28464 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28466 /* The link register is always the first saved register. */
28469 /* Construct the address. */
28470 addr
= gen_rtx_REG (SImode
, reg
);
28473 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28474 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28478 addr
= plus_constant (Pmode
, addr
, delta
);
28480 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28483 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28486 /* Implements target hook vector_mode_supported_p. */
28488 arm_vector_mode_supported_p (enum machine_mode mode
)
28490 /* Neon also supports V2SImode, etc. listed in the clause below. */
28491 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28492 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28495 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28496 && ((mode
== V2SImode
)
28497 || (mode
== V4HImode
)
28498 || (mode
== V8QImode
)))
28501 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28502 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28503 || mode
== V2HAmode
))
28509 /* Implements target hook array_mode_supported_p. */
28512 arm_array_mode_supported_p (enum machine_mode mode
,
28513 unsigned HOST_WIDE_INT nelems
)
28516 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28517 && (nelems
>= 2 && nelems
<= 4))
28523 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28524 registers when autovectorizing for Neon, at least until multiple vector
28525 widths are supported properly by the middle-end. */
28527 static enum machine_mode
28528 arm_preferred_simd_mode (enum machine_mode mode
)
28534 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28536 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28538 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28540 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28542 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28549 if (TARGET_REALLY_IWMMXT
)
28565 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28567 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28568 using r0-r4 for function arguments, r7 for the stack frame and don't have
28569 enough left over to do doubleword arithmetic. For Thumb-2 all the
28570 potentially problematic instructions accept high registers so this is not
28571 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28572 that require many low registers. */
28574 arm_class_likely_spilled_p (reg_class_t rclass
)
28576 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28577 || rclass
== CC_REG
)
28583 /* Implements target hook small_register_classes_for_mode_p. */
28585 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28587 return TARGET_THUMB1
;
28590 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28591 ARM insns and therefore guarantee that the shift count is modulo 256.
28592 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28593 guarantee no particular behavior for out-of-range counts. */
28595 static unsigned HOST_WIDE_INT
28596 arm_shift_truncation_mask (enum machine_mode mode
)
28598 return mode
== SImode
? 255 : 0;
28602 /* Map internal gcc register numbers to DWARF2 register numbers. */
28605 arm_dbx_register_number (unsigned int regno
)
28610 if (IS_VFP_REGNUM (regno
))
28612 /* See comment in arm_dwarf_register_span. */
28613 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28614 return 64 + regno
- FIRST_VFP_REGNUM
;
28616 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28619 if (IS_IWMMXT_GR_REGNUM (regno
))
28620 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28622 if (IS_IWMMXT_REGNUM (regno
))
28623 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28625 gcc_unreachable ();
28628 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28629 GCC models tham as 64 32-bit registers, so we need to describe this to
28630 the DWARF generation code. Other registers can use the default. */
28632 arm_dwarf_register_span (rtx rtl
)
28634 enum machine_mode mode
;
28640 regno
= REGNO (rtl
);
28641 if (!IS_VFP_REGNUM (regno
))
28644 /* XXX FIXME: The EABI defines two VFP register ranges:
28645 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28647 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28648 corresponding D register. Until GDB supports this, we shall use the
28649 legacy encodings. We also use these encodings for D0-D15 for
28650 compatibility with older debuggers. */
28651 mode
= GET_MODE (rtl
);
28652 if (GET_MODE_SIZE (mode
) < 8)
28655 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28657 nregs
= GET_MODE_SIZE (mode
) / 4;
28658 for (i
= 0; i
< nregs
; i
+= 2)
28659 if (TARGET_BIG_END
)
28661 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28662 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28666 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28667 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28672 nregs
= GET_MODE_SIZE (mode
) / 8;
28673 for (i
= 0; i
< nregs
; i
++)
28674 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28677 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28680 #if ARM_UNWIND_INFO
28681 /* Emit unwind directives for a store-multiple instruction or stack pointer
28682 push during alignment.
28683 These should only ever be generated by the function prologue code, so
28684 expect them to have a particular form. */
28687 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28690 HOST_WIDE_INT offset
;
28691 HOST_WIDE_INT nregs
;
28697 e
= XVECEXP (p
, 0, 0);
28698 if (GET_CODE (e
) != SET
)
28701 /* First insn will adjust the stack pointer. */
28702 if (GET_CODE (e
) != SET
28703 || !REG_P (XEXP (e
, 0))
28704 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28705 || GET_CODE (XEXP (e
, 1)) != PLUS
)
28708 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
28709 nregs
= XVECLEN (p
, 0) - 1;
28711 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
28714 /* The function prologue may also push pc, but not annotate it as it is
28715 never restored. We turn this into a stack pointer adjustment. */
28716 if (nregs
* 4 == offset
- 4)
28718 fprintf (asm_out_file
, "\t.pad #4\n");
28722 fprintf (asm_out_file
, "\t.save {");
28724 else if (IS_VFP_REGNUM (reg
))
28727 fprintf (asm_out_file
, "\t.vsave {");
28730 /* Unknown register type. */
28733 /* If the stack increment doesn't match the size of the saved registers,
28734 something has gone horribly wrong. */
28735 if (offset
!= nregs
* reg_size
)
28740 /* The remaining insns will describe the stores. */
28741 for (i
= 1; i
<= nregs
; i
++)
28743 /* Expect (set (mem <addr>) (reg)).
28744 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28745 e
= XVECEXP (p
, 0, i
);
28746 if (GET_CODE (e
) != SET
28747 || !MEM_P (XEXP (e
, 0))
28748 || !REG_P (XEXP (e
, 1)))
28751 reg
= REGNO (XEXP (e
, 1));
28756 fprintf (asm_out_file
, ", ");
28757 /* We can't use %r for vfp because we need to use the
28758 double precision register names. */
28759 if (IS_VFP_REGNUM (reg
))
28760 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28762 asm_fprintf (asm_out_file
, "%r", reg
);
28764 #ifdef ENABLE_CHECKING
28765 /* Check that the addresses are consecutive. */
28766 e
= XEXP (XEXP (e
, 0), 0);
28767 if (GET_CODE (e
) == PLUS
)
28769 offset
+= reg_size
;
28770 if (!REG_P (XEXP (e
, 0))
28771 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28772 || !CONST_INT_P (XEXP (e
, 1))
28773 || offset
!= INTVAL (XEXP (e
, 1)))
28778 || REGNO (e
) != SP_REGNUM
)
28782 fprintf (asm_out_file
, "}\n");
28785 /* Emit unwind directives for a SET. */
28788 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28796 switch (GET_CODE (e0
))
28799 /* Pushing a single register. */
28800 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28801 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28802 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28805 asm_fprintf (asm_out_file
, "\t.save ");
28806 if (IS_VFP_REGNUM (REGNO (e1
)))
28807 asm_fprintf(asm_out_file
, "{d%d}\n",
28808 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28810 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28814 if (REGNO (e0
) == SP_REGNUM
)
28816 /* A stack increment. */
28817 if (GET_CODE (e1
) != PLUS
28818 || !REG_P (XEXP (e1
, 0))
28819 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28820 || !CONST_INT_P (XEXP (e1
, 1)))
28823 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28824 -INTVAL (XEXP (e1
, 1)));
28826 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28828 HOST_WIDE_INT offset
;
28830 if (GET_CODE (e1
) == PLUS
)
28832 if (!REG_P (XEXP (e1
, 0))
28833 || !CONST_INT_P (XEXP (e1
, 1)))
28835 reg
= REGNO (XEXP (e1
, 0));
28836 offset
= INTVAL (XEXP (e1
, 1));
28837 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28838 HARD_FRAME_POINTER_REGNUM
, reg
,
28841 else if (REG_P (e1
))
28844 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28845 HARD_FRAME_POINTER_REGNUM
, reg
);
28850 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28852 /* Move from sp to reg. */
28853 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28855 else if (GET_CODE (e1
) == PLUS
28856 && REG_P (XEXP (e1
, 0))
28857 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28858 && CONST_INT_P (XEXP (e1
, 1)))
28860 /* Set reg to offset from sp. */
28861 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28862 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28874 /* Emit unwind directives for the given insn. */
28877 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28880 bool handled_one
= false;
28882 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28885 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28886 && (TREE_NOTHROW (current_function_decl
)
28887 || crtl
->all_throwers_are_sibcalls
))
28890 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28893 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28895 switch (REG_NOTE_KIND (note
))
28897 case REG_FRAME_RELATED_EXPR
:
28898 pat
= XEXP (note
, 0);
28901 case REG_CFA_REGISTER
:
28902 pat
= XEXP (note
, 0);
28905 pat
= PATTERN (insn
);
28906 if (GET_CODE (pat
) == PARALLEL
)
28907 pat
= XVECEXP (pat
, 0, 0);
28910 /* Only emitted for IS_STACKALIGN re-alignment. */
28915 src
= SET_SRC (pat
);
28916 dest
= SET_DEST (pat
);
28918 gcc_assert (src
== stack_pointer_rtx
);
28919 reg
= REGNO (dest
);
28920 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28923 handled_one
= true;
28926 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28927 to get correct dwarf information for shrink-wrap. We should not
28928 emit unwind information for it because these are used either for
28929 pretend arguments or notes to adjust sp and restore registers from
28931 case REG_CFA_ADJUST_CFA
:
28932 case REG_CFA_RESTORE
:
28935 case REG_CFA_DEF_CFA
:
28936 case REG_CFA_EXPRESSION
:
28937 case REG_CFA_OFFSET
:
28938 /* ??? Only handling here what we actually emit. */
28939 gcc_unreachable ();
28947 pat
= PATTERN (insn
);
28950 switch (GET_CODE (pat
))
28953 arm_unwind_emit_set (asm_out_file
, pat
);
28957 /* Store multiple. */
28958 arm_unwind_emit_sequence (asm_out_file
, pat
);
28967 /* Output a reference from a function exception table to the type_info
28968 object X. The EABI specifies that the symbol should be relocated by
28969 an R_ARM_TARGET2 relocation. */
28972 arm_output_ttype (rtx x
)
28974 fputs ("\t.word\t", asm_out_file
);
28975 output_addr_const (asm_out_file
, x
);
28976 /* Use special relocations for symbol references. */
28977 if (!CONST_INT_P (x
))
28978 fputs ("(TARGET2)", asm_out_file
);
28979 fputc ('\n', asm_out_file
);
28984 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28987 arm_asm_emit_except_personality (rtx personality
)
28989 fputs ("\t.personality\t", asm_out_file
);
28990 output_addr_const (asm_out_file
, personality
);
28991 fputc ('\n', asm_out_file
);
28994 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28997 arm_asm_init_sections (void)
28999 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29002 #endif /* ARM_UNWIND_INFO */
29004 /* Output unwind directives for the start/end of a function. */
29007 arm_output_fn_unwind (FILE * f
, bool prologue
)
29009 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29013 fputs ("\t.fnstart\n", f
);
29016 /* If this function will never be unwound, then mark it as such.
29017 The came condition is used in arm_unwind_emit to suppress
29018 the frame annotations. */
29019 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29020 && (TREE_NOTHROW (current_function_decl
)
29021 || crtl
->all_throwers_are_sibcalls
))
29022 fputs("\t.cantunwind\n", f
);
29024 fputs ("\t.fnend\n", f
);
29029 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29031 enum tls_reloc reloc
;
29034 val
= XVECEXP (x
, 0, 0);
29035 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29037 output_addr_const (fp
, val
);
29042 fputs ("(tlsgd)", fp
);
29045 fputs ("(tlsldm)", fp
);
29048 fputs ("(tlsldo)", fp
);
29051 fputs ("(gottpoff)", fp
);
29054 fputs ("(tpoff)", fp
);
29057 fputs ("(tlsdesc)", fp
);
29060 gcc_unreachable ();
29069 fputs (" + (. - ", fp
);
29070 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29071 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29072 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29073 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29083 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29086 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29088 gcc_assert (size
== 4);
29089 fputs ("\t.word\t", file
);
29090 output_addr_const (file
, x
);
29091 fputs ("(tlsldo)", file
);
29094 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29097 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29099 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29100 return arm_emit_tls_decoration (fp
, x
);
29101 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29104 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29106 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29107 assemble_name_raw (fp
, label
);
29111 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29113 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29117 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29121 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29123 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29127 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29131 else if (GET_CODE (x
) == CONST_VECTOR
)
29132 return arm_emit_vector_const (fp
, x
);
29137 /* Output assembly for a shift instruction.
29138 SET_FLAGS determines how the instruction modifies the condition codes.
29139 0 - Do not set condition codes.
29140 1 - Set condition codes.
29141 2 - Use smallest instruction. */
29143 arm_output_shift(rtx
* operands
, int set_flags
)
29146 static const char flag_chars
[3] = {'?', '.', '!'};
29151 c
= flag_chars
[set_flags
];
29152 if (TARGET_UNIFIED_ASM
)
29154 shift
= shift_op(operands
[3], &val
);
29158 operands
[2] = GEN_INT(val
);
29159 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29162 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29165 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29166 output_asm_insn (pattern
, operands
);
29170 /* Output assembly for a WMMX immediate shift instruction. */
29172 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29174 int shift
= INTVAL (operands
[2]);
29176 enum machine_mode opmode
= GET_MODE (operands
[0]);
29178 gcc_assert (shift
>= 0);
29180 /* If the shift value in the register versions is > 63 (for D qualifier),
29181 31 (for W qualifier) or 15 (for H qualifier). */
29182 if (((opmode
== V4HImode
) && (shift
> 15))
29183 || ((opmode
== V2SImode
) && (shift
> 31))
29184 || ((opmode
== DImode
) && (shift
> 63)))
29188 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29189 output_asm_insn (templ
, operands
);
29190 if (opmode
== DImode
)
29192 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29193 output_asm_insn (templ
, operands
);
29198 /* The destination register will contain all zeros. */
29199 sprintf (templ
, "wzero\t%%0");
29200 output_asm_insn (templ
, operands
);
29205 if ((opmode
== DImode
) && (shift
> 32))
29207 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29208 output_asm_insn (templ
, operands
);
29209 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29210 output_asm_insn (templ
, operands
);
29214 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29215 output_asm_insn (templ
, operands
);
29220 /* Output assembly for a WMMX tinsr instruction. */
29222 arm_output_iwmmxt_tinsr (rtx
*operands
)
29224 int mask
= INTVAL (operands
[3]);
29227 int units
= mode_nunits
[GET_MODE (operands
[0])];
29228 gcc_assert ((mask
& (mask
- 1)) == 0);
29229 for (i
= 0; i
< units
; ++i
)
29231 if ((mask
& 0x01) == 1)
29237 gcc_assert (i
< units
);
29239 switch (GET_MODE (operands
[0]))
29242 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29245 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29248 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29251 gcc_unreachable ();
29254 output_asm_insn (templ
, operands
);
29259 /* Output a Thumb-1 casesi dispatch sequence. */
29261 thumb1_output_casesi (rtx
*operands
)
29263 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29265 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29267 switch (GET_MODE(diff_vec
))
29270 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29271 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29273 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29274 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29276 return "bl\t%___gnu_thumb1_case_si";
29278 gcc_unreachable ();
29282 /* Output a Thumb-2 casesi instruction. */
29284 thumb2_output_casesi (rtx
*operands
)
29286 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29288 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29290 output_asm_insn ("cmp\t%0, %1", operands
);
29291 output_asm_insn ("bhi\t%l3", operands
);
29292 switch (GET_MODE(diff_vec
))
29295 return "tbb\t[%|pc, %0]";
29297 return "tbh\t[%|pc, %0, lsl #1]";
29301 output_asm_insn ("adr\t%4, %l2", operands
);
29302 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29303 output_asm_insn ("add\t%4, %4, %5", operands
);
29308 output_asm_insn ("adr\t%4, %l2", operands
);
29309 return "ldr\t%|pc, [%4, %0, lsl #2]";
29312 gcc_unreachable ();
29316 /* Most ARM cores are single issue, but some newer ones can dual issue.
29317 The scheduler descriptions rely on this being correct. */
29319 arm_issue_rate (void)
29345 /* A table and a function to perform ARM-specific name mangling for
29346 NEON vector types in order to conform to the AAPCS (see "Procedure
29347 Call Standard for the ARM Architecture", Appendix A). To qualify
29348 for emission with the mangled names defined in that document, a
29349 vector type must not only be of the correct mode but also be
29350 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29353 enum machine_mode mode
;
29354 const char *element_type_name
;
29355 const char *aapcs_name
;
29356 } arm_mangle_map_entry
;
29358 static arm_mangle_map_entry arm_mangle_map
[] = {
29359 /* 64-bit containerized types. */
29360 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29361 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29362 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29363 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29364 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29365 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29366 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29367 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29368 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29369 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29371 /* 128-bit containerized types. */
29372 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29373 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29374 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29375 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29376 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29377 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29378 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29379 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29380 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29381 { VOIDmode
, NULL
, NULL
}
29385 arm_mangle_type (const_tree type
)
29387 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29389 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29390 has to be managled as if it is in the "std" namespace. */
29391 if (TARGET_AAPCS_BASED
29392 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29393 return "St9__va_list";
29395 /* Half-precision float. */
29396 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29399 if (TREE_CODE (type
) != VECTOR_TYPE
)
29402 /* Check the mode of the vector type, and the name of the vector
29403 element type, against the table. */
29404 while (pos
->mode
!= VOIDmode
)
29406 tree elt_type
= TREE_TYPE (type
);
29408 if (pos
->mode
== TYPE_MODE (type
)
29409 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29410 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29411 pos
->element_type_name
))
29412 return pos
->aapcs_name
;
29417 /* Use the default mangling for unrecognized (possibly user-defined)
29422 /* Order of allocation of core registers for Thumb: this allocation is
29423 written over the corresponding initial entries of the array
29424 initialized with REG_ALLOC_ORDER. We allocate all low registers
29425 first. Saving and restoring a low register is usually cheaper than
29426 using a call-clobbered high register. */
29428 static const int thumb_core_reg_alloc_order
[] =
29430 3, 2, 1, 0, 4, 5, 6, 7,
29431 14, 12, 8, 9, 10, 11
29434 /* Adjust register allocation order when compiling for Thumb. */
29437 arm_order_regs_for_local_alloc (void)
29439 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29440 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29442 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29443 sizeof (thumb_core_reg_alloc_order
));
29446 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29449 arm_frame_pointer_required (void)
29451 return (cfun
->has_nonlocal_label
29452 || SUBTARGET_FRAME_POINTER_REQUIRED
29453 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29456 /* Only thumb1 can't support conditional execution, so return true if
29457 the target is not thumb1. */
29459 arm_have_conditional_execution (void)
29461 return !TARGET_THUMB1
;
29465 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29467 enum machine_mode in_mode
, out_mode
;
29470 if (TREE_CODE (type_out
) != VECTOR_TYPE
29471 || TREE_CODE (type_in
) != VECTOR_TYPE
29472 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29475 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29476 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29477 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29478 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29480 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29481 decl of the vectorized builtin for the appropriate vector mode.
29482 NULL_TREE is returned if no such builtin is available. */
29483 #undef ARM_CHECK_BUILTIN_MODE
29484 #define ARM_CHECK_BUILTIN_MODE(C) \
29485 (out_mode == SFmode && out_n == C \
29486 && in_mode == SFmode && in_n == C)
29488 #undef ARM_FIND_VRINT_VARIANT
29489 #define ARM_FIND_VRINT_VARIANT(N) \
29490 (ARM_CHECK_BUILTIN_MODE (2) \
29491 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29492 : (ARM_CHECK_BUILTIN_MODE (4) \
29493 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29496 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29498 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29501 case BUILT_IN_FLOORF
:
29502 return ARM_FIND_VRINT_VARIANT (vrintm
);
29503 case BUILT_IN_CEILF
:
29504 return ARM_FIND_VRINT_VARIANT (vrintp
);
29505 case BUILT_IN_TRUNCF
:
29506 return ARM_FIND_VRINT_VARIANT (vrintz
);
29507 case BUILT_IN_ROUNDF
:
29508 return ARM_FIND_VRINT_VARIANT (vrinta
);
29515 #undef ARM_CHECK_BUILTIN_MODE
29516 #undef ARM_FIND_VRINT_VARIANT
29518 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29519 static HOST_WIDE_INT
29520 arm_vector_alignment (const_tree type
)
29522 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29524 if (TARGET_AAPCS_BASED
)
29525 align
= MIN (align
, 64);
29530 static unsigned int
29531 arm_autovectorize_vector_sizes (void)
29533 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29537 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29539 /* Vectors which aren't in packed structures will not be less aligned than
29540 the natural alignment of their element type, so this is safe. */
29541 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29544 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29548 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29549 const_tree type
, int misalignment
,
29552 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29554 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29559 /* If the misalignment is unknown, we should be able to handle the access
29560 so long as it is not to a member of a packed data structure. */
29561 if (misalignment
== -1)
29564 /* Return true if the misalignment is a multiple of the natural alignment
29565 of the vector's element type. This is probably always going to be
29566 true in practice, since we've already established that this isn't a
29568 return ((misalignment
% align
) == 0);
29571 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29576 arm_conditional_register_usage (void)
29580 if (TARGET_THUMB1
&& optimize_size
)
29582 /* When optimizing for size on Thumb-1, it's better not
29583 to use the HI regs, because of the overhead of
29585 for (regno
= FIRST_HI_REGNUM
;
29586 regno
<= LAST_HI_REGNUM
; ++regno
)
29587 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29590 /* The link register can be clobbered by any branch insn,
29591 but we have no way to track that at present, so mark
29592 it as unavailable. */
29594 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29596 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29598 /* VFPv3 registers are disabled when earlier VFP
29599 versions are selected due to the definition of
29600 LAST_VFP_REGNUM. */
29601 for (regno
= FIRST_VFP_REGNUM
;
29602 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29604 fixed_regs
[regno
] = 0;
29605 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29606 || regno
>= FIRST_VFP_REGNUM
+ 32;
29610 if (TARGET_REALLY_IWMMXT
)
29612 regno
= FIRST_IWMMXT_GR_REGNUM
;
29613 /* The 2002/10/09 revision of the XScale ABI has wCG0
29614 and wCG1 as call-preserved registers. The 2002/11/21
29615 revision changed this so that all wCG registers are
29616 scratch registers. */
29617 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29618 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29619 fixed_regs
[regno
] = 0;
29620 /* The XScale ABI has wR0 - wR9 as scratch registers,
29621 the rest as call-preserved registers. */
29622 for (regno
= FIRST_IWMMXT_REGNUM
;
29623 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29625 fixed_regs
[regno
] = 0;
29626 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29630 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29632 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29633 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29635 else if (TARGET_APCS_STACK
)
29637 fixed_regs
[10] = 1;
29638 call_used_regs
[10] = 1;
29640 /* -mcaller-super-interworking reserves r11 for calls to
29641 _interwork_r11_call_via_rN(). Making the register global
29642 is an easy way of ensuring that it remains valid for all
29644 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29645 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29647 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29648 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29649 if (TARGET_CALLER_INTERWORKING
)
29650 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29652 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29656 arm_preferred_rename_class (reg_class_t rclass
)
29658 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29659 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29660 and code size can be reduced. */
29661 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29667 /* Compute the atrribute "length" of insn "*push_multi".
29668 So this function MUST be kept in sync with that insn pattern. */
29670 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29672 int i
, regno
, hi_reg
;
29673 int num_saves
= XVECLEN (parallel_op
, 0);
29683 regno
= REGNO (first_op
);
29684 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29685 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29687 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29688 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29696 /* Compute the number of instructions emitted by output_move_double. */
29698 arm_count_output_move_double_insns (rtx
*operands
)
29702 /* output_move_double may modify the operands array, so call it
29703 here on a copy of the array. */
29704 ops
[0] = operands
[0];
29705 ops
[1] = operands
[1];
29706 output_move_double (ops
, false, &count
);
29711 vfp3_const_double_for_fract_bits (rtx operand
)
29713 REAL_VALUE_TYPE r0
;
29715 if (!CONST_DOUBLE_P (operand
))
29718 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29719 if (exact_real_inverse (DFmode
, &r0
))
29721 if (exact_real_truncate (DFmode
, &r0
))
29723 HOST_WIDE_INT value
= real_to_integer (&r0
);
29724 value
= value
& 0xffffffff;
29725 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29726 return int_log2 (value
);
29733 vfp3_const_double_for_bits (rtx operand
)
29735 REAL_VALUE_TYPE r0
;
29737 if (!CONST_DOUBLE_P (operand
))
29740 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29741 if (exact_real_truncate (DFmode
, &r0
))
29743 HOST_WIDE_INT value
= real_to_integer (&r0
);
29744 value
= value
& 0xffffffff;
29745 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29746 return int_log2 (value
);
29752 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29755 arm_pre_atomic_barrier (enum memmodel model
)
29757 if (need_atomic_barrier_p (model
, true))
29758 emit_insn (gen_memory_barrier ());
29762 arm_post_atomic_barrier (enum memmodel model
)
29764 if (need_atomic_barrier_p (model
, false))
29765 emit_insn (gen_memory_barrier ());
29768 /* Emit the load-exclusive and store-exclusive instructions.
29769 Use acquire and release versions if necessary. */
29772 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29774 rtx (*gen
) (rtx
, rtx
);
29780 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29781 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29782 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29783 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29785 gcc_unreachable ();
29792 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29793 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
29794 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
29795 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
29797 gcc_unreachable ();
29801 emit_insn (gen (rval
, mem
));
29805 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
29808 rtx (*gen
) (rtx
, rtx
, rtx
);
29814 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29815 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29816 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29817 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29819 gcc_unreachable ();
29826 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29827 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
29828 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
29829 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
29831 gcc_unreachable ();
29835 emit_insn (gen (bval
, rval
, mem
));
29838 /* Mark the previous jump instruction as unlikely. */
29841 emit_unlikely_jump (rtx insn
)
29843 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
29845 insn
= emit_jump_insn (insn
);
29846 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
29849 /* Expand a compare and swap pattern. */
29852 arm_expand_compare_and_swap (rtx operands
[])
29854 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29855 enum machine_mode mode
;
29856 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
29858 bval
= operands
[0];
29859 rval
= operands
[1];
29861 oldval
= operands
[3];
29862 newval
= operands
[4];
29863 is_weak
= operands
[5];
29864 mod_s
= operands
[6];
29865 mod_f
= operands
[7];
29866 mode
= GET_MODE (mem
);
29868 /* Normally the succ memory model must be stronger than fail, but in the
29869 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29870 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29872 if (TARGET_HAVE_LDACQ
29873 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
29874 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
29875 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29881 /* For narrow modes, we're going to perform the comparison in SImode,
29882 so do the zero-extension now. */
29883 rval
= gen_reg_rtx (SImode
);
29884 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29888 /* Force the value into a register if needed. We waited until after
29889 the zero-extension above to do this properly. */
29890 if (!arm_add_operand (oldval
, SImode
))
29891 oldval
= force_reg (SImode
, oldval
);
29895 if (!cmpdi_operand (oldval
, mode
))
29896 oldval
= force_reg (mode
, oldval
);
29900 gcc_unreachable ();
29905 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29906 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29907 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29908 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29910 gcc_unreachable ();
29913 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29915 if (mode
== QImode
|| mode
== HImode
)
29916 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29918 /* In all cases, we arrange for success to be signaled by Z set.
29919 This arrangement allows for the boolean result to be used directly
29920 in a subsequent branch, post optimization. */
29921 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29922 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
29923 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
29926 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29927 another memory store between the load-exclusive and store-exclusive can
29928 reset the monitor from Exclusive to Open state. This means we must wait
29929 until after reload to split the pattern, lest we get a register spill in
29930 the middle of the atomic sequence. */
29933 arm_split_compare_and_swap (rtx operands
[])
29935 rtx rval
, mem
, oldval
, newval
, scratch
;
29936 enum machine_mode mode
;
29937 enum memmodel mod_s
, mod_f
;
29939 rtx label1
, label2
, x
, cond
;
29941 rval
= operands
[0];
29943 oldval
= operands
[2];
29944 newval
= operands
[3];
29945 is_weak
= (operands
[4] != const0_rtx
);
29946 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
29947 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
29948 scratch
= operands
[7];
29949 mode
= GET_MODE (mem
);
29951 bool use_acquire
= TARGET_HAVE_LDACQ
29952 && !(mod_s
== MEMMODEL_RELAXED
29953 || mod_s
== MEMMODEL_CONSUME
29954 || mod_s
== MEMMODEL_RELEASE
);
29956 bool use_release
= TARGET_HAVE_LDACQ
29957 && !(mod_s
== MEMMODEL_RELAXED
29958 || mod_s
== MEMMODEL_CONSUME
29959 || mod_s
== MEMMODEL_ACQUIRE
);
29961 /* Checks whether a barrier is needed and emits one accordingly. */
29962 if (!(use_acquire
|| use_release
))
29963 arm_pre_atomic_barrier (mod_s
);
29968 label1
= gen_label_rtx ();
29969 emit_label (label1
);
29971 label2
= gen_label_rtx ();
29973 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29975 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
29976 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29977 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29978 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29979 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29981 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
29983 /* Weak or strong, we want EQ to be true for success, so that we
29984 match the flags that we got from the compare above. */
29985 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29986 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
29987 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
29991 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29992 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29993 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
29994 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29997 if (mod_f
!= MEMMODEL_RELAXED
)
29998 emit_label (label2
);
30000 /* Checks whether a barrier is needed and emits one accordingly. */
30001 if (!(use_acquire
|| use_release
))
30002 arm_post_atomic_barrier (mod_s
);
30004 if (mod_f
== MEMMODEL_RELAXED
)
30005 emit_label (label2
);
30009 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30010 rtx value
, rtx model_rtx
, rtx cond
)
30012 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30013 enum machine_mode mode
= GET_MODE (mem
);
30014 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30017 bool use_acquire
= TARGET_HAVE_LDACQ
30018 && !(model
== MEMMODEL_RELAXED
30019 || model
== MEMMODEL_CONSUME
30020 || model
== MEMMODEL_RELEASE
);
30022 bool use_release
= TARGET_HAVE_LDACQ
30023 && !(model
== MEMMODEL_RELAXED
30024 || model
== MEMMODEL_CONSUME
30025 || model
== MEMMODEL_ACQUIRE
);
30027 /* Checks whether a barrier is needed and emits one accordingly. */
30028 if (!(use_acquire
|| use_release
))
30029 arm_pre_atomic_barrier (model
);
30031 label
= gen_label_rtx ();
30032 emit_label (label
);
30035 new_out
= gen_lowpart (wmode
, new_out
);
30037 old_out
= gen_lowpart (wmode
, old_out
);
30040 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30042 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30051 x
= gen_rtx_AND (wmode
, old_out
, value
);
30052 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30053 x
= gen_rtx_NOT (wmode
, new_out
);
30054 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30058 if (CONST_INT_P (value
))
30060 value
= GEN_INT (-INTVAL (value
));
30066 if (mode
== DImode
)
30068 /* DImode plus/minus need to clobber flags. */
30069 /* The adddi3 and subdi3 patterns are incorrectly written so that
30070 they require matching operands, even when we could easily support
30071 three operands. Thankfully, this can be fixed up post-splitting,
30072 as the individual add+adc patterns do accept three operands and
30073 post-reload cprop can make these moves go away. */
30074 emit_move_insn (new_out
, old_out
);
30076 x
= gen_adddi3 (new_out
, new_out
, value
);
30078 x
= gen_subdi3 (new_out
, new_out
, value
);
30085 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30086 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30090 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30093 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30094 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30096 /* Checks whether a barrier is needed and emits one accordingly. */
30097 if (!(use_acquire
|| use_release
))
30098 arm_post_atomic_barrier (model
);
30101 #define MAX_VECT_LEN 16
30103 struct expand_vec_perm_d
30105 rtx target
, op0
, op1
;
30106 unsigned char perm
[MAX_VECT_LEN
];
30107 enum machine_mode vmode
;
30108 unsigned char nelt
;
30113 /* Generate a variable permutation. */
30116 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30118 enum machine_mode vmode
= GET_MODE (target
);
30119 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30121 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30122 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30123 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30124 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30125 gcc_checking_assert (TARGET_NEON
);
30129 if (vmode
== V8QImode
)
30130 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30132 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30138 if (vmode
== V8QImode
)
30140 pair
= gen_reg_rtx (V16QImode
);
30141 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30142 pair
= gen_lowpart (TImode
, pair
);
30143 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30147 pair
= gen_reg_rtx (OImode
);
30148 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30149 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30155 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30157 enum machine_mode vmode
= GET_MODE (target
);
30158 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30159 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30160 rtx rmask
[MAX_VECT_LEN
], mask
;
30162 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30163 numbering of elements for big-endian, we must reverse the order. */
30164 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30166 /* The VTBL instruction does not use a modulo index, so we must take care
30167 of that ourselves. */
30168 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30169 for (i
= 0; i
< nelt
; ++i
)
30171 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30172 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30174 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30177 /* Generate or test for an insn that supports a constant permutation. */
30179 /* Recognize patterns for the VUZP insns. */
30182 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30184 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30185 rtx out0
, out1
, in0
, in1
, x
;
30186 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30188 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30191 /* Note that these are little-endian tests. Adjust for big-endian later. */
30192 if (d
->perm
[0] == 0)
30194 else if (d
->perm
[0] == 1)
30198 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30200 for (i
= 0; i
< nelt
; i
++)
30202 unsigned elt
= (i
* 2 + odd
) & mask
;
30203 if (d
->perm
[i
] != elt
)
30213 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30214 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30215 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30216 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30217 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30218 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30219 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30220 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30222 gcc_unreachable ();
30227 if (BYTES_BIG_ENDIAN
)
30229 x
= in0
, in0
= in1
, in1
= x
;
30234 out1
= gen_reg_rtx (d
->vmode
);
30236 x
= out0
, out0
= out1
, out1
= x
;
30238 emit_insn (gen (out0
, in0
, in1
, out1
));
30242 /* Recognize patterns for the VZIP insns. */
30245 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30247 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30248 rtx out0
, out1
, in0
, in1
, x
;
30249 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30251 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30254 /* Note that these are little-endian tests. Adjust for big-endian later. */
30256 if (d
->perm
[0] == high
)
30258 else if (d
->perm
[0] == 0)
30262 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30264 for (i
= 0; i
< nelt
/ 2; i
++)
30266 unsigned elt
= (i
+ high
) & mask
;
30267 if (d
->perm
[i
* 2] != elt
)
30269 elt
= (elt
+ nelt
) & mask
;
30270 if (d
->perm
[i
* 2 + 1] != elt
)
30280 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30281 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30282 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30283 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30284 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30285 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30286 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30287 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30289 gcc_unreachable ();
30294 if (BYTES_BIG_ENDIAN
)
30296 x
= in0
, in0
= in1
, in1
= x
;
30301 out1
= gen_reg_rtx (d
->vmode
);
30303 x
= out0
, out0
= out1
, out1
= x
;
30305 emit_insn (gen (out0
, in0
, in1
, out1
));
30309 /* Recognize patterns for the VREV insns. */
30312 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30314 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30315 rtx (*gen
)(rtx
, rtx
, rtx
);
30317 if (!d
->one_vector_p
)
30326 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30327 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30335 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30336 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30337 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30338 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30346 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30347 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30348 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30349 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30350 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30351 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30352 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30353 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30362 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30363 for (j
= 0; j
<= diff
; j
+= 1)
30365 /* This is guaranteed to be true as the value of diff
30366 is 7, 3, 1 and we should have enough elements in the
30367 queue to generate this. Getting a vector mask with a
30368 value of diff other than these values implies that
30369 something is wrong by the time we get here. */
30370 gcc_assert (i
+ j
< nelt
);
30371 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30379 /* ??? The third operand is an artifact of the builtin infrastructure
30380 and is ignored by the actual instruction. */
30381 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30385 /* Recognize patterns for the VTRN insns. */
30388 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30390 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30391 rtx out0
, out1
, in0
, in1
, x
;
30392 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30394 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30397 /* Note that these are little-endian tests. Adjust for big-endian later. */
30398 if (d
->perm
[0] == 0)
30400 else if (d
->perm
[0] == 1)
30404 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30406 for (i
= 0; i
< nelt
; i
+= 2)
30408 if (d
->perm
[i
] != i
+ odd
)
30410 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30420 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30421 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30422 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30423 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30424 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30425 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30426 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30427 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30429 gcc_unreachable ();
30434 if (BYTES_BIG_ENDIAN
)
30436 x
= in0
, in0
= in1
, in1
= x
;
30441 out1
= gen_reg_rtx (d
->vmode
);
30443 x
= out0
, out0
= out1
, out1
= x
;
30445 emit_insn (gen (out0
, in0
, in1
, out1
));
30449 /* Recognize patterns for the VEXT insns. */
30452 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30454 unsigned int i
, nelt
= d
->nelt
;
30455 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30458 unsigned int location
;
30460 unsigned int next
= d
->perm
[0] + 1;
30462 /* TODO: Handle GCC's numbering of elements for big-endian. */
30463 if (BYTES_BIG_ENDIAN
)
30466 /* Check if the extracted indexes are increasing by one. */
30467 for (i
= 1; i
< nelt
; next
++, i
++)
30469 /* If we hit the most significant element of the 2nd vector in
30470 the previous iteration, no need to test further. */
30471 if (next
== 2 * nelt
)
30474 /* If we are operating on only one vector: it could be a
30475 rotation. If there are only two elements of size < 64, let
30476 arm_evpc_neon_vrev catch it. */
30477 if (d
->one_vector_p
&& (next
== nelt
))
30479 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30485 if (d
->perm
[i
] != next
)
30489 location
= d
->perm
[0];
30493 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30494 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30495 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30496 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30497 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30498 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30499 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30500 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30501 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30510 offset
= GEN_INT (location
);
30511 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30515 /* The NEON VTBL instruction is a fully variable permuation that's even
30516 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30517 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30518 can do slightly better by expanding this as a constant where we don't
30519 have to apply a mask. */
30522 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30524 rtx rperm
[MAX_VECT_LEN
], sel
;
30525 enum machine_mode vmode
= d
->vmode
;
30526 unsigned int i
, nelt
= d
->nelt
;
30528 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30529 numbering of elements for big-endian, we must reverse the order. */
30530 if (BYTES_BIG_ENDIAN
)
30536 /* Generic code will try constant permutation twice. Once with the
30537 original mode and again with the elements lowered to QImode.
30538 So wait and don't do the selector expansion ourselves. */
30539 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30542 for (i
= 0; i
< nelt
; ++i
)
30543 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30544 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30545 sel
= force_reg (vmode
, sel
);
30547 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30552 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30554 /* Check if the input mask matches vext before reordering the
30557 if (arm_evpc_neon_vext (d
))
30560 /* The pattern matching functions above are written to look for a small
30561 number to begin the sequence (0, 1, N/2). If we begin with an index
30562 from the second operand, we can swap the operands. */
30563 if (d
->perm
[0] >= d
->nelt
)
30565 unsigned i
, nelt
= d
->nelt
;
30568 for (i
= 0; i
< nelt
; ++i
)
30569 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30578 if (arm_evpc_neon_vuzp (d
))
30580 if (arm_evpc_neon_vzip (d
))
30582 if (arm_evpc_neon_vrev (d
))
30584 if (arm_evpc_neon_vtrn (d
))
30586 return arm_evpc_neon_vtbl (d
);
30591 /* Expand a vec_perm_const pattern. */
30594 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30596 struct expand_vec_perm_d d
;
30597 int i
, nelt
, which
;
30603 d
.vmode
= GET_MODE (target
);
30604 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30605 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30606 d
.testing_p
= false;
30608 for (i
= which
= 0; i
< nelt
; ++i
)
30610 rtx e
= XVECEXP (sel
, 0, i
);
30611 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30612 which
|= (ei
< nelt
? 1 : 2);
30622 d
.one_vector_p
= false;
30623 if (!rtx_equal_p (op0
, op1
))
30626 /* The elements of PERM do not suggest that only the first operand
30627 is used, but both operands are identical. Allow easier matching
30628 of the permutation by folding the permutation into the single
30632 for (i
= 0; i
< nelt
; ++i
)
30633 d
.perm
[i
] &= nelt
- 1;
30635 d
.one_vector_p
= true;
30640 d
.one_vector_p
= true;
30644 return arm_expand_vec_perm_const_1 (&d
);
30647 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30650 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30651 const unsigned char *sel
)
30653 struct expand_vec_perm_d d
;
30654 unsigned int i
, nelt
, which
;
30658 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30659 d
.testing_p
= true;
30660 memcpy (d
.perm
, sel
, nelt
);
30662 /* Categorize the set of elements in the selector. */
30663 for (i
= which
= 0; i
< nelt
; ++i
)
30665 unsigned char e
= d
.perm
[i
];
30666 gcc_assert (e
< 2 * nelt
);
30667 which
|= (e
< nelt
? 1 : 2);
30670 /* For all elements from second vector, fold the elements to first. */
30672 for (i
= 0; i
< nelt
; ++i
)
30675 /* Check whether the mask can be applied to the vector type. */
30676 d
.one_vector_p
= (which
!= 3);
30678 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30679 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30680 if (!d
.one_vector_p
)
30681 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30684 ret
= arm_expand_vec_perm_const_1 (&d
);
30691 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
30693 /* If we are soft float and we do not have ldrd
30694 then all auto increment forms are ok. */
30695 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30700 /* Post increment and Pre Decrement are supported for all
30701 instruction forms except for vector forms. */
30704 if (VECTOR_MODE_P (mode
))
30706 if (code
!= ARM_PRE_DEC
)
30716 /* Without LDRD and mode size greater than
30717 word size, there is no point in auto-incrementing
30718 because ldm and stm will not have these forms. */
30719 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30722 /* Vector and floating point modes do not support
30723 these auto increment forms. */
30724 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30737 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30738 on ARM, since we know that shifts by negative amounts are no-ops.
30739 Additionally, the default expansion code is not available or suitable
30740 for post-reload insn splits (this can occur when the register allocator
30741 chooses not to do a shift in NEON).
30743 This function is used in both initial expand and post-reload splits, and
30744 handles all kinds of 64-bit shifts.
30746 Input requirements:
30747 - It is safe for the input and output to be the same register, but
30748 early-clobber rules apply for the shift amount and scratch registers.
30749 - Shift by register requires both scratch registers. In all other cases
30750 the scratch registers may be NULL.
30751 - Ashiftrt by a register also clobbers the CC register. */
30753 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30754 rtx amount
, rtx scratch1
, rtx scratch2
)
30756 rtx out_high
= gen_highpart (SImode
, out
);
30757 rtx out_low
= gen_lowpart (SImode
, out
);
30758 rtx in_high
= gen_highpart (SImode
, in
);
30759 rtx in_low
= gen_lowpart (SImode
, in
);
30762 in = the register pair containing the input value.
30763 out = the destination register pair.
30764 up = the high- or low-part of each pair.
30765 down = the opposite part to "up".
30766 In a shift, we can consider bits to shift from "up"-stream to
30767 "down"-stream, so in a left-shift "up" is the low-part and "down"
30768 is the high-part of each register pair. */
30770 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30771 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30772 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30773 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30775 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30777 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30778 && GET_MODE (out
) == DImode
);
30780 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30781 && GET_MODE (in
) == DImode
);
30783 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30784 && GET_MODE (amount
) == SImode
)
30785 || CONST_INT_P (amount
)));
30786 gcc_assert (scratch1
== NULL
30787 || (GET_CODE (scratch1
) == SCRATCH
)
30788 || (GET_MODE (scratch1
) == SImode
30789 && REG_P (scratch1
)));
30790 gcc_assert (scratch2
== NULL
30791 || (GET_CODE (scratch2
) == SCRATCH
)
30792 || (GET_MODE (scratch2
) == SImode
30793 && REG_P (scratch2
)));
30794 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30795 || !HARD_REGISTER_P (out
)
30796 || (REGNO (out
) != REGNO (amount
)
30797 && REGNO (out
) + 1 != REGNO (amount
)));
30799 /* Macros to make following code more readable. */
30800 #define SUB_32(DEST,SRC) \
30801 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30802 #define RSB_32(DEST,SRC) \
30803 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30804 #define SUB_S_32(DEST,SRC) \
30805 gen_addsi3_compare0 ((DEST), (SRC), \
30807 #define SET(DEST,SRC) \
30808 gen_rtx_SET (SImode, (DEST), (SRC))
30809 #define SHIFT(CODE,SRC,AMOUNT) \
30810 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30811 #define LSHIFT(CODE,SRC,AMOUNT) \
30812 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30813 SImode, (SRC), (AMOUNT))
30814 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30815 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30816 SImode, (SRC), (AMOUNT))
30818 gen_rtx_IOR (SImode, (A), (B))
30819 #define BRANCH(COND,LABEL) \
30820 gen_arm_cond_branch ((LABEL), \
30821 gen_rtx_ ## COND (CCmode, cc_reg, \
30825 /* Shifts by register and shifts by constant are handled separately. */
30826 if (CONST_INT_P (amount
))
30828 /* We have a shift-by-constant. */
30830 /* First, handle out-of-range shift amounts.
30831 In both cases we try to match the result an ARM instruction in a
30832 shift-by-register would give. This helps reduce execution
30833 differences between optimization levels, but it won't stop other
30834 parts of the compiler doing different things. This is "undefined
30835 behaviour, in any case. */
30836 if (INTVAL (amount
) <= 0)
30837 emit_insn (gen_movdi (out
, in
));
30838 else if (INTVAL (amount
) >= 64)
30840 if (code
== ASHIFTRT
)
30842 rtx const31_rtx
= GEN_INT (31);
30843 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30844 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30847 emit_insn (gen_movdi (out
, const0_rtx
));
30850 /* Now handle valid shifts. */
30851 else if (INTVAL (amount
) < 32)
30853 /* Shifts by a constant less than 32. */
30854 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30856 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30857 emit_insn (SET (out_down
,
30858 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30860 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30864 /* Shifts by a constant greater than 31. */
30865 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30867 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30868 if (code
== ASHIFTRT
)
30869 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30872 emit_insn (SET (out_up
, const0_rtx
));
30877 /* We have a shift-by-register. */
30878 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30880 /* This alternative requires the scratch registers. */
30881 gcc_assert (scratch1
&& REG_P (scratch1
));
30882 gcc_assert (scratch2
&& REG_P (scratch2
));
30884 /* We will need the values "amount-32" and "32-amount" later.
30885 Swapping them around now allows the later code to be more general. */
30889 emit_insn (SUB_32 (scratch1
, amount
));
30890 emit_insn (RSB_32 (scratch2
, amount
));
30893 emit_insn (RSB_32 (scratch1
, amount
));
30894 /* Also set CC = amount > 32. */
30895 emit_insn (SUB_S_32 (scratch2
, amount
));
30898 emit_insn (RSB_32 (scratch1
, amount
));
30899 emit_insn (SUB_32 (scratch2
, amount
));
30902 gcc_unreachable ();
30905 /* Emit code like this:
30908 out_down = in_down << amount;
30909 out_down = (in_up << (amount - 32)) | out_down;
30910 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30911 out_up = in_up << amount;
30914 out_down = in_down >> amount;
30915 out_down = (in_up << (32 - amount)) | out_down;
30917 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30918 out_up = in_up << amount;
30921 out_down = in_down >> amount;
30922 out_down = (in_up << (32 - amount)) | out_down;
30924 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30925 out_up = in_up << amount;
30927 The ARM and Thumb2 variants are the same but implemented slightly
30928 differently. If this were only called during expand we could just
30929 use the Thumb2 case and let combine do the right thing, but this
30930 can also be called from post-reload splitters. */
30932 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30934 if (!TARGET_THUMB2
)
30936 /* Emit code for ARM mode. */
30937 emit_insn (SET (out_down
,
30938 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30939 if (code
== ASHIFTRT
)
30941 rtx done_label
= gen_label_rtx ();
30942 emit_jump_insn (BRANCH (LT
, done_label
));
30943 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30945 emit_label (done_label
);
30948 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30953 /* Emit code for Thumb2 mode.
30954 Thumb2 can't do shift and or in one insn. */
30955 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30956 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30958 if (code
== ASHIFTRT
)
30960 rtx done_label
= gen_label_rtx ();
30961 emit_jump_insn (BRANCH (LT
, done_label
));
30962 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30963 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30964 emit_label (done_label
);
30968 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30969 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30973 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30988 /* Returns true if a valid comparison operation and makes
30989 the operands in a form that is valid. */
30991 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30993 enum rtx_code code
= GET_CODE (*comparison
);
30995 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30996 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30998 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31000 if (code
== UNEQ
|| code
== LTGT
)
31003 code_int
= (int)code
;
31004 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31005 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31010 if (!arm_add_operand (*op1
, mode
))
31011 *op1
= force_reg (mode
, *op1
);
31012 if (!arm_add_operand (*op2
, mode
))
31013 *op2
= force_reg (mode
, *op2
);
31017 if (!cmpdi_operand (*op1
, mode
))
31018 *op1
= force_reg (mode
, *op1
);
31019 if (!cmpdi_operand (*op2
, mode
))
31020 *op2
= force_reg (mode
, *op2
);
31025 if (!arm_float_compare_operand (*op1
, mode
))
31026 *op1
= force_reg (mode
, *op1
);
31027 if (!arm_float_compare_operand (*op2
, mode
))
31028 *op2
= force_reg (mode
, *op2
);
31038 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31040 static unsigned HOST_WIDE_INT
31041 arm_asan_shadow_offset (void)
31043 return (unsigned HOST_WIDE_INT
) 1 << 29;
31046 #include "gt-arm.h"