1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
42 #include "diagnostic-core.h"
49 #include "target-def.h"
51 #include "langhooks.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode
;
61 typedef struct minipool_fixup Mfix
;
63 void (*arm_lang_output_object_attributes_hook
)(void);
70 /* Forward function declarations. */
71 static bool arm_lra_p (void);
72 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets
*arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
77 HOST_WIDE_INT
, rtx
, rtx
, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx
, int);
80 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
81 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
82 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
83 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
84 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
85 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
86 inline static int thumb1_index_register_rtx_p (rtx
, int);
87 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx
, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx
, int);
93 static void arm_print_operand_address (FILE *, rtx
);
94 static bool arm_print_operand_punct_valid_p (unsigned char code
);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
96 static arm_cc
get_arm_condition_code (rtx
);
97 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
98 static rtx
is_jump_table (rtx
);
99 static const char *output_multi_immediate (rtx
*, const char *, const char *,
101 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
102 static struct machine_function
*arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx
is_jump_table (rtx
);
105 static HOST_WIDE_INT
get_jump_table_size (rtx
);
106 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
107 static Mnode
*add_minipool_forward_ref (Mfix
*);
108 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_backward_ref (Mfix
*);
110 static void assign_minipool_offsets (Mfix
*);
111 static void arm_print_value (FILE *, rtx
);
112 static void dump_minipool (rtx
);
113 static int arm_barrier_cost (rtx
);
114 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
115 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
116 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree
);
123 static unsigned long arm_compute_func_type (void);
124 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
125 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
126 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
132 static int arm_comp_type_attributes (const_tree
, const_tree
);
133 static void arm_set_default_type_attributes (tree
);
134 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
135 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
136 static int optimal_immediate_sequence (enum rtx_code code
,
137 unsigned HOST_WIDE_INT val
,
138 struct four_ints
*return_sequence
);
139 static int optimal_immediate_sequence_1 (enum rtx_code code
,
140 unsigned HOST_WIDE_INT val
,
141 struct four_ints
*return_sequence
,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree
, tree
);
145 static enum machine_mode
arm_promote_function_mode (const_tree
,
146 enum machine_mode
, int *,
148 static bool arm_return_in_memory (const_tree
, const_tree
);
149 static rtx
arm_function_value (const_tree
, const_tree
, bool);
150 static rtx
arm_libcall_value_1 (enum machine_mode
);
151 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
158 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
159 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
160 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
161 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
166 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
167 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
168 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
169 static void arm_init_builtins (void);
170 static void arm_init_iwmmxt_builtins (void);
171 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
172 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
173 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
174 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
175 static tree
arm_builtin_decl (unsigned, bool);
176 static void emit_constant_insn (rtx cond
, rtx pattern
);
177 static rtx
emit_set_insn (rtx
, rtx
);
178 static rtx
emit_multi_reg_push (unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
181 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
183 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
185 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
186 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
188 static rtx
aapcs_libcall_value (enum machine_mode
);
189 static int aapcs_select_return_coproc (const_tree
, const_tree
);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
193 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_encode_section_info (tree
, rtx
, int);
199 static void arm_file_end (void);
200 static void arm_file_start (void);
202 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
204 static bool arm_pass_by_reference (cumulative_args_t
,
205 enum machine_mode
, const_tree
, bool);
206 static bool arm_promote_prototypes (const_tree
);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree
);
210 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
211 static bool arm_return_in_memory (const_tree
, const_tree
);
213 static void arm_unwind_emit (FILE *, rtx
);
214 static bool arm_output_ttype (rtx
);
215 static void arm_asm_emit_except_personality (rtx
);
216 static void arm_asm_init_sections (void);
218 static rtx
arm_dwarf_register_span (rtx
);
220 static tree
arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree
arm_get_cookie_size (tree
);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree
);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree
arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree
, rtx
);
232 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
233 static void arm_option_override (void);
234 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
235 static bool arm_cannot_copy_insn_p (rtx
);
236 static bool arm_tls_symbol_p (rtx x
);
237 static int arm_issue_rate (void);
238 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
239 static bool arm_output_addr_const_extra (FILE *, rtx
);
240 static bool arm_allocate_stack_slots_for_args (void);
241 static bool arm_warn_func_return (tree
);
242 static const char *arm_invalid_parameter_type (const_tree t
);
243 static const char *arm_invalid_return_type (const_tree t
);
244 static tree
arm_promoted_type (const_tree t
);
245 static tree
arm_convert_to_type (tree type
, tree expr
);
246 static bool arm_scalar_mode_supported_p (enum machine_mode
);
247 static bool arm_frame_pointer_required (void);
248 static bool arm_can_eliminate (const int, const int);
249 static void arm_asm_trampoline_template (FILE *);
250 static void arm_trampoline_init (rtx
, tree
, rtx
);
251 static rtx
arm_trampoline_adjust_address (rtx
);
252 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
253 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
254 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
255 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool arm_array_mode_supported_p (enum machine_mode
,
257 unsigned HOST_WIDE_INT
);
258 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
259 static bool arm_class_likely_spilled_p (reg_class_t
);
260 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
261 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
266 static void arm_conditional_register_usage (void);
267 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
272 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
273 const unsigned char *sel
);
275 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
277 int misalign ATTRIBUTE_UNUSED
);
278 static unsigned arm_add_stmt_cost (void *data
, int count
,
279 enum vect_cost_for_stmt kind
,
280 struct _stmt_vec_info
*stmt_info
,
282 enum vect_cost_model_location where
);
284 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
285 bool op0_preserve_value
);
286 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
288 /* Table of machine attributes. */
289 static const struct attribute_spec arm_attribute_table
[] =
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
292 affects_type_identity } */
293 /* Function calls made to this symbol must be done indirectly, because
294 it may lie outside of the 26 bit addressing range of a normal function
296 { "long_call", 0, 0, false, true, true, NULL
, false },
297 /* Whereas these functions are always known to reside within the 26 bit
299 { "short_call", 0, 0, false, true, true, NULL
, false },
300 /* Specify the procedure call conventions for a function. */
301 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
303 /* Interrupt Service Routines have special prologue and epilogue requirements. */
304 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
306 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
308 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
311 /* ARM/PE has three new attributes:
313 dllexport - for exporting a function/variable that will live in a dll
314 dllimport - for importing a function/variable from a dll
316 Microsoft allows multiple declspecs in one __declspec, separating
317 them with spaces. We do NOT support this. Instead, use __declspec
320 { "dllimport", 0, 0, true, false, false, NULL
, false },
321 { "dllexport", 0, 0, true, false, false, NULL
, false },
322 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
324 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
325 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
326 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
327 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
330 { NULL
, 0, 0, false, false, false, NULL
, false }
333 /* Initialize the GCC target structure. */
334 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
335 #undef TARGET_MERGE_DECL_ATTRIBUTES
336 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
339 #undef TARGET_LEGITIMIZE_ADDRESS
340 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
343 #define TARGET_LRA_P arm_lra_p
345 #undef TARGET_ATTRIBUTE_TABLE
346 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
348 #undef TARGET_ASM_FILE_START
349 #define TARGET_ASM_FILE_START arm_file_start
350 #undef TARGET_ASM_FILE_END
351 #define TARGET_ASM_FILE_END arm_file_end
353 #undef TARGET_ASM_ALIGNED_SI_OP
354 #define TARGET_ASM_ALIGNED_SI_OP NULL
355 #undef TARGET_ASM_INTEGER
356 #define TARGET_ASM_INTEGER arm_assemble_integer
358 #undef TARGET_PRINT_OPERAND
359 #define TARGET_PRINT_OPERAND arm_print_operand
360 #undef TARGET_PRINT_OPERAND_ADDRESS
361 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
362 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
363 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
368 #undef TARGET_ASM_FUNCTION_PROLOGUE
369 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
371 #undef TARGET_ASM_FUNCTION_EPILOGUE
372 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE arm_option_override
377 #undef TARGET_COMP_TYPE_ATTRIBUTES
378 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
380 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
381 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
383 #undef TARGET_SCHED_ADJUST_COST
384 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
386 #undef TARGET_SCHED_REORDER
387 #define TARGET_SCHED_REORDER arm_sched_reorder
389 #undef TARGET_REGISTER_MOVE_COST
390 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
392 #undef TARGET_MEMORY_MOVE_COST
393 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
395 #undef TARGET_ENCODE_SECTION_INFO
397 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
399 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
402 #undef TARGET_STRIP_NAME_ENCODING
403 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
405 #undef TARGET_ASM_INTERNAL_LABEL
406 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
408 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
409 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
411 #undef TARGET_FUNCTION_VALUE
412 #define TARGET_FUNCTION_VALUE arm_function_value
414 #undef TARGET_LIBCALL_VALUE
415 #define TARGET_LIBCALL_VALUE arm_libcall_value
417 #undef TARGET_FUNCTION_VALUE_REGNO_P
418 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
420 #undef TARGET_ASM_OUTPUT_MI_THUNK
421 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
422 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
423 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
425 #undef TARGET_RTX_COSTS
426 #define TARGET_RTX_COSTS arm_rtx_costs
427 #undef TARGET_ADDRESS_COST
428 #define TARGET_ADDRESS_COST arm_address_cost
430 #undef TARGET_SHIFT_TRUNCATION_MASK
431 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
432 #undef TARGET_VECTOR_MODE_SUPPORTED_P
433 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
434 #undef TARGET_ARRAY_MODE_SUPPORTED_P
435 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
436 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
437 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
438 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
439 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
440 arm_autovectorize_vector_sizes
442 #undef TARGET_MACHINE_DEPENDENT_REORG
443 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
445 #undef TARGET_INIT_BUILTINS
446 #define TARGET_INIT_BUILTINS arm_init_builtins
447 #undef TARGET_EXPAND_BUILTIN
448 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
449 #undef TARGET_BUILTIN_DECL
450 #define TARGET_BUILTIN_DECL arm_builtin_decl
452 #undef TARGET_INIT_LIBFUNCS
453 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
455 #undef TARGET_PROMOTE_FUNCTION_MODE
456 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
457 #undef TARGET_PROMOTE_PROTOTYPES
458 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
459 #undef TARGET_PASS_BY_REFERENCE
460 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
461 #undef TARGET_ARG_PARTIAL_BYTES
462 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
463 #undef TARGET_FUNCTION_ARG
464 #define TARGET_FUNCTION_ARG arm_function_arg
465 #undef TARGET_FUNCTION_ARG_ADVANCE
466 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
467 #undef TARGET_FUNCTION_ARG_BOUNDARY
468 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
470 #undef TARGET_SETUP_INCOMING_VARARGS
471 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
473 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
474 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
476 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
477 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
478 #undef TARGET_TRAMPOLINE_INIT
479 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
480 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
481 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
483 #undef TARGET_WARN_FUNC_RETURN
484 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
486 #undef TARGET_DEFAULT_SHORT_ENUMS
487 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
489 #undef TARGET_ALIGN_ANON_BITFIELD
490 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
492 #undef TARGET_NARROW_VOLATILE_BITFIELD
493 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
495 #undef TARGET_CXX_GUARD_TYPE
496 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
498 #undef TARGET_CXX_GUARD_MASK_BIT
499 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
501 #undef TARGET_CXX_GET_COOKIE_SIZE
502 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
504 #undef TARGET_CXX_COOKIE_HAS_SIZE
505 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
507 #undef TARGET_CXX_CDTOR_RETURNS_THIS
508 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
510 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
511 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
513 #undef TARGET_CXX_USE_AEABI_ATEXIT
514 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
516 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
517 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
518 arm_cxx_determine_class_data_visibility
520 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
521 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
523 #undef TARGET_RETURN_IN_MSB
524 #define TARGET_RETURN_IN_MSB arm_return_in_msb
526 #undef TARGET_RETURN_IN_MEMORY
527 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
529 #undef TARGET_MUST_PASS_IN_STACK
530 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
533 #undef TARGET_ASM_UNWIND_EMIT
534 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
536 /* EABI unwinding tables use a different format for the typeinfo tables. */
537 #undef TARGET_ASM_TTYPE
538 #define TARGET_ASM_TTYPE arm_output_ttype
540 #undef TARGET_ARM_EABI_UNWINDER
541 #define TARGET_ARM_EABI_UNWINDER true
543 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
544 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
546 #undef TARGET_ASM_INIT_SECTIONS
547 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
548 #endif /* ARM_UNWIND_INFO */
550 #undef TARGET_DWARF_REGISTER_SPAN
551 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
553 #undef TARGET_CANNOT_COPY_INSN_P
554 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
557 #undef TARGET_HAVE_TLS
558 #define TARGET_HAVE_TLS true
561 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
562 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
564 #undef TARGET_LEGITIMATE_CONSTANT_P
565 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
567 #undef TARGET_CANNOT_FORCE_CONST_MEM
568 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
570 #undef TARGET_MAX_ANCHOR_OFFSET
571 #define TARGET_MAX_ANCHOR_OFFSET 4095
573 /* The minimum is set such that the total size of the block
574 for a particular anchor is -4088 + 1 + 4095 bytes, which is
575 divisible by eight, ensuring natural spacing of anchors. */
576 #undef TARGET_MIN_ANCHOR_OFFSET
577 #define TARGET_MIN_ANCHOR_OFFSET -4088
579 #undef TARGET_SCHED_ISSUE_RATE
580 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
582 #undef TARGET_MANGLE_TYPE
583 #define TARGET_MANGLE_TYPE arm_mangle_type
585 #undef TARGET_BUILD_BUILTIN_VA_LIST
586 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
587 #undef TARGET_EXPAND_BUILTIN_VA_START
588 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
589 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
590 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
593 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
594 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
597 #undef TARGET_LEGITIMATE_ADDRESS_P
598 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
600 #undef TARGET_PREFERRED_RELOAD_CLASS
601 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
603 #undef TARGET_INVALID_PARAMETER_TYPE
604 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
606 #undef TARGET_INVALID_RETURN_TYPE
607 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
609 #undef TARGET_PROMOTED_TYPE
610 #define TARGET_PROMOTED_TYPE arm_promoted_type
612 #undef TARGET_CONVERT_TO_TYPE
613 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
615 #undef TARGET_SCALAR_MODE_SUPPORTED_P
616 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
618 #undef TARGET_FRAME_POINTER_REQUIRED
619 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
621 #undef TARGET_CAN_ELIMINATE
622 #define TARGET_CAN_ELIMINATE arm_can_eliminate
624 #undef TARGET_CONDITIONAL_REGISTER_USAGE
625 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
627 #undef TARGET_CLASS_LIKELY_SPILLED_P
628 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
630 #undef TARGET_VECTORIZE_BUILTINS
631 #define TARGET_VECTORIZE_BUILTINS
633 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
634 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
635 arm_builtin_vectorized_function
637 #undef TARGET_VECTOR_ALIGNMENT
638 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
640 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
641 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
642 arm_vector_alignment_reachable
644 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
645 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
646 arm_builtin_support_vector_misalignment
648 #undef TARGET_PREFERRED_RENAME_CLASS
649 #define TARGET_PREFERRED_RENAME_CLASS \
650 arm_preferred_rename_class
652 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
653 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
654 arm_vectorize_vec_perm_const_ok
656 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
657 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
658 arm_builtin_vectorization_cost
659 #undef TARGET_VECTORIZE_ADD_STMT_COST
660 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
662 #undef TARGET_CANONICALIZE_COMPARISON
663 #define TARGET_CANONICALIZE_COMPARISON \
664 arm_canonicalize_comparison
666 #undef TARGET_ASAN_SHADOW_OFFSET
667 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
669 #undef MAX_INSN_PER_IT_BLOCK
670 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
673 struct gcc_target targetm
= TARGET_INITIALIZER
;
675 /* Obstack for minipool constant handling. */
676 static struct obstack minipool_obstack
;
677 static char * minipool_startobj
;
679 /* The maximum number of insns skipped which
680 will be conditionalised if possible. */
681 static int max_insns_skipped
= 5;
683 extern FILE * asm_out_file
;
685 /* True if we are currently building a constant table. */
686 int making_const_table
;
688 /* The processor for which instructions should be scheduled. */
689 enum processor_type arm_tune
= arm_none
;
691 /* The current tuning set. */
692 const struct tune_params
*current_tune
;
694 /* Which floating point hardware to schedule for. */
697 /* Which floating popint hardware to use. */
698 const struct arm_fpu_desc
*arm_fpu_desc
;
700 /* Used for Thumb call_via trampolines. */
701 rtx thumb_call_via_label
[14];
702 static int thumb_call_reg_needed
;
704 /* Bit values used to identify processor capabilities. */
705 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
706 #define FL_ARCH3M (1 << 1) /* Extended multiply */
707 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
708 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
709 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
710 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
711 #define FL_THUMB (1 << 6) /* Thumb aware */
712 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
713 #define FL_STRONG (1 << 8) /* StrongARM */
714 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
715 #define FL_XSCALE (1 << 10) /* XScale */
716 /* spare (1 << 11) */
717 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
718 media instructions. */
719 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
720 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
721 Note: ARM6 & 7 derivatives only. */
722 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
723 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
724 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
726 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
727 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
728 #define FL_NEON (1 << 20) /* Neon instructions. */
729 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
731 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
732 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
733 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
735 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
736 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
738 /* Flags that only effect tuning, not available instructions. */
739 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
742 #define FL_FOR_ARCH2 FL_NOTM
743 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
744 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
745 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
746 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
747 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
748 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
749 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
750 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
751 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
752 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
753 #define FL_FOR_ARCH6J FL_FOR_ARCH6
754 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
755 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
756 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
757 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
758 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
759 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
760 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
761 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
762 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
763 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
764 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
765 | FL_ARM_DIV | FL_NOTM)
767 /* The bits in this mask specify which
768 instructions we are allowed to generate. */
769 static unsigned long insn_flags
= 0;
771 /* The bits in this mask specify which instruction scheduling options should
773 static unsigned long tune_flags
= 0;
775 /* The highest ARM architecture version supported by the
777 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
779 /* The following are used in the arm.md file as equivalents to bits
780 in the above two flag variables. */
782 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
785 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
788 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
800 /* Nonzero if this chip supports the ARM 6K extensions. */
803 /* Nonzero if instructions present in ARMv6-M can be used. */
806 /* Nonzero if this chip supports the ARM 7 extensions. */
809 /* Nonzero if instructions not present in the 'M' profile can be used. */
810 int arm_arch_notm
= 0;
812 /* Nonzero if instructions present in ARMv7E-M can be used. */
815 /* Nonzero if instructions present in ARMv8 can be used. */
818 /* Nonzero if this chip can benefit from load scheduling. */
819 int arm_ld_sched
= 0;
821 /* Nonzero if this chip is a StrongARM. */
822 int arm_tune_strongarm
= 0;
824 /* Nonzero if this chip supports Intel Wireless MMX technology. */
825 int arm_arch_iwmmxt
= 0;
827 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
828 int arm_arch_iwmmxt2
= 0;
830 /* Nonzero if this chip is an XScale. */
831 int arm_arch_xscale
= 0;
833 /* Nonzero if tuning for XScale */
834 int arm_tune_xscale
= 0;
836 /* Nonzero if we want to tune for stores that access the write-buffer.
837 This typically means an ARM6 or ARM7 with MMU or MPU. */
838 int arm_tune_wbuf
= 0;
840 /* Nonzero if tuning for Cortex-A9. */
841 int arm_tune_cortex_a9
= 0;
843 /* Nonzero if generating Thumb instructions. */
846 /* Nonzero if generating Thumb-1 instructions. */
849 /* Nonzero if we should define __THUMB_INTERWORK__ in the
851 XXX This is a bit of a hack, it's intended to help work around
852 problems in GLD which doesn't understand that armv5t code is
853 interworking clean. */
854 int arm_cpp_interwork
= 0;
856 /* Nonzero if chip supports Thumb 2. */
859 /* Nonzero if chip supports integer division instruction. */
860 int arm_arch_arm_hwdiv
;
861 int arm_arch_thumb_hwdiv
;
863 /* Nonzero if we should use Neon to handle 64-bits operations rather
864 than core registers. */
865 int prefer_neon_for_64bits
= 0;
867 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
868 we must report the mode of the memory reference from
869 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
870 enum machine_mode output_memory_reference_mode
;
872 /* The register number to be used for the PIC offset register. */
873 unsigned arm_pic_register
= INVALID_REGNUM
;
875 /* Set to 1 after arm_reorg has started. Reset to start at the start of
876 the next function. */
877 static int after_arm_reorg
= 0;
879 enum arm_pcs arm_pcs_default
;
881 /* For an explanation of these variables, see final_prescan_insn below. */
883 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
884 enum arm_cond_code arm_current_cc
;
887 int arm_target_label
;
888 /* The number of conditionally executed insns, including the current insn. */
889 int arm_condexec_count
= 0;
890 /* A bitmask specifying the patterns for the IT block.
891 Zero means do not output an IT block before this insn. */
892 int arm_condexec_mask
= 0;
893 /* The number of bits used in arm_condexec_mask. */
894 int arm_condexec_masklen
= 0;
896 /* The condition codes of the ARM, and the inverse function. */
897 static const char * const arm_condition_codes
[] =
899 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
900 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
903 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
904 int arm_regs_in_sequence
[] =
906 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
909 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
910 #define streq(string1, string2) (strcmp (string1, string2) == 0)
912 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
913 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
914 | (1 << PIC_OFFSET_TABLE_REGNUM)))
916 /* Initialization code. */
920 const char *const name
;
921 enum processor_type core
;
923 enum base_architecture base_arch
;
924 const unsigned long flags
;
925 const struct tune_params
*const tune
;
929 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
930 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
935 /* arm generic vectorizer costs. */
937 struct cpu_vec_costs arm_default_vec_cost
= {
938 1, /* scalar_stmt_cost. */
939 1, /* scalar load_cost. */
940 1, /* scalar_store_cost. */
941 1, /* vec_stmt_cost. */
942 1, /* vec_to_scalar_cost. */
943 1, /* scalar_to_vec_cost. */
944 1, /* vec_align_load_cost. */
945 1, /* vec_unalign_load_cost. */
946 1, /* vec_unalign_store_cost. */
947 1, /* vec_store_cost. */
948 3, /* cond_taken_branch_cost. */
949 1, /* cond_not_taken_branch_cost. */
953 const struct cpu_cost_table generic_extra_costs
=
960 COSTS_N_INSNS (1), /* Shift_reg. */
961 0, /* Arith_shift. */
962 COSTS_N_INSNS (1), /* Arith_shift_reg. */
964 COSTS_N_INSNS (1), /* Log_shift_reg. */
966 COSTS_N_INSNS (1), /* Extend_arith. */
970 COSTS_N_INSNS (1), /* non_exec. */
971 false /* non_exec_costs_exec. */
976 COSTS_N_INSNS (2), /* Simple. */
977 COSTS_N_INSNS (1), /* Flag_setting. */
978 COSTS_N_INSNS (2), /* Extend. */
979 COSTS_N_INSNS (3), /* Add. */
980 COSTS_N_INSNS (3), /* Extend_add. */
981 COSTS_N_INSNS (8) /* Idiv. */
985 0, /* Simple (N/A). */
986 0, /* Flag_setting (N/A). */
987 COSTS_N_INSNS (2), /* Extend. */
989 COSTS_N_INSNS (3), /* Extend_add. */
995 COSTS_N_INSNS (2), /* Load. */
996 COSTS_N_INSNS (2), /* Load_sign_extend. */
997 COSTS_N_INSNS (3), /* Ldrd. */
998 COSTS_N_INSNS (2), /* Ldm_1st. */
999 1, /* Ldm_regs_per_insn_1st. */
1000 1, /* Ldm_regs_per_insn_subsequent. */
1001 COSTS_N_INSNS (2), /* Loadf. */
1002 COSTS_N_INSNS (3), /* Loadd. */
1003 COSTS_N_INSNS (1), /* Load_unaligned. */
1004 COSTS_N_INSNS (2), /* Store. */
1005 COSTS_N_INSNS (3), /* Strd. */
1006 COSTS_N_INSNS (2), /* Stm_1st. */
1007 1, /* Stm_regs_per_insn_1st. */
1008 1, /* Stm_regs_per_insn_subsequent. */
1009 COSTS_N_INSNS (2), /* Storef. */
1010 COSTS_N_INSNS (3), /* Stored. */
1011 COSTS_N_INSNS (1) /* Store_unaligned. */
1016 COSTS_N_INSNS (7), /* Div. */
1017 COSTS_N_INSNS (2), /* Mult. */
1018 COSTS_N_INSNS (3), /* Mult_addsub. */
1019 COSTS_N_INSNS (3), /* Fma. */
1020 COSTS_N_INSNS (1), /* Addsub. */
1032 COSTS_N_INSNS (15), /* Div. */
1033 COSTS_N_INSNS (5), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (7), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1049 COSTS_N_INSNS (1) /* Alu. */
1053 const struct cpu_cost_table cortexa15_extra_costs
=
1057 COSTS_N_INSNS (1), /* Arith. */
1058 COSTS_N_INSNS (1), /* Logical. */
1059 COSTS_N_INSNS (1), /* Shift. */
1060 COSTS_N_INSNS (1), /* Shift_reg. */
1061 COSTS_N_INSNS (1), /* Arith_shift. */
1062 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1063 COSTS_N_INSNS (1), /* Log_shift. */
1064 COSTS_N_INSNS (1), /* Log_shift_reg. */
1065 COSTS_N_INSNS (1), /* Extend. */
1066 COSTS_N_INSNS (2), /* Extend_arith. */
1067 COSTS_N_INSNS (2), /* Bfi. */
1068 COSTS_N_INSNS (1), /* Bfx. */
1069 COSTS_N_INSNS (1), /* Clz. */
1070 COSTS_N_INSNS (1), /* non_exec. */
1071 true /* non_exec_costs_exec. */
1076 COSTS_N_INSNS (3), /* Simple. */
1077 COSTS_N_INSNS (4), /* Flag_setting. */
1078 COSTS_N_INSNS (3), /* Extend. */
1079 COSTS_N_INSNS (4), /* Add. */
1080 COSTS_N_INSNS (4), /* Extend_add. */
1081 COSTS_N_INSNS (19) /* Idiv. */
1085 0, /* Simple (N/A). */
1086 0, /* Flag_setting (N/A). */
1087 COSTS_N_INSNS (4), /* Extend. */
1089 COSTS_N_INSNS (6), /* Extend_add. */
1095 COSTS_N_INSNS (4), /* Load. */
1096 COSTS_N_INSNS (4), /* Load_sign_extend. */
1097 COSTS_N_INSNS (4), /* Ldrd. */
1098 COSTS_N_INSNS (5), /* Ldm_1st. */
1099 1, /* Ldm_regs_per_insn_1st. */
1100 2, /* Ldm_regs_per_insn_subsequent. */
1101 COSTS_N_INSNS (5), /* Loadf. */
1102 COSTS_N_INSNS (5), /* Loadd. */
1103 COSTS_N_INSNS (1), /* Load_unaligned. */
1104 COSTS_N_INSNS (1), /* Store. */
1105 COSTS_N_INSNS (1), /* Strd. */
1106 COSTS_N_INSNS (2), /* Stm_1st. */
1107 1, /* Stm_regs_per_insn_1st. */
1108 2, /* Stm_regs_per_insn_subsequent. */
1109 COSTS_N_INSNS (1), /* Storef. */
1110 COSTS_N_INSNS (1), /* Stored. */
1111 COSTS_N_INSNS (1) /* Store_unaligned. */
1116 COSTS_N_INSNS (18), /* Div. */
1117 COSTS_N_INSNS (5), /* Mult. */
1118 COSTS_N_INSNS (3), /* Mult_addsub. */
1119 COSTS_N_INSNS (13), /* Fma. */
1120 COSTS_N_INSNS (5), /* Addsub. */
1121 COSTS_N_INSNS (5), /* Fpconst. */
1122 COSTS_N_INSNS (3), /* Neg. */
1123 COSTS_N_INSNS (3), /* Compare. */
1124 COSTS_N_INSNS (3), /* Widen. */
1125 COSTS_N_INSNS (3), /* Narrow. */
1126 COSTS_N_INSNS (3), /* Toint. */
1127 COSTS_N_INSNS (3), /* Fromint. */
1128 COSTS_N_INSNS (3) /* Roundint. */
1132 COSTS_N_INSNS (32), /* Div. */
1133 COSTS_N_INSNS (5), /* Mult. */
1134 COSTS_N_INSNS (3), /* Mult_addsub. */
1135 COSTS_N_INSNS (13), /* Fma. */
1136 COSTS_N_INSNS (5), /* Addsub. */
1137 COSTS_N_INSNS (3), /* Fpconst. */
1138 COSTS_N_INSNS (3), /* Neg. */
1139 COSTS_N_INSNS (3), /* Compare. */
1140 COSTS_N_INSNS (3), /* Widen. */
1141 COSTS_N_INSNS (3), /* Narrow. */
1142 COSTS_N_INSNS (3), /* Toint. */
1143 COSTS_N_INSNS (3), /* Fromint. */
1144 COSTS_N_INSNS (3) /* Roundint. */
1149 COSTS_N_INSNS (1) /* Alu. */
1153 const struct tune_params arm_slowmul_tune
=
1155 arm_slowmul_rtx_costs
,
1158 3, /* Constant limit. */
1159 5, /* Max cond insns. */
1160 ARM_PREFETCH_NOT_BENEFICIAL
,
1161 true, /* Prefer constant pool. */
1162 arm_default_branch_cost
,
1163 false, /* Prefer LDRD/STRD. */
1164 {true, true}, /* Prefer non short circuit. */
1165 &arm_default_vec_cost
, /* Vectorizer costs. */
1166 false /* Prefer Neon for 64-bits bitops. */
1169 const struct tune_params arm_fastmul_tune
=
1171 arm_fastmul_rtx_costs
,
1174 1, /* Constant limit. */
1175 5, /* Max cond insns. */
1176 ARM_PREFETCH_NOT_BENEFICIAL
,
1177 true, /* Prefer constant pool. */
1178 arm_default_branch_cost
,
1179 false, /* Prefer LDRD/STRD. */
1180 {true, true}, /* Prefer non short circuit. */
1181 &arm_default_vec_cost
, /* Vectorizer costs. */
1182 false /* Prefer Neon for 64-bits bitops. */
1185 /* StrongARM has early execution of branches, so a sequence that is worth
1186 skipping is shorter. Set max_insns_skipped to a lower value. */
1188 const struct tune_params arm_strongarm_tune
=
1190 arm_fastmul_rtx_costs
,
1193 1, /* Constant limit. */
1194 3, /* Max cond insns. */
1195 ARM_PREFETCH_NOT_BENEFICIAL
,
1196 true, /* Prefer constant pool. */
1197 arm_default_branch_cost
,
1198 false, /* Prefer LDRD/STRD. */
1199 {true, true}, /* Prefer non short circuit. */
1200 &arm_default_vec_cost
, /* Vectorizer costs. */
1201 false /* Prefer Neon for 64-bits bitops. */
1204 const struct tune_params arm_xscale_tune
=
1206 arm_xscale_rtx_costs
,
1208 xscale_sched_adjust_cost
,
1209 2, /* Constant limit. */
1210 3, /* Max cond insns. */
1211 ARM_PREFETCH_NOT_BENEFICIAL
,
1212 true, /* Prefer constant pool. */
1213 arm_default_branch_cost
,
1214 false, /* Prefer LDRD/STRD. */
1215 {true, true}, /* Prefer non short circuit. */
1216 &arm_default_vec_cost
, /* Vectorizer costs. */
1217 false /* Prefer Neon for 64-bits bitops. */
1220 const struct tune_params arm_9e_tune
=
1225 1, /* Constant limit. */
1226 5, /* Max cond insns. */
1227 ARM_PREFETCH_NOT_BENEFICIAL
,
1228 true, /* Prefer constant pool. */
1229 arm_default_branch_cost
,
1230 false, /* Prefer LDRD/STRD. */
1231 {true, true}, /* Prefer non short circuit. */
1232 &arm_default_vec_cost
, /* Vectorizer costs. */
1233 false /* Prefer Neon for 64-bits bitops. */
1236 const struct tune_params arm_v6t2_tune
=
1241 1, /* Constant limit. */
1242 5, /* Max cond insns. */
1243 ARM_PREFETCH_NOT_BENEFICIAL
,
1244 false, /* Prefer constant pool. */
1245 arm_default_branch_cost
,
1246 false, /* Prefer LDRD/STRD. */
1247 {true, true}, /* Prefer non short circuit. */
1248 &arm_default_vec_cost
, /* Vectorizer costs. */
1249 false /* Prefer Neon for 64-bits bitops. */
1252 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1253 const struct tune_params arm_cortex_tune
=
1256 &generic_extra_costs
,
1258 1, /* Constant limit. */
1259 5, /* Max cond insns. */
1260 ARM_PREFETCH_NOT_BENEFICIAL
,
1261 false, /* Prefer constant pool. */
1262 arm_default_branch_cost
,
1263 false, /* Prefer LDRD/STRD. */
1264 {true, true}, /* Prefer non short circuit. */
1265 &arm_default_vec_cost
, /* Vectorizer costs. */
1266 false /* Prefer Neon for 64-bits bitops. */
1269 const struct tune_params arm_cortex_a15_tune
=
1272 &cortexa15_extra_costs
,
1274 1, /* Constant limit. */
1275 2, /* Max cond insns. */
1276 ARM_PREFETCH_NOT_BENEFICIAL
,
1277 false, /* Prefer constant pool. */
1278 arm_default_branch_cost
,
1279 true, /* Prefer LDRD/STRD. */
1280 {true, true}, /* Prefer non short circuit. */
1281 &arm_default_vec_cost
, /* Vectorizer costs. */
1282 false /* Prefer Neon for 64-bits bitops. */
1285 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1286 less appealing. Set max_insns_skipped to a low value. */
1288 const struct tune_params arm_cortex_a5_tune
=
1293 1, /* Constant limit. */
1294 1, /* Max cond insns. */
1295 ARM_PREFETCH_NOT_BENEFICIAL
,
1296 false, /* Prefer constant pool. */
1297 arm_cortex_a5_branch_cost
,
1298 false, /* Prefer LDRD/STRD. */
1299 {false, false}, /* Prefer non short circuit. */
1300 &arm_default_vec_cost
, /* Vectorizer costs. */
1301 false /* Prefer Neon for 64-bits bitops. */
1304 const struct tune_params arm_cortex_a9_tune
=
1308 cortex_a9_sched_adjust_cost
,
1309 1, /* Constant limit. */
1310 5, /* Max cond insns. */
1311 ARM_PREFETCH_BENEFICIAL(4,32,32),
1312 false, /* Prefer constant pool. */
1313 arm_default_branch_cost
,
1314 false, /* Prefer LDRD/STRD. */
1315 {true, true}, /* Prefer non short circuit. */
1316 &arm_default_vec_cost
, /* Vectorizer costs. */
1317 false /* Prefer Neon for 64-bits bitops. */
1320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1322 const struct tune_params arm_v6m_tune
=
1327 1, /* Constant limit. */
1328 5, /* Max cond insns. */
1329 ARM_PREFETCH_NOT_BENEFICIAL
,
1330 false, /* Prefer constant pool. */
1331 arm_default_branch_cost
,
1332 false, /* Prefer LDRD/STRD. */
1333 {false, false}, /* Prefer non short circuit. */
1334 &arm_default_vec_cost
, /* Vectorizer costs. */
1335 false /* Prefer Neon for 64-bits bitops. */
1338 const struct tune_params arm_fa726te_tune
=
1342 fa726te_sched_adjust_cost
,
1343 1, /* Constant limit. */
1344 5, /* Max cond insns. */
1345 ARM_PREFETCH_NOT_BENEFICIAL
,
1346 true, /* Prefer constant pool. */
1347 arm_default_branch_cost
,
1348 false, /* Prefer LDRD/STRD. */
1349 {true, true}, /* Prefer non short circuit. */
1350 &arm_default_vec_cost
, /* Vectorizer costs. */
1351 false /* Prefer Neon for 64-bits bitops. */
1355 /* Not all of these give usefully different compilation alternatives,
1356 but there is no simple way of generalizing them. */
1357 static const struct processors all_cores
[] =
1360 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1361 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1362 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1363 #include "arm-cores.def"
1365 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1368 static const struct processors all_architectures
[] =
1370 /* ARM Architectures */
1371 /* We don't specify tuning costs here as it will be figured out
1374 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1375 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1376 #include "arm-arches.def"
1378 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1382 /* These are populated as commandline arguments are processed, or NULL
1383 if not specified. */
1384 static const struct processors
*arm_selected_arch
;
1385 static const struct processors
*arm_selected_cpu
;
1386 static const struct processors
*arm_selected_tune
;
1388 /* The name of the preprocessor macro to define for this architecture. */
1390 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1392 /* Available values for -mfpu=. */
1394 static const struct arm_fpu_desc all_fpus
[] =
1396 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1397 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1398 #include "arm-fpus.def"
1403 /* Supported TLS relocations. */
1411 TLS_DESCSEQ
/* GNU scheme */
1414 /* The maximum number of insns to be used when loading a constant. */
1416 arm_constant_limit (bool size_p
)
1418 return size_p
? 1 : current_tune
->constant_limit
;
1421 /* Emit an insn that's a simple single-set. Both the operands must be known
1424 emit_set_insn (rtx x
, rtx y
)
1426 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1429 /* Return the number of bits set in VALUE. */
1431 bit_count (unsigned long value
)
1433 unsigned long count
= 0;
1438 value
&= value
- 1; /* Clear the least-significant set bit. */
1446 enum machine_mode mode
;
1448 } arm_fixed_mode_set
;
1450 /* A small helper for setting fixed-point library libfuncs. */
1453 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1454 const char *funcname
, const char *modename
,
1459 if (num_suffix
== 0)
1460 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1462 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1464 set_optab_libfunc (optable
, mode
, buffer
);
1468 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1469 enum machine_mode from
, const char *funcname
,
1470 const char *toname
, const char *fromname
)
1473 const char *maybe_suffix_2
= "";
1475 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1476 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1477 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1478 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1479 maybe_suffix_2
= "2";
1481 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1484 set_conv_libfunc (optable
, to
, from
, buffer
);
1487 /* Set up library functions unique to ARM. */
1490 arm_init_libfuncs (void)
1492 /* For Linux, we have access to kernel support for atomic operations. */
1493 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1494 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1496 /* There are no special library functions unless we are using the
1501 /* The functions below are described in Section 4 of the "Run-Time
1502 ABI for the ARM architecture", Version 1.0. */
1504 /* Double-precision floating-point arithmetic. Table 2. */
1505 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1506 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1507 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1508 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1509 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1511 /* Double-precision comparisons. Table 3. */
1512 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1513 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1514 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1515 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1516 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1517 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1518 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1520 /* Single-precision floating-point arithmetic. Table 4. */
1521 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1522 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1523 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1524 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1525 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1527 /* Single-precision comparisons. Table 5. */
1528 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1529 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1530 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1531 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1532 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1533 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1534 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1536 /* Floating-point to integer conversions. Table 6. */
1537 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1538 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1539 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1540 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1541 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1542 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1543 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1544 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1546 /* Conversions between floating types. Table 7. */
1547 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1548 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1550 /* Integer to floating-point conversions. Table 8. */
1551 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1552 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1553 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1554 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1555 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1556 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1557 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1558 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1560 /* Long long. Table 9. */
1561 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1562 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1563 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1564 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1565 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1566 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1567 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1568 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1570 /* Integer (32/32->32) division. \S 4.3.1. */
1571 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1572 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1574 /* The divmod functions are designed so that they can be used for
1575 plain division, even though they return both the quotient and the
1576 remainder. The quotient is returned in the usual location (i.e.,
1577 r0 for SImode, {r0, r1} for DImode), just as would be expected
1578 for an ordinary division routine. Because the AAPCS calling
1579 conventions specify that all of { r0, r1, r2, r3 } are
1580 callee-saved registers, there is no need to tell the compiler
1581 explicitly that those registers are clobbered by these
1583 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1584 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1586 /* For SImode division the ABI provides div-without-mod routines,
1587 which are faster. */
1588 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1589 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1591 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1592 divmod libcalls instead. */
1593 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1594 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1595 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1596 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1598 /* Half-precision float operations. The compiler handles all operations
1599 with NULL libfuncs by converting the SFmode. */
1600 switch (arm_fp16_format
)
1602 case ARM_FP16_FORMAT_IEEE
:
1603 case ARM_FP16_FORMAT_ALTERNATIVE
:
1606 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1607 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1609 : "__gnu_f2h_alternative"));
1610 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1611 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1613 : "__gnu_h2f_alternative"));
1616 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1617 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1618 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1619 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1620 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1623 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1624 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1625 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1626 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1627 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1628 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1629 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1636 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1638 const arm_fixed_mode_set fixed_arith_modes
[] =
1659 const arm_fixed_mode_set fixed_conv_modes
[] =
1689 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
1691 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
1692 "add", fixed_arith_modes
[i
].name
, 3);
1693 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
1694 "ssadd", fixed_arith_modes
[i
].name
, 3);
1695 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
1696 "usadd", fixed_arith_modes
[i
].name
, 3);
1697 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
1698 "sub", fixed_arith_modes
[i
].name
, 3);
1699 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
1700 "sssub", fixed_arith_modes
[i
].name
, 3);
1701 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
1702 "ussub", fixed_arith_modes
[i
].name
, 3);
1703 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
1704 "mul", fixed_arith_modes
[i
].name
, 3);
1705 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
1706 "ssmul", fixed_arith_modes
[i
].name
, 3);
1707 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
1708 "usmul", fixed_arith_modes
[i
].name
, 3);
1709 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
1710 "div", fixed_arith_modes
[i
].name
, 3);
1711 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
1712 "udiv", fixed_arith_modes
[i
].name
, 3);
1713 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
1714 "ssdiv", fixed_arith_modes
[i
].name
, 3);
1715 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
1716 "usdiv", fixed_arith_modes
[i
].name
, 3);
1717 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
1718 "neg", fixed_arith_modes
[i
].name
, 2);
1719 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
1720 "ssneg", fixed_arith_modes
[i
].name
, 2);
1721 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
1722 "usneg", fixed_arith_modes
[i
].name
, 2);
1723 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
1724 "ashl", fixed_arith_modes
[i
].name
, 3);
1725 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
1726 "ashr", fixed_arith_modes
[i
].name
, 3);
1727 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
1728 "lshr", fixed_arith_modes
[i
].name
, 3);
1729 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
1730 "ssashl", fixed_arith_modes
[i
].name
, 3);
1731 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
1732 "usashl", fixed_arith_modes
[i
].name
, 3);
1733 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
1734 "cmp", fixed_arith_modes
[i
].name
, 2);
1737 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
1738 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
1741 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
1742 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
1745 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
1746 fixed_conv_modes
[j
].mode
, "fract",
1747 fixed_conv_modes
[i
].name
,
1748 fixed_conv_modes
[j
].name
);
1749 arm_set_fixed_conv_libfunc (satfract_optab
,
1750 fixed_conv_modes
[i
].mode
,
1751 fixed_conv_modes
[j
].mode
, "satfract",
1752 fixed_conv_modes
[i
].name
,
1753 fixed_conv_modes
[j
].name
);
1754 arm_set_fixed_conv_libfunc (fractuns_optab
,
1755 fixed_conv_modes
[i
].mode
,
1756 fixed_conv_modes
[j
].mode
, "fractuns",
1757 fixed_conv_modes
[i
].name
,
1758 fixed_conv_modes
[j
].name
);
1759 arm_set_fixed_conv_libfunc (satfractuns_optab
,
1760 fixed_conv_modes
[i
].mode
,
1761 fixed_conv_modes
[j
].mode
, "satfractuns",
1762 fixed_conv_modes
[i
].name
,
1763 fixed_conv_modes
[j
].name
);
1767 if (TARGET_AAPCS_BASED
)
1768 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1771 /* On AAPCS systems, this is the "struct __va_list". */
1772 static GTY(()) tree va_list_type
;
1774 /* Return the type to use as __builtin_va_list. */
1776 arm_build_builtin_va_list (void)
1781 if (!TARGET_AAPCS_BASED
)
1782 return std_build_builtin_va_list ();
1784 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1792 The C Library ABI further reinforces this definition in \S
1795 We must follow this definition exactly. The structure tag
1796 name is visible in C++ mangled names, and thus forms a part
1797 of the ABI. The field name may be used by people who
1798 #include <stdarg.h>. */
1799 /* Create the type. */
1800 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1801 /* Give it the required name. */
1802 va_list_name
= build_decl (BUILTINS_LOCATION
,
1804 get_identifier ("__va_list"),
1806 DECL_ARTIFICIAL (va_list_name
) = 1;
1807 TYPE_NAME (va_list_type
) = va_list_name
;
1808 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1809 /* Create the __ap field. */
1810 ap_field
= build_decl (BUILTINS_LOCATION
,
1812 get_identifier ("__ap"),
1814 DECL_ARTIFICIAL (ap_field
) = 1;
1815 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1816 TYPE_FIELDS (va_list_type
) = ap_field
;
1817 /* Compute its layout. */
1818 layout_type (va_list_type
);
1820 return va_list_type
;
1823 /* Return an expression of type "void *" pointing to the next
1824 available argument in a variable-argument list. VALIST is the
1825 user-level va_list object, of type __builtin_va_list. */
1827 arm_extract_valist_ptr (tree valist
)
1829 if (TREE_TYPE (valist
) == error_mark_node
)
1830 return error_mark_node
;
1832 /* On an AAPCS target, the pointer is stored within "struct
1834 if (TARGET_AAPCS_BASED
)
1836 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1837 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1838 valist
, ap_field
, NULL_TREE
);
1844 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1846 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1848 valist
= arm_extract_valist_ptr (valist
);
1849 std_expand_builtin_va_start (valist
, nextarg
);
1852 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1854 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1857 valist
= arm_extract_valist_ptr (valist
);
1858 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1861 /* Fix up any incompatible options that the user has specified. */
1863 arm_option_override (void)
1865 if (global_options_set
.x_arm_arch_option
)
1866 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1868 if (global_options_set
.x_arm_cpu_option
)
1869 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1871 if (global_options_set
.x_arm_tune_option
)
1872 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1874 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1875 SUBTARGET_OVERRIDE_OPTIONS
;
1878 if (arm_selected_arch
)
1880 if (arm_selected_cpu
)
1882 /* Check for conflict between mcpu and march. */
1883 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1885 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1886 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1887 /* -march wins for code generation.
1888 -mcpu wins for default tuning. */
1889 if (!arm_selected_tune
)
1890 arm_selected_tune
= arm_selected_cpu
;
1892 arm_selected_cpu
= arm_selected_arch
;
1896 arm_selected_arch
= NULL
;
1899 /* Pick a CPU based on the architecture. */
1900 arm_selected_cpu
= arm_selected_arch
;
1903 /* If the user did not specify a processor, choose one for them. */
1904 if (!arm_selected_cpu
)
1906 const struct processors
* sel
;
1907 unsigned int sought
;
1909 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1910 if (!arm_selected_cpu
->name
)
1912 #ifdef SUBTARGET_CPU_DEFAULT
1913 /* Use the subtarget default CPU if none was specified by
1915 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1917 /* Default to ARM6. */
1918 if (!arm_selected_cpu
->name
)
1919 arm_selected_cpu
= &all_cores
[arm6
];
1922 sel
= arm_selected_cpu
;
1923 insn_flags
= sel
->flags
;
1925 /* Now check to see if the user has specified some command line
1926 switch that require certain abilities from the cpu. */
1929 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1931 sought
|= (FL_THUMB
| FL_MODE32
);
1933 /* There are no ARM processors that support both APCS-26 and
1934 interworking. Therefore we force FL_MODE26 to be removed
1935 from insn_flags here (if it was set), so that the search
1936 below will always be able to find a compatible processor. */
1937 insn_flags
&= ~FL_MODE26
;
1940 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1942 /* Try to locate a CPU type that supports all of the abilities
1943 of the default CPU, plus the extra abilities requested by
1945 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1946 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1949 if (sel
->name
== NULL
)
1951 unsigned current_bit_count
= 0;
1952 const struct processors
* best_fit
= NULL
;
1954 /* Ideally we would like to issue an error message here
1955 saying that it was not possible to find a CPU compatible
1956 with the default CPU, but which also supports the command
1957 line options specified by the programmer, and so they
1958 ought to use the -mcpu=<name> command line option to
1959 override the default CPU type.
1961 If we cannot find a cpu that has both the
1962 characteristics of the default cpu and the given
1963 command line options we scan the array again looking
1964 for a best match. */
1965 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1966 if ((sel
->flags
& sought
) == sought
)
1970 count
= bit_count (sel
->flags
& insn_flags
);
1972 if (count
>= current_bit_count
)
1975 current_bit_count
= count
;
1979 gcc_assert (best_fit
);
1983 arm_selected_cpu
= sel
;
1987 gcc_assert (arm_selected_cpu
);
1988 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1989 if (!arm_selected_tune
)
1990 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1992 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1993 insn_flags
= arm_selected_cpu
->flags
;
1994 arm_base_arch
= arm_selected_cpu
->base_arch
;
1996 arm_tune
= arm_selected_tune
->core
;
1997 tune_flags
= arm_selected_tune
->flags
;
1998 current_tune
= arm_selected_tune
->tune
;
2000 /* Make sure that the processor choice does not conflict with any of the
2001 other command line choices. */
2002 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2003 error ("target CPU does not support ARM mode");
2005 /* BPABI targets use linker tricks to allow interworking on cores
2006 without thumb support. */
2007 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2009 warning (0, "target CPU does not support interworking" );
2010 target_flags
&= ~MASK_INTERWORK
;
2013 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2015 warning (0, "target CPU does not support THUMB instructions");
2016 target_flags
&= ~MASK_THUMB
;
2019 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2021 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2022 target_flags
&= ~MASK_APCS_FRAME
;
2025 /* Callee super interworking implies thumb interworking. Adding
2026 this to the flags here simplifies the logic elsewhere. */
2027 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2028 target_flags
|= MASK_INTERWORK
;
2030 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2031 from here where no function is being compiled currently. */
2032 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2033 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2035 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2036 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2038 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2040 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2041 target_flags
|= MASK_APCS_FRAME
;
2044 if (TARGET_POKE_FUNCTION_NAME
)
2045 target_flags
|= MASK_APCS_FRAME
;
2047 if (TARGET_APCS_REENT
&& flag_pic
)
2048 error ("-fpic and -mapcs-reent are incompatible");
2050 if (TARGET_APCS_REENT
)
2051 warning (0, "APCS reentrant code not supported. Ignored");
2053 /* If this target is normally configured to use APCS frames, warn if they
2054 are turned off and debugging is turned on. */
2056 && write_symbols
!= NO_DEBUG
2057 && !TARGET_APCS_FRAME
2058 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2059 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2061 if (TARGET_APCS_FLOAT
)
2062 warning (0, "passing floating point arguments in fp regs not yet supported");
2064 if (TARGET_LITTLE_WORDS
)
2065 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2066 "will be removed in a future release");
2068 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2069 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2070 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2071 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2072 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2073 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2074 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2075 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2076 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2077 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2078 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2079 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2080 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2081 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2082 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2084 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2085 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2086 thumb_code
= TARGET_ARM
== 0;
2087 thumb1_code
= TARGET_THUMB1
!= 0;
2088 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2089 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2090 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2091 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2092 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2093 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2094 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2095 if (arm_restrict_it
== 2)
2096 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2099 arm_restrict_it
= 0;
2101 /* If we are not using the default (ARM mode) section anchor offset
2102 ranges, then set the correct ranges now. */
2105 /* Thumb-1 LDR instructions cannot have negative offsets.
2106 Permissible positive offset ranges are 5-bit (for byte loads),
2107 6-bit (for halfword loads), or 7-bit (for word loads).
2108 Empirical results suggest a 7-bit anchor range gives the best
2109 overall code size. */
2110 targetm
.min_anchor_offset
= 0;
2111 targetm
.max_anchor_offset
= 127;
2113 else if (TARGET_THUMB2
)
2115 /* The minimum is set such that the total size of the block
2116 for a particular anchor is 248 + 1 + 4095 bytes, which is
2117 divisible by eight, ensuring natural spacing of anchors. */
2118 targetm
.min_anchor_offset
= -248;
2119 targetm
.max_anchor_offset
= 4095;
2122 /* V5 code we generate is completely interworking capable, so we turn off
2123 TARGET_INTERWORK here to avoid many tests later on. */
2125 /* XXX However, we must pass the right pre-processor defines to CPP
2126 or GLD can get confused. This is a hack. */
2127 if (TARGET_INTERWORK
)
2128 arm_cpp_interwork
= 1;
2131 target_flags
&= ~MASK_INTERWORK
;
2133 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2134 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2136 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2137 error ("iwmmxt abi requires an iwmmxt capable cpu");
2139 if (!global_options_set
.x_arm_fpu_index
)
2141 const char *target_fpu_name
;
2144 #ifdef FPUTYPE_DEFAULT
2145 target_fpu_name
= FPUTYPE_DEFAULT
;
2147 target_fpu_name
= "vfp";
2150 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2155 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2157 switch (arm_fpu_desc
->model
)
2159 case ARM_FP_MODEL_VFP
:
2160 arm_fpu_attr
= FPU_VFP
;
2167 if (TARGET_AAPCS_BASED
)
2169 if (TARGET_CALLER_INTERWORKING
)
2170 error ("AAPCS does not support -mcaller-super-interworking");
2172 if (TARGET_CALLEE_INTERWORKING
)
2173 error ("AAPCS does not support -mcallee-super-interworking");
2176 /* iWMMXt and NEON are incompatible. */
2177 if (TARGET_IWMMXT
&& TARGET_NEON
)
2178 error ("iWMMXt and NEON are incompatible");
2180 /* iWMMXt unsupported under Thumb mode. */
2181 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2182 error ("iWMMXt unsupported under Thumb mode");
2184 /* __fp16 support currently assumes the core has ldrh. */
2185 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2186 sorry ("__fp16 and no ldrh");
2188 /* If soft-float is specified then don't use FPU. */
2189 if (TARGET_SOFT_FLOAT
)
2190 arm_fpu_attr
= FPU_NONE
;
2192 if (TARGET_AAPCS_BASED
)
2194 if (arm_abi
== ARM_ABI_IWMMXT
)
2195 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2196 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2197 && TARGET_HARD_FLOAT
2199 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2201 arm_pcs_default
= ARM_PCS_AAPCS
;
2205 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2206 sorry ("-mfloat-abi=hard and VFP");
2208 if (arm_abi
== ARM_ABI_APCS
)
2209 arm_pcs_default
= ARM_PCS_APCS
;
2211 arm_pcs_default
= ARM_PCS_ATPCS
;
2214 /* For arm2/3 there is no need to do any scheduling if we are doing
2215 software floating-point. */
2216 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2217 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2219 /* Use the cp15 method if it is available. */
2220 if (target_thread_pointer
== TP_AUTO
)
2222 if (arm_arch6k
&& !TARGET_THUMB1
)
2223 target_thread_pointer
= TP_CP15
;
2225 target_thread_pointer
= TP_SOFT
;
2228 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2229 error ("can not use -mtp=cp15 with 16-bit Thumb");
2231 /* Override the default structure alignment for AAPCS ABI. */
2232 if (!global_options_set
.x_arm_structure_size_boundary
)
2234 if (TARGET_AAPCS_BASED
)
2235 arm_structure_size_boundary
= 8;
2239 if (arm_structure_size_boundary
!= 8
2240 && arm_structure_size_boundary
!= 32
2241 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2243 if (ARM_DOUBLEWORD_ALIGN
)
2245 "structure size boundary can only be set to 8, 32 or 64");
2247 warning (0, "structure size boundary can only be set to 8 or 32");
2248 arm_structure_size_boundary
2249 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2253 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2255 error ("RTP PIC is incompatible with Thumb");
2259 /* If stack checking is disabled, we can use r10 as the PIC register,
2260 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2261 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2263 if (TARGET_VXWORKS_RTP
)
2264 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2265 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2268 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2269 arm_pic_register
= 9;
2271 if (arm_pic_register_string
!= NULL
)
2273 int pic_register
= decode_reg_name (arm_pic_register_string
);
2276 warning (0, "-mpic-register= is useless without -fpic");
2278 /* Prevent the user from choosing an obviously stupid PIC register. */
2279 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2280 || pic_register
== HARD_FRAME_POINTER_REGNUM
2281 || pic_register
== STACK_POINTER_REGNUM
2282 || pic_register
>= PC_REGNUM
2283 || (TARGET_VXWORKS_RTP
2284 && (unsigned int) pic_register
!= arm_pic_register
))
2285 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2287 arm_pic_register
= pic_register
;
2290 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2291 if (fix_cm3_ldrd
== 2)
2293 if (arm_selected_cpu
->core
== cortexm3
)
2299 /* Enable -munaligned-access by default for
2300 - all ARMv6 architecture-based processors
2301 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2302 - ARMv8 architecture-base processors.
2304 Disable -munaligned-access by default for
2305 - all pre-ARMv6 architecture-based processors
2306 - ARMv6-M architecture-based processors. */
2308 if (unaligned_access
== 2)
2310 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2311 unaligned_access
= 1;
2313 unaligned_access
= 0;
2315 else if (unaligned_access
== 1
2316 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2318 warning (0, "target CPU does not support unaligned accesses");
2319 unaligned_access
= 0;
2322 if (TARGET_THUMB1
&& flag_schedule_insns
)
2324 /* Don't warn since it's on by default in -O2. */
2325 flag_schedule_insns
= 0;
2330 /* If optimizing for size, bump the number of instructions that we
2331 are prepared to conditionally execute (even on a StrongARM). */
2332 max_insns_skipped
= 6;
2335 max_insns_skipped
= current_tune
->max_insns_skipped
;
2337 /* Hot/Cold partitioning is not currently supported, since we can't
2338 handle literal pool placement in that case. */
2339 if (flag_reorder_blocks_and_partition
)
2341 inform (input_location
,
2342 "-freorder-blocks-and-partition not supported on this architecture");
2343 flag_reorder_blocks_and_partition
= 0;
2344 flag_reorder_blocks
= 1;
2348 /* Hoisting PIC address calculations more aggressively provides a small,
2349 but measurable, size reduction for PIC code. Therefore, we decrease
2350 the bar for unrestricted expression hoisting to the cost of PIC address
2351 calculation, which is 2 instructions. */
2352 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2353 global_options
.x_param_values
,
2354 global_options_set
.x_param_values
);
2356 /* ARM EABI defaults to strict volatile bitfields. */
2357 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2358 && abi_version_at_least(2))
2359 flag_strict_volatile_bitfields
= 1;
2361 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2362 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2363 if (flag_prefetch_loop_arrays
< 0
2366 && current_tune
->num_prefetch_slots
> 0)
2367 flag_prefetch_loop_arrays
= 1;
2369 /* Set up parameters to be used in prefetching algorithm. Do not override the
2370 defaults unless we are tuning for a core we have researched values for. */
2371 if (current_tune
->num_prefetch_slots
> 0)
2372 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2373 current_tune
->num_prefetch_slots
,
2374 global_options
.x_param_values
,
2375 global_options_set
.x_param_values
);
2376 if (current_tune
->l1_cache_line_size
>= 0)
2377 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2378 current_tune
->l1_cache_line_size
,
2379 global_options
.x_param_values
,
2380 global_options_set
.x_param_values
);
2381 if (current_tune
->l1_cache_size
>= 0)
2382 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2383 current_tune
->l1_cache_size
,
2384 global_options
.x_param_values
,
2385 global_options_set
.x_param_values
);
2387 /* Use Neon to perform 64-bits operations rather than core
2389 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2390 if (use_neon_for_64bits
== 1)
2391 prefer_neon_for_64bits
= true;
2393 /* Use the alternative scheduling-pressure algorithm by default. */
2394 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2395 global_options
.x_param_values
,
2396 global_options_set
.x_param_values
);
2398 /* Disable shrink-wrap when optimizing function for size, since it tends to
2399 generate additional returns. */
2400 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2401 flag_shrink_wrap
= false;
2402 /* TBD: Dwarf info for apcs frame is not handled yet. */
2403 if (TARGET_APCS_FRAME
)
2404 flag_shrink_wrap
= false;
2406 /* Register global variables with the garbage collector. */
2407 arm_add_gc_roots ();
2411 arm_add_gc_roots (void)
2413 gcc_obstack_init(&minipool_obstack
);
2414 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2417 /* A table of known ARM exception types.
2418 For use with the interrupt function attribute. */
2422 const char *const arg
;
2423 const unsigned long return_value
;
2427 static const isr_attribute_arg isr_attribute_args
[] =
2429 { "IRQ", ARM_FT_ISR
},
2430 { "irq", ARM_FT_ISR
},
2431 { "FIQ", ARM_FT_FIQ
},
2432 { "fiq", ARM_FT_FIQ
},
2433 { "ABORT", ARM_FT_ISR
},
2434 { "abort", ARM_FT_ISR
},
2435 { "ABORT", ARM_FT_ISR
},
2436 { "abort", ARM_FT_ISR
},
2437 { "UNDEF", ARM_FT_EXCEPTION
},
2438 { "undef", ARM_FT_EXCEPTION
},
2439 { "SWI", ARM_FT_EXCEPTION
},
2440 { "swi", ARM_FT_EXCEPTION
},
2441 { NULL
, ARM_FT_NORMAL
}
2444 /* Returns the (interrupt) function type of the current
2445 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2447 static unsigned long
2448 arm_isr_value (tree argument
)
2450 const isr_attribute_arg
* ptr
;
2454 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2456 /* No argument - default to IRQ. */
2457 if (argument
== NULL_TREE
)
2460 /* Get the value of the argument. */
2461 if (TREE_VALUE (argument
) == NULL_TREE
2462 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2463 return ARM_FT_UNKNOWN
;
2465 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2467 /* Check it against the list of known arguments. */
2468 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2469 if (streq (arg
, ptr
->arg
))
2470 return ptr
->return_value
;
2472 /* An unrecognized interrupt type. */
2473 return ARM_FT_UNKNOWN
;
2476 /* Computes the type of the current function. */
2478 static unsigned long
2479 arm_compute_func_type (void)
2481 unsigned long type
= ARM_FT_UNKNOWN
;
2485 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2487 /* Decide if the current function is volatile. Such functions
2488 never return, and many memory cycles can be saved by not storing
2489 register values that will never be needed again. This optimization
2490 was added to speed up context switching in a kernel application. */
2492 && (TREE_NOTHROW (current_function_decl
)
2493 || !(flag_unwind_tables
2495 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2496 && TREE_THIS_VOLATILE (current_function_decl
))
2497 type
|= ARM_FT_VOLATILE
;
2499 if (cfun
->static_chain_decl
!= NULL
)
2500 type
|= ARM_FT_NESTED
;
2502 attr
= DECL_ATTRIBUTES (current_function_decl
);
2504 a
= lookup_attribute ("naked", attr
);
2506 type
|= ARM_FT_NAKED
;
2508 a
= lookup_attribute ("isr", attr
);
2510 a
= lookup_attribute ("interrupt", attr
);
2513 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2515 type
|= arm_isr_value (TREE_VALUE (a
));
2520 /* Returns the type of the current function. */
2523 arm_current_func_type (void)
2525 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2526 cfun
->machine
->func_type
= arm_compute_func_type ();
2528 return cfun
->machine
->func_type
;
2532 arm_allocate_stack_slots_for_args (void)
2534 /* Naked functions should not allocate stack slots for arguments. */
2535 return !IS_NAKED (arm_current_func_type ());
2539 arm_warn_func_return (tree decl
)
2541 /* Naked functions are implemented entirely in assembly, including the
2542 return sequence, so suppress warnings about this. */
2543 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2547 /* Output assembler code for a block containing the constant parts
2548 of a trampoline, leaving space for the variable parts.
2550 On the ARM, (if r8 is the static chain regnum, and remembering that
2551 referencing pc adds an offset of 8) the trampoline looks like:
2554 .word static chain value
2555 .word function's address
2556 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2559 arm_asm_trampoline_template (FILE *f
)
2563 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2564 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2566 else if (TARGET_THUMB2
)
2568 /* The Thumb-2 trampoline is similar to the arm implementation.
2569 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2570 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2571 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2572 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2576 ASM_OUTPUT_ALIGN (f
, 2);
2577 fprintf (f
, "\t.code\t16\n");
2578 fprintf (f
, ".Ltrampoline_start:\n");
2579 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2580 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2581 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2582 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2583 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2584 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2586 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2587 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2590 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2593 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2595 rtx fnaddr
, mem
, a_tramp
;
2597 emit_block_move (m_tramp
, assemble_trampoline_template (),
2598 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2600 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2601 emit_move_insn (mem
, chain_value
);
2603 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2604 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2605 emit_move_insn (mem
, fnaddr
);
2607 a_tramp
= XEXP (m_tramp
, 0);
2608 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2609 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2610 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2613 /* Thumb trampolines should be entered in thumb mode, so set
2614 the bottom bit of the address. */
2617 arm_trampoline_adjust_address (rtx addr
)
2620 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2621 NULL
, 0, OPTAB_LIB_WIDEN
);
2625 /* Return 1 if it is possible to return using a single instruction.
2626 If SIBLING is non-null, this is a test for a return before a sibling
2627 call. SIBLING is the call insn, so we can examine its register usage. */
2630 use_return_insn (int iscond
, rtx sibling
)
2633 unsigned int func_type
;
2634 unsigned long saved_int_regs
;
2635 unsigned HOST_WIDE_INT stack_adjust
;
2636 arm_stack_offsets
*offsets
;
2638 /* Never use a return instruction before reload has run. */
2639 if (!reload_completed
)
2642 func_type
= arm_current_func_type ();
2644 /* Naked, volatile and stack alignment functions need special
2646 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2649 /* So do interrupt functions that use the frame pointer and Thumb
2650 interrupt functions. */
2651 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2654 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
2655 && !optimize_function_for_size_p (cfun
))
2658 offsets
= arm_get_frame_offsets ();
2659 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2661 /* As do variadic functions. */
2662 if (crtl
->args
.pretend_args_size
2663 || cfun
->machine
->uses_anonymous_args
2664 /* Or if the function calls __builtin_eh_return () */
2665 || crtl
->calls_eh_return
2666 /* Or if the function calls alloca */
2667 || cfun
->calls_alloca
2668 /* Or if there is a stack adjustment. However, if the stack pointer
2669 is saved on the stack, we can use a pre-incrementing stack load. */
2670 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2671 && stack_adjust
== 4)))
2674 saved_int_regs
= offsets
->saved_regs_mask
;
2676 /* Unfortunately, the insn
2678 ldmib sp, {..., sp, ...}
2680 triggers a bug on most SA-110 based devices, such that the stack
2681 pointer won't be correctly restored if the instruction takes a
2682 page fault. We work around this problem by popping r3 along with
2683 the other registers, since that is never slower than executing
2684 another instruction.
2686 We test for !arm_arch5 here, because code for any architecture
2687 less than this could potentially be run on one of the buggy
2689 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2691 /* Validate that r3 is a call-clobbered register (always true in
2692 the default abi) ... */
2693 if (!call_used_regs
[3])
2696 /* ... that it isn't being used for a return value ... */
2697 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2700 /* ... or for a tail-call argument ... */
2703 gcc_assert (CALL_P (sibling
));
2705 if (find_regno_fusage (sibling
, USE
, 3))
2709 /* ... and that there are no call-saved registers in r0-r2
2710 (always true in the default ABI). */
2711 if (saved_int_regs
& 0x7)
2715 /* Can't be done if interworking with Thumb, and any registers have been
2717 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2720 /* On StrongARM, conditional returns are expensive if they aren't
2721 taken and multiple registers have been stacked. */
2722 if (iscond
&& arm_tune_strongarm
)
2724 /* Conditional return when just the LR is stored is a simple
2725 conditional-load instruction, that's not expensive. */
2726 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2730 && arm_pic_register
!= INVALID_REGNUM
2731 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2735 /* If there are saved registers but the LR isn't saved, then we need
2736 two instructions for the return. */
2737 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2740 /* Can't be done if any of the VFP regs are pushed,
2741 since this also requires an insn. */
2742 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2743 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2744 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2747 if (TARGET_REALLY_IWMMXT
)
2748 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2749 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2755 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2756 shrink-wrapping if possible. This is the case if we need to emit a
2757 prologue, which we can test by looking at the offsets. */
2759 use_simple_return_p (void)
2761 arm_stack_offsets
*offsets
;
2763 offsets
= arm_get_frame_offsets ();
2764 return offsets
->outgoing_args
!= 0;
2767 /* Return TRUE if int I is a valid immediate ARM constant. */
2770 const_ok_for_arm (HOST_WIDE_INT i
)
2774 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2775 be all zero, or all one. */
2776 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2777 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2778 != ((~(unsigned HOST_WIDE_INT
) 0)
2779 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2782 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2784 /* Fast return for 0 and small values. We must do this for zero, since
2785 the code below can't handle that one case. */
2786 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2789 /* Get the number of trailing zeros. */
2790 lowbit
= ffs((int) i
) - 1;
2792 /* Only even shifts are allowed in ARM mode so round down to the
2793 nearest even number. */
2797 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2802 /* Allow rotated constants in ARM mode. */
2804 && ((i
& ~0xc000003f) == 0
2805 || (i
& ~0xf000000f) == 0
2806 || (i
& ~0xfc000003) == 0))
2813 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2816 if (i
== v
|| i
== (v
| (v
<< 8)))
2819 /* Allow repeated pattern 0xXY00XY00. */
2829 /* Return true if I is a valid constant for the operation CODE. */
2831 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2833 if (const_ok_for_arm (i
))
2839 /* See if we can use movw. */
2840 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2843 /* Otherwise, try mvn. */
2844 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2847 /* See if we can use addw or subw. */
2849 && ((i
& 0xfffff000) == 0
2850 || ((-i
) & 0xfffff000) == 0))
2852 /* else fall through. */
2872 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2874 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2880 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2884 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2891 /* Return true if I is a valid di mode constant for the operation CODE. */
2893 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
2895 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
2896 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
2897 rtx hi
= GEN_INT (hi_val
);
2898 rtx lo
= GEN_INT (lo_val
);
2908 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
2909 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
2911 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
2918 /* Emit a sequence of insns to handle a large constant.
2919 CODE is the code of the operation required, it can be any of SET, PLUS,
2920 IOR, AND, XOR, MINUS;
2921 MODE is the mode in which the operation is being performed;
2922 VAL is the integer to operate on;
2923 SOURCE is the other operand (a register, or a null-pointer for SET);
2924 SUBTARGETS means it is safe to create scratch registers if that will
2925 either produce a simpler sequence, or we will want to cse the values.
2926 Return value is the number of insns emitted. */
2928 /* ??? Tweak this for thumb2. */
2930 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2931 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2935 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2936 cond
= COND_EXEC_TEST (PATTERN (insn
));
2940 if (subtargets
|| code
== SET
2941 || (REG_P (target
) && REG_P (source
)
2942 && REGNO (target
) != REGNO (source
)))
2944 /* After arm_reorg has been called, we can't fix up expensive
2945 constants by pushing them into memory so we must synthesize
2946 them in-line, regardless of the cost. This is only likely to
2947 be more costly on chips that have load delay slots and we are
2948 compiling without running the scheduler (so no splitting
2949 occurred before the final instruction emission).
2951 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2953 if (!after_arm_reorg
2955 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2957 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2962 /* Currently SET is the only monadic value for CODE, all
2963 the rest are diadic. */
2964 if (TARGET_USE_MOVT
)
2965 arm_emit_movpair (target
, GEN_INT (val
));
2967 emit_set_insn (target
, GEN_INT (val
));
2973 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2975 if (TARGET_USE_MOVT
)
2976 arm_emit_movpair (temp
, GEN_INT (val
));
2978 emit_set_insn (temp
, GEN_INT (val
));
2980 /* For MINUS, the value is subtracted from, since we never
2981 have subtraction of a constant. */
2983 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2985 emit_set_insn (target
,
2986 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2992 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2996 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2997 ARM/THUMB2 immediates, and add up to VAL.
2998 Thr function return value gives the number of insns required. */
3000 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3001 struct four_ints
*return_sequence
)
3003 int best_consecutive_zeros
= 0;
3007 struct four_ints tmp_sequence
;
3009 /* If we aren't targeting ARM, the best place to start is always at
3010 the bottom, otherwise look more closely. */
3013 for (i
= 0; i
< 32; i
+= 2)
3015 int consecutive_zeros
= 0;
3017 if (!(val
& (3 << i
)))
3019 while ((i
< 32) && !(val
& (3 << i
)))
3021 consecutive_zeros
+= 2;
3024 if (consecutive_zeros
> best_consecutive_zeros
)
3026 best_consecutive_zeros
= consecutive_zeros
;
3027 best_start
= i
- consecutive_zeros
;
3034 /* So long as it won't require any more insns to do so, it's
3035 desirable to emit a small constant (in bits 0...9) in the last
3036 insn. This way there is more chance that it can be combined with
3037 a later addressing insn to form a pre-indexed load or store
3038 operation. Consider:
3040 *((volatile int *)0xe0000100) = 1;
3041 *((volatile int *)0xe0000110) = 2;
3043 We want this to wind up as:
3047 str rB, [rA, #0x100]
3049 str rB, [rA, #0x110]
3051 rather than having to synthesize both large constants from scratch.
3053 Therefore, we calculate how many insns would be required to emit
3054 the constant starting from `best_start', and also starting from
3055 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3056 yield a shorter sequence, we may as well use zero. */
3057 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3059 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3061 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3062 if (insns2
<= insns1
)
3064 *return_sequence
= tmp_sequence
;
3072 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3074 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3075 struct four_ints
*return_sequence
, int i
)
3077 int remainder
= val
& 0xffffffff;
3080 /* Try and find a way of doing the job in either two or three
3083 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3084 location. We start at position I. This may be the MSB, or
3085 optimial_immediate_sequence may have positioned it at the largest block
3086 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3087 wrapping around to the top of the word when we drop off the bottom.
3088 In the worst case this code should produce no more than four insns.
3090 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3091 constants, shifted to any arbitrary location. We should always start
3096 unsigned int b1
, b2
, b3
, b4
;
3097 unsigned HOST_WIDE_INT result
;
3100 gcc_assert (insns
< 4);
3105 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3106 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3109 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3110 /* We can use addw/subw for the last 12 bits. */
3114 /* Use an 8-bit shifted/rotated immediate. */
3118 result
= remainder
& ((0x0ff << end
)
3119 | ((i
< end
) ? (0xff >> (32 - end
))
3126 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3127 arbitrary shifts. */
3128 i
-= TARGET_ARM
? 2 : 1;
3132 /* Next, see if we can do a better job with a thumb2 replicated
3135 We do it this way around to catch the cases like 0x01F001E0 where
3136 two 8-bit immediates would work, but a replicated constant would
3139 TODO: 16-bit constants that don't clear all the bits, but still win.
3140 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3143 b1
= (remainder
& 0xff000000) >> 24;
3144 b2
= (remainder
& 0x00ff0000) >> 16;
3145 b3
= (remainder
& 0x0000ff00) >> 8;
3146 b4
= remainder
& 0xff;
3150 /* The 8-bit immediate already found clears b1 (and maybe b2),
3151 but must leave b3 and b4 alone. */
3153 /* First try to find a 32-bit replicated constant that clears
3154 almost everything. We can assume that we can't do it in one,
3155 or else we wouldn't be here. */
3156 unsigned int tmp
= b1
& b2
& b3
& b4
;
3157 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3159 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3160 + (tmp
== b3
) + (tmp
== b4
);
3162 && (matching_bytes
>= 3
3163 || (matching_bytes
== 2
3164 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3166 /* At least 3 of the bytes match, and the fourth has at
3167 least as many bits set, or two of the bytes match
3168 and it will only require one more insn to finish. */
3176 /* Second, try to find a 16-bit replicated constant that can
3177 leave three of the bytes clear. If b2 or b4 is already
3178 zero, then we can. If the 8-bit from above would not
3179 clear b2 anyway, then we still win. */
3180 else if (b1
== b3
&& (!b2
|| !b4
3181 || (remainder
& 0x00ff0000 & ~result
)))
3183 result
= remainder
& 0xff00ff00;
3189 /* The 8-bit immediate already found clears b2 (and maybe b3)
3190 and we don't get here unless b1 is alredy clear, but it will
3191 leave b4 unchanged. */
3193 /* If we can clear b2 and b4 at once, then we win, since the
3194 8-bits couldn't possibly reach that far. */
3197 result
= remainder
& 0x00ff00ff;
3203 return_sequence
->i
[insns
++] = result
;
3204 remainder
&= ~result
;
3206 if (code
== SET
|| code
== MINUS
)
3214 /* Emit an instruction with the indicated PATTERN. If COND is
3215 non-NULL, conditionalize the execution of the instruction on COND
3219 emit_constant_insn (rtx cond
, rtx pattern
)
3222 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3223 emit_insn (pattern
);
3226 /* As above, but extra parameter GENERATE which, if clear, suppresses
3230 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3231 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3236 int final_invert
= 0;
3238 int set_sign_bit_copies
= 0;
3239 int clear_sign_bit_copies
= 0;
3240 int clear_zero_bit_copies
= 0;
3241 int set_zero_bit_copies
= 0;
3242 int insns
= 0, neg_insns
, inv_insns
;
3243 unsigned HOST_WIDE_INT temp1
, temp2
;
3244 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3245 struct four_ints
*immediates
;
3246 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3248 /* Find out which operations are safe for a given CODE. Also do a quick
3249 check for degenerate cases; these can occur when DImode operations
3262 if (remainder
== 0xffffffff)
3265 emit_constant_insn (cond
,
3266 gen_rtx_SET (VOIDmode
, target
,
3267 GEN_INT (ARM_SIGN_EXTEND (val
))));
3273 if (reload_completed
&& rtx_equal_p (target
, source
))
3277 emit_constant_insn (cond
,
3278 gen_rtx_SET (VOIDmode
, target
, source
));
3287 emit_constant_insn (cond
,
3288 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3291 if (remainder
== 0xffffffff)
3293 if (reload_completed
&& rtx_equal_p (target
, source
))
3296 emit_constant_insn (cond
,
3297 gen_rtx_SET (VOIDmode
, target
, source
));
3306 if (reload_completed
&& rtx_equal_p (target
, source
))
3309 emit_constant_insn (cond
,
3310 gen_rtx_SET (VOIDmode
, target
, source
));
3314 if (remainder
== 0xffffffff)
3317 emit_constant_insn (cond
,
3318 gen_rtx_SET (VOIDmode
, target
,
3319 gen_rtx_NOT (mode
, source
)));
3326 /* We treat MINUS as (val - source), since (source - val) is always
3327 passed as (source + (-val)). */
3331 emit_constant_insn (cond
,
3332 gen_rtx_SET (VOIDmode
, target
,
3333 gen_rtx_NEG (mode
, source
)));
3336 if (const_ok_for_arm (val
))
3339 emit_constant_insn (cond
,
3340 gen_rtx_SET (VOIDmode
, target
,
3341 gen_rtx_MINUS (mode
, GEN_INT (val
),
3352 /* If we can do it in one insn get out quickly. */
3353 if (const_ok_for_op (val
, code
))
3356 emit_constant_insn (cond
,
3357 gen_rtx_SET (VOIDmode
, target
,
3359 ? gen_rtx_fmt_ee (code
, mode
, source
,
3365 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3367 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3368 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3372 if (mode
== SImode
&& i
== 16)
3373 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3375 emit_constant_insn (cond
,
3376 gen_zero_extendhisi2
3377 (target
, gen_lowpart (HImode
, source
)));
3379 /* Extz only supports SImode, but we can coerce the operands
3381 emit_constant_insn (cond
,
3382 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3383 gen_lowpart (SImode
, source
),
3384 GEN_INT (i
), const0_rtx
));
3390 /* Calculate a few attributes that may be useful for specific
3392 /* Count number of leading zeros. */
3393 for (i
= 31; i
>= 0; i
--)
3395 if ((remainder
& (1 << i
)) == 0)
3396 clear_sign_bit_copies
++;
3401 /* Count number of leading 1's. */
3402 for (i
= 31; i
>= 0; i
--)
3404 if ((remainder
& (1 << i
)) != 0)
3405 set_sign_bit_copies
++;
3410 /* Count number of trailing zero's. */
3411 for (i
= 0; i
<= 31; i
++)
3413 if ((remainder
& (1 << i
)) == 0)
3414 clear_zero_bit_copies
++;
3419 /* Count number of trailing 1's. */
3420 for (i
= 0; i
<= 31; i
++)
3422 if ((remainder
& (1 << i
)) != 0)
3423 set_zero_bit_copies
++;
3431 /* See if we can do this by sign_extending a constant that is known
3432 to be negative. This is a good, way of doing it, since the shift
3433 may well merge into a subsequent insn. */
3434 if (set_sign_bit_copies
> 1)
3436 if (const_ok_for_arm
3437 (temp1
= ARM_SIGN_EXTEND (remainder
3438 << (set_sign_bit_copies
- 1))))
3442 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3443 emit_constant_insn (cond
,
3444 gen_rtx_SET (VOIDmode
, new_src
,
3446 emit_constant_insn (cond
,
3447 gen_ashrsi3 (target
, new_src
,
3448 GEN_INT (set_sign_bit_copies
- 1)));
3452 /* For an inverted constant, we will need to set the low bits,
3453 these will be shifted out of harm's way. */
3454 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3455 if (const_ok_for_arm (~temp1
))
3459 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3460 emit_constant_insn (cond
,
3461 gen_rtx_SET (VOIDmode
, new_src
,
3463 emit_constant_insn (cond
,
3464 gen_ashrsi3 (target
, new_src
,
3465 GEN_INT (set_sign_bit_copies
- 1)));
3471 /* See if we can calculate the value as the difference between two
3472 valid immediates. */
3473 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3475 int topshift
= clear_sign_bit_copies
& ~1;
3477 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3478 & (0xff000000 >> topshift
));
3480 /* If temp1 is zero, then that means the 9 most significant
3481 bits of remainder were 1 and we've caused it to overflow.
3482 When topshift is 0 we don't need to do anything since we
3483 can borrow from 'bit 32'. */
3484 if (temp1
== 0 && topshift
!= 0)
3485 temp1
= 0x80000000 >> (topshift
- 1);
3487 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3489 if (const_ok_for_arm (temp2
))
3493 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3494 emit_constant_insn (cond
,
3495 gen_rtx_SET (VOIDmode
, new_src
,
3497 emit_constant_insn (cond
,
3498 gen_addsi3 (target
, new_src
,
3506 /* See if we can generate this by setting the bottom (or the top)
3507 16 bits, and then shifting these into the other half of the
3508 word. We only look for the simplest cases, to do more would cost
3509 too much. Be careful, however, not to generate this when the
3510 alternative would take fewer insns. */
3511 if (val
& 0xffff0000)
3513 temp1
= remainder
& 0xffff0000;
3514 temp2
= remainder
& 0x0000ffff;
3516 /* Overlaps outside this range are best done using other methods. */
3517 for (i
= 9; i
< 24; i
++)
3519 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3520 && !const_ok_for_arm (temp2
))
3522 rtx new_src
= (subtargets
3523 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3525 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3526 source
, subtargets
, generate
);
3534 gen_rtx_ASHIFT (mode
, source
,
3541 /* Don't duplicate cases already considered. */
3542 for (i
= 17; i
< 24; i
++)
3544 if (((temp1
| (temp1
>> i
)) == remainder
)
3545 && !const_ok_for_arm (temp1
))
3547 rtx new_src
= (subtargets
3548 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3550 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3551 source
, subtargets
, generate
);
3556 gen_rtx_SET (VOIDmode
, target
,
3559 gen_rtx_LSHIFTRT (mode
, source
,
3570 /* If we have IOR or XOR, and the constant can be loaded in a
3571 single instruction, and we can find a temporary to put it in,
3572 then this can be done in two instructions instead of 3-4. */
3574 /* TARGET can't be NULL if SUBTARGETS is 0 */
3575 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3577 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3581 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3583 emit_constant_insn (cond
,
3584 gen_rtx_SET (VOIDmode
, sub
,
3586 emit_constant_insn (cond
,
3587 gen_rtx_SET (VOIDmode
, target
,
3588 gen_rtx_fmt_ee (code
, mode
,
3599 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3600 and the remainder 0s for e.g. 0xfff00000)
3601 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3603 This can be done in 2 instructions by using shifts with mov or mvn.
3608 mvn r0, r0, lsr #12 */
3609 if (set_sign_bit_copies
> 8
3610 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
3614 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3615 rtx shift
= GEN_INT (set_sign_bit_copies
);
3619 gen_rtx_SET (VOIDmode
, sub
,
3621 gen_rtx_ASHIFT (mode
,
3626 gen_rtx_SET (VOIDmode
, target
,
3628 gen_rtx_LSHIFTRT (mode
, sub
,
3635 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3637 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3639 For eg. r0 = r0 | 0xfff
3644 if (set_zero_bit_copies
> 8
3645 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3649 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3650 rtx shift
= GEN_INT (set_zero_bit_copies
);
3654 gen_rtx_SET (VOIDmode
, sub
,
3656 gen_rtx_LSHIFTRT (mode
,
3661 gen_rtx_SET (VOIDmode
, target
,
3663 gen_rtx_ASHIFT (mode
, sub
,
3669 /* This will never be reached for Thumb2 because orn is a valid
3670 instruction. This is for Thumb1 and the ARM 32 bit cases.
3672 x = y | constant (such that ~constant is a valid constant)
3674 x = ~(~y & ~constant).
3676 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3680 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3681 emit_constant_insn (cond
,
3682 gen_rtx_SET (VOIDmode
, sub
,
3683 gen_rtx_NOT (mode
, source
)));
3686 sub
= gen_reg_rtx (mode
);
3687 emit_constant_insn (cond
,
3688 gen_rtx_SET (VOIDmode
, sub
,
3689 gen_rtx_AND (mode
, source
,
3691 emit_constant_insn (cond
,
3692 gen_rtx_SET (VOIDmode
, target
,
3693 gen_rtx_NOT (mode
, sub
)));
3700 /* See if two shifts will do 2 or more insn's worth of work. */
3701 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3703 HOST_WIDE_INT shift_mask
= ((0xffffffff
3704 << (32 - clear_sign_bit_copies
))
3707 if ((remainder
| shift_mask
) != 0xffffffff)
3711 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3712 insns
= arm_gen_constant (AND
, mode
, cond
,
3713 remainder
| shift_mask
,
3714 new_src
, source
, subtargets
, 1);
3719 rtx targ
= subtargets
? NULL_RTX
: target
;
3720 insns
= arm_gen_constant (AND
, mode
, cond
,
3721 remainder
| shift_mask
,
3722 targ
, source
, subtargets
, 0);
3728 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3729 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3731 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3732 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3738 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3740 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3742 if ((remainder
| shift_mask
) != 0xffffffff)
3746 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3748 insns
= arm_gen_constant (AND
, mode
, cond
,
3749 remainder
| shift_mask
,
3750 new_src
, source
, subtargets
, 1);
3755 rtx targ
= subtargets
? NULL_RTX
: target
;
3757 insns
= arm_gen_constant (AND
, mode
, cond
,
3758 remainder
| shift_mask
,
3759 targ
, source
, subtargets
, 0);
3765 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3766 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3768 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3769 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3781 /* Calculate what the instruction sequences would be if we generated it
3782 normally, negated, or inverted. */
3784 /* AND cannot be split into multiple insns, so invert and use BIC. */
3787 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
3790 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
3795 if (can_invert
|| final_invert
)
3796 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
3801 immediates
= &pos_immediates
;
3803 /* Is the negated immediate sequence more efficient? */
3804 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
3807 immediates
= &neg_immediates
;
3812 /* Is the inverted immediate sequence more efficient?
3813 We must allow for an extra NOT instruction for XOR operations, although
3814 there is some chance that the final 'mvn' will get optimized later. */
3815 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
3818 immediates
= &inv_immediates
;
3826 /* Now output the chosen sequence as instructions. */
3829 for (i
= 0; i
< insns
; i
++)
3831 rtx new_src
, temp1_rtx
;
3833 temp1
= immediates
->i
[i
];
3835 if (code
== SET
|| code
== MINUS
)
3836 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3837 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
3838 new_src
= gen_reg_rtx (mode
);
3844 else if (can_negate
)
3847 temp1
= trunc_int_for_mode (temp1
, mode
);
3848 temp1_rtx
= GEN_INT (temp1
);
3852 else if (code
== MINUS
)
3853 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3855 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3857 emit_constant_insn (cond
,
3858 gen_rtx_SET (VOIDmode
, new_src
,
3864 can_negate
= can_invert
;
3868 else if (code
== MINUS
)
3876 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3877 gen_rtx_NOT (mode
, source
)));
3884 /* Canonicalize a comparison so that we are more likely to recognize it.
3885 This can be done for a few constant compares, where we can make the
3886 immediate value easier to load. */
3889 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
3890 bool op0_preserve_value
)
3892 enum machine_mode mode
;
3893 unsigned HOST_WIDE_INT i
, maxval
;
3895 mode
= GET_MODE (*op0
);
3896 if (mode
== VOIDmode
)
3897 mode
= GET_MODE (*op1
);
3899 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3901 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3902 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3903 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3904 for GTU/LEU in Thumb mode. */
3909 if (*code
== GT
|| *code
== LE
3910 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
3912 /* Missing comparison. First try to use an available
3914 if (CONST_INT_P (*op1
))
3922 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3924 *op1
= GEN_INT (i
+ 1);
3925 *code
= *code
== GT
? GE
: LT
;
3931 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3932 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3934 *op1
= GEN_INT (i
+ 1);
3935 *code
= *code
== GTU
? GEU
: LTU
;
3944 /* If that did not work, reverse the condition. */
3945 if (!op0_preserve_value
)
3950 *code
= (int)swap_condition ((enum rtx_code
)*code
);
3956 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3957 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3958 to facilitate possible combining with a cmp into 'ands'. */
3960 && GET_CODE (*op0
) == ZERO_EXTEND
3961 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
3962 && GET_MODE (XEXP (*op0
, 0)) == QImode
3963 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
3964 && subreg_lowpart_p (XEXP (*op0
, 0))
3965 && *op1
== const0_rtx
)
3966 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
3969 /* Comparisons smaller than DImode. Only adjust comparisons against
3970 an out-of-range constant. */
3971 if (!CONST_INT_P (*op1
)
3972 || const_ok_for_arm (INTVAL (*op1
))
3973 || const_ok_for_arm (- INTVAL (*op1
)))
3987 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3989 *op1
= GEN_INT (i
+ 1);
3990 *code
= *code
== GT
? GE
: LT
;
3998 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4000 *op1
= GEN_INT (i
- 1);
4001 *code
= *code
== GE
? GT
: LE
;
4008 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4009 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4011 *op1
= GEN_INT (i
+ 1);
4012 *code
= *code
== GTU
? GEU
: LTU
;
4020 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4022 *op1
= GEN_INT (i
- 1);
4023 *code
= *code
== GEU
? GTU
: LEU
;
4034 /* Define how to find the value returned by a function. */
4037 arm_function_value(const_tree type
, const_tree func
,
4038 bool outgoing ATTRIBUTE_UNUSED
)
4040 enum machine_mode mode
;
4041 int unsignedp ATTRIBUTE_UNUSED
;
4042 rtx r ATTRIBUTE_UNUSED
;
4044 mode
= TYPE_MODE (type
);
4046 if (TARGET_AAPCS_BASED
)
4047 return aapcs_allocate_return_reg (mode
, type
, func
);
4049 /* Promote integer types. */
4050 if (INTEGRAL_TYPE_P (type
))
4051 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4053 /* Promotes small structs returned in a register to full-word size
4054 for big-endian AAPCS. */
4055 if (arm_return_in_msb (type
))
4057 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4058 if (size
% UNITS_PER_WORD
!= 0)
4060 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4061 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4065 return arm_libcall_value_1 (mode
);
4068 /* libcall hashtable helpers. */
4070 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4072 typedef rtx_def value_type
;
4073 typedef rtx_def compare_type
;
4074 static inline hashval_t
hash (const value_type
*);
4075 static inline bool equal (const value_type
*, const compare_type
*);
4076 static inline void remove (value_type
*);
4080 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4082 return rtx_equal_p (p1
, p2
);
4086 libcall_hasher::hash (const value_type
*p1
)
4088 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4091 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4094 add_libcall (libcall_table_type htab
, rtx libcall
)
4096 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4100 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4102 static bool init_done
= false;
4103 static libcall_table_type libcall_htab
;
4109 libcall_htab
.create (31);
4110 add_libcall (libcall_htab
,
4111 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4112 add_libcall (libcall_htab
,
4113 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4114 add_libcall (libcall_htab
,
4115 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4116 add_libcall (libcall_htab
,
4117 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4119 add_libcall (libcall_htab
,
4120 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4121 add_libcall (libcall_htab
,
4122 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4123 add_libcall (libcall_htab
,
4124 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4125 add_libcall (libcall_htab
,
4126 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4128 add_libcall (libcall_htab
,
4129 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4130 add_libcall (libcall_htab
,
4131 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4132 add_libcall (libcall_htab
,
4133 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4134 add_libcall (libcall_htab
,
4135 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4136 add_libcall (libcall_htab
,
4137 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4138 add_libcall (libcall_htab
,
4139 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4140 add_libcall (libcall_htab
,
4141 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4142 add_libcall (libcall_htab
,
4143 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4145 /* Values from double-precision helper functions are returned in core
4146 registers if the selected core only supports single-precision
4147 arithmetic, even if we are using the hard-float ABI. The same is
4148 true for single-precision helpers, but we will never be using the
4149 hard-float ABI on a CPU which doesn't support single-precision
4150 operations in hardware. */
4151 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4152 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4153 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4154 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4155 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4156 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4157 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4158 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4159 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4160 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4161 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4162 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4164 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4168 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4172 arm_libcall_value_1 (enum machine_mode mode
)
4174 if (TARGET_AAPCS_BASED
)
4175 return aapcs_libcall_value (mode
);
4176 else if (TARGET_IWMMXT_ABI
4177 && arm_vector_mode_supported_p (mode
))
4178 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4180 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4183 /* Define how to find the value returned by a library function
4184 assuming the value has mode MODE. */
4187 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4189 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4190 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4192 /* The following libcalls return their result in integer registers,
4193 even though they return a floating point value. */
4194 if (arm_libcall_uses_aapcs_base (libcall
))
4195 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4199 return arm_libcall_value_1 (mode
);
4202 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4205 arm_function_value_regno_p (const unsigned int regno
)
4207 if (regno
== ARG_REGISTER (1)
4209 && TARGET_AAPCS_BASED
4211 && TARGET_HARD_FLOAT
4212 && regno
== FIRST_VFP_REGNUM
)
4213 || (TARGET_IWMMXT_ABI
4214 && regno
== FIRST_IWMMXT_REGNUM
))
4220 /* Determine the amount of memory needed to store the possible return
4221 registers of an untyped call. */
4223 arm_apply_result_size (void)
4229 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4231 if (TARGET_IWMMXT_ABI
)
4238 /* Decide whether TYPE should be returned in memory (true)
4239 or in a register (false). FNTYPE is the type of the function making
4242 arm_return_in_memory (const_tree type
, const_tree fntype
)
4246 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4248 if (TARGET_AAPCS_BASED
)
4250 /* Simple, non-aggregate types (ie not including vectors and
4251 complex) are always returned in a register (or registers).
4252 We don't care about which register here, so we can short-cut
4253 some of the detail. */
4254 if (!AGGREGATE_TYPE_P (type
)
4255 && TREE_CODE (type
) != VECTOR_TYPE
4256 && TREE_CODE (type
) != COMPLEX_TYPE
)
4259 /* Any return value that is no larger than one word can be
4261 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4264 /* Check any available co-processors to see if they accept the
4265 type as a register candidate (VFP, for example, can return
4266 some aggregates in consecutive registers). These aren't
4267 available if the call is variadic. */
4268 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4271 /* Vector values should be returned using ARM registers, not
4272 memory (unless they're over 16 bytes, which will break since
4273 we only have four call-clobbered registers to play with). */
4274 if (TREE_CODE (type
) == VECTOR_TYPE
)
4275 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4277 /* The rest go in memory. */
4281 if (TREE_CODE (type
) == VECTOR_TYPE
)
4282 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4284 if (!AGGREGATE_TYPE_P (type
) &&
4285 (TREE_CODE (type
) != VECTOR_TYPE
))
4286 /* All simple types are returned in registers. */
4289 if (arm_abi
!= ARM_ABI_APCS
)
4291 /* ATPCS and later return aggregate types in memory only if they are
4292 larger than a word (or are variable size). */
4293 return (size
< 0 || size
> UNITS_PER_WORD
);
4296 /* For the arm-wince targets we choose to be compatible with Microsoft's
4297 ARM and Thumb compilers, which always return aggregates in memory. */
4299 /* All structures/unions bigger than one word are returned in memory.
4300 Also catch the case where int_size_in_bytes returns -1. In this case
4301 the aggregate is either huge or of variable size, and in either case
4302 we will want to return it via memory and not in a register. */
4303 if (size
< 0 || size
> UNITS_PER_WORD
)
4306 if (TREE_CODE (type
) == RECORD_TYPE
)
4310 /* For a struct the APCS says that we only return in a register
4311 if the type is 'integer like' and every addressable element
4312 has an offset of zero. For practical purposes this means
4313 that the structure can have at most one non bit-field element
4314 and that this element must be the first one in the structure. */
4316 /* Find the first field, ignoring non FIELD_DECL things which will
4317 have been created by C++. */
4318 for (field
= TYPE_FIELDS (type
);
4319 field
&& TREE_CODE (field
) != FIELD_DECL
;
4320 field
= DECL_CHAIN (field
))
4324 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4326 /* Check that the first field is valid for returning in a register. */
4328 /* ... Floats are not allowed */
4329 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4332 /* ... Aggregates that are not themselves valid for returning in
4333 a register are not allowed. */
4334 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4337 /* Now check the remaining fields, if any. Only bitfields are allowed,
4338 since they are not addressable. */
4339 for (field
= DECL_CHAIN (field
);
4341 field
= DECL_CHAIN (field
))
4343 if (TREE_CODE (field
) != FIELD_DECL
)
4346 if (!DECL_BIT_FIELD_TYPE (field
))
4353 if (TREE_CODE (type
) == UNION_TYPE
)
4357 /* Unions can be returned in registers if every element is
4358 integral, or can be returned in an integer register. */
4359 for (field
= TYPE_FIELDS (type
);
4361 field
= DECL_CHAIN (field
))
4363 if (TREE_CODE (field
) != FIELD_DECL
)
4366 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4369 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4375 #endif /* not ARM_WINCE */
4377 /* Return all other types in memory. */
4381 const struct pcs_attribute_arg
4385 } pcs_attribute_args
[] =
4387 {"aapcs", ARM_PCS_AAPCS
},
4388 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4390 /* We could recognize these, but changes would be needed elsewhere
4391 * to implement them. */
4392 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4393 {"atpcs", ARM_PCS_ATPCS
},
4394 {"apcs", ARM_PCS_APCS
},
4396 {NULL
, ARM_PCS_UNKNOWN
}
4400 arm_pcs_from_attribute (tree attr
)
4402 const struct pcs_attribute_arg
*ptr
;
4405 /* Get the value of the argument. */
4406 if (TREE_VALUE (attr
) == NULL_TREE
4407 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4408 return ARM_PCS_UNKNOWN
;
4410 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4412 /* Check it against the list of known arguments. */
4413 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4414 if (streq (arg
, ptr
->arg
))
4417 /* An unrecognized interrupt type. */
4418 return ARM_PCS_UNKNOWN
;
4421 /* Get the PCS variant to use for this call. TYPE is the function's type
4422 specification, DECL is the specific declartion. DECL may be null if
4423 the call could be indirect or if this is a library call. */
4425 arm_get_pcs_model (const_tree type
, const_tree decl
)
4427 bool user_convention
= false;
4428 enum arm_pcs user_pcs
= arm_pcs_default
;
4433 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4436 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4437 user_convention
= true;
4440 if (TARGET_AAPCS_BASED
)
4442 /* Detect varargs functions. These always use the base rules
4443 (no argument is ever a candidate for a co-processor
4445 bool base_rules
= stdarg_p (type
);
4447 if (user_convention
)
4449 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4450 sorry ("non-AAPCS derived PCS variant");
4451 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4452 error ("variadic functions must use the base AAPCS variant");
4456 return ARM_PCS_AAPCS
;
4457 else if (user_convention
)
4459 else if (decl
&& flag_unit_at_a_time
)
4461 /* Local functions never leak outside this compilation unit,
4462 so we are free to use whatever conventions are
4464 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4465 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4467 return ARM_PCS_AAPCS_LOCAL
;
4470 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4471 sorry ("PCS variant");
4473 /* For everything else we use the target's default. */
4474 return arm_pcs_default
;
4479 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4480 const_tree fntype ATTRIBUTE_UNUSED
,
4481 rtx libcall ATTRIBUTE_UNUSED
,
4482 const_tree fndecl ATTRIBUTE_UNUSED
)
4484 /* Record the unallocated VFP registers. */
4485 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4486 pcum
->aapcs_vfp_reg_alloc
= 0;
4489 /* Walk down the type tree of TYPE counting consecutive base elements.
4490 If *MODEP is VOIDmode, then set it to the first valid floating point
4491 type. If a non-floating point type is found, or if a floating point
4492 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4493 otherwise return the count in the sub-tree. */
4495 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4497 enum machine_mode mode
;
4500 switch (TREE_CODE (type
))
4503 mode
= TYPE_MODE (type
);
4504 if (mode
!= DFmode
&& mode
!= SFmode
)
4507 if (*modep
== VOIDmode
)
4516 mode
= TYPE_MODE (TREE_TYPE (type
));
4517 if (mode
!= DFmode
&& mode
!= SFmode
)
4520 if (*modep
== VOIDmode
)
4529 /* Use V2SImode and V4SImode as representatives of all 64-bit
4530 and 128-bit vector types, whether or not those modes are
4531 supported with the present options. */
4532 size
= int_size_in_bytes (type
);
4545 if (*modep
== VOIDmode
)
4548 /* Vector modes are considered to be opaque: two vectors are
4549 equivalent for the purposes of being homogeneous aggregates
4550 if they are the same size. */
4559 tree index
= TYPE_DOMAIN (type
);
4561 /* Can't handle incomplete types. */
4562 if (!COMPLETE_TYPE_P (type
))
4565 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4568 || !TYPE_MAX_VALUE (index
)
4569 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
4570 || !TYPE_MIN_VALUE (index
)
4571 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
4575 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
4576 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
4578 /* There must be no padding. */
4579 if (!host_integerp (TYPE_SIZE (type
), 1)
4580 || (tree_low_cst (TYPE_SIZE (type
), 1)
4581 != count
* GET_MODE_BITSIZE (*modep
)))
4593 /* Can't handle incomplete types. */
4594 if (!COMPLETE_TYPE_P (type
))
4597 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4599 if (TREE_CODE (field
) != FIELD_DECL
)
4602 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4608 /* There must be no padding. */
4609 if (!host_integerp (TYPE_SIZE (type
), 1)
4610 || (tree_low_cst (TYPE_SIZE (type
), 1)
4611 != count
* GET_MODE_BITSIZE (*modep
)))
4618 case QUAL_UNION_TYPE
:
4620 /* These aren't very interesting except in a degenerate case. */
4625 /* Can't handle incomplete types. */
4626 if (!COMPLETE_TYPE_P (type
))
4629 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4631 if (TREE_CODE (field
) != FIELD_DECL
)
4634 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4637 count
= count
> sub_count
? count
: sub_count
;
4640 /* There must be no padding. */
4641 if (!host_integerp (TYPE_SIZE (type
), 1)
4642 || (tree_low_cst (TYPE_SIZE (type
), 1)
4643 != count
* GET_MODE_BITSIZE (*modep
)))
4656 /* Return true if PCS_VARIANT should use VFP registers. */
4658 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4660 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4662 static bool seen_thumb1_vfp
= false;
4664 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4666 sorry ("Thumb-1 hard-float VFP ABI");
4667 /* sorry() is not immediately fatal, so only display this once. */
4668 seen_thumb1_vfp
= true;
4674 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4677 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4678 (TARGET_VFP_DOUBLE
|| !is_double
));
4681 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4682 suitable for passing or returning in VFP registers for the PCS
4683 variant selected. If it is, then *BASE_MODE is updated to contain
4684 a machine mode describing each element of the argument's type and
4685 *COUNT to hold the number of such elements. */
4687 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4688 enum machine_mode mode
, const_tree type
,
4689 enum machine_mode
*base_mode
, int *count
)
4691 enum machine_mode new_mode
= VOIDmode
;
4693 /* If we have the type information, prefer that to working things
4694 out from the mode. */
4697 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4699 if (ag_count
> 0 && ag_count
<= 4)
4704 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4705 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4706 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4711 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4714 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4720 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4723 *base_mode
= new_mode
;
4728 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4729 enum machine_mode mode
, const_tree type
)
4731 int count ATTRIBUTE_UNUSED
;
4732 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4734 if (!use_vfp_abi (pcs_variant
, false))
4736 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4741 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4744 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4747 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4748 &pcum
->aapcs_vfp_rmode
,
4749 &pcum
->aapcs_vfp_rcount
);
4753 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4754 const_tree type ATTRIBUTE_UNUSED
)
4756 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4757 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4760 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4761 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4763 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4765 || (mode
== TImode
&& ! TARGET_NEON
)
4766 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
4769 int rcount
= pcum
->aapcs_vfp_rcount
;
4771 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4775 /* Avoid using unsupported vector modes. */
4776 if (rmode
== V2SImode
)
4778 else if (rmode
== V4SImode
)
4785 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4786 for (i
= 0; i
< rcount
; i
++)
4788 rtx tmp
= gen_rtx_REG (rmode
,
4789 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4790 tmp
= gen_rtx_EXPR_LIST
4792 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4793 XVECEXP (par
, 0, i
) = tmp
;
4796 pcum
->aapcs_reg
= par
;
4799 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4806 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4807 enum machine_mode mode
,
4808 const_tree type ATTRIBUTE_UNUSED
)
4810 if (!use_vfp_abi (pcs_variant
, false))
4813 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4816 enum machine_mode ag_mode
;
4821 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4826 if (ag_mode
== V2SImode
)
4828 else if (ag_mode
== V4SImode
)
4834 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4835 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4836 for (i
= 0; i
< count
; i
++)
4838 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4839 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4840 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4841 XVECEXP (par
, 0, i
) = tmp
;
4847 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4851 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4852 enum machine_mode mode ATTRIBUTE_UNUSED
,
4853 const_tree type ATTRIBUTE_UNUSED
)
4855 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4856 pcum
->aapcs_vfp_reg_alloc
= 0;
4860 #define AAPCS_CP(X) \
4862 aapcs_ ## X ## _cum_init, \
4863 aapcs_ ## X ## _is_call_candidate, \
4864 aapcs_ ## X ## _allocate, \
4865 aapcs_ ## X ## _is_return_candidate, \
4866 aapcs_ ## X ## _allocate_return_reg, \
4867 aapcs_ ## X ## _advance \
4870 /* Table of co-processors that can be used to pass arguments in
4871 registers. Idealy no arugment should be a candidate for more than
4872 one co-processor table entry, but the table is processed in order
4873 and stops after the first match. If that entry then fails to put
4874 the argument into a co-processor register, the argument will go on
4878 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4879 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4881 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4882 BLKmode) is a candidate for this co-processor's registers; this
4883 function should ignore any position-dependent state in
4884 CUMULATIVE_ARGS and only use call-type dependent information. */
4885 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4887 /* Return true if the argument does get a co-processor register; it
4888 should set aapcs_reg to an RTX of the register allocated as is
4889 required for a return from FUNCTION_ARG. */
4890 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4892 /* Return true if a result of mode MODE (or type TYPE if MODE is
4893 BLKmode) is can be returned in this co-processor's registers. */
4894 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4896 /* Allocate and return an RTX element to hold the return type of a
4897 call, this routine must not fail and will only be called if
4898 is_return_candidate returned true with the same parameters. */
4899 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4901 /* Finish processing this argument and prepare to start processing
4903 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4904 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4912 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4917 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4918 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4925 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4927 /* We aren't passed a decl, so we can't check that a call is local.
4928 However, it isn't clear that that would be a win anyway, since it
4929 might limit some tail-calling opportunities. */
4930 enum arm_pcs pcs_variant
;
4934 const_tree fndecl
= NULL_TREE
;
4936 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4939 fntype
= TREE_TYPE (fntype
);
4942 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4945 pcs_variant
= arm_pcs_default
;
4947 if (pcs_variant
!= ARM_PCS_AAPCS
)
4951 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4952 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4961 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4964 /* We aren't passed a decl, so we can't check that a call is local.
4965 However, it isn't clear that that would be a win anyway, since it
4966 might limit some tail-calling opportunities. */
4967 enum arm_pcs pcs_variant
;
4968 int unsignedp ATTRIBUTE_UNUSED
;
4972 const_tree fndecl
= NULL_TREE
;
4974 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4977 fntype
= TREE_TYPE (fntype
);
4980 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4983 pcs_variant
= arm_pcs_default
;
4985 /* Promote integer types. */
4986 if (type
&& INTEGRAL_TYPE_P (type
))
4987 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4989 if (pcs_variant
!= ARM_PCS_AAPCS
)
4993 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4994 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4996 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5000 /* Promotes small structs returned in a register to full-word size
5001 for big-endian AAPCS. */
5002 if (type
&& arm_return_in_msb (type
))
5004 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5005 if (size
% UNITS_PER_WORD
!= 0)
5007 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5008 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5012 return gen_rtx_REG (mode
, R0_REGNUM
);
5016 aapcs_libcall_value (enum machine_mode mode
)
5018 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5019 && GET_MODE_SIZE (mode
) <= 4)
5022 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5025 /* Lay out a function argument using the AAPCS rules. The rule
5026 numbers referred to here are those in the AAPCS. */
5028 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5029 const_tree type
, bool named
)
5034 /* We only need to do this once per argument. */
5035 if (pcum
->aapcs_arg_processed
)
5038 pcum
->aapcs_arg_processed
= true;
5040 /* Special case: if named is false then we are handling an incoming
5041 anonymous argument which is on the stack. */
5045 /* Is this a potential co-processor register candidate? */
5046 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5048 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5049 pcum
->aapcs_cprc_slot
= slot
;
5051 /* We don't have to apply any of the rules from part B of the
5052 preparation phase, these are handled elsewhere in the
5057 /* A Co-processor register candidate goes either in its own
5058 class of registers or on the stack. */
5059 if (!pcum
->aapcs_cprc_failed
[slot
])
5061 /* C1.cp - Try to allocate the argument to co-processor
5063 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5066 /* C2.cp - Put the argument on the stack and note that we
5067 can't assign any more candidates in this slot. We also
5068 need to note that we have allocated stack space, so that
5069 we won't later try to split a non-cprc candidate between
5070 core registers and the stack. */
5071 pcum
->aapcs_cprc_failed
[slot
] = true;
5072 pcum
->can_split
= false;
5075 /* We didn't get a register, so this argument goes on the
5077 gcc_assert (pcum
->can_split
== false);
5082 /* C3 - For double-word aligned arguments, round the NCRN up to the
5083 next even number. */
5084 ncrn
= pcum
->aapcs_ncrn
;
5085 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5088 nregs
= ARM_NUM_REGS2(mode
, type
);
5090 /* Sigh, this test should really assert that nregs > 0, but a GCC
5091 extension allows empty structs and then gives them empty size; it
5092 then allows such a structure to be passed by value. For some of
5093 the code below we have to pretend that such an argument has
5094 non-zero size so that we 'locate' it correctly either in
5095 registers or on the stack. */
5096 gcc_assert (nregs
>= 0);
5098 nregs2
= nregs
? nregs
: 1;
5100 /* C4 - Argument fits entirely in core registers. */
5101 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5103 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5104 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5108 /* C5 - Some core registers left and there are no arguments already
5109 on the stack: split this argument between the remaining core
5110 registers and the stack. */
5111 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5113 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5114 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5115 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5119 /* C6 - NCRN is set to 4. */
5120 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5122 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5126 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5127 for a call to a function whose data type is FNTYPE.
5128 For a library call, FNTYPE is NULL. */
5130 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5132 tree fndecl ATTRIBUTE_UNUSED
)
5134 /* Long call handling. */
5136 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5138 pcum
->pcs_variant
= arm_pcs_default
;
5140 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5142 if (arm_libcall_uses_aapcs_base (libname
))
5143 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5145 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5146 pcum
->aapcs_reg
= NULL_RTX
;
5147 pcum
->aapcs_partial
= 0;
5148 pcum
->aapcs_arg_processed
= false;
5149 pcum
->aapcs_cprc_slot
= -1;
5150 pcum
->can_split
= true;
5152 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5156 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5158 pcum
->aapcs_cprc_failed
[i
] = false;
5159 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5167 /* On the ARM, the offset starts at 0. */
5169 pcum
->iwmmxt_nregs
= 0;
5170 pcum
->can_split
= true;
5172 /* Varargs vectors are treated the same as long long.
5173 named_count avoids having to change the way arm handles 'named' */
5174 pcum
->named_count
= 0;
5177 if (TARGET_REALLY_IWMMXT
&& fntype
)
5181 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5183 fn_arg
= TREE_CHAIN (fn_arg
))
5184 pcum
->named_count
+= 1;
5186 if (! pcum
->named_count
)
5187 pcum
->named_count
= INT_MAX
;
5191 /* Return true if we use LRA instead of reload pass. */
5195 return arm_lra_flag
;
5198 /* Return true if mode/type need doubleword alignment. */
5200 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5202 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5203 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5207 /* Determine where to put an argument to a function.
5208 Value is zero to push the argument on the stack,
5209 or a hard register in which to store the argument.
5211 MODE is the argument's machine mode.
5212 TYPE is the data type of the argument (as a tree).
5213 This is null for libcalls where that information may
5215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5216 the preceding args and about the function being called.
5217 NAMED is nonzero if this argument is a named parameter
5218 (otherwise it is an extra parameter matching an ellipsis).
5220 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5221 other arguments are passed on the stack. If (NAMED == 0) (which happens
5222 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5223 defined), say it is passed in the stack (function_prologue will
5224 indeed make it pass in the stack if necessary). */
5227 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5228 const_tree type
, bool named
)
5230 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5233 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5234 a call insn (op3 of a call_value insn). */
5235 if (mode
== VOIDmode
)
5238 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5240 aapcs_layout_arg (pcum
, mode
, type
, named
);
5241 return pcum
->aapcs_reg
;
5244 /* Varargs vectors are treated the same as long long.
5245 named_count avoids having to change the way arm handles 'named' */
5246 if (TARGET_IWMMXT_ABI
5247 && arm_vector_mode_supported_p (mode
)
5248 && pcum
->named_count
> pcum
->nargs
+ 1)
5250 if (pcum
->iwmmxt_nregs
<= 9)
5251 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5254 pcum
->can_split
= false;
5259 /* Put doubleword aligned quantities in even register pairs. */
5261 && ARM_DOUBLEWORD_ALIGN
5262 && arm_needs_doubleword_align (mode
, type
))
5265 /* Only allow splitting an arg between regs and memory if all preceding
5266 args were allocated to regs. For args passed by reference we only count
5267 the reference pointer. */
5268 if (pcum
->can_split
)
5271 nregs
= ARM_NUM_REGS2 (mode
, type
);
5273 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5276 return gen_rtx_REG (mode
, pcum
->nregs
);
5280 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5282 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5283 ? DOUBLEWORD_ALIGNMENT
5288 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5289 tree type
, bool named
)
5291 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5292 int nregs
= pcum
->nregs
;
5294 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5296 aapcs_layout_arg (pcum
, mode
, type
, named
);
5297 return pcum
->aapcs_partial
;
5300 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5303 if (NUM_ARG_REGS
> nregs
5304 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5306 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5311 /* Update the data in PCUM to advance over an argument
5312 of mode MODE and data type TYPE.
5313 (TYPE is null for libcalls where that information may not be available.) */
5316 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5317 const_tree type
, bool named
)
5319 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5321 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5323 aapcs_layout_arg (pcum
, mode
, type
, named
);
5325 if (pcum
->aapcs_cprc_slot
>= 0)
5327 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5329 pcum
->aapcs_cprc_slot
= -1;
5332 /* Generic stuff. */
5333 pcum
->aapcs_arg_processed
= false;
5334 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5335 pcum
->aapcs_reg
= NULL_RTX
;
5336 pcum
->aapcs_partial
= 0;
5341 if (arm_vector_mode_supported_p (mode
)
5342 && pcum
->named_count
> pcum
->nargs
5343 && TARGET_IWMMXT_ABI
)
5344 pcum
->iwmmxt_nregs
+= 1;
5346 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5350 /* Variable sized types are passed by reference. This is a GCC
5351 extension to the ARM ABI. */
5354 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5355 enum machine_mode mode ATTRIBUTE_UNUSED
,
5356 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5358 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5361 /* Encode the current state of the #pragma [no_]long_calls. */
5364 OFF
, /* No #pragma [no_]long_calls is in effect. */
5365 LONG
, /* #pragma long_calls is in effect. */
5366 SHORT
/* #pragma no_long_calls is in effect. */
5369 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5372 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5374 arm_pragma_long_calls
= LONG
;
5378 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5380 arm_pragma_long_calls
= SHORT
;
5384 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5386 arm_pragma_long_calls
= OFF
;
5389 /* Handle an attribute requiring a FUNCTION_DECL;
5390 arguments as in struct attribute_spec.handler. */
5392 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5393 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5395 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5397 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5399 *no_add_attrs
= true;
5405 /* Handle an "interrupt" or "isr" attribute;
5406 arguments as in struct attribute_spec.handler. */
5408 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5413 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5415 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5417 *no_add_attrs
= true;
5419 /* FIXME: the argument if any is checked for type attributes;
5420 should it be checked for decl ones? */
5424 if (TREE_CODE (*node
) == FUNCTION_TYPE
5425 || TREE_CODE (*node
) == METHOD_TYPE
)
5427 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5429 warning (OPT_Wattributes
, "%qE attribute ignored",
5431 *no_add_attrs
= true;
5434 else if (TREE_CODE (*node
) == POINTER_TYPE
5435 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5436 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5437 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5439 *node
= build_variant_type_copy (*node
);
5440 TREE_TYPE (*node
) = build_type_attribute_variant
5442 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5443 *no_add_attrs
= true;
5447 /* Possibly pass this attribute on from the type to a decl. */
5448 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5449 | (int) ATTR_FLAG_FUNCTION_NEXT
5450 | (int) ATTR_FLAG_ARRAY_NEXT
))
5452 *no_add_attrs
= true;
5453 return tree_cons (name
, args
, NULL_TREE
);
5457 warning (OPT_Wattributes
, "%qE attribute ignored",
5466 /* Handle a "pcs" attribute; arguments as in struct
5467 attribute_spec.handler. */
5469 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5470 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5472 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5474 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5475 *no_add_attrs
= true;
5480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5481 /* Handle the "notshared" attribute. This attribute is another way of
5482 requesting hidden visibility. ARM's compiler supports
5483 "__declspec(notshared)"; we support the same thing via an
5487 arm_handle_notshared_attribute (tree
*node
,
5488 tree name ATTRIBUTE_UNUSED
,
5489 tree args ATTRIBUTE_UNUSED
,
5490 int flags ATTRIBUTE_UNUSED
,
5493 tree decl
= TYPE_NAME (*node
);
5497 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5498 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5499 *no_add_attrs
= false;
5505 /* Return 0 if the attributes for two types are incompatible, 1 if they
5506 are compatible, and 2 if they are nearly compatible (which causes a
5507 warning to be generated). */
5509 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5513 /* Check for mismatch of non-default calling convention. */
5514 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5517 /* Check for mismatched call attributes. */
5518 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5519 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5520 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5521 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5523 /* Only bother to check if an attribute is defined. */
5524 if (l1
| l2
| s1
| s2
)
5526 /* If one type has an attribute, the other must have the same attribute. */
5527 if ((l1
!= l2
) || (s1
!= s2
))
5530 /* Disallow mixed attributes. */
5531 if ((l1
& s2
) || (l2
& s1
))
5535 /* Check for mismatched ISR attribute. */
5536 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5538 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5539 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5541 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5548 /* Assigns default attributes to newly defined type. This is used to
5549 set short_call/long_call attributes for function types of
5550 functions defined inside corresponding #pragma scopes. */
5552 arm_set_default_type_attributes (tree type
)
5554 /* Add __attribute__ ((long_call)) to all functions, when
5555 inside #pragma long_calls or __attribute__ ((short_call)),
5556 when inside #pragma no_long_calls. */
5557 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5559 tree type_attr_list
, attr_name
;
5560 type_attr_list
= TYPE_ATTRIBUTES (type
);
5562 if (arm_pragma_long_calls
== LONG
)
5563 attr_name
= get_identifier ("long_call");
5564 else if (arm_pragma_long_calls
== SHORT
)
5565 attr_name
= get_identifier ("short_call");
5569 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5570 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5574 /* Return true if DECL is known to be linked into section SECTION. */
5577 arm_function_in_section_p (tree decl
, section
*section
)
5579 /* We can only be certain about functions defined in the same
5580 compilation unit. */
5581 if (!TREE_STATIC (decl
))
5584 /* Make sure that SYMBOL always binds to the definition in this
5585 compilation unit. */
5586 if (!targetm
.binds_local_p (decl
))
5589 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5590 if (!DECL_SECTION_NAME (decl
))
5592 /* Make sure that we will not create a unique section for DECL. */
5593 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
5597 return function_section (decl
) == section
;
5600 /* Return nonzero if a 32-bit "long_call" should be generated for
5601 a call from the current function to DECL. We generate a long_call
5604 a. has an __attribute__((long call))
5605 or b. is within the scope of a #pragma long_calls
5606 or c. the -mlong-calls command line switch has been specified
5608 However we do not generate a long call if the function:
5610 d. has an __attribute__ ((short_call))
5611 or e. is inside the scope of a #pragma no_long_calls
5612 or f. is defined in the same section as the current function. */
5615 arm_is_long_call_p (tree decl
)
5620 return TARGET_LONG_CALLS
;
5622 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
5623 if (lookup_attribute ("short_call", attrs
))
5626 /* For "f", be conservative, and only cater for cases in which the
5627 whole of the current function is placed in the same section. */
5628 if (!flag_reorder_blocks_and_partition
5629 && TREE_CODE (decl
) == FUNCTION_DECL
5630 && arm_function_in_section_p (decl
, current_function_section ()))
5633 if (lookup_attribute ("long_call", attrs
))
5636 return TARGET_LONG_CALLS
;
5639 /* Return nonzero if it is ok to make a tail-call to DECL. */
5641 arm_function_ok_for_sibcall (tree decl
, tree exp
)
5643 unsigned long func_type
;
5645 if (cfun
->machine
->sibcall_blocked
)
5648 /* Never tailcall something if we are generating code for Thumb-1. */
5652 /* The PIC register is live on entry to VxWorks PLT entries, so we
5653 must make the call before restoring the PIC register. */
5654 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5657 /* Cannot tail-call to long calls, since these are out of range of
5658 a branch instruction. */
5659 if (decl
&& arm_is_long_call_p (decl
))
5662 /* If we are interworking and the function is not declared static
5663 then we can't tail-call it unless we know that it exists in this
5664 compilation unit (since it might be a Thumb routine). */
5665 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
5666 && !TREE_ASM_WRITTEN (decl
))
5669 func_type
= arm_current_func_type ();
5670 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5671 if (IS_INTERRUPT (func_type
))
5674 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5676 /* Check that the return value locations are the same. For
5677 example that we aren't returning a value from the sibling in
5678 a VFP register but then need to transfer it to a core
5682 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5683 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5685 if (!rtx_equal_p (a
, b
))
5689 /* Never tailcall if function may be called with a misaligned SP. */
5690 if (IS_STACKALIGN (func_type
))
5693 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5694 references should become a NOP. Don't convert such calls into
5696 if (TARGET_AAPCS_BASED
5697 && arm_abi
== ARM_ABI_AAPCS
5699 && DECL_WEAK (decl
))
5702 /* Everything else is ok. */
5707 /* Addressing mode support functions. */
5709 /* Return nonzero if X is a legitimate immediate operand when compiling
5710 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5712 legitimate_pic_operand_p (rtx x
)
5714 if (GET_CODE (x
) == SYMBOL_REF
5715 || (GET_CODE (x
) == CONST
5716 && GET_CODE (XEXP (x
, 0)) == PLUS
5717 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5723 /* Record that the current function needs a PIC register. Initialize
5724 cfun->machine->pic_reg if we have not already done so. */
5727 require_pic_register (void)
5729 /* A lot of the logic here is made obscure by the fact that this
5730 routine gets called as part of the rtx cost estimation process.
5731 We don't want those calls to affect any assumptions about the real
5732 function; and further, we can't call entry_of_function() until we
5733 start the real expansion process. */
5734 if (!crtl
->uses_pic_offset_table
)
5736 gcc_assert (can_create_pseudo_p ());
5737 if (arm_pic_register
!= INVALID_REGNUM
)
5739 if (!cfun
->machine
->pic_reg
)
5740 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5742 /* Play games to avoid marking the function as needing pic
5743 if we are being called as part of the cost-estimation
5745 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5746 crtl
->uses_pic_offset_table
= 1;
5752 if (!cfun
->machine
->pic_reg
)
5753 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5755 /* Play games to avoid marking the function as needing pic
5756 if we are being called as part of the cost-estimation
5758 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5760 crtl
->uses_pic_offset_table
= 1;
5763 arm_load_pic_register (0UL);
5768 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5770 INSN_LOCATION (insn
) = prologue_location
;
5772 /* We can be called during expansion of PHI nodes, where
5773 we can't yet emit instructions directly in the final
5774 insn stream. Queue the insns on the entry edge, they will
5775 be committed after everything else is expanded. */
5776 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5783 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5785 if (GET_CODE (orig
) == SYMBOL_REF
5786 || GET_CODE (orig
) == LABEL_REF
)
5792 gcc_assert (can_create_pseudo_p ());
5793 reg
= gen_reg_rtx (Pmode
);
5796 /* VxWorks does not impose a fixed gap between segments; the run-time
5797 gap can be different from the object-file gap. We therefore can't
5798 use GOTOFF unless we are absolutely sure that the symbol is in the
5799 same segment as the GOT. Unfortunately, the flexibility of linker
5800 scripts means that we can't be sure of that in general, so assume
5801 that GOTOFF is never valid on VxWorks. */
5802 if ((GET_CODE (orig
) == LABEL_REF
5803 || (GET_CODE (orig
) == SYMBOL_REF
&&
5804 SYMBOL_REF_LOCAL_P (orig
)))
5806 && !TARGET_VXWORKS_RTP
)
5807 insn
= arm_pic_static_addr (orig
, reg
);
5813 /* If this function doesn't have a pic register, create one now. */
5814 require_pic_register ();
5816 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5818 /* Make the MEM as close to a constant as possible. */
5819 mem
= SET_SRC (pat
);
5820 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5821 MEM_READONLY_P (mem
) = 1;
5822 MEM_NOTRAP_P (mem
) = 1;
5824 insn
= emit_insn (pat
);
5827 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5829 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5833 else if (GET_CODE (orig
) == CONST
)
5837 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5838 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5841 /* Handle the case where we have: const (UNSPEC_TLS). */
5842 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5843 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5846 /* Handle the case where we have:
5847 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5849 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5850 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5851 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5853 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
5859 gcc_assert (can_create_pseudo_p ());
5860 reg
= gen_reg_rtx (Pmode
);
5863 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5865 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5866 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5867 base
== reg
? 0 : reg
);
5869 if (CONST_INT_P (offset
))
5871 /* The base register doesn't really matter, we only want to
5872 test the index for the appropriate mode. */
5873 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5875 gcc_assert (can_create_pseudo_p ());
5876 offset
= force_reg (Pmode
, offset
);
5879 if (CONST_INT_P (offset
))
5880 return plus_constant (Pmode
, base
, INTVAL (offset
));
5883 if (GET_MODE_SIZE (mode
) > 4
5884 && (GET_MODE_CLASS (mode
) == MODE_INT
5885 || TARGET_SOFT_FLOAT
))
5887 emit_insn (gen_addsi3 (reg
, base
, offset
));
5891 return gen_rtx_PLUS (Pmode
, base
, offset
);
5898 /* Find a spare register to use during the prolog of a function. */
5901 thumb_find_work_register (unsigned long pushed_regs_mask
)
5905 /* Check the argument registers first as these are call-used. The
5906 register allocation order means that sometimes r3 might be used
5907 but earlier argument registers might not, so check them all. */
5908 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5909 if (!df_regs_ever_live_p (reg
))
5912 /* Before going on to check the call-saved registers we can try a couple
5913 more ways of deducing that r3 is available. The first is when we are
5914 pushing anonymous arguments onto the stack and we have less than 4
5915 registers worth of fixed arguments(*). In this case r3 will be part of
5916 the variable argument list and so we can be sure that it will be
5917 pushed right at the start of the function. Hence it will be available
5918 for the rest of the prologue.
5919 (*): ie crtl->args.pretend_args_size is greater than 0. */
5920 if (cfun
->machine
->uses_anonymous_args
5921 && crtl
->args
.pretend_args_size
> 0)
5922 return LAST_ARG_REGNUM
;
5924 /* The other case is when we have fixed arguments but less than 4 registers
5925 worth. In this case r3 might be used in the body of the function, but
5926 it is not being used to convey an argument into the function. In theory
5927 we could just check crtl->args.size to see how many bytes are
5928 being passed in argument registers, but it seems that it is unreliable.
5929 Sometimes it will have the value 0 when in fact arguments are being
5930 passed. (See testcase execute/20021111-1.c for an example). So we also
5931 check the args_info.nregs field as well. The problem with this field is
5932 that it makes no allowances for arguments that are passed to the
5933 function but which are not used. Hence we could miss an opportunity
5934 when a function has an unused argument in r3. But it is better to be
5935 safe than to be sorry. */
5936 if (! cfun
->machine
->uses_anonymous_args
5937 && crtl
->args
.size
>= 0
5938 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5939 && (TARGET_AAPCS_BASED
5940 ? crtl
->args
.info
.aapcs_ncrn
< 4
5941 : crtl
->args
.info
.nregs
< 4))
5942 return LAST_ARG_REGNUM
;
5944 /* Otherwise look for a call-saved register that is going to be pushed. */
5945 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5946 if (pushed_regs_mask
& (1 << reg
))
5951 /* Thumb-2 can use high regs. */
5952 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5953 if (pushed_regs_mask
& (1 << reg
))
5956 /* Something went wrong - thumb_compute_save_reg_mask()
5957 should have arranged for a suitable register to be pushed. */
5961 static GTY(()) int pic_labelno
;
5963 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5967 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5969 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5971 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5974 gcc_assert (flag_pic
);
5976 pic_reg
= cfun
->machine
->pic_reg
;
5977 if (TARGET_VXWORKS_RTP
)
5979 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5980 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5981 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5983 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5985 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5986 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5990 /* We use an UNSPEC rather than a LABEL_REF because this label
5991 never appears in the code stream. */
5993 labelno
= GEN_INT (pic_labelno
++);
5994 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5995 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5997 /* On the ARM the PC register contains 'dot + 8' at the time of the
5998 addition, on the Thumb it is 'dot + 4'. */
5999 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6000 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6002 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6006 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6008 else /* TARGET_THUMB1 */
6010 if (arm_pic_register
!= INVALID_REGNUM
6011 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6013 /* We will have pushed the pic register, so we should always be
6014 able to find a work register. */
6015 pic_tmp
= gen_rtx_REG (SImode
,
6016 thumb_find_work_register (saved_regs
));
6017 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6018 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6019 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6022 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6026 /* Need to emit this whether or not we obey regdecls,
6027 since setjmp/longjmp can cause life info to screw up. */
6031 /* Generate code to load the address of a static var when flag_pic is set. */
6033 arm_pic_static_addr (rtx orig
, rtx reg
)
6035 rtx l1
, labelno
, offset_rtx
, insn
;
6037 gcc_assert (flag_pic
);
6039 /* We use an UNSPEC rather than a LABEL_REF because this label
6040 never appears in the code stream. */
6041 labelno
= GEN_INT (pic_labelno
++);
6042 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6043 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6045 /* On the ARM the PC register contains 'dot + 8' at the time of the
6046 addition, on the Thumb it is 'dot + 4'. */
6047 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6048 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6049 UNSPEC_SYMBOL_OFFSET
);
6050 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6052 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6056 /* Return nonzero if X is valid as an ARM state addressing register. */
6058 arm_address_register_rtx_p (rtx x
, int strict_p
)
6068 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6070 return (regno
<= LAST_ARM_REGNUM
6071 || regno
>= FIRST_PSEUDO_REGISTER
6072 || regno
== FRAME_POINTER_REGNUM
6073 || regno
== ARG_POINTER_REGNUM
);
6076 /* Return TRUE if this rtx is the difference of a symbol and a label,
6077 and will reduce to a PC-relative relocation in the object file.
6078 Expressions like this can be left alone when generating PIC, rather
6079 than forced through the GOT. */
6081 pcrel_constant_p (rtx x
)
6083 if (GET_CODE (x
) == MINUS
)
6084 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6089 /* Return true if X will surely end up in an index register after next
6092 will_be_in_index_register (const_rtx x
)
6094 /* arm.md: calculate_pic_address will split this into a register. */
6095 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6098 /* Return nonzero if X is a valid ARM state address operand. */
6100 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6104 enum rtx_code code
= GET_CODE (x
);
6106 if (arm_address_register_rtx_p (x
, strict_p
))
6109 use_ldrd
= (TARGET_LDRD
6111 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6113 if (code
== POST_INC
|| code
== PRE_DEC
6114 || ((code
== PRE_INC
|| code
== POST_DEC
)
6115 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6116 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6118 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6119 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6120 && GET_CODE (XEXP (x
, 1)) == PLUS
6121 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6123 rtx addend
= XEXP (XEXP (x
, 1), 1);
6125 /* Don't allow ldrd post increment by register because it's hard
6126 to fixup invalid register choices. */
6128 && GET_CODE (x
) == POST_MODIFY
6132 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6133 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6136 /* After reload constants split into minipools will have addresses
6137 from a LABEL_REF. */
6138 else if (reload_completed
6139 && (code
== LABEL_REF
6141 && GET_CODE (XEXP (x
, 0)) == PLUS
6142 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6143 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6146 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6149 else if (code
== PLUS
)
6151 rtx xop0
= XEXP (x
, 0);
6152 rtx xop1
= XEXP (x
, 1);
6154 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6155 && ((CONST_INT_P (xop1
)
6156 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6157 || (!strict_p
&& will_be_in_index_register (xop1
))))
6158 || (arm_address_register_rtx_p (xop1
, strict_p
)
6159 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6163 /* Reload currently can't handle MINUS, so disable this for now */
6164 else if (GET_CODE (x
) == MINUS
)
6166 rtx xop0
= XEXP (x
, 0);
6167 rtx xop1
= XEXP (x
, 1);
6169 return (arm_address_register_rtx_p (xop0
, strict_p
)
6170 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6174 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6175 && code
== SYMBOL_REF
6176 && CONSTANT_POOL_ADDRESS_P (x
)
6178 && symbol_mentioned_p (get_pool_constant (x
))
6179 && ! pcrel_constant_p (get_pool_constant (x
))))
6185 /* Return nonzero if X is a valid Thumb-2 address operand. */
6187 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6190 enum rtx_code code
= GET_CODE (x
);
6192 if (arm_address_register_rtx_p (x
, strict_p
))
6195 use_ldrd
= (TARGET_LDRD
6197 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6199 if (code
== POST_INC
|| code
== PRE_DEC
6200 || ((code
== PRE_INC
|| code
== POST_DEC
)
6201 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6202 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6204 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6205 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6206 && GET_CODE (XEXP (x
, 1)) == PLUS
6207 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6209 /* Thumb-2 only has autoincrement by constant. */
6210 rtx addend
= XEXP (XEXP (x
, 1), 1);
6211 HOST_WIDE_INT offset
;
6213 if (!CONST_INT_P (addend
))
6216 offset
= INTVAL(addend
);
6217 if (GET_MODE_SIZE (mode
) <= 4)
6218 return (offset
> -256 && offset
< 256);
6220 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6221 && (offset
& 3) == 0);
6224 /* After reload constants split into minipools will have addresses
6225 from a LABEL_REF. */
6226 else if (reload_completed
6227 && (code
== LABEL_REF
6229 && GET_CODE (XEXP (x
, 0)) == PLUS
6230 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6231 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6234 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6237 else if (code
== PLUS
)
6239 rtx xop0
= XEXP (x
, 0);
6240 rtx xop1
= XEXP (x
, 1);
6242 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6243 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6244 || (!strict_p
&& will_be_in_index_register (xop1
))))
6245 || (arm_address_register_rtx_p (xop1
, strict_p
)
6246 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6249 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6250 && code
== SYMBOL_REF
6251 && CONSTANT_POOL_ADDRESS_P (x
)
6253 && symbol_mentioned_p (get_pool_constant (x
))
6254 && ! pcrel_constant_p (get_pool_constant (x
))))
6260 /* Return nonzero if INDEX is valid for an address index operand in
6263 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6266 HOST_WIDE_INT range
;
6267 enum rtx_code code
= GET_CODE (index
);
6269 /* Standard coprocessor addressing modes. */
6270 if (TARGET_HARD_FLOAT
6272 && (mode
== SFmode
|| mode
== DFmode
))
6273 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6274 && INTVAL (index
) > -1024
6275 && (INTVAL (index
) & 3) == 0);
6277 /* For quad modes, we restrict the constant offset to be slightly less
6278 than what the instruction format permits. We do this because for
6279 quad mode moves, we will actually decompose them into two separate
6280 double-mode reads or writes. INDEX must therefore be a valid
6281 (double-mode) offset and so should INDEX+8. */
6282 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6283 return (code
== CONST_INT
6284 && INTVAL (index
) < 1016
6285 && INTVAL (index
) > -1024
6286 && (INTVAL (index
) & 3) == 0);
6288 /* We have no such constraint on double mode offsets, so we permit the
6289 full range of the instruction format. */
6290 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6291 return (code
== CONST_INT
6292 && INTVAL (index
) < 1024
6293 && INTVAL (index
) > -1024
6294 && (INTVAL (index
) & 3) == 0);
6296 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6297 return (code
== CONST_INT
6298 && INTVAL (index
) < 1024
6299 && INTVAL (index
) > -1024
6300 && (INTVAL (index
) & 3) == 0);
6302 if (arm_address_register_rtx_p (index
, strict_p
)
6303 && (GET_MODE_SIZE (mode
) <= 4))
6306 if (mode
== DImode
|| mode
== DFmode
)
6308 if (code
== CONST_INT
)
6310 HOST_WIDE_INT val
= INTVAL (index
);
6313 return val
> -256 && val
< 256;
6315 return val
> -4096 && val
< 4092;
6318 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6321 if (GET_MODE_SIZE (mode
) <= 4
6325 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6329 rtx xiop0
= XEXP (index
, 0);
6330 rtx xiop1
= XEXP (index
, 1);
6332 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6333 && power_of_two_operand (xiop1
, SImode
))
6334 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6335 && power_of_two_operand (xiop0
, SImode
)));
6337 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6338 || code
== ASHIFT
|| code
== ROTATERT
)
6340 rtx op
= XEXP (index
, 1);
6342 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6345 && INTVAL (op
) <= 31);
6349 /* For ARM v4 we may be doing a sign-extend operation during the
6355 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6361 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6363 return (code
== CONST_INT
6364 && INTVAL (index
) < range
6365 && INTVAL (index
) > -range
);
6368 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6369 index operand. i.e. 1, 2, 4 or 8. */
6371 thumb2_index_mul_operand (rtx op
)
6375 if (!CONST_INT_P (op
))
6379 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6382 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6384 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6386 enum rtx_code code
= GET_CODE (index
);
6388 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6389 /* Standard coprocessor addressing modes. */
6390 if (TARGET_HARD_FLOAT
6392 && (mode
== SFmode
|| mode
== DFmode
))
6393 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6394 /* Thumb-2 allows only > -256 index range for it's core register
6395 load/stores. Since we allow SF/DF in core registers, we have
6396 to use the intersection between -256~4096 (core) and -1024~1024
6398 && INTVAL (index
) > -256
6399 && (INTVAL (index
) & 3) == 0);
6401 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6403 /* For DImode assume values will usually live in core regs
6404 and only allow LDRD addressing modes. */
6405 if (!TARGET_LDRD
|| mode
!= DImode
)
6406 return (code
== CONST_INT
6407 && INTVAL (index
) < 1024
6408 && INTVAL (index
) > -1024
6409 && (INTVAL (index
) & 3) == 0);
6412 /* For quad modes, we restrict the constant offset to be slightly less
6413 than what the instruction format permits. We do this because for
6414 quad mode moves, we will actually decompose them into two separate
6415 double-mode reads or writes. INDEX must therefore be a valid
6416 (double-mode) offset and so should INDEX+8. */
6417 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6418 return (code
== CONST_INT
6419 && INTVAL (index
) < 1016
6420 && INTVAL (index
) > -1024
6421 && (INTVAL (index
) & 3) == 0);
6423 /* We have no such constraint on double mode offsets, so we permit the
6424 full range of the instruction format. */
6425 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6426 return (code
== CONST_INT
6427 && INTVAL (index
) < 1024
6428 && INTVAL (index
) > -1024
6429 && (INTVAL (index
) & 3) == 0);
6431 if (arm_address_register_rtx_p (index
, strict_p
)
6432 && (GET_MODE_SIZE (mode
) <= 4))
6435 if (mode
== DImode
|| mode
== DFmode
)
6437 if (code
== CONST_INT
)
6439 HOST_WIDE_INT val
= INTVAL (index
);
6440 /* ??? Can we assume ldrd for thumb2? */
6441 /* Thumb-2 ldrd only has reg+const addressing modes. */
6442 /* ldrd supports offsets of +-1020.
6443 However the ldr fallback does not. */
6444 return val
> -256 && val
< 256 && (val
& 3) == 0;
6452 rtx xiop0
= XEXP (index
, 0);
6453 rtx xiop1
= XEXP (index
, 1);
6455 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6456 && thumb2_index_mul_operand (xiop1
))
6457 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6458 && thumb2_index_mul_operand (xiop0
)));
6460 else if (code
== ASHIFT
)
6462 rtx op
= XEXP (index
, 1);
6464 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6467 && INTVAL (op
) <= 3);
6470 return (code
== CONST_INT
6471 && INTVAL (index
) < 4096
6472 && INTVAL (index
) > -256);
6475 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6477 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6487 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6489 return (regno
<= LAST_LO_REGNUM
6490 || regno
> LAST_VIRTUAL_REGISTER
6491 || regno
== FRAME_POINTER_REGNUM
6492 || (GET_MODE_SIZE (mode
) >= 4
6493 && (regno
== STACK_POINTER_REGNUM
6494 || regno
>= FIRST_PSEUDO_REGISTER
6495 || x
== hard_frame_pointer_rtx
6496 || x
== arg_pointer_rtx
)));
6499 /* Return nonzero if x is a legitimate index register. This is the case
6500 for any base register that can access a QImode object. */
6502 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6504 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6507 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6509 The AP may be eliminated to either the SP or the FP, so we use the
6510 least common denominator, e.g. SImode, and offsets from 0 to 64.
6512 ??? Verify whether the above is the right approach.
6514 ??? Also, the FP may be eliminated to the SP, so perhaps that
6515 needs special handling also.
6517 ??? Look at how the mips16 port solves this problem. It probably uses
6518 better ways to solve some of these problems.
6520 Although it is not incorrect, we don't accept QImode and HImode
6521 addresses based on the frame pointer or arg pointer until the
6522 reload pass starts. This is so that eliminating such addresses
6523 into stack based ones won't produce impossible code. */
6525 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6527 /* ??? Not clear if this is right. Experiment. */
6528 if (GET_MODE_SIZE (mode
) < 4
6529 && !(reload_in_progress
|| reload_completed
)
6530 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6531 || reg_mentioned_p (arg_pointer_rtx
, x
)
6532 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6533 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6534 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6535 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6538 /* Accept any base register. SP only in SImode or larger. */
6539 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6542 /* This is PC relative data before arm_reorg runs. */
6543 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6544 && GET_CODE (x
) == SYMBOL_REF
6545 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6548 /* This is PC relative data after arm_reorg runs. */
6549 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6551 && (GET_CODE (x
) == LABEL_REF
6552 || (GET_CODE (x
) == CONST
6553 && GET_CODE (XEXP (x
, 0)) == PLUS
6554 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6555 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6558 /* Post-inc indexing only supported for SImode and larger. */
6559 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
6560 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
6563 else if (GET_CODE (x
) == PLUS
)
6565 /* REG+REG address can be any two index registers. */
6566 /* We disallow FRAME+REG addressing since we know that FRAME
6567 will be replaced with STACK, and SP relative addressing only
6568 permits SP+OFFSET. */
6569 if (GET_MODE_SIZE (mode
) <= 4
6570 && XEXP (x
, 0) != frame_pointer_rtx
6571 && XEXP (x
, 1) != frame_pointer_rtx
6572 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6573 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
6574 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
6577 /* REG+const has 5-7 bit offset for non-SP registers. */
6578 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6579 || XEXP (x
, 0) == arg_pointer_rtx
)
6580 && CONST_INT_P (XEXP (x
, 1))
6581 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6584 /* REG+const has 10-bit offset for SP, but only SImode and
6585 larger is supported. */
6586 /* ??? Should probably check for DI/DFmode overflow here
6587 just like GO_IF_LEGITIMATE_OFFSET does. */
6588 else if (REG_P (XEXP (x
, 0))
6589 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
6590 && GET_MODE_SIZE (mode
) >= 4
6591 && CONST_INT_P (XEXP (x
, 1))
6592 && INTVAL (XEXP (x
, 1)) >= 0
6593 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
6594 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6597 else if (REG_P (XEXP (x
, 0))
6598 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
6599 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
6600 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
6601 && REGNO (XEXP (x
, 0))
6602 <= LAST_VIRTUAL_POINTER_REGISTER
))
6603 && GET_MODE_SIZE (mode
) >= 4
6604 && CONST_INT_P (XEXP (x
, 1))
6605 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6609 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6610 && GET_MODE_SIZE (mode
) == 4
6611 && GET_CODE (x
) == SYMBOL_REF
6612 && CONSTANT_POOL_ADDRESS_P (x
)
6614 && symbol_mentioned_p (get_pool_constant (x
))
6615 && ! pcrel_constant_p (get_pool_constant (x
))))
6621 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6622 instruction of mode MODE. */
6624 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
6626 switch (GET_MODE_SIZE (mode
))
6629 return val
>= 0 && val
< 32;
6632 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6636 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6642 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6645 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6646 else if (TARGET_THUMB2
)
6647 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6648 else /* if (TARGET_THUMB1) */
6649 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6652 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6654 Given an rtx X being reloaded into a reg required to be
6655 in class CLASS, return the class of reg to actually use.
6656 In general this is just CLASS, but for the Thumb core registers and
6657 immediate constants we prefer a LO_REGS class or a subset. */
6660 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
6666 if (rclass
== GENERAL_REGS
6667 || rclass
== HI_REGS
6668 || rclass
== NO_REGS
6669 || rclass
== STACK_REG
)
6676 /* Build the SYMBOL_REF for __tls_get_addr. */
6678 static GTY(()) rtx tls_get_addr_libfunc
;
6681 get_tls_get_addr (void)
6683 if (!tls_get_addr_libfunc
)
6684 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6685 return tls_get_addr_libfunc
;
6689 arm_load_tp (rtx target
)
6692 target
= gen_reg_rtx (SImode
);
6696 /* Can return in any reg. */
6697 emit_insn (gen_load_tp_hard (target
));
6701 /* Always returned in r0. Immediately copy the result into a pseudo,
6702 otherwise other uses of r0 (e.g. setting up function arguments) may
6703 clobber the value. */
6707 emit_insn (gen_load_tp_soft ());
6709 tmp
= gen_rtx_REG (SImode
, 0);
6710 emit_move_insn (target
, tmp
);
6716 load_tls_operand (rtx x
, rtx reg
)
6720 if (reg
== NULL_RTX
)
6721 reg
= gen_reg_rtx (SImode
);
6723 tmp
= gen_rtx_CONST (SImode
, x
);
6725 emit_move_insn (reg
, tmp
);
6731 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6733 rtx insns
, label
, labelno
, sum
;
6735 gcc_assert (reloc
!= TLS_DESCSEQ
);
6738 labelno
= GEN_INT (pic_labelno
++);
6739 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6740 label
= gen_rtx_CONST (VOIDmode
, label
);
6742 sum
= gen_rtx_UNSPEC (Pmode
,
6743 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6744 GEN_INT (TARGET_ARM
? 8 : 4)),
6746 reg
= load_tls_operand (sum
, reg
);
6749 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6751 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6753 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
6754 LCT_PURE
, /* LCT_CONST? */
6755 Pmode
, 1, reg
, Pmode
);
6757 insns
= get_insns ();
6764 arm_tls_descseq_addr (rtx x
, rtx reg
)
6766 rtx labelno
= GEN_INT (pic_labelno
++);
6767 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6768 rtx sum
= gen_rtx_UNSPEC (Pmode
,
6769 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
6770 gen_rtx_CONST (VOIDmode
, label
),
6771 GEN_INT (!TARGET_ARM
)),
6773 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
6775 emit_insn (gen_tlscall (x
, labelno
));
6777 reg
= gen_reg_rtx (SImode
);
6779 gcc_assert (REGNO (reg
) != 0);
6781 emit_move_insn (reg
, reg0
);
6787 legitimize_tls_address (rtx x
, rtx reg
)
6789 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6790 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6794 case TLS_MODEL_GLOBAL_DYNAMIC
:
6795 if (TARGET_GNU2_TLS
)
6797 reg
= arm_tls_descseq_addr (x
, reg
);
6799 tp
= arm_load_tp (NULL_RTX
);
6801 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6805 /* Original scheme */
6806 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6807 dest
= gen_reg_rtx (Pmode
);
6808 emit_libcall_block (insns
, dest
, ret
, x
);
6812 case TLS_MODEL_LOCAL_DYNAMIC
:
6813 if (TARGET_GNU2_TLS
)
6815 reg
= arm_tls_descseq_addr (x
, reg
);
6817 tp
= arm_load_tp (NULL_RTX
);
6819 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6823 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6825 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6826 share the LDM result with other LD model accesses. */
6827 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6829 dest
= gen_reg_rtx (Pmode
);
6830 emit_libcall_block (insns
, dest
, ret
, eqv
);
6832 /* Load the addend. */
6833 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
6834 GEN_INT (TLS_LDO32
)),
6836 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6837 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
6841 case TLS_MODEL_INITIAL_EXEC
:
6842 labelno
= GEN_INT (pic_labelno
++);
6843 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6844 label
= gen_rtx_CONST (VOIDmode
, label
);
6845 sum
= gen_rtx_UNSPEC (Pmode
,
6846 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6847 GEN_INT (TARGET_ARM
? 8 : 4)),
6849 reg
= load_tls_operand (sum
, reg
);
6852 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6853 else if (TARGET_THUMB2
)
6854 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6857 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6858 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6861 tp
= arm_load_tp (NULL_RTX
);
6863 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6865 case TLS_MODEL_LOCAL_EXEC
:
6866 tp
= arm_load_tp (NULL_RTX
);
6868 reg
= gen_rtx_UNSPEC (Pmode
,
6869 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6871 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6873 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6880 /* Try machine-dependent ways of modifying an illegitimate address
6881 to be legitimate. If we find one, return the new, valid address. */
6883 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6887 /* TODO: legitimize_address for Thumb2. */
6890 return thumb_legitimize_address (x
, orig_x
, mode
);
6893 if (arm_tls_symbol_p (x
))
6894 return legitimize_tls_address (x
, NULL_RTX
);
6896 if (GET_CODE (x
) == PLUS
)
6898 rtx xop0
= XEXP (x
, 0);
6899 rtx xop1
= XEXP (x
, 1);
6901 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6902 xop0
= force_reg (SImode
, xop0
);
6904 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6905 xop1
= force_reg (SImode
, xop1
);
6907 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6908 && CONST_INT_P (xop1
))
6910 HOST_WIDE_INT n
, low_n
;
6914 /* VFP addressing modes actually allow greater offsets, but for
6915 now we just stick with the lowest common denominator. */
6917 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6929 low_n
= ((mode
) == TImode
? 0
6930 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6934 base_reg
= gen_reg_rtx (SImode
);
6935 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
6936 emit_move_insn (base_reg
, val
);
6937 x
= plus_constant (Pmode
, base_reg
, low_n
);
6939 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6940 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6943 /* XXX We don't allow MINUS any more -- see comment in
6944 arm_legitimate_address_outer_p (). */
6945 else if (GET_CODE (x
) == MINUS
)
6947 rtx xop0
= XEXP (x
, 0);
6948 rtx xop1
= XEXP (x
, 1);
6950 if (CONSTANT_P (xop0
))
6951 xop0
= force_reg (SImode
, xop0
);
6953 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6954 xop1
= force_reg (SImode
, xop1
);
6956 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6957 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6960 /* Make sure to take full advantage of the pre-indexed addressing mode
6961 with absolute addresses which often allows for the base register to
6962 be factorized for multiple adjacent memory references, and it might
6963 even allows for the mini pool to be avoided entirely. */
6964 else if (CONST_INT_P (x
) && optimize
> 0)
6967 HOST_WIDE_INT mask
, base
, index
;
6970 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6971 use a 8-bit index. So let's use a 12-bit index for SImode only and
6972 hope that arm_gen_constant will enable ldrb to use more bits. */
6973 bits
= (mode
== SImode
) ? 12 : 8;
6974 mask
= (1 << bits
) - 1;
6975 base
= INTVAL (x
) & ~mask
;
6976 index
= INTVAL (x
) & mask
;
6977 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6979 /* It'll most probably be more efficient to generate the base
6980 with more bits set and use a negative index instead. */
6984 base_reg
= force_reg (SImode
, GEN_INT (base
));
6985 x
= plus_constant (Pmode
, base_reg
, index
);
6990 /* We need to find and carefully transform any SYMBOL and LABEL
6991 references; so go back to the original address expression. */
6992 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6994 if (new_x
!= orig_x
)
7002 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7003 to be legitimate. If we find one, return the new, valid address. */
7005 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7007 if (arm_tls_symbol_p (x
))
7008 return legitimize_tls_address (x
, NULL_RTX
);
7010 if (GET_CODE (x
) == PLUS
7011 && CONST_INT_P (XEXP (x
, 1))
7012 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7013 || INTVAL (XEXP (x
, 1)) < 0))
7015 rtx xop0
= XEXP (x
, 0);
7016 rtx xop1
= XEXP (x
, 1);
7017 HOST_WIDE_INT offset
= INTVAL (xop1
);
7019 /* Try and fold the offset into a biasing of the base register and
7020 then offsetting that. Don't do this when optimizing for space
7021 since it can cause too many CSEs. */
7022 if (optimize_size
&& offset
>= 0
7023 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7025 HOST_WIDE_INT delta
;
7028 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7029 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7030 delta
= 31 * GET_MODE_SIZE (mode
);
7032 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7034 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7036 x
= plus_constant (Pmode
, xop0
, delta
);
7038 else if (offset
< 0 && offset
> -256)
7039 /* Small negative offsets are best done with a subtract before the
7040 dereference, forcing these into a register normally takes two
7042 x
= force_operand (x
, NULL_RTX
);
7045 /* For the remaining cases, force the constant into a register. */
7046 xop1
= force_reg (SImode
, xop1
);
7047 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7050 else if (GET_CODE (x
) == PLUS
7051 && s_register_operand (XEXP (x
, 1), SImode
)
7052 && !s_register_operand (XEXP (x
, 0), SImode
))
7054 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7056 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7061 /* We need to find and carefully transform any SYMBOL and LABEL
7062 references; so go back to the original address expression. */
7063 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7065 if (new_x
!= orig_x
)
7073 arm_legitimize_reload_address (rtx
*p
,
7074 enum machine_mode mode
,
7075 int opnum
, int type
,
7076 int ind_levels ATTRIBUTE_UNUSED
)
7078 /* We must recognize output that we have already generated ourselves. */
7079 if (GET_CODE (*p
) == PLUS
7080 && GET_CODE (XEXP (*p
, 0)) == PLUS
7081 && REG_P (XEXP (XEXP (*p
, 0), 0))
7082 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7083 && CONST_INT_P (XEXP (*p
, 1)))
7085 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7086 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7087 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7091 if (GET_CODE (*p
) == PLUS
7092 && REG_P (XEXP (*p
, 0))
7093 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7094 /* If the base register is equivalent to a constant, let the generic
7095 code handle it. Otherwise we will run into problems if a future
7096 reload pass decides to rematerialize the constant. */
7097 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7098 && CONST_INT_P (XEXP (*p
, 1)))
7100 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7101 HOST_WIDE_INT low
, high
;
7103 /* Detect coprocessor load/stores. */
7104 bool coproc_p
= ((TARGET_HARD_FLOAT
7106 && (mode
== SFmode
|| mode
== DFmode
))
7107 || (TARGET_REALLY_IWMMXT
7108 && VALID_IWMMXT_REG_MODE (mode
))
7110 && (VALID_NEON_DREG_MODE (mode
)
7111 || VALID_NEON_QREG_MODE (mode
))));
7113 /* For some conditions, bail out when lower two bits are unaligned. */
7114 if ((val
& 0x3) != 0
7115 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7117 /* For DI, and DF under soft-float: */
7118 || ((mode
== DImode
|| mode
== DFmode
)
7119 /* Without ldrd, we use stm/ldm, which does not
7120 fair well with unaligned bits. */
7122 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7123 || TARGET_THUMB2
))))
7126 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7127 of which the (reg+high) gets turned into a reload add insn,
7128 we try to decompose the index into high/low values that can often
7129 also lead to better reload CSE.
7131 ldr r0, [r2, #4100] // Offset too large
7132 ldr r1, [r2, #4104] // Offset too large
7134 is best reloaded as:
7140 which post-reload CSE can simplify in most cases to eliminate the
7141 second add instruction:
7146 The idea here is that we want to split out the bits of the constant
7147 as a mask, rather than as subtracting the maximum offset that the
7148 respective type of load/store used can handle.
7150 When encountering negative offsets, we can still utilize it even if
7151 the overall offset is positive; sometimes this may lead to an immediate
7152 that can be constructed with fewer instructions.
7154 ldr r0, [r2, #0x3FFFFC]
7156 This is best reloaded as:
7157 add t1, r2, #0x400000
7160 The trick for spotting this for a load insn with N bits of offset
7161 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7162 negative offset that is going to make bit N and all the bits below
7163 it become zero in the remainder part.
7165 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7166 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7167 used in most cases of ARM load/store instructions. */
7169 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7170 (((VAL) & ((1 << (N)) - 1)) \
7171 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7176 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7178 /* NEON quad-word load/stores are made of two double-word accesses,
7179 so the valid index range is reduced by 8. Treat as 9-bit range if
7181 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7182 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7184 else if (GET_MODE_SIZE (mode
) == 8)
7187 low
= (TARGET_THUMB2
7188 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7189 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7191 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7192 to access doublewords. The supported load/store offsets are
7193 -8, -4, and 4, which we try to produce here. */
7194 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7196 else if (GET_MODE_SIZE (mode
) < 8)
7198 /* NEON element load/stores do not have an offset. */
7199 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7204 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7205 Try the wider 12-bit range first, and re-try if the result
7207 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7209 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7213 if (mode
== HImode
|| mode
== HFmode
)
7216 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7219 /* The storehi/movhi_bytes fallbacks can use only
7220 [-4094,+4094] of the full ldrb/strb index range. */
7221 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7222 if (low
== 4095 || low
== -4095)
7227 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7233 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7234 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7235 - (unsigned HOST_WIDE_INT
) 0x80000000);
7236 /* Check for overflow or zero */
7237 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7240 /* Reload the high part into a base reg; leave the low part
7242 Note that replacing this gen_rtx_PLUS with plus_constant is
7243 wrong in this case because we rely on the
7244 (plus (plus reg c1) c2) structure being preserved so that
7245 XEXP (*p, 0) in push_reload below uses the correct term. */
7246 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7247 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7250 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7251 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7252 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7260 thumb_legitimize_reload_address (rtx
*x_p
,
7261 enum machine_mode mode
,
7262 int opnum
, int type
,
7263 int ind_levels ATTRIBUTE_UNUSED
)
7267 if (GET_CODE (x
) == PLUS
7268 && GET_MODE_SIZE (mode
) < 4
7269 && REG_P (XEXP (x
, 0))
7270 && XEXP (x
, 0) == stack_pointer_rtx
7271 && CONST_INT_P (XEXP (x
, 1))
7272 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7277 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7278 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7282 /* If both registers are hi-regs, then it's better to reload the
7283 entire expression rather than each register individually. That
7284 only requires one reload register rather than two. */
7285 if (GET_CODE (x
) == PLUS
7286 && REG_P (XEXP (x
, 0))
7287 && REG_P (XEXP (x
, 1))
7288 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7289 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7294 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7295 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7302 /* Test for various thread-local symbols. */
7304 /* Return TRUE if X is a thread-local symbol. */
7307 arm_tls_symbol_p (rtx x
)
7309 if (! TARGET_HAVE_TLS
)
7312 if (GET_CODE (x
) != SYMBOL_REF
)
7315 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7318 /* Helper for arm_tls_referenced_p. */
7321 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7323 if (GET_CODE (*x
) == SYMBOL_REF
)
7324 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7326 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7327 TLS offsets, not real symbol references. */
7328 if (GET_CODE (*x
) == UNSPEC
7329 && XINT (*x
, 1) == UNSPEC_TLS
)
7335 /* Return TRUE if X contains any TLS symbol references. */
7338 arm_tls_referenced_p (rtx x
)
7340 if (! TARGET_HAVE_TLS
)
7343 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7346 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7348 On the ARM, allow any integer (invalid ones are removed later by insn
7349 patterns), nice doubles and symbol_refs which refer to the function's
7352 When generating pic allow anything. */
7355 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7357 /* At present, we have no support for Neon structure constants, so forbid
7358 them here. It might be possible to handle simple cases like 0 and -1
7360 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7363 return flag_pic
|| !label_mentioned_p (x
);
7367 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7369 return (CONST_INT_P (x
)
7370 || CONST_DOUBLE_P (x
)
7371 || CONSTANT_ADDRESS_P (x
)
7376 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7378 return (!arm_cannot_force_const_mem (mode
, x
)
7380 ? arm_legitimate_constant_p_1 (mode
, x
)
7381 : thumb_legitimate_constant_p (mode
, x
)));
7384 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7387 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7391 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7393 split_const (x
, &base
, &offset
);
7394 if (GET_CODE (base
) == SYMBOL_REF
7395 && !offset_within_block_p (base
, INTVAL (offset
)))
7398 return arm_tls_referenced_p (x
);
7401 #define REG_OR_SUBREG_REG(X) \
7403 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7405 #define REG_OR_SUBREG_RTX(X) \
7406 (REG_P (X) ? (X) : SUBREG_REG (X))
7409 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7411 enum machine_mode mode
= GET_MODE (x
);
7420 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7427 return COSTS_N_INSNS (1);
7430 if (CONST_INT_P (XEXP (x
, 1)))
7433 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7440 return COSTS_N_INSNS (2) + cycles
;
7442 return COSTS_N_INSNS (1) + 16;
7445 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7447 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7448 return (COSTS_N_INSNS (words
)
7449 + 4 * ((MEM_P (SET_SRC (x
)))
7450 + MEM_P (SET_DEST (x
))));
7455 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7457 if (thumb_shiftable_const (INTVAL (x
)))
7458 return COSTS_N_INSNS (2);
7459 return COSTS_N_INSNS (3);
7461 else if ((outer
== PLUS
|| outer
== COMPARE
)
7462 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7464 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7465 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7466 return COSTS_N_INSNS (1);
7467 else if (outer
== AND
)
7470 /* This duplicates the tests in the andsi3 expander. */
7471 for (i
= 9; i
<= 31; i
++)
7472 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7473 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7474 return COSTS_N_INSNS (2);
7476 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7477 || outer
== LSHIFTRT
)
7479 return COSTS_N_INSNS (2);
7485 return COSTS_N_INSNS (3);
7503 /* XXX another guess. */
7504 /* Memory costs quite a lot for the first word, but subsequent words
7505 load at the equivalent of a single insn each. */
7506 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7507 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7512 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7518 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7519 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7525 return total
+ COSTS_N_INSNS (1);
7527 /* Assume a two-shift sequence. Increase the cost slightly so
7528 we prefer actual shifts over an extend operation. */
7529 return total
+ 1 + COSTS_N_INSNS (2);
7537 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7539 enum machine_mode mode
= GET_MODE (x
);
7540 enum rtx_code subcode
;
7542 enum rtx_code code
= GET_CODE (x
);
7548 /* Memory costs quite a lot for the first word, but subsequent words
7549 load at the equivalent of a single insn each. */
7550 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7557 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7558 *total
= COSTS_N_INSNS (2);
7559 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7560 *total
= COSTS_N_INSNS (4);
7562 *total
= COSTS_N_INSNS (20);
7566 if (REG_P (XEXP (x
, 1)))
7567 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
7568 else if (!CONST_INT_P (XEXP (x
, 1)))
7569 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7575 *total
+= COSTS_N_INSNS (4);
7580 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
7581 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7584 *total
+= COSTS_N_INSNS (3);
7588 *total
+= COSTS_N_INSNS (1);
7589 /* Increase the cost of complex shifts because they aren't any faster,
7590 and reduce dual issue opportunities. */
7591 if (arm_tune_cortex_a9
7592 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
7600 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7601 if (CONST_INT_P (XEXP (x
, 0))
7602 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7604 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7608 if (CONST_INT_P (XEXP (x
, 1))
7609 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7611 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7618 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7620 if (TARGET_HARD_FLOAT
7622 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7624 *total
= COSTS_N_INSNS (1);
7625 if (CONST_DOUBLE_P (XEXP (x
, 0))
7626 && arm_const_double_rtx (XEXP (x
, 0)))
7628 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7632 if (CONST_DOUBLE_P (XEXP (x
, 1))
7633 && arm_const_double_rtx (XEXP (x
, 1)))
7635 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7641 *total
= COSTS_N_INSNS (20);
7645 *total
= COSTS_N_INSNS (1);
7646 if (CONST_INT_P (XEXP (x
, 0))
7647 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7649 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7653 subcode
= GET_CODE (XEXP (x
, 1));
7654 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7655 || subcode
== LSHIFTRT
7656 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7658 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7659 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7663 /* A shift as a part of RSB costs no more than RSB itself. */
7664 if (GET_CODE (XEXP (x
, 0)) == MULT
7665 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7667 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
7668 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7673 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
7675 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7676 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7680 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
7681 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
7683 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7684 if (REG_P (XEXP (XEXP (x
, 1), 0))
7685 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
7686 *total
+= COSTS_N_INSNS (1);
7694 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
7695 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7696 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7698 *total
= COSTS_N_INSNS (1);
7699 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
7701 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7705 /* MLA: All arguments must be registers. We filter out
7706 multiplication by a power of two, so that we fall down into
7708 if (GET_CODE (XEXP (x
, 0)) == MULT
7709 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7711 /* The cost comes from the cost of the multiply. */
7715 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7717 if (TARGET_HARD_FLOAT
7719 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7721 *total
= COSTS_N_INSNS (1);
7722 if (CONST_DOUBLE_P (XEXP (x
, 1))
7723 && arm_const_double_rtx (XEXP (x
, 1)))
7725 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7732 *total
= COSTS_N_INSNS (20);
7736 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
7737 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
7739 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7740 if (REG_P (XEXP (XEXP (x
, 0), 0))
7741 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
7742 *total
+= COSTS_N_INSNS (1);
7748 case AND
: case XOR
: case IOR
:
7750 /* Normally the frame registers will be spilt into reg+const during
7751 reload, so it is a bad idea to combine them with other instructions,
7752 since then they might not be moved outside of loops. As a compromise
7753 we allow integration with ops that have a constant as their second
7755 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
7756 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
7757 && !CONST_INT_P (XEXP (x
, 1)))
7758 *total
= COSTS_N_INSNS (1);
7762 *total
+= COSTS_N_INSNS (2);
7763 if (CONST_INT_P (XEXP (x
, 1))
7764 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7766 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7773 *total
+= COSTS_N_INSNS (1);
7774 if (CONST_INT_P (XEXP (x
, 1))
7775 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7777 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7780 subcode
= GET_CODE (XEXP (x
, 0));
7781 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7782 || subcode
== LSHIFTRT
7783 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7785 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7786 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7791 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7793 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7794 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7798 if (subcode
== UMIN
|| subcode
== UMAX
7799 || subcode
== SMIN
|| subcode
== SMAX
)
7801 *total
= COSTS_N_INSNS (3);
7808 /* This should have been handled by the CPU specific routines. */
7812 if (arm_arch3m
&& mode
== SImode
7813 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7814 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7815 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
7816 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
7817 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7818 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
7820 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
7823 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7827 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7829 if (TARGET_HARD_FLOAT
7831 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7833 *total
= COSTS_N_INSNS (1);
7836 *total
= COSTS_N_INSNS (2);
7842 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
7843 if (mode
== SImode
&& code
== NOT
)
7845 subcode
= GET_CODE (XEXP (x
, 0));
7846 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7847 || subcode
== LSHIFTRT
7848 || subcode
== ROTATE
|| subcode
== ROTATERT
7850 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
7852 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7853 /* Register shifts cost an extra cycle. */
7854 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
7855 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
7864 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7866 *total
= COSTS_N_INSNS (4);
7870 operand
= XEXP (x
, 0);
7872 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
7873 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
7874 && REG_P (XEXP (operand
, 0))
7875 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
7876 *total
+= COSTS_N_INSNS (1);
7877 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
7878 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
7882 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7884 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7890 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7891 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7893 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7899 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7900 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7902 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7922 /* SCC insns. In the case where the comparison has already been
7923 performed, then they cost 2 instructions. Otherwise they need
7924 an additional comparison before them. */
7925 *total
= COSTS_N_INSNS (2);
7926 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7933 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7939 *total
+= COSTS_N_INSNS (1);
7940 if (CONST_INT_P (XEXP (x
, 1))
7941 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7943 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7947 subcode
= GET_CODE (XEXP (x
, 0));
7948 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7949 || subcode
== LSHIFTRT
7950 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7952 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7953 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7958 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7960 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7961 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7971 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7972 if (!CONST_INT_P (XEXP (x
, 1))
7973 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7974 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7978 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7980 if (TARGET_HARD_FLOAT
7982 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7984 *total
= COSTS_N_INSNS (1);
7987 *total
= COSTS_N_INSNS (20);
7990 *total
= COSTS_N_INSNS (1);
7992 *total
+= COSTS_N_INSNS (3);
7998 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8000 rtx op
= XEXP (x
, 0);
8001 enum machine_mode opmode
= GET_MODE (op
);
8004 *total
+= COSTS_N_INSNS (1);
8006 if (opmode
!= SImode
)
8010 /* If !arm_arch4, we use one of the extendhisi2_mem
8011 or movhi_bytes patterns for HImode. For a QImode
8012 sign extension, we first zero-extend from memory
8013 and then perform a shift sequence. */
8014 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8015 *total
+= COSTS_N_INSNS (2);
8018 *total
+= COSTS_N_INSNS (1);
8020 /* We don't have the necessary insn, so we need to perform some
8022 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8023 /* An and with constant 255. */
8024 *total
+= COSTS_N_INSNS (1);
8026 /* A shift sequence. Increase costs slightly to avoid
8027 combining two shifts into an extend operation. */
8028 *total
+= COSTS_N_INSNS (2) + 1;
8034 switch (GET_MODE (XEXP (x
, 0)))
8041 *total
= COSTS_N_INSNS (1);
8051 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8055 if (const_ok_for_arm (INTVAL (x
))
8056 || const_ok_for_arm (~INTVAL (x
)))
8057 *total
= COSTS_N_INSNS (1);
8059 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8060 INTVAL (x
), NULL_RTX
,
8067 *total
= COSTS_N_INSNS (3);
8071 *total
= COSTS_N_INSNS (1);
8075 *total
= COSTS_N_INSNS (1);
8076 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8080 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8081 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8082 *total
= COSTS_N_INSNS (1);
8084 *total
= COSTS_N_INSNS (4);
8088 /* The vec_extract patterns accept memory operands that require an
8089 address reload. Account for the cost of that reload to give the
8090 auto-inc-dec pass an incentive to try to replace them. */
8091 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8092 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8094 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8095 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8096 *total
+= COSTS_N_INSNS (1);
8099 /* Likewise for the vec_set patterns. */
8100 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8101 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8102 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8104 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8105 *total
= rtx_cost (mem
, code
, 0, speed
);
8106 if (!neon_vector_mem_operand (mem
, 2, true))
8107 *total
+= COSTS_N_INSNS (1);
8113 /* We cost this as high as our memory costs to allow this to
8114 be hoisted from loops. */
8115 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8117 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8123 && TARGET_HARD_FLOAT
8125 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8126 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8127 *total
= COSTS_N_INSNS (1);
8129 *total
= COSTS_N_INSNS (4);
8133 *total
= COSTS_N_INSNS (4);
8138 /* Estimates the size cost of thumb1 instructions.
8139 For now most of the code is copied from thumb1_rtx_costs. We need more
8140 fine grain tuning when we have more related test cases. */
8142 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8144 enum machine_mode mode
= GET_MODE (x
);
8153 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8157 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8158 defined by RTL expansion, especially for the expansion of
8160 if ((GET_CODE (XEXP (x
, 0)) == MULT
8161 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8162 || (GET_CODE (XEXP (x
, 1)) == MULT
8163 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8164 return COSTS_N_INSNS (2);
8165 /* On purpose fall through for normal RTX. */
8169 return COSTS_N_INSNS (1);
8172 if (CONST_INT_P (XEXP (x
, 1)))
8174 /* Thumb1 mul instruction can't operate on const. We must Load it
8175 into a register first. */
8176 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8177 return COSTS_N_INSNS (1) + const_size
;
8179 return COSTS_N_INSNS (1);
8182 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8184 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8185 return (COSTS_N_INSNS (words
)
8186 + 4 * ((MEM_P (SET_SRC (x
)))
8187 + MEM_P (SET_DEST (x
))));
8192 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8193 return COSTS_N_INSNS (1);
8194 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8195 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8196 return COSTS_N_INSNS (2);
8197 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8198 if (thumb_shiftable_const (INTVAL (x
)))
8199 return COSTS_N_INSNS (2);
8200 return COSTS_N_INSNS (3);
8202 else if ((outer
== PLUS
|| outer
== COMPARE
)
8203 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8205 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8206 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8207 return COSTS_N_INSNS (1);
8208 else if (outer
== AND
)
8211 /* This duplicates the tests in the andsi3 expander. */
8212 for (i
= 9; i
<= 31; i
++)
8213 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8214 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8215 return COSTS_N_INSNS (2);
8217 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8218 || outer
== LSHIFTRT
)
8220 return COSTS_N_INSNS (2);
8226 return COSTS_N_INSNS (3);
8244 /* XXX another guess. */
8245 /* Memory costs quite a lot for the first word, but subsequent words
8246 load at the equivalent of a single insn each. */
8247 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8248 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8253 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8258 /* XXX still guessing. */
8259 switch (GET_MODE (XEXP (x
, 0)))
8262 return (1 + (mode
== DImode
? 4 : 0)
8263 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8266 return (4 + (mode
== DImode
? 4 : 0)
8267 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8270 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8281 /* RTX costs when optimizing for size. */
8283 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8286 enum machine_mode mode
= GET_MODE (x
);
8289 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8293 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8297 /* A memory access costs 1 insn if the mode is small, or the address is
8298 a single register, otherwise it costs one insn per word. */
8299 if (REG_P (XEXP (x
, 0)))
8300 *total
= COSTS_N_INSNS (1);
8302 && GET_CODE (XEXP (x
, 0)) == PLUS
8303 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8304 /* This will be split into two instructions.
8305 See arm.md:calculate_pic_address. */
8306 *total
= COSTS_N_INSNS (2);
8308 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8315 /* Needs a libcall, so it costs about this. */
8316 *total
= COSTS_N_INSNS (2);
8320 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8322 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8330 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8332 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8335 else if (mode
== SImode
)
8337 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8338 /* Slightly disparage register shifts, but not by much. */
8339 if (!CONST_INT_P (XEXP (x
, 1)))
8340 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8344 /* Needs a libcall. */
8345 *total
= COSTS_N_INSNS (2);
8349 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8350 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8352 *total
= COSTS_N_INSNS (1);
8358 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8359 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8361 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8362 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8363 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8364 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8365 || subcode1
== ASHIFTRT
)
8367 /* It's just the cost of the two operands. */
8372 *total
= COSTS_N_INSNS (1);
8376 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8380 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8381 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8383 *total
= COSTS_N_INSNS (1);
8387 /* A shift as a part of ADD costs nothing. */
8388 if (GET_CODE (XEXP (x
, 0)) == MULT
8389 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8391 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8392 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8393 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8398 case AND
: case XOR
: case IOR
:
8401 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8403 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8404 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8405 || (code
== AND
&& subcode
== NOT
))
8407 /* It's just the cost of the two operands. */
8413 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8417 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8421 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8422 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8424 *total
= COSTS_N_INSNS (1);
8430 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8439 if (cc_register (XEXP (x
, 0), VOIDmode
))
8442 *total
= COSTS_N_INSNS (1);
8446 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8447 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8448 *total
= COSTS_N_INSNS (1);
8450 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8455 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8458 if (const_ok_for_arm (INTVAL (x
)))
8459 /* A multiplication by a constant requires another instruction
8460 to load the constant to a register. */
8461 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8463 else if (const_ok_for_arm (~INTVAL (x
)))
8464 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8465 else if (const_ok_for_arm (-INTVAL (x
)))
8467 if (outer_code
== COMPARE
|| outer_code
== PLUS
8468 || outer_code
== MINUS
)
8471 *total
= COSTS_N_INSNS (1);
8474 *total
= COSTS_N_INSNS (2);
8480 *total
= COSTS_N_INSNS (2);
8484 *total
= COSTS_N_INSNS (4);
8489 && TARGET_HARD_FLOAT
8490 && outer_code
== SET
8491 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8492 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8493 *total
= COSTS_N_INSNS (1);
8495 *total
= COSTS_N_INSNS (4);
8500 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8501 cost of these slightly. */
8502 *total
= COSTS_N_INSNS (1) + 1;
8509 if (mode
!= VOIDmode
)
8510 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8512 *total
= COSTS_N_INSNS (4); /* How knows? */
8517 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8518 operand, then return the operand that is being shifted. If the shift
8519 is not by a constant, then set SHIFT_REG to point to the operand.
8520 Return NULL if OP is not a shifter operand. */
8522 shifter_op_p (rtx op
, rtx
*shift_reg
)
8524 enum rtx_code code
= GET_CODE (op
);
8526 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8527 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8528 return XEXP (op
, 0);
8529 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8530 return XEXP (op
, 0);
8531 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8532 || code
== ASHIFTRT
)
8534 if (!CONST_INT_P (XEXP (op
, 1)))
8535 *shift_reg
= XEXP (op
, 1);
8536 return XEXP (op
, 0);
8543 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8545 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8546 gcc_assert (GET_CODE (x
) == UNSPEC
);
8548 switch (XINT (x
, 1))
8550 case UNSPEC_UNALIGNED_LOAD
:
8551 /* We can only do unaligned loads into the integer unit, and we can't
8553 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8555 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8556 + extra_cost
->ldst
.load_unaligned
);
8559 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8560 ADDR_SPACE_GENERIC
, speed_p
);
8564 case UNSPEC_UNALIGNED_STORE
:
8565 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8567 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
8568 + extra_cost
->ldst
.store_unaligned
);
8570 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
8572 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8573 ADDR_SPACE_GENERIC
, speed_p
);
8583 *cost
= COSTS_N_INSNS (1);
8585 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
8589 *cost
= COSTS_N_INSNS (2);
8595 /* Cost of a libcall. We assume one insn per argument, an amount for the
8596 call (one insn for -Os) and then one for processing the result. */
8597 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8599 /* RTX costs. Make an estimate of the cost of executing the operation
8600 X, which is contained with an operation with code OUTER_CODE.
8601 SPEED_P indicates whether the cost desired is the performance cost,
8602 or the size cost. The estimate is stored in COST and the return
8603 value is TRUE if the cost calculation is final, or FALSE if the
8604 caller should recurse through the operands of X to add additional
8607 We currently make no attempt to model the size savings of Thumb-2
8608 16-bit instructions. At the normal points in compilation where
8609 this code is called we have no measure of whether the condition
8610 flags are live or not, and thus no realistic way to determine what
8611 the size will eventually be. */
8613 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8614 const struct cpu_cost_table
*extra_cost
,
8615 int *cost
, bool speed_p
)
8617 enum machine_mode mode
= GET_MODE (x
);
8622 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
8624 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8632 if (REG_P (SET_SRC (x
))
8633 && REG_P (SET_DEST (x
)))
8635 /* Assume that most copies can be done with a single insn,
8636 unless we don't have HW FP, in which case everything
8637 larger than word mode will require two insns. */
8638 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8639 && GET_MODE_SIZE (mode
) > 4)
8642 /* Conditional register moves can be encoded
8643 in 16 bits in Thumb mode. */
8644 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
8648 if (CONST_INT_P (SET_SRC (x
)))
8650 /* Handle CONST_INT here, since the value doesn't have a mode
8651 and we would otherwise be unable to work out the true cost. */
8652 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
8653 mode
= GET_MODE (SET_DEST (x
));
8655 /* Slightly lower the cost of setting a core reg to a constant.
8656 This helps break up chains and allows for better scheduling. */
8657 if (REG_P (SET_DEST (x
))
8658 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
8661 /* Immediate moves with an immediate in the range [0, 255] can be
8662 encoded in 16 bits in Thumb mode. */
8663 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
8664 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
8666 goto const_int_cost
;
8672 /* A memory access costs 1 insn if the mode is small, or the address is
8673 a single register, otherwise it costs one insn per word. */
8674 if (REG_P (XEXP (x
, 0)))
8675 *cost
= COSTS_N_INSNS (1);
8677 && GET_CODE (XEXP (x
, 0)) == PLUS
8678 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8679 /* This will be split into two instructions.
8680 See arm.md:calculate_pic_address. */
8681 *cost
= COSTS_N_INSNS (2);
8683 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8685 /* For speed optimizations, add the costs of the address and
8686 accessing memory. */
8689 *cost
+= (extra_cost
->ldst
.load
8690 + arm_address_cost (XEXP (x
, 0), mode
,
8691 ADDR_SPACE_GENERIC
, speed_p
));
8693 *cost
+= extra_cost
->ldst
.load
;
8699 /* Calculations of LDM costs are complex. We assume an initial cost
8700 (ldm_1st) which will load the number of registers mentioned in
8701 ldm_regs_per_insn_1st registers; then each additional
8702 ldm_regs_per_insn_subsequent registers cost one more insn. The
8703 formula for N regs is thus:
8705 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8706 + ldm_regs_per_insn_subsequent - 1)
8707 / ldm_regs_per_insn_subsequent).
8709 Additional costs may also be added for addressing. A similar
8710 formula is used for STM. */
8712 bool is_ldm
= load_multiple_operation (x
, SImode
);
8713 bool is_stm
= store_multiple_operation (x
, SImode
);
8715 *cost
= COSTS_N_INSNS (1);
8717 if (is_ldm
|| is_stm
)
8721 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
8722 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
8723 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
8724 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
8725 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
8726 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
8727 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
8729 *cost
+= regs_per_insn_1st
8730 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
8731 + regs_per_insn_sub
- 1)
8732 / regs_per_insn_sub
);
8741 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8742 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8743 *cost
= COSTS_N_INSNS (speed_p
8744 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
8745 else if (mode
== SImode
&& TARGET_IDIV
)
8746 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
8748 *cost
= LIBCALL_COST (2);
8749 return false; /* All arguments must be in registers. */
8753 *cost
= LIBCALL_COST (2);
8754 return false; /* All arguments must be in registers. */
8757 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8759 *cost
= (COSTS_N_INSNS (2)
8760 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
8762 *cost
+= extra_cost
->alu
.shift_reg
;
8770 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8772 *cost
= (COSTS_N_INSNS (3)
8773 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
8775 *cost
+= 2 * extra_cost
->alu
.shift
;
8778 else if (mode
== SImode
)
8780 *cost
= (COSTS_N_INSNS (1)
8781 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
8782 /* Slightly disparage register shifts at -Os, but not by much. */
8783 if (!CONST_INT_P (XEXP (x
, 1)))
8784 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8785 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
8788 else if (GET_MODE_CLASS (mode
) == MODE_INT
8789 && GET_MODE_SIZE (mode
) < 4)
8793 *cost
= (COSTS_N_INSNS (1)
8794 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
8795 /* Slightly disparage register shifts at -Os, but not by
8797 if (!CONST_INT_P (XEXP (x
, 1)))
8798 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8799 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
8801 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
8803 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
8805 /* Can use SBFX/UBFX. */
8806 *cost
= COSTS_N_INSNS (1);
8808 *cost
+= extra_cost
->alu
.bfx
;
8809 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
8813 *cost
= COSTS_N_INSNS (2);
8814 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
8817 if (CONST_INT_P (XEXP (x
, 1)))
8818 *cost
+= 2 * extra_cost
->alu
.shift
;
8820 *cost
+= (extra_cost
->alu
.shift
8821 + extra_cost
->alu
.shift_reg
);
8824 /* Slightly disparage register shifts. */
8825 *cost
+= !CONST_INT_P (XEXP (x
, 1));
8830 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
8831 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
8834 if (CONST_INT_P (XEXP (x
, 1)))
8835 *cost
+= (2 * extra_cost
->alu
.shift
8836 + extra_cost
->alu
.log_shift
);
8838 *cost
+= (extra_cost
->alu
.shift
8839 + extra_cost
->alu
.shift_reg
8840 + extra_cost
->alu
.log_shift_reg
);
8846 *cost
= LIBCALL_COST (2);
8850 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8851 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8853 *cost
= COSTS_N_INSNS (1);
8854 if (GET_CODE (XEXP (x
, 0)) == MULT
8855 || GET_CODE (XEXP (x
, 1)) == MULT
)
8857 rtx mul_op0
, mul_op1
, sub_op
;
8860 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
8862 if (GET_CODE (XEXP (x
, 0)) == MULT
)
8864 mul_op0
= XEXP (XEXP (x
, 0), 0);
8865 mul_op1
= XEXP (XEXP (x
, 0), 1);
8866 sub_op
= XEXP (x
, 1);
8870 mul_op0
= XEXP (XEXP (x
, 1), 0);
8871 mul_op1
= XEXP (XEXP (x
, 1), 1);
8872 sub_op
= XEXP (x
, 0);
8875 /* The first operand of the multiply may be optionally
8877 if (GET_CODE (mul_op0
) == NEG
)
8878 mul_op0
= XEXP (mul_op0
, 0);
8880 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
8881 + rtx_cost (mul_op1
, code
, 0, speed_p
)
8882 + rtx_cost (sub_op
, code
, 0, speed_p
));
8888 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
8894 rtx shift_by_reg
= NULL
;
8898 *cost
= COSTS_N_INSNS (1);
8900 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
8901 if (shift_op
== NULL
)
8903 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
8904 non_shift_op
= XEXP (x
, 0);
8907 non_shift_op
= XEXP (x
, 1);
8909 if (shift_op
!= NULL
)
8911 if (shift_by_reg
!= NULL
)
8914 *cost
+= extra_cost
->alu
.arith_shift_reg
;
8915 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
8918 *cost
+= extra_cost
->alu
.arith_shift
;
8920 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
8921 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
8926 && GET_CODE (XEXP (x
, 1)) == MULT
)
8930 *cost
+= extra_cost
->mult
[0].add
;
8931 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
8932 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
8933 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
8937 if (CONST_INT_P (XEXP (x
, 0)))
8939 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
8940 INTVAL (XEXP (x
, 0)), NULL_RTX
,
8942 *cost
= COSTS_N_INSNS (insns
);
8944 *cost
+= insns
* extra_cost
->alu
.arith
;
8945 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
8952 if (GET_MODE_CLASS (mode
) == MODE_INT
8953 && GET_MODE_SIZE (mode
) < 4)
8955 /* Slightly disparage, as we might need to widen the result. */
8956 *cost
= 1 + COSTS_N_INSNS (1);
8958 *cost
+= extra_cost
->alu
.arith
;
8960 if (CONST_INT_P (XEXP (x
, 0)))
8962 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
8971 *cost
= COSTS_N_INSNS (2);
8973 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
8975 rtx op1
= XEXP (x
, 1);
8978 *cost
+= 2 * extra_cost
->alu
.arith
;
8980 if (GET_CODE (op1
) == ZERO_EXTEND
)
8981 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
8983 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
8984 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
8988 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
8991 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
8992 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
8994 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
8997 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
8998 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9001 *cost
+= (extra_cost
->alu
.arith
9002 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9003 ? extra_cost
->alu
.arith
9004 : extra_cost
->alu
.arith_shift
));
9005 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9006 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9007 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9012 *cost
+= 2 * extra_cost
->alu
.arith
;
9018 *cost
= LIBCALL_COST (2);
9022 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9023 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9025 *cost
= COSTS_N_INSNS (1);
9026 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9028 rtx mul_op0
, mul_op1
, add_op
;
9031 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9033 mul_op0
= XEXP (XEXP (x
, 0), 0);
9034 mul_op1
= XEXP (XEXP (x
, 0), 1);
9035 add_op
= XEXP (x
, 1);
9037 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9038 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9039 + rtx_cost (add_op
, code
, 0, speed_p
));
9045 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9048 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9050 *cost
= LIBCALL_COST (2);
9054 if (GET_MODE_CLASS (mode
) == MODE_INT
9055 && GET_MODE_SIZE (mode
) < 4)
9057 /* Narrow modes can be synthesized in SImode, but the range
9058 of useful sub-operations is limited. */
9059 if (CONST_INT_P (XEXP (x
, 1)))
9061 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9062 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9064 *cost
= COSTS_N_INSNS (insns
);
9066 *cost
+= insns
* extra_cost
->alu
.arith
;
9067 /* Slightly penalize a narrow operation as the result may
9069 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9073 /* Slightly penalize a narrow operation as the result may
9075 *cost
= 1 + COSTS_N_INSNS (1);
9077 *cost
+= extra_cost
->alu
.arith
;
9084 rtx shift_op
, shift_reg
;
9086 *cost
= COSTS_N_INSNS (1);
9088 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9089 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9091 /* UXTA[BH] or SXTA[BH]. */
9093 *cost
+= extra_cost
->alu
.extnd_arith
;
9094 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9096 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9101 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9102 if (shift_op
!= NULL
)
9107 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9108 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9111 *cost
+= extra_cost
->alu
.arith_shift
;
9113 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9114 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9117 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9119 rtx mul_op
= XEXP (x
, 0);
9121 *cost
= COSTS_N_INSNS (1);
9123 if (TARGET_DSP_MULTIPLY
9124 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9125 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9126 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9127 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9128 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9129 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9130 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9131 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9132 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9133 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9134 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9135 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9140 *cost
+= extra_cost
->mult
[0].extend_add
;
9141 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9142 SIGN_EXTEND
, 0, speed_p
)
9143 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9144 SIGN_EXTEND
, 0, speed_p
)
9145 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9150 *cost
+= extra_cost
->mult
[0].add
;
9151 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9152 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9153 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9156 if (CONST_INT_P (XEXP (x
, 1)))
9158 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9159 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9161 *cost
= COSTS_N_INSNS (insns
);
9163 *cost
+= insns
* extra_cost
->alu
.arith
;
9164 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9173 && GET_CODE (XEXP (x
, 0)) == MULT
9174 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9175 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9176 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9177 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9179 *cost
= COSTS_N_INSNS (1);
9181 *cost
+= extra_cost
->mult
[1].extend_add
;
9182 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9183 ZERO_EXTEND
, 0, speed_p
)
9184 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9185 ZERO_EXTEND
, 0, speed_p
)
9186 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9190 *cost
= COSTS_N_INSNS (2);
9192 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9193 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9196 *cost
+= (extra_cost
->alu
.arith
9197 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9198 ? extra_cost
->alu
.arith
9199 : extra_cost
->alu
.arith_shift
));
9201 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9203 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9208 *cost
+= 2 * extra_cost
->alu
.arith
;
9213 *cost
= LIBCALL_COST (2);
9216 case AND
: case XOR
: case IOR
:
9219 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9220 rtx op0
= XEXP (x
, 0);
9221 rtx shift_op
, shift_reg
;
9223 *cost
= COSTS_N_INSNS (1);
9227 || (code
== IOR
&& TARGET_THUMB2
)))
9228 op0
= XEXP (op0
, 0);
9231 shift_op
= shifter_op_p (op0
, &shift_reg
);
9232 if (shift_op
!= NULL
)
9237 *cost
+= extra_cost
->alu
.log_shift_reg
;
9238 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9241 *cost
+= extra_cost
->alu
.log_shift
;
9243 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9244 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9248 if (CONST_INT_P (XEXP (x
, 1)))
9250 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9251 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9254 *cost
= COSTS_N_INSNS (insns
);
9256 *cost
+= insns
* extra_cost
->alu
.logical
;
9257 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9262 *cost
+= extra_cost
->alu
.logical
;
9263 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9264 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9270 rtx op0
= XEXP (x
, 0);
9271 enum rtx_code subcode
= GET_CODE (op0
);
9273 *cost
= COSTS_N_INSNS (2);
9277 || (code
== IOR
&& TARGET_THUMB2
)))
9278 op0
= XEXP (op0
, 0);
9280 if (GET_CODE (op0
) == ZERO_EXTEND
)
9283 *cost
+= 2 * extra_cost
->alu
.logical
;
9285 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9286 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9289 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9292 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9294 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9295 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9300 *cost
+= 2 * extra_cost
->alu
.logical
;
9306 *cost
= LIBCALL_COST (2);
9310 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9311 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9313 rtx op0
= XEXP (x
, 0);
9315 *cost
= COSTS_N_INSNS (1);
9317 if (GET_CODE (op0
) == NEG
)
9318 op0
= XEXP (op0
, 0);
9321 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9323 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9324 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9327 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9329 *cost
= LIBCALL_COST (2);
9335 *cost
= COSTS_N_INSNS (1);
9336 if (TARGET_DSP_MULTIPLY
9337 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9338 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9339 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9340 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9341 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9342 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9343 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9344 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9345 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9346 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9347 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9348 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9353 *cost
+= extra_cost
->mult
[0].extend
;
9354 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9355 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9359 *cost
+= extra_cost
->mult
[0].simple
;
9366 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9367 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9368 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9369 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9371 *cost
= COSTS_N_INSNS (1);
9373 *cost
+= extra_cost
->mult
[1].extend
;
9374 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9375 ZERO_EXTEND
, 0, speed_p
)
9376 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9377 ZERO_EXTEND
, 0, speed_p
));
9381 *cost
= LIBCALL_COST (2);
9386 *cost
= LIBCALL_COST (2);
9390 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9391 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9393 *cost
= COSTS_N_INSNS (1);
9395 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9399 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9401 *cost
= LIBCALL_COST (1);
9407 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9409 *cost
= COSTS_N_INSNS (2);
9410 /* Assume the non-flag-changing variant. */
9412 *cost
+= (extra_cost
->alu
.log_shift
9413 + extra_cost
->alu
.arith_shift
);
9414 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9418 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9419 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9421 *cost
= COSTS_N_INSNS (2);
9422 /* No extra cost for MOV imm and MVN imm. */
9423 /* If the comparison op is using the flags, there's no further
9424 cost, otherwise we need to add the cost of the comparison. */
9425 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9426 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9427 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9429 *cost
+= (COSTS_N_INSNS (1)
9430 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9432 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9435 *cost
+= extra_cost
->alu
.arith
;
9439 *cost
= COSTS_N_INSNS (1);
9441 *cost
+= extra_cost
->alu
.arith
;
9445 if (GET_MODE_CLASS (mode
) == MODE_INT
9446 && GET_MODE_SIZE (mode
) < 4)
9448 /* Slightly disparage, as we might need an extend operation. */
9449 *cost
= 1 + COSTS_N_INSNS (1);
9451 *cost
+= extra_cost
->alu
.arith
;
9457 *cost
= COSTS_N_INSNS (2);
9459 *cost
+= 2 * extra_cost
->alu
.arith
;
9464 *cost
= LIBCALL_COST (1);
9471 rtx shift_reg
= NULL
;
9473 *cost
= COSTS_N_INSNS (1);
9474 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9478 if (shift_reg
!= NULL
)
9481 *cost
+= extra_cost
->alu
.log_shift_reg
;
9482 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9485 *cost
+= extra_cost
->alu
.log_shift
;
9486 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9491 *cost
+= extra_cost
->alu
.logical
;
9496 *cost
= COSTS_N_INSNS (2);
9502 *cost
+= LIBCALL_COST (1);
9507 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9509 *cost
= COSTS_N_INSNS (4);
9512 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
9513 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
9515 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
9516 /* Assume that if one arm of the if_then_else is a register,
9517 that it will be tied with the result and eliminate the
9518 conditional insn. */
9519 if (REG_P (XEXP (x
, 1)))
9521 else if (REG_P (XEXP (x
, 2)))
9527 if (extra_cost
->alu
.non_exec_costs_exec
)
9528 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
9530 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
9533 *cost
+= op1cost
+ op2cost
;
9539 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
9543 enum machine_mode op0mode
;
9544 /* We'll mostly assume that the cost of a compare is the cost of the
9545 LHS. However, there are some notable exceptions. */
9547 /* Floating point compares are never done as side-effects. */
9548 op0mode
= GET_MODE (XEXP (x
, 0));
9549 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
9550 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9552 *cost
= COSTS_N_INSNS (1);
9554 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
9556 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
9558 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9564 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
9566 *cost
= LIBCALL_COST (2);
9570 /* DImode compares normally take two insns. */
9571 if (op0mode
== DImode
)
9573 *cost
= COSTS_N_INSNS (2);
9575 *cost
+= 2 * extra_cost
->alu
.arith
;
9579 if (op0mode
== SImode
)
9584 if (XEXP (x
, 1) == const0_rtx
9585 && !(REG_P (XEXP (x
, 0))
9586 || (GET_CODE (XEXP (x
, 0)) == SUBREG
9587 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
9589 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
9591 /* Multiply operations that set the flags are often
9592 significantly more expensive. */
9594 && GET_CODE (XEXP (x
, 0)) == MULT
9595 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
9596 *cost
+= extra_cost
->mult
[0].flag_setting
;
9599 && GET_CODE (XEXP (x
, 0)) == PLUS
9600 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
9601 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
9603 *cost
+= extra_cost
->mult
[0].flag_setting
;
9608 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9609 if (shift_op
!= NULL
)
9611 *cost
= COSTS_N_INSNS (1);
9612 if (shift_reg
!= NULL
)
9614 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9616 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9619 *cost
+= extra_cost
->alu
.arith_shift
;
9620 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9621 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
9625 *cost
= COSTS_N_INSNS (1);
9627 *cost
+= extra_cost
->alu
.arith
;
9628 if (CONST_INT_P (XEXP (x
, 1))
9629 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9631 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
9639 *cost
= LIBCALL_COST (2);
9662 if (outer_code
== SET
)
9664 /* Is it a store-flag operation? */
9665 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9666 && XEXP (x
, 1) == const0_rtx
)
9668 /* Thumb also needs an IT insn. */
9669 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
9672 if (XEXP (x
, 1) == const0_rtx
)
9677 /* LSR Rd, Rn, #31. */
9678 *cost
= COSTS_N_INSNS (1);
9680 *cost
+= extra_cost
->alu
.shift
;
9690 *cost
= COSTS_N_INSNS (2);
9694 /* RSBS T1, Rn, Rn, LSR #31
9696 *cost
= COSTS_N_INSNS (2);
9698 *cost
+= extra_cost
->alu
.arith_shift
;
9702 /* RSB Rd, Rn, Rn, ASR #1
9704 *cost
= COSTS_N_INSNS (2);
9706 *cost
+= (extra_cost
->alu
.arith_shift
9707 + extra_cost
->alu
.shift
);
9713 *cost
= COSTS_N_INSNS (2);
9715 *cost
+= extra_cost
->alu
.shift
;
9719 /* Remaining cases are either meaningless or would take
9720 three insns anyway. */
9721 *cost
= COSTS_N_INSNS (3);
9724 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9729 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
9730 if (CONST_INT_P (XEXP (x
, 1))
9731 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9733 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9740 /* Not directly inside a set. If it involves the condition code
9741 register it must be the condition for a branch, cond_exec or
9742 I_T_E operation. Since the comparison is performed elsewhere
9743 this is just the control part which has no additional
9745 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9746 && XEXP (x
, 1) == const0_rtx
)
9753 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9754 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9756 *cost
= COSTS_N_INSNS (1);
9758 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9762 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9764 *cost
= LIBCALL_COST (1);
9770 *cost
= COSTS_N_INSNS (1);
9772 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
9776 *cost
= LIBCALL_COST (1);
9780 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
9781 && MEM_P (XEXP (x
, 0)))
9783 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9786 *cost
+= COSTS_N_INSNS (1);
9791 if (GET_MODE (XEXP (x
, 0)) == SImode
)
9792 *cost
+= extra_cost
->ldst
.load
;
9794 *cost
+= extra_cost
->ldst
.load_sign_extend
;
9797 *cost
+= extra_cost
->alu
.shift
;
9802 /* Widening from less than 32-bits requires an extend operation. */
9803 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
9805 /* We have SXTB/SXTH. */
9806 *cost
= COSTS_N_INSNS (1);
9807 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9809 *cost
+= extra_cost
->alu
.extnd
;
9811 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
9813 /* Needs two shifts. */
9814 *cost
= COSTS_N_INSNS (2);
9815 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9817 *cost
+= 2 * extra_cost
->alu
.shift
;
9820 /* Widening beyond 32-bits requires one more insn. */
9823 *cost
+= COSTS_N_INSNS (1);
9825 *cost
+= extra_cost
->alu
.shift
;
9832 || GET_MODE (XEXP (x
, 0)) == SImode
9833 || GET_MODE (XEXP (x
, 0)) == QImode
)
9834 && MEM_P (XEXP (x
, 0)))
9836 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9839 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
9844 /* Widening from less than 32-bits requires an extend operation. */
9845 if (GET_MODE (XEXP (x
, 0)) == QImode
)
9847 /* UXTB can be a shorter instruction in Thumb2, but it might
9848 be slower than the AND Rd, Rn, #255 alternative. When
9849 optimizing for speed it should never be slower to use
9850 AND, and we don't really model 16-bit vs 32-bit insns
9852 *cost
= COSTS_N_INSNS (1);
9854 *cost
+= extra_cost
->alu
.logical
;
9856 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
9858 /* We have UXTB/UXTH. */
9859 *cost
= COSTS_N_INSNS (1);
9860 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9862 *cost
+= extra_cost
->alu
.extnd
;
9864 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
9866 /* Needs two shifts. It's marginally preferable to use
9867 shifts rather than two BIC instructions as the second
9868 shift may merge with a subsequent insn as a shifter
9870 *cost
= COSTS_N_INSNS (2);
9871 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9873 *cost
+= 2 * extra_cost
->alu
.shift
;
9876 /* Widening beyond 32-bits requires one more insn. */
9879 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
9886 /* CONST_INT has no mode, so we cannot tell for sure how many
9887 insns are really going to be needed. The best we can do is
9888 look at the value passed. If it fits in SImode, then assume
9889 that's the mode it will be used for. Otherwise assume it
9890 will be used in DImode. */
9891 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
9896 /* Avoid blowing up in arm_gen_constant (). */
9897 if (!(outer_code
== PLUS
9898 || outer_code
== AND
9899 || outer_code
== IOR
9900 || outer_code
== XOR
9901 || outer_code
== MINUS
))
9908 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
9909 INTVAL (x
), NULL
, NULL
,
9915 *cost
+= COSTS_N_INSNS (arm_gen_constant
9916 (outer_code
, SImode
, NULL
,
9917 trunc_int_for_mode (INTVAL (x
), SImode
),
9919 + arm_gen_constant (outer_code
, SImode
, NULL
,
9920 INTVAL (x
) >> 32, NULL
,
9932 if (arm_arch_thumb2
&& !flag_pic
)
9933 *cost
= COSTS_N_INSNS (2);
9935 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
9938 *cost
= COSTS_N_INSNS (2);
9942 *cost
+= COSTS_N_INSNS (1);
9944 *cost
+= extra_cost
->alu
.arith
;
9950 *cost
= COSTS_N_INSNS (4);
9955 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9956 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9958 if (vfp3_const_double_rtx (x
))
9960 *cost
= COSTS_N_INSNS (1);
9962 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
9968 *cost
= COSTS_N_INSNS (1);
9970 *cost
+= extra_cost
->ldst
.loadd
;
9972 *cost
+= extra_cost
->ldst
.loadf
;
9975 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
9979 *cost
= COSTS_N_INSNS (4);
9985 && TARGET_HARD_FLOAT
9986 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9987 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9988 *cost
= COSTS_N_INSNS (1);
9990 *cost
= COSTS_N_INSNS (4);
9995 *cost
= COSTS_N_INSNS (1);
9996 /* When optimizing for size, we prefer constant pool entries to
9997 MOVW/MOVT pairs, so bump the cost of these slightly. */
10003 *cost
= COSTS_N_INSNS (1);
10005 *cost
+= extra_cost
->alu
.clz
;
10009 if (XEXP (x
, 1) == const0_rtx
)
10011 *cost
= COSTS_N_INSNS (1);
10013 *cost
+= extra_cost
->alu
.log_shift
;
10014 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10017 /* Fall through. */
10021 *cost
= COSTS_N_INSNS (2);
10025 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10026 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10027 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10028 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10029 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10030 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10031 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10032 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10035 *cost
= COSTS_N_INSNS (1);
10037 *cost
+= extra_cost
->mult
[1].extend
;
10038 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10040 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10044 *cost
= LIBCALL_COST (1);
10048 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10051 /* Reading the PC is like reading any other register. Writing it
10052 is more expensive, but we take that into account elsewhere. */
10057 /* TODO: Simple zero_extract of bottom bits using AND. */
10058 /* Fall through. */
10062 && CONST_INT_P (XEXP (x
, 1))
10063 && CONST_INT_P (XEXP (x
, 2)))
10065 *cost
= COSTS_N_INSNS (1);
10067 *cost
+= extra_cost
->alu
.bfx
;
10068 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10071 /* Without UBFX/SBFX, need to resort to shift operations. */
10072 *cost
= COSTS_N_INSNS (2);
10074 *cost
+= 2 * extra_cost
->alu
.shift
;
10075 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10079 if (TARGET_HARD_FLOAT
)
10081 *cost
= COSTS_N_INSNS (1);
10083 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10084 if (!TARGET_FPU_ARMV8
10085 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10087 /* Pre v8, widening HF->DF is a two-step process, first
10088 widening to SFmode. */
10089 *cost
+= COSTS_N_INSNS (1);
10091 *cost
+= extra_cost
->fp
[0].widen
;
10093 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10097 *cost
= LIBCALL_COST (1);
10100 case FLOAT_TRUNCATE
:
10101 if (TARGET_HARD_FLOAT
)
10103 *cost
= COSTS_N_INSNS (1);
10105 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10106 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10108 /* Vector modes? */
10110 *cost
= LIBCALL_COST (1);
10115 if (TARGET_HARD_FLOAT
)
10117 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10119 *cost
= COSTS_N_INSNS (1);
10121 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10122 /* Strip of the 'cost' of rounding towards zero. */
10123 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10124 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10126 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10127 /* ??? Increase the cost to deal with transferring from
10128 FP -> CORE registers? */
10131 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10132 && TARGET_FPU_ARMV8
)
10134 *cost
= COSTS_N_INSNS (1);
10136 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10139 /* Vector costs? */
10141 *cost
= LIBCALL_COST (1);
10145 case UNSIGNED_FLOAT
:
10146 if (TARGET_HARD_FLOAT
)
10148 /* ??? Increase the cost to deal with transferring from CORE
10149 -> FP registers? */
10150 *cost
= COSTS_N_INSNS (1);
10152 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10155 *cost
= LIBCALL_COST (1);
10159 *cost
= COSTS_N_INSNS (1);
10163 /* Just a guess. Cost one insn per input. */
10164 *cost
= COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x
));
10168 if (mode
!= VOIDmode
)
10169 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10171 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10176 /* RTX costs when optimizing for size. */
10178 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10179 int *total
, bool speed
)
10183 if (TARGET_OLD_RTX_COSTS
10184 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10186 /* Old way. (Deprecated.) */
10188 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10189 (enum rtx_code
) outer_code
, total
);
10191 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10192 (enum rtx_code
) outer_code
, total
,
10198 if (current_tune
->insn_extra_cost
)
10199 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10200 (enum rtx_code
) outer_code
,
10201 current_tune
->insn_extra_cost
,
10203 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10204 && current_tune->insn_extra_cost != NULL */
10206 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10207 (enum rtx_code
) outer_code
,
10208 &generic_extra_costs
, total
, speed
);
10211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10213 print_rtl_single (dump_file
, x
);
10214 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10215 *total
, result
? "final" : "partial");
10220 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10221 supported on any "slowmul" cores, so it can be ignored. */
10224 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10225 int *total
, bool speed
)
10227 enum machine_mode mode
= GET_MODE (x
);
10231 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10238 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10241 *total
= COSTS_N_INSNS (20);
10245 if (CONST_INT_P (XEXP (x
, 1)))
10247 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10248 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10249 int cost
, const_ok
= const_ok_for_arm (i
);
10250 int j
, booth_unit_size
;
10252 /* Tune as appropriate. */
10253 cost
= const_ok
? 4 : 8;
10254 booth_unit_size
= 2;
10255 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10257 i
>>= booth_unit_size
;
10261 *total
= COSTS_N_INSNS (cost
);
10262 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10266 *total
= COSTS_N_INSNS (20);
10270 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10275 /* RTX cost for cores with a fast multiply unit (M variants). */
10278 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10279 int *total
, bool speed
)
10281 enum machine_mode mode
= GET_MODE (x
);
10285 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10289 /* ??? should thumb2 use different costs? */
10293 /* There is no point basing this on the tuning, since it is always the
10294 fast variant if it exists at all. */
10296 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10297 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10298 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10300 *total
= COSTS_N_INSNS(2);
10305 if (mode
== DImode
)
10307 *total
= COSTS_N_INSNS (5);
10311 if (CONST_INT_P (XEXP (x
, 1)))
10313 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10314 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10315 int cost
, const_ok
= const_ok_for_arm (i
);
10316 int j
, booth_unit_size
;
10318 /* Tune as appropriate. */
10319 cost
= const_ok
? 4 : 8;
10320 booth_unit_size
= 8;
10321 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10323 i
>>= booth_unit_size
;
10327 *total
= COSTS_N_INSNS(cost
);
10331 if (mode
== SImode
)
10333 *total
= COSTS_N_INSNS (4);
10337 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10339 if (TARGET_HARD_FLOAT
10341 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10343 *total
= COSTS_N_INSNS (1);
10348 /* Requires a lib call */
10349 *total
= COSTS_N_INSNS (20);
10353 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10358 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10359 so it can be ignored. */
10362 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10363 int *total
, bool speed
)
10365 enum machine_mode mode
= GET_MODE (x
);
10369 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10376 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10377 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10379 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10380 will stall until the multiplication is complete. */
10381 *total
= COSTS_N_INSNS (3);
10385 /* There is no point basing this on the tuning, since it is always the
10386 fast variant if it exists at all. */
10388 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10389 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10390 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10392 *total
= COSTS_N_INSNS (2);
10397 if (mode
== DImode
)
10399 *total
= COSTS_N_INSNS (5);
10403 if (CONST_INT_P (XEXP (x
, 1)))
10405 /* If operand 1 is a constant we can more accurately
10406 calculate the cost of the multiply. The multiplier can
10407 retire 15 bits on the first cycle and a further 12 on the
10408 second. We do, of course, have to load the constant into
10409 a register first. */
10410 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10411 /* There's a general overhead of one cycle. */
10413 unsigned HOST_WIDE_INT masked_const
;
10415 if (i
& 0x80000000)
10418 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10420 masked_const
= i
& 0xffff8000;
10421 if (masked_const
!= 0)
10424 masked_const
= i
& 0xf8000000;
10425 if (masked_const
!= 0)
10428 *total
= COSTS_N_INSNS (cost
);
10432 if (mode
== SImode
)
10434 *total
= COSTS_N_INSNS (3);
10438 /* Requires a lib call */
10439 *total
= COSTS_N_INSNS (20);
10443 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10448 /* RTX costs for 9e (and later) cores. */
10451 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10452 int *total
, bool speed
)
10454 enum machine_mode mode
= GET_MODE (x
);
10461 *total
= COSTS_N_INSNS (3);
10465 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10473 /* There is no point basing this on the tuning, since it is always the
10474 fast variant if it exists at all. */
10476 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10477 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10478 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10480 *total
= COSTS_N_INSNS (2);
10485 if (mode
== DImode
)
10487 *total
= COSTS_N_INSNS (5);
10491 if (mode
== SImode
)
10493 *total
= COSTS_N_INSNS (2);
10497 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10499 if (TARGET_HARD_FLOAT
10501 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10503 *total
= COSTS_N_INSNS (1);
10508 *total
= COSTS_N_INSNS (20);
10512 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10515 /* All address computations that can be done are free, but rtx cost returns
10516 the same for practically all of them. So we weight the different types
10517 of address here in the order (most pref first):
10518 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10520 arm_arm_address_cost (rtx x
)
10522 enum rtx_code c
= GET_CODE (x
);
10524 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10526 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10531 if (CONST_INT_P (XEXP (x
, 1)))
10534 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10544 arm_thumb_address_cost (rtx x
)
10546 enum rtx_code c
= GET_CODE (x
);
10551 && REG_P (XEXP (x
, 0))
10552 && CONST_INT_P (XEXP (x
, 1)))
10559 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
10560 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10562 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10565 /* Adjust cost hook for XScale. */
10567 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
10569 /* Some true dependencies can have a higher cost depending
10570 on precisely how certain input operands are used. */
10571 if (REG_NOTE_KIND(link
) == 0
10572 && recog_memoized (insn
) >= 0
10573 && recog_memoized (dep
) >= 0)
10575 int shift_opnum
= get_attr_shift (insn
);
10576 enum attr_type attr_type
= get_attr_type (dep
);
10578 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10579 operand for INSN. If we have a shifted input operand and the
10580 instruction we depend on is another ALU instruction, then we may
10581 have to account for an additional stall. */
10582 if (shift_opnum
!= 0
10583 && (attr_type
== TYPE_ALU_SHIFT_IMM
10584 || attr_type
== TYPE_ALUS_SHIFT_IMM
10585 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10586 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10587 || attr_type
== TYPE_ALU_SHIFT_REG
10588 || attr_type
== TYPE_ALUS_SHIFT_REG
10589 || attr_type
== TYPE_LOGIC_SHIFT_REG
10590 || attr_type
== TYPE_LOGICS_SHIFT_REG
10591 || attr_type
== TYPE_MOV_SHIFT
10592 || attr_type
== TYPE_MVN_SHIFT
10593 || attr_type
== TYPE_MOV_SHIFT_REG
10594 || attr_type
== TYPE_MVN_SHIFT_REG
))
10596 rtx shifted_operand
;
10599 /* Get the shifted operand. */
10600 extract_insn (insn
);
10601 shifted_operand
= recog_data
.operand
[shift_opnum
];
10603 /* Iterate over all the operands in DEP. If we write an operand
10604 that overlaps with SHIFTED_OPERAND, then we have increase the
10605 cost of this dependency. */
10606 extract_insn (dep
);
10607 preprocess_constraints ();
10608 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10610 /* We can ignore strict inputs. */
10611 if (recog_data
.operand_type
[opno
] == OP_IN
)
10614 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10626 /* Adjust cost hook for Cortex A9. */
10628 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
10630 switch (REG_NOTE_KIND (link
))
10637 case REG_DEP_OUTPUT
:
10638 if (recog_memoized (insn
) >= 0
10639 && recog_memoized (dep
) >= 0)
10641 if (GET_CODE (PATTERN (insn
)) == SET
)
10644 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10646 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10648 enum attr_type attr_type_insn
= get_attr_type (insn
);
10649 enum attr_type attr_type_dep
= get_attr_type (dep
);
10651 /* By default all dependencies of the form
10654 have an extra latency of 1 cycle because
10655 of the input and output dependency in this
10656 case. However this gets modeled as an true
10657 dependency and hence all these checks. */
10658 if (REG_P (SET_DEST (PATTERN (insn
)))
10659 && REG_P (SET_DEST (PATTERN (dep
)))
10660 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
10661 SET_DEST (PATTERN (dep
))))
10663 /* FMACS is a special case where the dependent
10664 instruction can be issued 3 cycles before
10665 the normal latency in case of an output
10667 if ((attr_type_insn
== TYPE_FMACS
10668 || attr_type_insn
== TYPE_FMACD
)
10669 && (attr_type_dep
== TYPE_FMACS
10670 || attr_type_dep
== TYPE_FMACD
))
10672 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
10673 *cost
= insn_default_latency (dep
) - 3;
10675 *cost
= insn_default_latency (dep
);
10680 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
10681 *cost
= insn_default_latency (dep
) + 1;
10683 *cost
= insn_default_latency (dep
);
10693 gcc_unreachable ();
10699 /* Adjust cost hook for FA726TE. */
10701 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
10703 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10704 have penalty of 3. */
10705 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
10706 && recog_memoized (insn
) >= 0
10707 && recog_memoized (dep
) >= 0
10708 && get_attr_conds (dep
) == CONDS_SET
)
10710 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10711 if (get_attr_conds (insn
) == CONDS_USE
10712 && get_attr_type (insn
) != TYPE_BRANCH
)
10718 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
10719 || get_attr_conds (insn
) == CONDS_USE
)
10729 /* Implement TARGET_REGISTER_MOVE_COST.
10731 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10732 it is typically more expensive than a single memory access. We set
10733 the cost to less than two memory accesses so that floating
10734 point to integer conversion does not go through memory. */
10737 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
10738 reg_class_t from
, reg_class_t to
)
10742 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
10743 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
10745 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
10746 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
10748 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
10755 if (from
== HI_REGS
|| to
== HI_REGS
)
10762 /* Implement TARGET_MEMORY_MOVE_COST. */
10765 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
10766 bool in ATTRIBUTE_UNUSED
)
10772 if (GET_MODE_SIZE (mode
) < 4)
10775 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
10779 /* Vectorizer cost model implementation. */
10781 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10783 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
10785 int misalign ATTRIBUTE_UNUSED
)
10789 switch (type_of_cost
)
10792 return current_tune
->vec_costs
->scalar_stmt_cost
;
10795 return current_tune
->vec_costs
->scalar_load_cost
;
10798 return current_tune
->vec_costs
->scalar_store_cost
;
10801 return current_tune
->vec_costs
->vec_stmt_cost
;
10804 return current_tune
->vec_costs
->vec_align_load_cost
;
10807 return current_tune
->vec_costs
->vec_store_cost
;
10809 case vec_to_scalar
:
10810 return current_tune
->vec_costs
->vec_to_scalar_cost
;
10812 case scalar_to_vec
:
10813 return current_tune
->vec_costs
->scalar_to_vec_cost
;
10815 case unaligned_load
:
10816 return current_tune
->vec_costs
->vec_unalign_load_cost
;
10818 case unaligned_store
:
10819 return current_tune
->vec_costs
->vec_unalign_store_cost
;
10821 case cond_branch_taken
:
10822 return current_tune
->vec_costs
->cond_taken_branch_cost
;
10824 case cond_branch_not_taken
:
10825 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
10828 case vec_promote_demote
:
10829 return current_tune
->vec_costs
->vec_stmt_cost
;
10831 case vec_construct
:
10832 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
10833 return elements
/ 2 + 1;
10836 gcc_unreachable ();
10840 /* Implement targetm.vectorize.add_stmt_cost. */
10843 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
10844 struct _stmt_vec_info
*stmt_info
, int misalign
,
10845 enum vect_cost_model_location where
)
10847 unsigned *cost
= (unsigned *) data
;
10848 unsigned retval
= 0;
10850 if (flag_vect_cost_model
)
10852 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
10853 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
10855 /* Statements in an inner loop relative to the loop being
10856 vectorized are weighted more heavily. The value here is
10857 arbitrary and could potentially be improved with analysis. */
10858 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
10859 count
*= 50; /* FIXME. */
10861 retval
= (unsigned) (count
* stmt_cost
);
10862 cost
[where
] += retval
;
10868 /* Return true if and only if this insn can dual-issue only as older. */
10870 cortexa7_older_only (rtx insn
)
10872 if (recog_memoized (insn
) < 0)
10875 switch (get_attr_type (insn
))
10878 case TYPE_ALUS_REG
:
10879 case TYPE_LOGIC_REG
:
10880 case TYPE_LOGICS_REG
:
10882 case TYPE_ADCS_REG
:
10887 case TYPE_SHIFT_IMM
:
10888 case TYPE_SHIFT_REG
:
10889 case TYPE_LOAD_BYTE
:
10892 case TYPE_FFARITHS
:
10894 case TYPE_FFARITHD
:
10912 case TYPE_F_STORES
:
10919 /* Return true if and only if this insn can dual-issue as younger. */
10921 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
10923 if (recog_memoized (insn
) < 0)
10926 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
10930 switch (get_attr_type (insn
))
10933 case TYPE_ALUS_IMM
:
10934 case TYPE_LOGIC_IMM
:
10935 case TYPE_LOGICS_IMM
:
10940 case TYPE_MOV_SHIFT
:
10941 case TYPE_MOV_SHIFT_REG
:
10951 /* Look for an instruction that can dual issue only as an older
10952 instruction, and move it in front of any instructions that can
10953 dual-issue as younger, while preserving the relative order of all
10954 other instructions in the ready list. This is a hueuristic to help
10955 dual-issue in later cycles, by postponing issue of more flexible
10956 instructions. This heuristic may affect dual issue opportunities
10957 in the current cycle. */
10959 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
10963 int first_older_only
= -1, first_younger
= -1;
10967 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10971 /* Traverse the ready list from the head (the instruction to issue
10972 first), and looking for the first instruction that can issue as
10973 younger and the first instruction that can dual-issue only as
10975 for (i
= *n_readyp
- 1; i
>= 0; i
--)
10977 rtx insn
= ready
[i
];
10978 if (cortexa7_older_only (insn
))
10980 first_older_only
= i
;
10982 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
10985 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
10989 /* Nothing to reorder because either no younger insn found or insn
10990 that can dual-issue only as older appears before any insn that
10991 can dual-issue as younger. */
10992 if (first_younger
== -1)
10995 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
10999 /* Nothing to reorder because no older-only insn in the ready list. */
11000 if (first_older_only
== -1)
11003 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11007 /* Move first_older_only insn before first_younger. */
11009 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11010 INSN_UID(ready
[first_older_only
]),
11011 INSN_UID(ready
[first_younger
]));
11012 rtx first_older_only_insn
= ready
[first_older_only
];
11013 for (i
= first_older_only
; i
< first_younger
; i
++)
11015 ready
[i
] = ready
[i
+1];
11018 ready
[i
] = first_older_only_insn
;
11022 /* Implement TARGET_SCHED_REORDER. */
11024 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11030 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11033 /* Do nothing for other cores. */
11037 return arm_issue_rate ();
11040 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11041 It corrects the value of COST based on the relationship between
11042 INSN and DEP through the dependence LINK. It returns the new
11043 value. There is a per-core adjust_cost hook to adjust scheduler costs
11044 and the per-core hook can choose to completely override the generic
11045 adjust_cost function. Only put bits of code into arm_adjust_cost that
11046 are common across all cores. */
11048 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11052 /* When generating Thumb-1 code, we want to place flag-setting operations
11053 close to a conditional branch which depends on them, so that we can
11054 omit the comparison. */
11056 && REG_NOTE_KIND (link
) == 0
11057 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11058 && recog_memoized (dep
) >= 0
11059 && get_attr_conds (dep
) == CONDS_SET
)
11062 if (current_tune
->sched_adjust_cost
!= NULL
)
11064 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11068 /* XXX Is this strictly true? */
11069 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11070 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11073 /* Call insns don't incur a stall, even if they follow a load. */
11074 if (REG_NOTE_KIND (link
) == 0
11078 if ((i_pat
= single_set (insn
)) != NULL
11079 && MEM_P (SET_SRC (i_pat
))
11080 && (d_pat
= single_set (dep
)) != NULL
11081 && MEM_P (SET_DEST (d_pat
)))
11083 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11084 /* This is a load after a store, there is no conflict if the load reads
11085 from a cached area. Assume that loads from the stack, and from the
11086 constant pool are cached, and that others will miss. This is a
11089 if ((GET_CODE (src_mem
) == SYMBOL_REF
11090 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11091 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11092 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11093 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11101 arm_max_conditional_execute (void)
11103 return max_insns_skipped
;
11107 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11110 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11112 return (optimize
> 0) ? 2 : 0;
11116 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11118 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11121 static bool fp_consts_inited
= false;
11123 static REAL_VALUE_TYPE value_fp0
;
11126 init_fp_table (void)
11130 r
= REAL_VALUE_ATOF ("0", DFmode
);
11132 fp_consts_inited
= true;
11135 /* Return TRUE if rtx X is a valid immediate FP constant. */
11137 arm_const_double_rtx (rtx x
)
11141 if (!fp_consts_inited
)
11144 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11145 if (REAL_VALUE_MINUS_ZERO (r
))
11148 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11154 /* VFPv3 has a fairly wide range of representable immediates, formed from
11155 "quarter-precision" floating-point values. These can be evaluated using this
11156 formula (with ^ for exponentiation):
11160 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11161 16 <= n <= 31 and 0 <= r <= 7.
11163 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11165 - A (most-significant) is the sign bit.
11166 - BCD are the exponent (encoded as r XOR 3).
11167 - EFGH are the mantissa (encoded as n - 16).
11170 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11171 fconst[sd] instruction, or -1 if X isn't suitable. */
11173 vfp3_const_double_index (rtx x
)
11175 REAL_VALUE_TYPE r
, m
;
11176 int sign
, exponent
;
11177 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11178 unsigned HOST_WIDE_INT mask
;
11179 HOST_WIDE_INT m1
, m2
;
11180 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11182 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11185 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11187 /* We can't represent these things, so detect them first. */
11188 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11191 /* Extract sign, exponent and mantissa. */
11192 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11193 r
= real_value_abs (&r
);
11194 exponent
= REAL_EXP (&r
);
11195 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11196 highest (sign) bit, with a fixed binary point at bit point_pos.
11197 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11198 bits for the mantissa, this may fail (low bits would be lost). */
11199 real_ldexp (&m
, &r
, point_pos
- exponent
);
11200 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11204 /* If there are bits set in the low part of the mantissa, we can't
11205 represent this value. */
11209 /* Now make it so that mantissa contains the most-significant bits, and move
11210 the point_pos to indicate that the least-significant bits have been
11212 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11213 mantissa
= mant_hi
;
11215 /* We can permit four significant bits of mantissa only, plus a high bit
11216 which is always 1. */
11217 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11218 if ((mantissa
& mask
) != 0)
11221 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11222 mantissa
>>= point_pos
- 5;
11224 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11225 floating-point immediate zero with Neon using an integer-zero load, but
11226 that case is handled elsewhere.) */
11230 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11232 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11233 normalized significands are in the range [1, 2). (Our mantissa is shifted
11234 left 4 places at this point relative to normalized IEEE754 values). GCC
11235 internally uses [0.5, 1) (see real.c), so the exponent returned from
11236 REAL_EXP must be altered. */
11237 exponent
= 5 - exponent
;
11239 if (exponent
< 0 || exponent
> 7)
11242 /* Sign, mantissa and exponent are now in the correct form to plug into the
11243 formula described in the comment above. */
11244 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11247 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11249 vfp3_const_double_rtx (rtx x
)
11254 return vfp3_const_double_index (x
) != -1;
11257 /* Recognize immediates which can be used in various Neon instructions. Legal
11258 immediates are described by the following table (for VMVN variants, the
11259 bitwise inverse of the constant shown is recognized. In either case, VMOV
11260 is output and the correct instruction to use for a given constant is chosen
11261 by the assembler). The constant shown is replicated across all elements of
11262 the destination vector.
11264 insn elems variant constant (binary)
11265 ---- ----- ------- -----------------
11266 vmov i32 0 00000000 00000000 00000000 abcdefgh
11267 vmov i32 1 00000000 00000000 abcdefgh 00000000
11268 vmov i32 2 00000000 abcdefgh 00000000 00000000
11269 vmov i32 3 abcdefgh 00000000 00000000 00000000
11270 vmov i16 4 00000000 abcdefgh
11271 vmov i16 5 abcdefgh 00000000
11272 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11273 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11274 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11275 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11276 vmvn i16 10 00000000 abcdefgh
11277 vmvn i16 11 abcdefgh 00000000
11278 vmov i32 12 00000000 00000000 abcdefgh 11111111
11279 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11280 vmov i32 14 00000000 abcdefgh 11111111 11111111
11281 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11282 vmov i8 16 abcdefgh
11283 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11284 eeeeeeee ffffffff gggggggg hhhhhhhh
11285 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11286 vmov f32 19 00000000 00000000 00000000 00000000
11288 For case 18, B = !b. Representable values are exactly those accepted by
11289 vfp3_const_double_index, but are output as floating-point numbers rather
11292 For case 19, we will change it to vmov.i32 when assembling.
11294 Variants 0-5 (inclusive) may also be used as immediates for the second
11295 operand of VORR/VBIC instructions.
11297 The INVERSE argument causes the bitwise inverse of the given operand to be
11298 recognized instead (used for recognizing legal immediates for the VAND/VORN
11299 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11300 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11301 output, rather than the real insns vbic/vorr).
11303 INVERSE makes no difference to the recognition of float vectors.
11305 The return value is the variant of immediate as shown in the above table, or
11306 -1 if the given value doesn't match any of the listed patterns.
11309 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11310 rtx
*modconst
, int *elementwidth
)
11312 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11314 for (i = 0; i < idx; i += (STRIDE)) \
11319 immtype = (CLASS); \
11320 elsize = (ELSIZE); \
11324 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11325 unsigned int innersize
;
11326 unsigned char bytes
[16];
11327 int immtype
= -1, matches
;
11328 unsigned int invmask
= inverse
? 0xff : 0;
11329 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11333 n_elts
= CONST_VECTOR_NUNITS (op
);
11334 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11339 if (mode
== VOIDmode
)
11341 innersize
= GET_MODE_SIZE (mode
);
11344 /* Vectors of float constants. */
11345 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11347 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11348 REAL_VALUE_TYPE r0
;
11350 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11353 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11355 for (i
= 1; i
< n_elts
; i
++)
11357 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11358 REAL_VALUE_TYPE re
;
11360 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11362 if (!REAL_VALUES_EQUAL (r0
, re
))
11367 *modconst
= CONST_VECTOR_ELT (op
, 0);
11372 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11378 /* Splat vector constant out into a byte vector. */
11379 for (i
= 0; i
< n_elts
; i
++)
11381 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11382 unsigned HOST_WIDE_INT elpart
;
11383 unsigned int part
, parts
;
11385 if (CONST_INT_P (el
))
11387 elpart
= INTVAL (el
);
11390 else if (CONST_DOUBLE_P (el
))
11392 elpart
= CONST_DOUBLE_LOW (el
);
11396 gcc_unreachable ();
11398 for (part
= 0; part
< parts
; part
++)
11401 for (byte
= 0; byte
< innersize
; byte
++)
11403 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11404 elpart
>>= BITS_PER_UNIT
;
11406 if (CONST_DOUBLE_P (el
))
11407 elpart
= CONST_DOUBLE_HIGH (el
);
11411 /* Sanity check. */
11412 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11416 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11417 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11419 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11420 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11422 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11423 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11425 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11426 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11428 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11430 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11432 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11433 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11435 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11436 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11438 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11439 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11441 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11442 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11444 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11446 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11448 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11449 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11451 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11452 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11454 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11455 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11457 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11458 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11460 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11462 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11463 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11471 *elementwidth
= elsize
;
11475 unsigned HOST_WIDE_INT imm
= 0;
11477 /* Un-invert bytes of recognized vector, if necessary. */
11479 for (i
= 0; i
< idx
; i
++)
11480 bytes
[i
] ^= invmask
;
11484 /* FIXME: Broken on 32-bit H_W_I hosts. */
11485 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11487 for (i
= 0; i
< 8; i
++)
11488 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11489 << (i
* BITS_PER_UNIT
);
11491 *modconst
= GEN_INT (imm
);
11495 unsigned HOST_WIDE_INT imm
= 0;
11497 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11498 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11500 *modconst
= GEN_INT (imm
);
11508 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11509 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11510 float elements), and a modified constant (whatever should be output for a
11511 VMOV) in *MODCONST. */
11514 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
11515 rtx
*modconst
, int *elementwidth
)
11519 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11525 *modconst
= tmpconst
;
11528 *elementwidth
= tmpwidth
;
11533 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11534 the immediate is valid, write a constant suitable for using as an operand
11535 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11536 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11539 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
11540 rtx
*modconst
, int *elementwidth
)
11544 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11546 if (retval
< 0 || retval
> 5)
11550 *modconst
= tmpconst
;
11553 *elementwidth
= tmpwidth
;
11558 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11559 the immediate is valid, write a constant suitable for using as an operand
11560 to VSHR/VSHL to *MODCONST and the corresponding element width to
11561 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11562 because they have different limitations. */
11565 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
11566 rtx
*modconst
, int *elementwidth
,
11569 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11570 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11571 unsigned HOST_WIDE_INT last_elt
= 0;
11572 unsigned HOST_WIDE_INT maxshift
;
11574 /* Split vector constant out into a byte vector. */
11575 for (i
= 0; i
< n_elts
; i
++)
11577 rtx el
= CONST_VECTOR_ELT (op
, i
);
11578 unsigned HOST_WIDE_INT elpart
;
11580 if (CONST_INT_P (el
))
11581 elpart
= INTVAL (el
);
11582 else if (CONST_DOUBLE_P (el
))
11585 gcc_unreachable ();
11587 if (i
!= 0 && elpart
!= last_elt
)
11593 /* Shift less than element size. */
11594 maxshift
= innersize
* 8;
11598 /* Left shift immediate value can be from 0 to <size>-1. */
11599 if (last_elt
>= maxshift
)
11604 /* Right shift immediate value can be from 1 to <size>. */
11605 if (last_elt
== 0 || last_elt
> maxshift
)
11610 *elementwidth
= innersize
* 8;
11613 *modconst
= CONST_VECTOR_ELT (op
, 0);
11618 /* Return a string suitable for output of Neon immediate logic operation
11622 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
11623 int inverse
, int quad
)
11625 int width
, is_valid
;
11626 static char templ
[40];
11628 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11630 gcc_assert (is_valid
!= 0);
11633 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11635 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11640 /* Return a string suitable for output of Neon immediate shift operation
11641 (VSHR or VSHL) MNEM. */
11644 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11645 enum machine_mode mode
, int quad
,
11648 int width
, is_valid
;
11649 static char templ
[40];
11651 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11652 gcc_assert (is_valid
!= 0);
11655 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11657 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11662 /* Output a sequence of pairwise operations to implement a reduction.
11663 NOTE: We do "too much work" here, because pairwise operations work on two
11664 registers-worth of operands in one go. Unfortunately we can't exploit those
11665 extra calculations to do the full operation in fewer steps, I don't think.
11666 Although all vector elements of the result but the first are ignored, we
11667 actually calculate the same result in each of the elements. An alternative
11668 such as initially loading a vector with zero to use as each of the second
11669 operands would use up an additional register and take an extra instruction,
11670 for no particular gain. */
11673 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
11674 rtx (*reduc
) (rtx
, rtx
, rtx
))
11676 enum machine_mode inner
= GET_MODE_INNER (mode
);
11677 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
11680 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11682 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
11683 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
11688 /* If VALS is a vector constant that can be loaded into a register
11689 using VDUP, generate instructions to do so and return an RTX to
11690 assign to the register. Otherwise return NULL_RTX. */
11693 neon_vdup_constant (rtx vals
)
11695 enum machine_mode mode
= GET_MODE (vals
);
11696 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
11697 int n_elts
= GET_MODE_NUNITS (mode
);
11698 bool all_same
= true;
11702 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
11705 for (i
= 0; i
< n_elts
; ++i
)
11707 x
= XVECEXP (vals
, 0, i
);
11708 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
11713 /* The elements are not all the same. We could handle repeating
11714 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11715 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11719 /* We can load this constant by using VDUP and a constant in a
11720 single ARM register. This will be cheaper than a vector
11723 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
11724 return gen_rtx_VEC_DUPLICATE (mode
, x
);
11727 /* Generate code to load VALS, which is a PARALLEL containing only
11728 constants (for vec_init) or CONST_VECTOR, efficiently into a
11729 register. Returns an RTX to copy into the register, or NULL_RTX
11730 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11733 neon_make_constant (rtx vals
)
11735 enum machine_mode mode
= GET_MODE (vals
);
11737 rtx const_vec
= NULL_RTX
;
11738 int n_elts
= GET_MODE_NUNITS (mode
);
11742 if (GET_CODE (vals
) == CONST_VECTOR
)
11744 else if (GET_CODE (vals
) == PARALLEL
)
11746 /* A CONST_VECTOR must contain only CONST_INTs and
11747 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11748 Only store valid constants in a CONST_VECTOR. */
11749 for (i
= 0; i
< n_elts
; ++i
)
11751 rtx x
= XVECEXP (vals
, 0, i
);
11752 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11755 if (n_const
== n_elts
)
11756 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11759 gcc_unreachable ();
11761 if (const_vec
!= NULL
11762 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
11763 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11765 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
11766 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11767 pipeline cycle; creating the constant takes one or two ARM
11768 pipeline cycles. */
11770 else if (const_vec
!= NULL_RTX
)
11771 /* Load from constant pool. On Cortex-A8 this takes two cycles
11772 (for either double or quad vectors). We can not take advantage
11773 of single-cycle VLD1 because we need a PC-relative addressing
11777 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11778 We can not construct an initializer. */
11782 /* Initialize vector TARGET to VALS. */
11785 neon_expand_vector_init (rtx target
, rtx vals
)
11787 enum machine_mode mode
= GET_MODE (target
);
11788 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
11789 int n_elts
= GET_MODE_NUNITS (mode
);
11790 int n_var
= 0, one_var
= -1;
11791 bool all_same
= true;
11795 for (i
= 0; i
< n_elts
; ++i
)
11797 x
= XVECEXP (vals
, 0, i
);
11798 if (!CONSTANT_P (x
))
11799 ++n_var
, one_var
= i
;
11801 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
11807 rtx constant
= neon_make_constant (vals
);
11808 if (constant
!= NULL_RTX
)
11810 emit_move_insn (target
, constant
);
11815 /* Splat a single non-constant element if we can. */
11816 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
11818 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
11819 emit_insn (gen_rtx_SET (VOIDmode
, target
,
11820 gen_rtx_VEC_DUPLICATE (mode
, x
)));
11824 /* One field is non-constant. Load constant then overwrite varying
11825 field. This is more efficient than using the stack. */
11828 rtx copy
= copy_rtx (vals
);
11829 rtx index
= GEN_INT (one_var
);
11831 /* Load constant part of vector, substitute neighboring value for
11832 varying element. */
11833 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
11834 neon_expand_vector_init (target
, copy
);
11836 /* Insert variable. */
11837 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
11841 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
11844 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
11847 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
11850 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
11853 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
11856 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
11859 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
11862 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
11865 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
11868 gcc_unreachable ();
11873 /* Construct the vector in memory one field at a time
11874 and load the whole vector. */
11875 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
11876 for (i
= 0; i
< n_elts
; i
++)
11877 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
11878 i
* GET_MODE_SIZE (inner_mode
)),
11879 XVECEXP (vals
, 0, i
));
11880 emit_move_insn (target
, mem
);
11883 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11884 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
11885 reported source locations are bogus. */
11888 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11891 HOST_WIDE_INT lane
;
11893 gcc_assert (CONST_INT_P (operand
));
11895 lane
= INTVAL (operand
);
11897 if (lane
< low
|| lane
>= high
)
11901 /* Bounds-check lanes. */
11904 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
11906 bounds_check (operand
, low
, high
, "lane out of range");
11909 /* Bounds-check constants. */
11912 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
11914 bounds_check (operand
, low
, high
, "constant out of range");
11918 neon_element_bits (enum machine_mode mode
)
11920 if (mode
== DImode
)
11921 return GET_MODE_BITSIZE (mode
);
11923 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
11927 /* Predicates for `match_operand' and `match_operator'. */
11929 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11930 WB is true if full writeback address modes are allowed and is false
11931 if limited writeback address modes (POST_INC and PRE_DEC) are
11935 arm_coproc_mem_operand (rtx op
, bool wb
)
11939 /* Reject eliminable registers. */
11940 if (! (reload_in_progress
|| reload_completed
)
11941 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
11942 || reg_mentioned_p (arg_pointer_rtx
, op
)
11943 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11944 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11945 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11946 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11949 /* Constants are converted into offsets from labels. */
11953 ind
= XEXP (op
, 0);
11955 if (reload_completed
11956 && (GET_CODE (ind
) == LABEL_REF
11957 || (GET_CODE (ind
) == CONST
11958 && GET_CODE (XEXP (ind
, 0)) == PLUS
11959 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
11960 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
11963 /* Match: (mem (reg)). */
11965 return arm_address_register_rtx_p (ind
, 0);
11967 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11968 acceptable in any case (subject to verification by
11969 arm_address_register_rtx_p). We need WB to be true to accept
11970 PRE_INC and POST_DEC. */
11971 if (GET_CODE (ind
) == POST_INC
11972 || GET_CODE (ind
) == PRE_DEC
11974 && (GET_CODE (ind
) == PRE_INC
11975 || GET_CODE (ind
) == POST_DEC
)))
11976 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
11979 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
11980 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
11981 && GET_CODE (XEXP (ind
, 1)) == PLUS
11982 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
11983 ind
= XEXP (ind
, 1);
11988 if (GET_CODE (ind
) == PLUS
11989 && REG_P (XEXP (ind
, 0))
11990 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
11991 && CONST_INT_P (XEXP (ind
, 1))
11992 && INTVAL (XEXP (ind
, 1)) > -1024
11993 && INTVAL (XEXP (ind
, 1)) < 1024
11994 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12000 /* Return TRUE if OP is a memory operand which we can load or store a vector
12001 to/from. TYPE is one of the following values:
12002 0 - Vector load/stor (vldr)
12003 1 - Core registers (ldm)
12004 2 - Element/structure loads (vld1)
12007 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12011 /* Reject eliminable registers. */
12012 if (! (reload_in_progress
|| reload_completed
)
12013 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12014 || reg_mentioned_p (arg_pointer_rtx
, op
)
12015 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12016 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12017 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12018 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12021 /* Constants are converted into offsets from labels. */
12025 ind
= XEXP (op
, 0);
12027 if (reload_completed
12028 && (GET_CODE (ind
) == LABEL_REF
12029 || (GET_CODE (ind
) == CONST
12030 && GET_CODE (XEXP (ind
, 0)) == PLUS
12031 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12032 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12035 /* Match: (mem (reg)). */
12037 return arm_address_register_rtx_p (ind
, 0);
12039 /* Allow post-increment with Neon registers. */
12040 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12041 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12042 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12044 /* FIXME: vld1 allows register post-modify. */
12050 && GET_CODE (ind
) == PLUS
12051 && REG_P (XEXP (ind
, 0))
12052 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12053 && CONST_INT_P (XEXP (ind
, 1))
12054 && INTVAL (XEXP (ind
, 1)) > -1024
12055 /* For quad modes, we restrict the constant offset to be slightly less
12056 than what the instruction format permits. We have no such constraint
12057 on double mode offsets. (This must match arm_legitimate_index_p.) */
12058 && (INTVAL (XEXP (ind
, 1))
12059 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12060 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12066 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12069 neon_struct_mem_operand (rtx op
)
12073 /* Reject eliminable registers. */
12074 if (! (reload_in_progress
|| reload_completed
)
12075 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12076 || reg_mentioned_p (arg_pointer_rtx
, op
)
12077 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12078 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12079 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12080 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12083 /* Constants are converted into offsets from labels. */
12087 ind
= XEXP (op
, 0);
12089 if (reload_completed
12090 && (GET_CODE (ind
) == LABEL_REF
12091 || (GET_CODE (ind
) == CONST
12092 && GET_CODE (XEXP (ind
, 0)) == PLUS
12093 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12094 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12097 /* Match: (mem (reg)). */
12099 return arm_address_register_rtx_p (ind
, 0);
12101 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12102 if (GET_CODE (ind
) == POST_INC
12103 || GET_CODE (ind
) == PRE_DEC
)
12104 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12109 /* Return true if X is a register that will be eliminated later on. */
12111 arm_eliminable_register (rtx x
)
12113 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12114 || REGNO (x
) == ARG_POINTER_REGNUM
12115 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12116 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12119 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12120 coprocessor registers. Otherwise return NO_REGS. */
12123 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12125 if (mode
== HFmode
)
12127 if (!TARGET_NEON_FP16
)
12128 return GENERAL_REGS
;
12129 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12131 return GENERAL_REGS
;
12134 /* The neon move patterns handle all legitimate vector and struct
12137 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12138 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12139 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12140 || VALID_NEON_STRUCT_MODE (mode
)))
12143 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12146 return GENERAL_REGS
;
12149 /* Values which must be returned in the most-significant end of the return
12153 arm_return_in_msb (const_tree valtype
)
12155 return (TARGET_AAPCS_BASED
12156 && BYTES_BIG_ENDIAN
12157 && (AGGREGATE_TYPE_P (valtype
)
12158 || TREE_CODE (valtype
) == COMPLEX_TYPE
12159 || FIXED_POINT_TYPE_P (valtype
)));
12162 /* Return TRUE if X references a SYMBOL_REF. */
12164 symbol_mentioned_p (rtx x
)
12169 if (GET_CODE (x
) == SYMBOL_REF
)
12172 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12173 are constant offsets, not symbols. */
12174 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12177 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12179 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12185 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12186 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12189 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12196 /* Return TRUE if X references a LABEL_REF. */
12198 label_mentioned_p (rtx x
)
12203 if (GET_CODE (x
) == LABEL_REF
)
12206 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12207 instruction, but they are constant offsets, not symbols. */
12208 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12211 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12212 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12218 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12219 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12222 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12230 tls_mentioned_p (rtx x
)
12232 switch (GET_CODE (x
))
12235 return tls_mentioned_p (XEXP (x
, 0));
12238 if (XINT (x
, 1) == UNSPEC_TLS
)
12246 /* Must not copy any rtx that uses a pc-relative address. */
12249 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12251 if (GET_CODE (*x
) == UNSPEC
12252 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12253 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12259 arm_cannot_copy_insn_p (rtx insn
)
12261 /* The tls call insn cannot be copied, as it is paired with a data
12263 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12266 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12270 minmax_code (rtx x
)
12272 enum rtx_code code
= GET_CODE (x
);
12285 gcc_unreachable ();
12289 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12292 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12293 int *mask
, bool *signed_sat
)
12295 /* The high bound must be a power of two minus one. */
12296 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12300 /* The low bound is either zero (for usat) or one less than the
12301 negation of the high bound (for ssat). */
12302 if (INTVAL (lo_bound
) == 0)
12307 *signed_sat
= false;
12312 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12317 *signed_sat
= true;
12325 /* Return 1 if memory locations are adjacent. */
12327 adjacent_mem_locations (rtx a
, rtx b
)
12329 /* We don't guarantee to preserve the order of these memory refs. */
12330 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12333 if ((REG_P (XEXP (a
, 0))
12334 || (GET_CODE (XEXP (a
, 0)) == PLUS
12335 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12336 && (REG_P (XEXP (b
, 0))
12337 || (GET_CODE (XEXP (b
, 0)) == PLUS
12338 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12340 HOST_WIDE_INT val0
= 0, val1
= 0;
12344 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12346 reg0
= XEXP (XEXP (a
, 0), 0);
12347 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12350 reg0
= XEXP (a
, 0);
12352 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12354 reg1
= XEXP (XEXP (b
, 0), 0);
12355 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12358 reg1
= XEXP (b
, 0);
12360 /* Don't accept any offset that will require multiple
12361 instructions to handle, since this would cause the
12362 arith_adjacentmem pattern to output an overlong sequence. */
12363 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12366 /* Don't allow an eliminable register: register elimination can make
12367 the offset too large. */
12368 if (arm_eliminable_register (reg0
))
12371 val_diff
= val1
- val0
;
12375 /* If the target has load delay slots, then there's no benefit
12376 to using an ldm instruction unless the offset is zero and
12377 we are optimizing for size. */
12378 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12379 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12380 && (val_diff
== 4 || val_diff
== -4));
12383 return ((REGNO (reg0
) == REGNO (reg1
))
12384 && (val_diff
== 4 || val_diff
== -4));
12390 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12391 for load operations, false for store operations. CONSECUTIVE is true
12392 if the register numbers in the operation must be consecutive in the register
12393 bank. RETURN_PC is true if value is to be loaded in PC.
12394 The pattern we are trying to match for load is:
12395 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12396 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12399 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12402 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12403 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12404 3. If consecutive is TRUE, then for kth register being loaded,
12405 REGNO (R_dk) = REGNO (R_d0) + k.
12406 The pattern for store is similar. */
12408 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12409 bool consecutive
, bool return_pc
)
12411 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12412 rtx reg
, mem
, addr
;
12414 unsigned first_regno
;
12415 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12417 bool addr_reg_in_reglist
= false;
12418 bool update
= false;
12423 /* If not in SImode, then registers must be consecutive
12424 (e.g., VLDM instructions for DFmode). */
12425 gcc_assert ((mode
== SImode
) || consecutive
);
12426 /* Setting return_pc for stores is illegal. */
12427 gcc_assert (!return_pc
|| load
);
12429 /* Set up the increments and the regs per val based on the mode. */
12430 reg_increment
= GET_MODE_SIZE (mode
);
12431 regs_per_val
= reg_increment
/ 4;
12432 offset_adj
= return_pc
? 1 : 0;
12435 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12436 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12439 /* Check if this is a write-back. */
12440 elt
= XVECEXP (op
, 0, offset_adj
);
12441 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12447 /* The offset adjustment must be the number of registers being
12448 popped times the size of a single register. */
12449 if (!REG_P (SET_DEST (elt
))
12450 || !REG_P (XEXP (SET_SRC (elt
), 0))
12451 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12452 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12453 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12454 ((count
- 1 - offset_adj
) * reg_increment
))
12458 i
= i
+ offset_adj
;
12459 base
= base
+ offset_adj
;
12460 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12461 success depends on the type: VLDM can do just one reg,
12462 LDM must do at least two. */
12463 if ((count
<= i
) && (mode
== SImode
))
12466 elt
= XVECEXP (op
, 0, i
- 1);
12467 if (GET_CODE (elt
) != SET
)
12472 reg
= SET_DEST (elt
);
12473 mem
= SET_SRC (elt
);
12477 reg
= SET_SRC (elt
);
12478 mem
= SET_DEST (elt
);
12481 if (!REG_P (reg
) || !MEM_P (mem
))
12484 regno
= REGNO (reg
);
12485 first_regno
= regno
;
12486 addr
= XEXP (mem
, 0);
12487 if (GET_CODE (addr
) == PLUS
)
12489 if (!CONST_INT_P (XEXP (addr
, 1)))
12492 offset
= INTVAL (XEXP (addr
, 1));
12493 addr
= XEXP (addr
, 0);
12499 /* Don't allow SP to be loaded unless it is also the base register. It
12500 guarantees that SP is reset correctly when an LDM instruction
12501 is interrupted. Otherwise, we might end up with a corrupt stack. */
12502 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12505 for (; i
< count
; i
++)
12507 elt
= XVECEXP (op
, 0, i
);
12508 if (GET_CODE (elt
) != SET
)
12513 reg
= SET_DEST (elt
);
12514 mem
= SET_SRC (elt
);
12518 reg
= SET_SRC (elt
);
12519 mem
= SET_DEST (elt
);
12523 || GET_MODE (reg
) != mode
12524 || REGNO (reg
) <= regno
12527 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12528 /* Don't allow SP to be loaded unless it is also the base register. It
12529 guarantees that SP is reset correctly when an LDM instruction
12530 is interrupted. Otherwise, we might end up with a corrupt stack. */
12531 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12533 || GET_MODE (mem
) != mode
12534 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12535 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12536 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12537 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12538 offset
+ (i
- base
) * reg_increment
))
12539 && (!REG_P (XEXP (mem
, 0))
12540 || offset
+ (i
- base
) * reg_increment
!= 0)))
12543 regno
= REGNO (reg
);
12544 if (regno
== REGNO (addr
))
12545 addr_reg_in_reglist
= true;
12550 if (update
&& addr_reg_in_reglist
)
12553 /* For Thumb-1, address register is always modified - either by write-back
12554 or by explicit load. If the pattern does not describe an update,
12555 then the address register must be in the list of loaded registers. */
12557 return update
|| addr_reg_in_reglist
;
12563 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12564 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12565 instruction. ADD_OFFSET is nonzero if the base address register needs
12566 to be modified with an add instruction before we can use it. */
12569 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12570 int nops
, HOST_WIDE_INT add_offset
)
12572 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12573 if the offset isn't small enough. The reason 2 ldrs are faster
12574 is because these ARMs are able to do more than one cache access
12575 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12576 whilst the ARM8 has a double bandwidth cache. This means that
12577 these cores can do both an instruction fetch and a data fetch in
12578 a single cycle, so the trick of calculating the address into a
12579 scratch register (one of the result regs) and then doing a load
12580 multiple actually becomes slower (and no smaller in code size).
12581 That is the transformation
12583 ldr rd1, [rbase + offset]
12584 ldr rd2, [rbase + offset + 4]
12588 add rd1, rbase, offset
12589 ldmia rd1, {rd1, rd2}
12591 produces worse code -- '3 cycles + any stalls on rd2' instead of
12592 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12593 access per cycle, the first sequence could never complete in less
12594 than 6 cycles, whereas the ldm sequence would only take 5 and
12595 would make better use of sequential accesses if not hitting the
12598 We cheat here and test 'arm_ld_sched' which we currently know to
12599 only be true for the ARM8, ARM9 and StrongARM. If this ever
12600 changes, then the test below needs to be reworked. */
12601 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12604 /* XScale has load-store double instructions, but they have stricter
12605 alignment requirements than load-store multiple, so we cannot
12608 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12609 the pipeline until completion.
12617 An ldr instruction takes 1-3 cycles, but does not block the
12626 Best case ldr will always win. However, the more ldr instructions
12627 we issue, the less likely we are to be able to schedule them well.
12628 Using ldr instructions also increases code size.
12630 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12631 for counts of 3 or 4 regs. */
12632 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12637 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12638 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12639 an array ORDER which describes the sequence to use when accessing the
12640 offsets that produces an ascending order. In this sequence, each
12641 offset must be larger by exactly 4 than the previous one. ORDER[0]
12642 must have been filled in with the lowest offset by the caller.
12643 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12644 we use to verify that ORDER produces an ascending order of registers.
12645 Return true if it was possible to construct such an order, false if
12649 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12650 int *unsorted_regs
)
12653 for (i
= 1; i
< nops
; i
++)
12657 order
[i
] = order
[i
- 1];
12658 for (j
= 0; j
< nops
; j
++)
12659 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12661 /* We must find exactly one offset that is higher than the
12662 previous one by 4. */
12663 if (order
[i
] != order
[i
- 1])
12667 if (order
[i
] == order
[i
- 1])
12669 /* The register numbers must be ascending. */
12670 if (unsorted_regs
!= NULL
12671 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
12677 /* Used to determine in a peephole whether a sequence of load
12678 instructions can be changed into a load-multiple instruction.
12679 NOPS is the number of separate load instructions we are examining. The
12680 first NOPS entries in OPERANDS are the destination registers, the
12681 next NOPS entries are memory operands. If this function is
12682 successful, *BASE is set to the common base register of the memory
12683 accesses; *LOAD_OFFSET is set to the first memory location's offset
12684 from that base register.
12685 REGS is an array filled in with the destination register numbers.
12686 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12687 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12688 the sequence of registers in REGS matches the loads from ascending memory
12689 locations, and the function verifies that the register numbers are
12690 themselves ascending. If CHECK_REGS is false, the register numbers
12691 are stored in the order they are found in the operands. */
12693 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
12694 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
12696 int unsorted_regs
[MAX_LDM_STM_OPS
];
12697 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12698 int order
[MAX_LDM_STM_OPS
];
12699 rtx base_reg_rtx
= NULL
;
12703 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12704 easily extended if required. */
12705 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12707 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12709 /* Loop over the operands and check that the memory references are
12710 suitable (i.e. immediate offsets from the same base register). At
12711 the same time, extract the target register, and the memory
12713 for (i
= 0; i
< nops
; i
++)
12718 /* Convert a subreg of a mem into the mem itself. */
12719 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12720 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12722 gcc_assert (MEM_P (operands
[nops
+ i
]));
12724 /* Don't reorder volatile memory references; it doesn't seem worth
12725 looking for the case where the order is ok anyway. */
12726 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12729 offset
= const0_rtx
;
12731 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12732 || (GET_CODE (reg
) == SUBREG
12733 && REG_P (reg
= SUBREG_REG (reg
))))
12734 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12735 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12736 || (GET_CODE (reg
) == SUBREG
12737 && REG_P (reg
= SUBREG_REG (reg
))))
12738 && (CONST_INT_P (offset
12739 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12743 base_reg
= REGNO (reg
);
12744 base_reg_rtx
= reg
;
12745 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12748 else if (base_reg
!= (int) REGNO (reg
))
12749 /* Not addressed from the same base register. */
12752 unsorted_regs
[i
] = (REG_P (operands
[i
])
12753 ? REGNO (operands
[i
])
12754 : REGNO (SUBREG_REG (operands
[i
])));
12756 /* If it isn't an integer register, or if it overwrites the
12757 base register but isn't the last insn in the list, then
12758 we can't do this. */
12759 if (unsorted_regs
[i
] < 0
12760 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12761 || unsorted_regs
[i
] > 14
12762 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
12765 /* Don't allow SP to be loaded unless it is also the base
12766 register. It guarantees that SP is reset correctly when
12767 an LDM instruction is interrupted. Otherwise, we might
12768 end up with a corrupt stack. */
12769 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
12772 unsorted_offsets
[i
] = INTVAL (offset
);
12773 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12777 /* Not a suitable memory address. */
12781 /* All the useful information has now been extracted from the
12782 operands into unsorted_regs and unsorted_offsets; additionally,
12783 order[0] has been set to the lowest offset in the list. Sort
12784 the offsets into order, verifying that they are adjacent, and
12785 check that the register numbers are ascending. */
12786 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12787 check_regs
? unsorted_regs
: NULL
))
12791 memcpy (saved_order
, order
, sizeof order
);
12797 for (i
= 0; i
< nops
; i
++)
12798 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12800 *load_offset
= unsorted_offsets
[order
[0]];
12804 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
12807 if (unsorted_offsets
[order
[0]] == 0)
12808 ldm_case
= 1; /* ldmia */
12809 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12810 ldm_case
= 2; /* ldmib */
12811 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12812 ldm_case
= 3; /* ldmda */
12813 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12814 ldm_case
= 4; /* ldmdb */
12815 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
12816 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
12821 if (!multiple_operation_profitable_p (false, nops
,
12823 ? unsorted_offsets
[order
[0]] : 0))
12829 /* Used to determine in a peephole whether a sequence of store instructions can
12830 be changed into a store-multiple instruction.
12831 NOPS is the number of separate store instructions we are examining.
12832 NOPS_TOTAL is the total number of instructions recognized by the peephole
12834 The first NOPS entries in OPERANDS are the source registers, the next
12835 NOPS entries are memory operands. If this function is successful, *BASE is
12836 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12837 to the first memory location's offset from that base register. REGS is an
12838 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12839 likewise filled with the corresponding rtx's.
12840 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12841 numbers to an ascending order of stores.
12842 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12843 from ascending memory locations, and the function verifies that the register
12844 numbers are themselves ascending. If CHECK_REGS is false, the register
12845 numbers are stored in the order they are found in the operands. */
12847 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
12848 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
12849 HOST_WIDE_INT
*load_offset
, bool check_regs
)
12851 int unsorted_regs
[MAX_LDM_STM_OPS
];
12852 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
12853 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12854 int order
[MAX_LDM_STM_OPS
];
12856 rtx base_reg_rtx
= NULL
;
12859 /* Write back of base register is currently only supported for Thumb 1. */
12860 int base_writeback
= TARGET_THUMB1
;
12862 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12863 easily extended if required. */
12864 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12866 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12868 /* Loop over the operands and check that the memory references are
12869 suitable (i.e. immediate offsets from the same base register). At
12870 the same time, extract the target register, and the memory
12872 for (i
= 0; i
< nops
; i
++)
12877 /* Convert a subreg of a mem into the mem itself. */
12878 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12879 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12881 gcc_assert (MEM_P (operands
[nops
+ i
]));
12883 /* Don't reorder volatile memory references; it doesn't seem worth
12884 looking for the case where the order is ok anyway. */
12885 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12888 offset
= const0_rtx
;
12890 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12891 || (GET_CODE (reg
) == SUBREG
12892 && REG_P (reg
= SUBREG_REG (reg
))))
12893 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12894 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12895 || (GET_CODE (reg
) == SUBREG
12896 && REG_P (reg
= SUBREG_REG (reg
))))
12897 && (CONST_INT_P (offset
12898 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12900 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
12901 ? operands
[i
] : SUBREG_REG (operands
[i
]));
12902 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
12906 base_reg
= REGNO (reg
);
12907 base_reg_rtx
= reg
;
12908 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12911 else if (base_reg
!= (int) REGNO (reg
))
12912 /* Not addressed from the same base register. */
12915 /* If it isn't an integer register, then we can't do this. */
12916 if (unsorted_regs
[i
] < 0
12917 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12918 /* The effects are unpredictable if the base register is
12919 both updated and stored. */
12920 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
12921 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
12922 || unsorted_regs
[i
] > 14)
12925 unsorted_offsets
[i
] = INTVAL (offset
);
12926 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12930 /* Not a suitable memory address. */
12934 /* All the useful information has now been extracted from the
12935 operands into unsorted_regs and unsorted_offsets; additionally,
12936 order[0] has been set to the lowest offset in the list. Sort
12937 the offsets into order, verifying that they are adjacent, and
12938 check that the register numbers are ascending. */
12939 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12940 check_regs
? unsorted_regs
: NULL
))
12944 memcpy (saved_order
, order
, sizeof order
);
12950 for (i
= 0; i
< nops
; i
++)
12952 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12954 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
12957 *load_offset
= unsorted_offsets
[order
[0]];
12961 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
12964 if (unsorted_offsets
[order
[0]] == 0)
12965 stm_case
= 1; /* stmia */
12966 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12967 stm_case
= 2; /* stmib */
12968 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12969 stm_case
= 3; /* stmda */
12970 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12971 stm_case
= 4; /* stmdb */
12975 if (!multiple_operation_profitable_p (false, nops
, 0))
12981 /* Routines for use in generating RTL. */
12983 /* Generate a load-multiple instruction. COUNT is the number of loads in
12984 the instruction; REGS and MEMS are arrays containing the operands.
12985 BASEREG is the base register to be used in addressing the memory operands.
12986 WBACK_OFFSET is nonzero if the instruction should update the base
12990 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
12991 HOST_WIDE_INT wback_offset
)
12996 if (!multiple_operation_profitable_p (false, count
, 0))
13002 for (i
= 0; i
< count
; i
++)
13003 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13005 if (wback_offset
!= 0)
13006 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13008 seq
= get_insns ();
13014 result
= gen_rtx_PARALLEL (VOIDmode
,
13015 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13016 if (wback_offset
!= 0)
13018 XVECEXP (result
, 0, 0)
13019 = gen_rtx_SET (VOIDmode
, basereg
,
13020 plus_constant (Pmode
, basereg
, wback_offset
));
13025 for (j
= 0; i
< count
; i
++, j
++)
13026 XVECEXP (result
, 0, i
)
13027 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13032 /* Generate a store-multiple instruction. COUNT is the number of stores in
13033 the instruction; REGS and MEMS are arrays containing the operands.
13034 BASEREG is the base register to be used in addressing the memory operands.
13035 WBACK_OFFSET is nonzero if the instruction should update the base
13039 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13040 HOST_WIDE_INT wback_offset
)
13045 if (GET_CODE (basereg
) == PLUS
)
13046 basereg
= XEXP (basereg
, 0);
13048 if (!multiple_operation_profitable_p (false, count
, 0))
13054 for (i
= 0; i
< count
; i
++)
13055 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13057 if (wback_offset
!= 0)
13058 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13060 seq
= get_insns ();
13066 result
= gen_rtx_PARALLEL (VOIDmode
,
13067 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13068 if (wback_offset
!= 0)
13070 XVECEXP (result
, 0, 0)
13071 = gen_rtx_SET (VOIDmode
, basereg
,
13072 plus_constant (Pmode
, basereg
, wback_offset
));
13077 for (j
= 0; i
< count
; i
++, j
++)
13078 XVECEXP (result
, 0, i
)
13079 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13084 /* Generate either a load-multiple or a store-multiple instruction. This
13085 function can be used in situations where we can start with a single MEM
13086 rtx and adjust its address upwards.
13087 COUNT is the number of operations in the instruction, not counting a
13088 possible update of the base register. REGS is an array containing the
13090 BASEREG is the base register to be used in addressing the memory operands,
13091 which are constructed from BASEMEM.
13092 WRITE_BACK specifies whether the generated instruction should include an
13093 update of the base register.
13094 OFFSETP is used to pass an offset to and from this function; this offset
13095 is not used when constructing the address (instead BASEMEM should have an
13096 appropriate offset in its address), it is used only for setting
13097 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13100 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13101 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13103 rtx mems
[MAX_LDM_STM_OPS
];
13104 HOST_WIDE_INT offset
= *offsetp
;
13107 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13109 if (GET_CODE (basereg
) == PLUS
)
13110 basereg
= XEXP (basereg
, 0);
13112 for (i
= 0; i
< count
; i
++)
13114 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13115 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13123 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13124 write_back
? 4 * count
: 0);
13126 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13127 write_back
? 4 * count
: 0);
13131 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13132 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13134 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13139 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13140 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13142 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13146 /* Called from a peephole2 expander to turn a sequence of loads into an
13147 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13148 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13149 is true if we can reorder the registers because they are used commutatively
13151 Returns true iff we could generate a new instruction. */
13154 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13156 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13157 rtx mems
[MAX_LDM_STM_OPS
];
13158 int i
, j
, base_reg
;
13160 HOST_WIDE_INT offset
;
13161 int write_back
= FALSE
;
13165 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13166 &base_reg
, &offset
, !sort_regs
);
13172 for (i
= 0; i
< nops
- 1; i
++)
13173 for (j
= i
+ 1; j
< nops
; j
++)
13174 if (regs
[i
] > regs
[j
])
13180 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13184 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13185 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13191 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13192 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13194 if (!TARGET_THUMB1
)
13196 base_reg
= regs
[0];
13197 base_reg_rtx
= newbase
;
13201 for (i
= 0; i
< nops
; i
++)
13203 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13204 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13207 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13208 write_back
? offset
+ i
* 4 : 0));
13212 /* Called from a peephole2 expander to turn a sequence of stores into an
13213 STM instruction. OPERANDS are the operands found by the peephole matcher;
13214 NOPS indicates how many separate stores we are trying to combine.
13215 Returns true iff we could generate a new instruction. */
13218 gen_stm_seq (rtx
*operands
, int nops
)
13221 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13222 rtx mems
[MAX_LDM_STM_OPS
];
13225 HOST_WIDE_INT offset
;
13226 int write_back
= FALSE
;
13229 bool base_reg_dies
;
13231 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13232 mem_order
, &base_reg
, &offset
, true);
13237 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13239 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13242 gcc_assert (base_reg_dies
);
13248 gcc_assert (base_reg_dies
);
13249 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13253 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13255 for (i
= 0; i
< nops
; i
++)
13257 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13258 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13261 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13262 write_back
? offset
+ i
* 4 : 0));
13266 /* Called from a peephole2 expander to turn a sequence of stores that are
13267 preceded by constant loads into an STM instruction. OPERANDS are the
13268 operands found by the peephole matcher; NOPS indicates how many
13269 separate stores we are trying to combine; there are 2 * NOPS
13270 instructions in the peephole.
13271 Returns true iff we could generate a new instruction. */
13274 gen_const_stm_seq (rtx
*operands
, int nops
)
13276 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13277 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13278 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13279 rtx mems
[MAX_LDM_STM_OPS
];
13282 HOST_WIDE_INT offset
;
13283 int write_back
= FALSE
;
13286 bool base_reg_dies
;
13288 HARD_REG_SET allocated
;
13290 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13291 mem_order
, &base_reg
, &offset
, false);
13296 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13298 /* If the same register is used more than once, try to find a free
13300 CLEAR_HARD_REG_SET (allocated
);
13301 for (i
= 0; i
< nops
; i
++)
13303 for (j
= i
+ 1; j
< nops
; j
++)
13304 if (regs
[i
] == regs
[j
])
13306 rtx t
= peep2_find_free_register (0, nops
* 2,
13307 TARGET_THUMB1
? "l" : "r",
13308 SImode
, &allocated
);
13312 regs
[i
] = REGNO (t
);
13316 /* Compute an ordering that maps the register numbers to an ascending
13319 for (i
= 0; i
< nops
; i
++)
13320 if (regs
[i
] < regs
[reg_order
[0]])
13323 for (i
= 1; i
< nops
; i
++)
13325 int this_order
= reg_order
[i
- 1];
13326 for (j
= 0; j
< nops
; j
++)
13327 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13328 && (this_order
== reg_order
[i
- 1]
13329 || regs
[j
] < regs
[this_order
]))
13331 reg_order
[i
] = this_order
;
13334 /* Ensure that registers that must be live after the instruction end
13335 up with the correct value. */
13336 for (i
= 0; i
< nops
; i
++)
13338 int this_order
= reg_order
[i
];
13339 if ((this_order
!= mem_order
[i
]
13340 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13341 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13345 /* Load the constants. */
13346 for (i
= 0; i
< nops
; i
++)
13348 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13349 sorted_regs
[i
] = regs
[reg_order
[i
]];
13350 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13353 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13355 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13358 gcc_assert (base_reg_dies
);
13364 gcc_assert (base_reg_dies
);
13365 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13369 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13371 for (i
= 0; i
< nops
; i
++)
13373 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13374 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13377 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13378 write_back
? offset
+ i
* 4 : 0));
13382 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13383 unaligned copies on processors which support unaligned semantics for those
13384 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13385 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13386 An interleave factor of 1 (the minimum) will perform no interleaving.
13387 Load/store multiple are used for aligned addresses where possible. */
13390 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13391 HOST_WIDE_INT length
,
13392 unsigned int interleave_factor
)
13394 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13395 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13396 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13397 HOST_WIDE_INT i
, j
;
13398 HOST_WIDE_INT remaining
= length
, words
;
13399 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13401 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13402 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13403 HOST_WIDE_INT srcoffset
, dstoffset
;
13404 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13407 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13409 /* Use hard registers if we have aligned source or destination so we can use
13410 load/store multiple with contiguous registers. */
13411 if (dst_aligned
|| src_aligned
)
13412 for (i
= 0; i
< interleave_factor
; i
++)
13413 regs
[i
] = gen_rtx_REG (SImode
, i
);
13415 for (i
= 0; i
< interleave_factor
; i
++)
13416 regs
[i
] = gen_reg_rtx (SImode
);
13418 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13419 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13421 srcoffset
= dstoffset
= 0;
13423 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13424 For copying the last bytes we want to subtract this offset again. */
13425 src_autoinc
= dst_autoinc
= 0;
13427 for (i
= 0; i
< interleave_factor
; i
++)
13430 /* Copy BLOCK_SIZE_BYTES chunks. */
13432 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13435 if (src_aligned
&& interleave_factor
> 1)
13437 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13438 TRUE
, srcbase
, &srcoffset
));
13439 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13443 for (j
= 0; j
< interleave_factor
; j
++)
13445 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13447 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13448 srcoffset
+ j
* UNITS_PER_WORD
);
13449 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13451 srcoffset
+= block_size_bytes
;
13455 if (dst_aligned
&& interleave_factor
> 1)
13457 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13458 TRUE
, dstbase
, &dstoffset
));
13459 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13463 for (j
= 0; j
< interleave_factor
; j
++)
13465 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13467 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13468 dstoffset
+ j
* UNITS_PER_WORD
);
13469 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13471 dstoffset
+= block_size_bytes
;
13474 remaining
-= block_size_bytes
;
13477 /* Copy any whole words left (note these aren't interleaved with any
13478 subsequent halfword/byte load/stores in the interests of simplicity). */
13480 words
= remaining
/ UNITS_PER_WORD
;
13482 gcc_assert (words
< interleave_factor
);
13484 if (src_aligned
&& words
> 1)
13486 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13488 src_autoinc
+= UNITS_PER_WORD
* words
;
13492 for (j
= 0; j
< words
; j
++)
13494 addr
= plus_constant (Pmode
, src
,
13495 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13496 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13497 srcoffset
+ j
* UNITS_PER_WORD
);
13498 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13500 srcoffset
+= words
* UNITS_PER_WORD
;
13503 if (dst_aligned
&& words
> 1)
13505 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13507 dst_autoinc
+= words
* UNITS_PER_WORD
;
13511 for (j
= 0; j
< words
; j
++)
13513 addr
= plus_constant (Pmode
, dst
,
13514 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13515 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13516 dstoffset
+ j
* UNITS_PER_WORD
);
13517 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13519 dstoffset
+= words
* UNITS_PER_WORD
;
13522 remaining
-= words
* UNITS_PER_WORD
;
13524 gcc_assert (remaining
< 4);
13526 /* Copy a halfword if necessary. */
13528 if (remaining
>= 2)
13530 halfword_tmp
= gen_reg_rtx (SImode
);
13532 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13533 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13534 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13536 /* Either write out immediately, or delay until we've loaded the last
13537 byte, depending on interleave factor. */
13538 if (interleave_factor
== 1)
13540 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13541 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13542 emit_insn (gen_unaligned_storehi (mem
,
13543 gen_lowpart (HImode
, halfword_tmp
)));
13544 halfword_tmp
= NULL
;
13552 gcc_assert (remaining
< 2);
13554 /* Copy last byte. */
13556 if ((remaining
& 1) != 0)
13558 byte_tmp
= gen_reg_rtx (SImode
);
13560 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13561 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13562 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13564 if (interleave_factor
== 1)
13566 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13567 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13568 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13577 /* Store last halfword if we haven't done so already. */
13581 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13582 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13583 emit_insn (gen_unaligned_storehi (mem
,
13584 gen_lowpart (HImode
, halfword_tmp
)));
13588 /* Likewise for last byte. */
13592 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13593 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13594 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13598 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13601 /* From mips_adjust_block_mem:
13603 Helper function for doing a loop-based block operation on memory
13604 reference MEM. Each iteration of the loop will operate on LENGTH
13607 Create a new base register for use within the loop and point it to
13608 the start of MEM. Create a new memory reference that uses this
13609 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13612 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13615 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13617 /* Although the new mem does not refer to a known location,
13618 it does keep up to LENGTH bytes of alignment. */
13619 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13620 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13623 /* From mips_block_move_loop:
13625 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13626 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13627 the memory regions do not overlap. */
13630 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13631 unsigned int interleave_factor
,
13632 HOST_WIDE_INT bytes_per_iter
)
13634 rtx label
, src_reg
, dest_reg
, final_src
, test
;
13635 HOST_WIDE_INT leftover
;
13637 leftover
= length
% bytes_per_iter
;
13638 length
-= leftover
;
13640 /* Create registers and memory references for use within the loop. */
13641 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13642 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13644 /* Calculate the value that SRC_REG should have after the last iteration of
13646 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13647 0, 0, OPTAB_WIDEN
);
13649 /* Emit the start of the loop. */
13650 label
= gen_label_rtx ();
13651 emit_label (label
);
13653 /* Emit the loop body. */
13654 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13655 interleave_factor
);
13657 /* Move on to the next block. */
13658 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13659 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13661 /* Emit the loop condition. */
13662 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13663 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
13665 /* Mop up any left-over bytes. */
13667 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
13670 /* Emit a block move when either the source or destination is unaligned (not
13671 aligned to a four-byte boundary). This may need further tuning depending on
13672 core type, optimize_size setting, etc. */
13675 arm_movmemqi_unaligned (rtx
*operands
)
13677 HOST_WIDE_INT length
= INTVAL (operands
[2]);
13681 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
13682 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
13683 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13684 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13685 or dst_aligned though: allow more interleaving in those cases since the
13686 resulting code can be smaller. */
13687 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
13688 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
13691 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
13692 interleave_factor
, bytes_per_iter
);
13694 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
13695 interleave_factor
);
13699 /* Note that the loop created by arm_block_move_unaligned_loop may be
13700 subject to loop unrolling, which makes tuning this condition a little
13703 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
13705 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
13712 arm_gen_movmemqi (rtx
*operands
)
13714 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
13715 HOST_WIDE_INT srcoffset
, dstoffset
;
13717 rtx src
, dst
, srcbase
, dstbase
;
13718 rtx part_bytes_reg
= NULL
;
13721 if (!CONST_INT_P (operands
[2])
13722 || !CONST_INT_P (operands
[3])
13723 || INTVAL (operands
[2]) > 64)
13726 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
13727 return arm_movmemqi_unaligned (operands
);
13729 if (INTVAL (operands
[3]) & 3)
13732 dstbase
= operands
[0];
13733 srcbase
= operands
[1];
13735 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
13736 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
13738 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
13739 out_words_to_go
= INTVAL (operands
[2]) / 4;
13740 last_bytes
= INTVAL (operands
[2]) & 3;
13741 dstoffset
= srcoffset
= 0;
13743 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
13744 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
13746 for (i
= 0; in_words_to_go
>= 2; i
+=4)
13748 if (in_words_to_go
> 4)
13749 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
13750 TRUE
, srcbase
, &srcoffset
));
13752 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
13753 src
, FALSE
, srcbase
,
13756 if (out_words_to_go
)
13758 if (out_words_to_go
> 4)
13759 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
13760 TRUE
, dstbase
, &dstoffset
));
13761 else if (out_words_to_go
!= 1)
13762 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
13763 out_words_to_go
, dst
,
13766 dstbase
, &dstoffset
));
13769 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13770 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
13771 if (last_bytes
!= 0)
13773 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
13779 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
13780 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
13783 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13784 if (out_words_to_go
)
13788 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13789 sreg
= copy_to_reg (mem
);
13791 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13792 emit_move_insn (mem
, sreg
);
13795 gcc_assert (!in_words_to_go
); /* Sanity check */
13798 if (in_words_to_go
)
13800 gcc_assert (in_words_to_go
> 0);
13802 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13803 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
13806 gcc_assert (!last_bytes
|| part_bytes_reg
);
13808 if (BYTES_BIG_ENDIAN
&& last_bytes
)
13810 rtx tmp
= gen_reg_rtx (SImode
);
13812 /* The bytes we want are in the top end of the word. */
13813 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
13814 GEN_INT (8 * (4 - last_bytes
))));
13815 part_bytes_reg
= tmp
;
13819 mem
= adjust_automodify_address (dstbase
, QImode
,
13820 plus_constant (Pmode
, dst
,
13822 dstoffset
+ last_bytes
- 1);
13823 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13827 tmp
= gen_reg_rtx (SImode
);
13828 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
13829 part_bytes_reg
= tmp
;
13836 if (last_bytes
> 1)
13838 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
13839 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
13843 rtx tmp
= gen_reg_rtx (SImode
);
13844 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
13845 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
13846 part_bytes_reg
= tmp
;
13853 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
13854 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13861 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13864 next_consecutive_mem (rtx mem
)
13866 enum machine_mode mode
= GET_MODE (mem
);
13867 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
13868 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
13870 return adjust_automodify_address (mem
, mode
, addr
, offset
);
13873 /* Copy using LDRD/STRD instructions whenever possible.
13874 Returns true upon success. */
13876 gen_movmem_ldrd_strd (rtx
*operands
)
13878 unsigned HOST_WIDE_INT len
;
13879 HOST_WIDE_INT align
;
13880 rtx src
, dst
, base
;
13882 bool src_aligned
, dst_aligned
;
13883 bool src_volatile
, dst_volatile
;
13885 gcc_assert (CONST_INT_P (operands
[2]));
13886 gcc_assert (CONST_INT_P (operands
[3]));
13888 len
= UINTVAL (operands
[2]);
13892 /* Maximum alignment we can assume for both src and dst buffers. */
13893 align
= INTVAL (operands
[3]);
13895 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
13898 /* Place src and dst addresses in registers
13899 and update the corresponding mem rtx. */
13901 dst_volatile
= MEM_VOLATILE_P (dst
);
13902 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
13903 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
13904 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13907 src_volatile
= MEM_VOLATILE_P (src
);
13908 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
13909 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
13910 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13912 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
13915 if (src_volatile
|| dst_volatile
)
13918 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13919 if (!(dst_aligned
|| src_aligned
))
13920 return arm_gen_movmemqi (operands
);
13922 src
= adjust_address (src
, DImode
, 0);
13923 dst
= adjust_address (dst
, DImode
, 0);
13927 reg0
= gen_reg_rtx (DImode
);
13929 emit_move_insn (reg0
, src
);
13931 emit_insn (gen_unaligned_loaddi (reg0
, src
));
13934 emit_move_insn (dst
, reg0
);
13936 emit_insn (gen_unaligned_storedi (dst
, reg0
));
13938 src
= next_consecutive_mem (src
);
13939 dst
= next_consecutive_mem (dst
);
13942 gcc_assert (len
< 8);
13945 /* More than a word but less than a double-word to copy. Copy a word. */
13946 reg0
= gen_reg_rtx (SImode
);
13947 src
= adjust_address (src
, SImode
, 0);
13948 dst
= adjust_address (dst
, SImode
, 0);
13950 emit_move_insn (reg0
, src
);
13952 emit_insn (gen_unaligned_loadsi (reg0
, src
));
13955 emit_move_insn (dst
, reg0
);
13957 emit_insn (gen_unaligned_storesi (dst
, reg0
));
13959 src
= next_consecutive_mem (src
);
13960 dst
= next_consecutive_mem (dst
);
13967 /* Copy the remaining bytes. */
13970 dst
= adjust_address (dst
, HImode
, 0);
13971 src
= adjust_address (src
, HImode
, 0);
13972 reg0
= gen_reg_rtx (SImode
);
13974 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
13976 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
13979 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
13981 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
13983 src
= next_consecutive_mem (src
);
13984 dst
= next_consecutive_mem (dst
);
13989 dst
= adjust_address (dst
, QImode
, 0);
13990 src
= adjust_address (src
, QImode
, 0);
13991 reg0
= gen_reg_rtx (QImode
);
13992 emit_move_insn (reg0
, src
);
13993 emit_move_insn (dst
, reg0
);
13997 /* Select a dominance comparison mode if possible for a test of the general
13998 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13999 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14000 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14001 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14002 In all cases OP will be either EQ or NE, but we don't need to know which
14003 here. If we are unable to support a dominance comparison we return
14004 CC mode. This will then fail to match for the RTL expressions that
14005 generate this call. */
14007 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14009 enum rtx_code cond1
, cond2
;
14012 /* Currently we will probably get the wrong result if the individual
14013 comparisons are not simple. This also ensures that it is safe to
14014 reverse a comparison if necessary. */
14015 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14017 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14021 /* The if_then_else variant of this tests the second condition if the
14022 first passes, but is true if the first fails. Reverse the first
14023 condition to get a true "inclusive-or" expression. */
14024 if (cond_or
== DOM_CC_NX_OR_Y
)
14025 cond1
= reverse_condition (cond1
);
14027 /* If the comparisons are not equal, and one doesn't dominate the other,
14028 then we can't do this. */
14030 && !comparison_dominates_p (cond1
, cond2
)
14031 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14036 enum rtx_code temp
= cond1
;
14044 if (cond_or
== DOM_CC_X_AND_Y
)
14049 case EQ
: return CC_DEQmode
;
14050 case LE
: return CC_DLEmode
;
14051 case LEU
: return CC_DLEUmode
;
14052 case GE
: return CC_DGEmode
;
14053 case GEU
: return CC_DGEUmode
;
14054 default: gcc_unreachable ();
14058 if (cond_or
== DOM_CC_X_AND_Y
)
14070 gcc_unreachable ();
14074 if (cond_or
== DOM_CC_X_AND_Y
)
14086 gcc_unreachable ();
14090 if (cond_or
== DOM_CC_X_AND_Y
)
14091 return CC_DLTUmode
;
14096 return CC_DLTUmode
;
14098 return CC_DLEUmode
;
14102 gcc_unreachable ();
14106 if (cond_or
== DOM_CC_X_AND_Y
)
14107 return CC_DGTUmode
;
14112 return CC_DGTUmode
;
14114 return CC_DGEUmode
;
14118 gcc_unreachable ();
14121 /* The remaining cases only occur when both comparisons are the
14124 gcc_assert (cond1
== cond2
);
14128 gcc_assert (cond1
== cond2
);
14132 gcc_assert (cond1
== cond2
);
14136 gcc_assert (cond1
== cond2
);
14137 return CC_DLEUmode
;
14140 gcc_assert (cond1
== cond2
);
14141 return CC_DGEUmode
;
14144 gcc_unreachable ();
14149 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14151 /* All floating point compares return CCFP if it is an equality
14152 comparison, and CCFPE otherwise. */
14153 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14176 gcc_unreachable ();
14180 /* A compare with a shifted operand. Because of canonicalization, the
14181 comparison will have to be swapped when we emit the assembler. */
14182 if (GET_MODE (y
) == SImode
14183 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14184 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14185 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14186 || GET_CODE (x
) == ROTATERT
))
14189 /* This operation is performed swapped, but since we only rely on the Z
14190 flag we don't need an additional mode. */
14191 if (GET_MODE (y
) == SImode
14192 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14193 && GET_CODE (x
) == NEG
14194 && (op
== EQ
|| op
== NE
))
14197 /* This is a special case that is used by combine to allow a
14198 comparison of a shifted byte load to be split into a zero-extend
14199 followed by a comparison of the shifted integer (only valid for
14200 equalities and unsigned inequalities). */
14201 if (GET_MODE (x
) == SImode
14202 && GET_CODE (x
) == ASHIFT
14203 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14204 && GET_CODE (XEXP (x
, 0)) == SUBREG
14205 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14206 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14207 && (op
== EQ
|| op
== NE
14208 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14209 && CONST_INT_P (y
))
14212 /* A construct for a conditional compare, if the false arm contains
14213 0, then both conditions must be true, otherwise either condition
14214 must be true. Not all conditions are possible, so CCmode is
14215 returned if it can't be done. */
14216 if (GET_CODE (x
) == IF_THEN_ELSE
14217 && (XEXP (x
, 2) == const0_rtx
14218 || XEXP (x
, 2) == const1_rtx
)
14219 && COMPARISON_P (XEXP (x
, 0))
14220 && COMPARISON_P (XEXP (x
, 1)))
14221 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14222 INTVAL (XEXP (x
, 2)));
14224 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14225 if (GET_CODE (x
) == AND
14226 && (op
== EQ
|| op
== NE
)
14227 && COMPARISON_P (XEXP (x
, 0))
14228 && COMPARISON_P (XEXP (x
, 1)))
14229 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14232 if (GET_CODE (x
) == IOR
14233 && (op
== EQ
|| op
== NE
)
14234 && COMPARISON_P (XEXP (x
, 0))
14235 && COMPARISON_P (XEXP (x
, 1)))
14236 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14239 /* An operation (on Thumb) where we want to test for a single bit.
14240 This is done by shifting that bit up into the top bit of a
14241 scratch register; we can then branch on the sign bit. */
14243 && GET_MODE (x
) == SImode
14244 && (op
== EQ
|| op
== NE
)
14245 && GET_CODE (x
) == ZERO_EXTRACT
14246 && XEXP (x
, 1) == const1_rtx
)
14249 /* An operation that sets the condition codes as a side-effect, the
14250 V flag is not set correctly, so we can only use comparisons where
14251 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14253 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14254 if (GET_MODE (x
) == SImode
14256 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14257 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14258 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14259 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14260 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14261 || GET_CODE (x
) == LSHIFTRT
14262 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14263 || GET_CODE (x
) == ROTATERT
14264 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14265 return CC_NOOVmode
;
14267 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14270 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14271 && GET_CODE (x
) == PLUS
14272 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14275 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14281 /* A DImode comparison against zero can be implemented by
14282 or'ing the two halves together. */
14283 if (y
== const0_rtx
)
14286 /* We can do an equality test in three Thumb instructions. */
14296 /* DImode unsigned comparisons can be implemented by cmp +
14297 cmpeq without a scratch register. Not worth doing in
14308 /* DImode signed and unsigned comparisons can be implemented
14309 by cmp + sbcs with a scratch register, but that does not
14310 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14311 gcc_assert (op
!= EQ
&& op
!= NE
);
14315 gcc_unreachable ();
14319 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14320 return GET_MODE (x
);
14325 /* X and Y are two things to compare using CODE. Emit the compare insn and
14326 return the rtx for register 0 in the proper mode. FP means this is a
14327 floating point compare: I don't think that it is needed on the arm. */
14329 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14331 enum machine_mode mode
;
14333 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14335 /* We might have X as a constant, Y as a register because of the predicates
14336 used for cmpdi. If so, force X to a register here. */
14337 if (dimode_comparison
&& !REG_P (x
))
14338 x
= force_reg (DImode
, x
);
14340 mode
= SELECT_CC_MODE (code
, x
, y
);
14341 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14343 if (dimode_comparison
14344 && mode
!= CC_CZmode
)
14348 /* To compare two non-zero values for equality, XOR them and
14349 then compare against zero. Not used for ARM mode; there
14350 CC_CZmode is cheaper. */
14351 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14353 gcc_assert (!reload_completed
);
14354 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14358 /* A scratch register is required. */
14359 if (reload_completed
)
14360 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14362 scratch
= gen_rtx_SCRATCH (SImode
);
14364 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14365 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14366 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14369 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14374 /* Generate a sequence of insns that will generate the correct return
14375 address mask depending on the physical architecture that the program
14378 arm_gen_return_addr_mask (void)
14380 rtx reg
= gen_reg_rtx (Pmode
);
14382 emit_insn (gen_return_addr_mask (reg
));
14387 arm_reload_in_hi (rtx
*operands
)
14389 rtx ref
= operands
[1];
14391 HOST_WIDE_INT offset
= 0;
14393 if (GET_CODE (ref
) == SUBREG
)
14395 offset
= SUBREG_BYTE (ref
);
14396 ref
= SUBREG_REG (ref
);
14401 /* We have a pseudo which has been spilt onto the stack; there
14402 are two cases here: the first where there is a simple
14403 stack-slot replacement and a second where the stack-slot is
14404 out of range, or is used as a subreg. */
14405 if (reg_equiv_mem (REGNO (ref
)))
14407 ref
= reg_equiv_mem (REGNO (ref
));
14408 base
= find_replacement (&XEXP (ref
, 0));
14411 /* The slot is out of range, or was dressed up in a SUBREG. */
14412 base
= reg_equiv_address (REGNO (ref
));
14415 base
= find_replacement (&XEXP (ref
, 0));
14417 /* Handle the case where the address is too complex to be offset by 1. */
14418 if (GET_CODE (base
) == MINUS
14419 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14421 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14423 emit_set_insn (base_plus
, base
);
14426 else if (GET_CODE (base
) == PLUS
)
14428 /* The addend must be CONST_INT, or we would have dealt with it above. */
14429 HOST_WIDE_INT hi
, lo
;
14431 offset
+= INTVAL (XEXP (base
, 1));
14432 base
= XEXP (base
, 0);
14434 /* Rework the address into a legal sequence of insns. */
14435 /* Valid range for lo is -4095 -> 4095 */
14438 : -((-offset
) & 0xfff));
14440 /* Corner case, if lo is the max offset then we would be out of range
14441 once we have added the additional 1 below, so bump the msb into the
14442 pre-loading insn(s). */
14446 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14447 ^ (HOST_WIDE_INT
) 0x80000000)
14448 - (HOST_WIDE_INT
) 0x80000000);
14450 gcc_assert (hi
+ lo
== offset
);
14454 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14456 /* Get the base address; addsi3 knows how to handle constants
14457 that require more than one insn. */
14458 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14464 /* Operands[2] may overlap operands[0] (though it won't overlap
14465 operands[1]), that's why we asked for a DImode reg -- so we can
14466 use the bit that does not overlap. */
14467 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14468 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14470 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14472 emit_insn (gen_zero_extendqisi2 (scratch
,
14473 gen_rtx_MEM (QImode
,
14474 plus_constant (Pmode
, base
,
14476 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14477 gen_rtx_MEM (QImode
,
14478 plus_constant (Pmode
, base
,
14480 if (!BYTES_BIG_ENDIAN
)
14481 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14482 gen_rtx_IOR (SImode
,
14485 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14489 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14490 gen_rtx_IOR (SImode
,
14491 gen_rtx_ASHIFT (SImode
, scratch
,
14493 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14496 /* Handle storing a half-word to memory during reload by synthesizing as two
14497 byte stores. Take care not to clobber the input values until after we
14498 have moved them somewhere safe. This code assumes that if the DImode
14499 scratch in operands[2] overlaps either the input value or output address
14500 in some way, then that value must die in this insn (we absolutely need
14501 two scratch registers for some corner cases). */
14503 arm_reload_out_hi (rtx
*operands
)
14505 rtx ref
= operands
[0];
14506 rtx outval
= operands
[1];
14508 HOST_WIDE_INT offset
= 0;
14510 if (GET_CODE (ref
) == SUBREG
)
14512 offset
= SUBREG_BYTE (ref
);
14513 ref
= SUBREG_REG (ref
);
14518 /* We have a pseudo which has been spilt onto the stack; there
14519 are two cases here: the first where there is a simple
14520 stack-slot replacement and a second where the stack-slot is
14521 out of range, or is used as a subreg. */
14522 if (reg_equiv_mem (REGNO (ref
)))
14524 ref
= reg_equiv_mem (REGNO (ref
));
14525 base
= find_replacement (&XEXP (ref
, 0));
14528 /* The slot is out of range, or was dressed up in a SUBREG. */
14529 base
= reg_equiv_address (REGNO (ref
));
14532 base
= find_replacement (&XEXP (ref
, 0));
14534 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14536 /* Handle the case where the address is too complex to be offset by 1. */
14537 if (GET_CODE (base
) == MINUS
14538 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14540 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14542 /* Be careful not to destroy OUTVAL. */
14543 if (reg_overlap_mentioned_p (base_plus
, outval
))
14545 /* Updating base_plus might destroy outval, see if we can
14546 swap the scratch and base_plus. */
14547 if (!reg_overlap_mentioned_p (scratch
, outval
))
14550 scratch
= base_plus
;
14555 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14557 /* Be conservative and copy OUTVAL into the scratch now,
14558 this should only be necessary if outval is a subreg
14559 of something larger than a word. */
14560 /* XXX Might this clobber base? I can't see how it can,
14561 since scratch is known to overlap with OUTVAL, and
14562 must be wider than a word. */
14563 emit_insn (gen_movhi (scratch_hi
, outval
));
14564 outval
= scratch_hi
;
14568 emit_set_insn (base_plus
, base
);
14571 else if (GET_CODE (base
) == PLUS
)
14573 /* The addend must be CONST_INT, or we would have dealt with it above. */
14574 HOST_WIDE_INT hi
, lo
;
14576 offset
+= INTVAL (XEXP (base
, 1));
14577 base
= XEXP (base
, 0);
14579 /* Rework the address into a legal sequence of insns. */
14580 /* Valid range for lo is -4095 -> 4095 */
14583 : -((-offset
) & 0xfff));
14585 /* Corner case, if lo is the max offset then we would be out of range
14586 once we have added the additional 1 below, so bump the msb into the
14587 pre-loading insn(s). */
14591 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14592 ^ (HOST_WIDE_INT
) 0x80000000)
14593 - (HOST_WIDE_INT
) 0x80000000);
14595 gcc_assert (hi
+ lo
== offset
);
14599 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14601 /* Be careful not to destroy OUTVAL. */
14602 if (reg_overlap_mentioned_p (base_plus
, outval
))
14604 /* Updating base_plus might destroy outval, see if we
14605 can swap the scratch and base_plus. */
14606 if (!reg_overlap_mentioned_p (scratch
, outval
))
14609 scratch
= base_plus
;
14614 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14616 /* Be conservative and copy outval into scratch now,
14617 this should only be necessary if outval is a
14618 subreg of something larger than a word. */
14619 /* XXX Might this clobber base? I can't see how it
14620 can, since scratch is known to overlap with
14622 emit_insn (gen_movhi (scratch_hi
, outval
));
14623 outval
= scratch_hi
;
14627 /* Get the base address; addsi3 knows how to handle constants
14628 that require more than one insn. */
14629 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14635 if (BYTES_BIG_ENDIAN
)
14637 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14638 plus_constant (Pmode
, base
,
14640 gen_lowpart (QImode
, outval
)));
14641 emit_insn (gen_lshrsi3 (scratch
,
14642 gen_rtx_SUBREG (SImode
, outval
, 0),
14644 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14646 gen_lowpart (QImode
, scratch
)));
14650 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14652 gen_lowpart (QImode
, outval
)));
14653 emit_insn (gen_lshrsi3 (scratch
,
14654 gen_rtx_SUBREG (SImode
, outval
, 0),
14656 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14657 plus_constant (Pmode
, base
,
14659 gen_lowpart (QImode
, scratch
)));
14663 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14664 (padded to the size of a word) should be passed in a register. */
14667 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
14669 if (TARGET_AAPCS_BASED
)
14670 return must_pass_in_stack_var_size (mode
, type
);
14672 return must_pass_in_stack_var_size_or_pad (mode
, type
);
14676 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14677 Return true if an argument passed on the stack should be padded upwards,
14678 i.e. if the least-significant byte has useful data.
14679 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14680 aggregate types are placed in the lowest memory address. */
14683 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
14685 if (!TARGET_AAPCS_BASED
)
14686 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
14688 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
14695 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14696 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14697 register has useful data, and return the opposite if the most
14698 significant byte does. */
14701 arm_pad_reg_upward (enum machine_mode mode
,
14702 tree type
, int first ATTRIBUTE_UNUSED
)
14704 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
14706 /* For AAPCS, small aggregates, small fixed-point types,
14707 and small complex types are always padded upwards. */
14710 if ((AGGREGATE_TYPE_P (type
)
14711 || TREE_CODE (type
) == COMPLEX_TYPE
14712 || FIXED_POINT_TYPE_P (type
))
14713 && int_size_in_bytes (type
) <= 4)
14718 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
14719 && GET_MODE_SIZE (mode
) <= 4)
14724 /* Otherwise, use default padding. */
14725 return !BYTES_BIG_ENDIAN
;
14728 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14729 assuming that the address in the base register is word aligned. */
14731 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
14733 HOST_WIDE_INT max_offset
;
14735 /* Offset must be a multiple of 4 in Thumb mode. */
14736 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
14741 else if (TARGET_ARM
)
14746 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
14749 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14750 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14751 Assumes that the address in the base register RN is word aligned. Pattern
14752 guarantees that both memory accesses use the same base register,
14753 the offsets are constants within the range, and the gap between the offsets is 4.
14754 If preload complete then check that registers are legal. WBACK indicates whether
14755 address is updated. LOAD indicates whether memory access is load or store. */
14757 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
14758 bool wback
, bool load
)
14760 unsigned int t
, t2
, n
;
14762 if (!reload_completed
)
14765 if (!offset_ok_for_ldrd_strd (offset
))
14772 if ((TARGET_THUMB2
)
14773 && ((wback
&& (n
== t
|| n
== t2
))
14774 || (t
== SP_REGNUM
)
14775 || (t
== PC_REGNUM
)
14776 || (t2
== SP_REGNUM
)
14777 || (t2
== PC_REGNUM
)
14778 || (!load
&& (n
== PC_REGNUM
))
14779 || (load
&& (t
== t2
))
14780 /* Triggers Cortex-M3 LDRD errata. */
14781 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
14785 && ((wback
&& (n
== t
|| n
== t2
))
14786 || (t2
== PC_REGNUM
)
14787 || (t
% 2 != 0) /* First destination register is not even. */
14789 /* PC can be used as base register (for offset addressing only),
14790 but it is depricated. */
14791 || (n
== PC_REGNUM
)))
14797 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14798 operand ADDR is an immediate offset from the base register and is
14799 not volatile, in which case it sets BASE and OFFSET
14802 mem_ok_for_ldrd_strd (rtx addr
, rtx
*base
, rtx
*offset
)
14804 /* TODO: Handle more general memory operand patterns, such as
14805 PRE_DEC and PRE_INC. */
14807 /* Convert a subreg of mem into mem itself. */
14808 if (GET_CODE (addr
) == SUBREG
)
14809 addr
= alter_subreg (&addr
, true);
14811 gcc_assert (MEM_P (addr
));
14813 /* Don't modify volatile memory accesses. */
14814 if (MEM_VOLATILE_P (addr
))
14817 *offset
= const0_rtx
;
14819 addr
= XEXP (addr
, 0);
14825 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
14827 *base
= XEXP (addr
, 0);
14828 *offset
= XEXP (addr
, 1);
14829 return (REG_P (*base
) && CONST_INT_P (*offset
));
14835 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
14837 /* Called from a peephole2 to replace two word-size accesses with a
14838 single LDRD/STRD instruction. Returns true iff we can generate a
14839 new instruction sequence. That is, both accesses use the same base
14840 register and the gap between constant offsets is 4. This function
14841 may reorder its operands to match ldrd/strd RTL templates.
14842 OPERANDS are the operands found by the peephole matcher;
14843 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14844 corresponding memory operands. LOAD indicaates whether the access
14845 is load or store. CONST_STORE indicates a store of constant
14846 integer values held in OPERANDS[4,5] and assumes that the pattern
14847 is of length 4 insn, for the purpose of checking dead registers.
14848 COMMUTE indicates that register operands may be reordered. */
14850 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
14851 bool const_store
, bool commute
)
14854 HOST_WIDE_INT offsets
[2], offset
;
14855 rtx base
= NULL_RTX
;
14856 rtx cur_base
, cur_offset
, tmp
;
14858 HARD_REG_SET regset
;
14860 gcc_assert (!const_store
|| !load
);
14861 /* Check that the memory references are immediate offsets from the
14862 same base register. Extract the base register, the destination
14863 registers, and the corresponding memory offsets. */
14864 for (i
= 0; i
< nops
; i
++)
14866 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
14871 else if (REGNO (base
) != REGNO (cur_base
))
14874 offsets
[i
] = INTVAL (cur_offset
);
14875 if (GET_CODE (operands
[i
]) == SUBREG
)
14877 tmp
= SUBREG_REG (operands
[i
]);
14878 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
14883 /* Make sure there is no dependency between the individual loads. */
14884 if (load
&& REGNO (operands
[0]) == REGNO (base
))
14885 return false; /* RAW */
14887 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
14888 return false; /* WAW */
14890 /* If the same input register is used in both stores
14891 when storing different constants, try to find a free register.
14892 For example, the code
14897 can be transformed into
14900 in Thumb mode assuming that r1 is free. */
14902 && REGNO (operands
[0]) == REGNO (operands
[1])
14903 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
14907 CLEAR_HARD_REG_SET (regset
);
14908 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14909 if (tmp
== NULL_RTX
)
14912 /* Use the new register in the first load to ensure that
14913 if the original input register is not dead after peephole,
14914 then it will have the correct constant value. */
14917 else if (TARGET_ARM
)
14920 int regno
= REGNO (operands
[0]);
14921 if (!peep2_reg_dead_p (4, operands
[0]))
14923 /* When the input register is even and is not dead after the
14924 pattern, it has to hold the second constant but we cannot
14925 form a legal STRD in ARM mode with this register as the second
14927 if (regno
% 2 == 0)
14930 /* Is regno-1 free? */
14931 SET_HARD_REG_SET (regset
);
14932 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
14933 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14934 if (tmp
== NULL_RTX
)
14941 /* Find a DImode register. */
14942 CLEAR_HARD_REG_SET (regset
);
14943 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
14944 if (tmp
!= NULL_RTX
)
14946 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
14947 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
14951 /* Can we use the input register to form a DI register? */
14952 SET_HARD_REG_SET (regset
);
14953 CLEAR_HARD_REG_BIT(regset
,
14954 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
14955 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14956 if (tmp
== NULL_RTX
)
14958 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
14962 gcc_assert (operands
[0] != NULL_RTX
);
14963 gcc_assert (operands
[1] != NULL_RTX
);
14964 gcc_assert (REGNO (operands
[0]) % 2 == 0);
14965 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
14969 /* Make sure the instructions are ordered with lower memory access first. */
14970 if (offsets
[0] > offsets
[1])
14972 gap
= offsets
[0] - offsets
[1];
14973 offset
= offsets
[1];
14975 /* Swap the instructions such that lower memory is accessed first. */
14976 SWAP_RTX (operands
[0], operands
[1]);
14977 SWAP_RTX (operands
[2], operands
[3]);
14979 SWAP_RTX (operands
[4], operands
[5]);
14983 gap
= offsets
[1] - offsets
[0];
14984 offset
= offsets
[0];
14987 /* Make sure accesses are to consecutive memory locations. */
14991 /* Make sure we generate legal instructions. */
14992 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
14996 /* In Thumb state, where registers are almost unconstrained, there
14997 is little hope to fix it. */
15001 if (load
&& commute
)
15003 /* Try reordering registers. */
15004 SWAP_RTX (operands
[0], operands
[1]);
15005 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15012 /* If input registers are dead after this pattern, they can be
15013 reordered or replaced by other registers that are free in the
15014 current pattern. */
15015 if (!peep2_reg_dead_p (4, operands
[0])
15016 || !peep2_reg_dead_p (4, operands
[1]))
15019 /* Try to reorder the input registers. */
15020 /* For example, the code
15025 can be transformed into
15030 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15033 SWAP_RTX (operands
[0], operands
[1]);
15037 /* Try to find a free DI register. */
15038 CLEAR_HARD_REG_SET (regset
);
15039 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15040 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15043 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15044 if (tmp
== NULL_RTX
)
15047 /* DREG must be an even-numbered register in DImode.
15048 Split it into SI registers. */
15049 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15050 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15051 gcc_assert (operands
[0] != NULL_RTX
);
15052 gcc_assert (operands
[1] != NULL_RTX
);
15053 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15054 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15056 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15069 /* Print a symbolic form of X to the debug file, F. */
15071 arm_print_value (FILE *f
, rtx x
)
15073 switch (GET_CODE (x
))
15076 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15080 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15088 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15090 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15091 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15099 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15103 fprintf (f
, "`%s'", XSTR (x
, 0));
15107 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15111 arm_print_value (f
, XEXP (x
, 0));
15115 arm_print_value (f
, XEXP (x
, 0));
15117 arm_print_value (f
, XEXP (x
, 1));
15125 fprintf (f
, "????");
15130 /* Routines for manipulation of the constant pool. */
15132 /* Arm instructions cannot load a large constant directly into a
15133 register; they have to come from a pc relative load. The constant
15134 must therefore be placed in the addressable range of the pc
15135 relative load. Depending on the precise pc relative load
15136 instruction the range is somewhere between 256 bytes and 4k. This
15137 means that we often have to dump a constant inside a function, and
15138 generate code to branch around it.
15140 It is important to minimize this, since the branches will slow
15141 things down and make the code larger.
15143 Normally we can hide the table after an existing unconditional
15144 branch so that there is no interruption of the flow, but in the
15145 worst case the code looks like this:
15163 We fix this by performing a scan after scheduling, which notices
15164 which instructions need to have their operands fetched from the
15165 constant table and builds the table.
15167 The algorithm starts by building a table of all the constants that
15168 need fixing up and all the natural barriers in the function (places
15169 where a constant table can be dropped without breaking the flow).
15170 For each fixup we note how far the pc-relative replacement will be
15171 able to reach and the offset of the instruction into the function.
15173 Having built the table we then group the fixes together to form
15174 tables that are as large as possible (subject to addressing
15175 constraints) and emit each table of constants after the last
15176 barrier that is within range of all the instructions in the group.
15177 If a group does not contain a barrier, then we forcibly create one
15178 by inserting a jump instruction into the flow. Once the table has
15179 been inserted, the insns are then modified to reference the
15180 relevant entry in the pool.
15182 Possible enhancements to the algorithm (not implemented) are:
15184 1) For some processors and object formats, there may be benefit in
15185 aligning the pools to the start of cache lines; this alignment
15186 would need to be taken into account when calculating addressability
15189 /* These typedefs are located at the start of this file, so that
15190 they can be used in the prototypes there. This comment is to
15191 remind readers of that fact so that the following structures
15192 can be understood more easily.
15194 typedef struct minipool_node Mnode;
15195 typedef struct minipool_fixup Mfix; */
15197 struct minipool_node
15199 /* Doubly linked chain of entries. */
15202 /* The maximum offset into the code that this entry can be placed. While
15203 pushing fixes for forward references, all entries are sorted in order
15204 of increasing max_address. */
15205 HOST_WIDE_INT max_address
;
15206 /* Similarly for an entry inserted for a backwards ref. */
15207 HOST_WIDE_INT min_address
;
15208 /* The number of fixes referencing this entry. This can become zero
15209 if we "unpush" an entry. In this case we ignore the entry when we
15210 come to emit the code. */
15212 /* The offset from the start of the minipool. */
15213 HOST_WIDE_INT offset
;
15214 /* The value in table. */
15216 /* The mode of value. */
15217 enum machine_mode mode
;
15218 /* The size of the value. With iWMMXt enabled
15219 sizes > 4 also imply an alignment of 8-bytes. */
15223 struct minipool_fixup
15227 HOST_WIDE_INT address
;
15229 enum machine_mode mode
;
15233 HOST_WIDE_INT forwards
;
15234 HOST_WIDE_INT backwards
;
15237 /* Fixes less than a word need padding out to a word boundary. */
15238 #define MINIPOOL_FIX_SIZE(mode) \
15239 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15241 static Mnode
* minipool_vector_head
;
15242 static Mnode
* minipool_vector_tail
;
15243 static rtx minipool_vector_label
;
15244 static int minipool_pad
;
15246 /* The linked list of all minipool fixes required for this function. */
15247 Mfix
* minipool_fix_head
;
15248 Mfix
* minipool_fix_tail
;
15249 /* The fix entry for the current minipool, once it has been placed. */
15250 Mfix
* minipool_barrier
;
15252 /* Determines if INSN is the start of a jump table. Returns the end
15253 of the TABLE or NULL_RTX. */
15255 is_jump_table (rtx insn
)
15259 if (jump_to_label_p (insn
)
15260 && ((table
= next_active_insn (JUMP_LABEL (insn
)))
15261 == next_active_insn (insn
))
15263 && JUMP_TABLE_DATA_P (table
))
15269 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15270 #define JUMP_TABLES_IN_TEXT_SECTION 0
15273 static HOST_WIDE_INT
15274 get_jump_table_size (rtx insn
)
15276 /* ADDR_VECs only take room if read-only data does into the text
15278 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15280 rtx body
= PATTERN (insn
);
15281 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15282 HOST_WIDE_INT size
;
15283 HOST_WIDE_INT modesize
;
15285 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15286 size
= modesize
* XVECLEN (body
, elt
);
15290 /* Round up size of TBB table to a halfword boundary. */
15291 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15294 /* No padding necessary for TBH. */
15297 /* Add two bytes for alignment on Thumb. */
15302 gcc_unreachable ();
15310 /* Return the maximum amount of padding that will be inserted before
15313 static HOST_WIDE_INT
15314 get_label_padding (rtx label
)
15316 HOST_WIDE_INT align
, min_insn_size
;
15318 align
= 1 << label_to_alignment (label
);
15319 min_insn_size
= TARGET_THUMB
? 2 : 4;
15320 return align
> min_insn_size
? align
- min_insn_size
: 0;
15323 /* Move a minipool fix MP from its current location to before MAX_MP.
15324 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15325 constraints may need updating. */
15327 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15328 HOST_WIDE_INT max_address
)
15330 /* The code below assumes these are different. */
15331 gcc_assert (mp
!= max_mp
);
15333 if (max_mp
== NULL
)
15335 if (max_address
< mp
->max_address
)
15336 mp
->max_address
= max_address
;
15340 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15341 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15343 mp
->max_address
= max_address
;
15345 /* Unlink MP from its current position. Since max_mp is non-null,
15346 mp->prev must be non-null. */
15347 mp
->prev
->next
= mp
->next
;
15348 if (mp
->next
!= NULL
)
15349 mp
->next
->prev
= mp
->prev
;
15351 minipool_vector_tail
= mp
->prev
;
15353 /* Re-insert it before MAX_MP. */
15355 mp
->prev
= max_mp
->prev
;
15358 if (mp
->prev
!= NULL
)
15359 mp
->prev
->next
= mp
;
15361 minipool_vector_head
= mp
;
15364 /* Save the new entry. */
15367 /* Scan over the preceding entries and adjust their addresses as
15369 while (mp
->prev
!= NULL
15370 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15372 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15379 /* Add a constant to the minipool for a forward reference. Returns the
15380 node added or NULL if the constant will not fit in this pool. */
15382 add_minipool_forward_ref (Mfix
*fix
)
15384 /* If set, max_mp is the first pool_entry that has a lower
15385 constraint than the one we are trying to add. */
15386 Mnode
* max_mp
= NULL
;
15387 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15390 /* If the minipool starts before the end of FIX->INSN then this FIX
15391 can not be placed into the current pool. Furthermore, adding the
15392 new constant pool entry may cause the pool to start FIX_SIZE bytes
15394 if (minipool_vector_head
&&
15395 (fix
->address
+ get_attr_length (fix
->insn
)
15396 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15399 /* Scan the pool to see if a constant with the same value has
15400 already been added. While we are doing this, also note the
15401 location where we must insert the constant if it doesn't already
15403 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15405 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15406 && fix
->mode
== mp
->mode
15407 && (!LABEL_P (fix
->value
)
15408 || (CODE_LABEL_NUMBER (fix
->value
)
15409 == CODE_LABEL_NUMBER (mp
->value
)))
15410 && rtx_equal_p (fix
->value
, mp
->value
))
15412 /* More than one fix references this entry. */
15414 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15417 /* Note the insertion point if necessary. */
15419 && mp
->max_address
> max_address
)
15422 /* If we are inserting an 8-bytes aligned quantity and
15423 we have not already found an insertion point, then
15424 make sure that all such 8-byte aligned quantities are
15425 placed at the start of the pool. */
15426 if (ARM_DOUBLEWORD_ALIGN
15428 && fix
->fix_size
>= 8
15429 && mp
->fix_size
< 8)
15432 max_address
= mp
->max_address
;
15436 /* The value is not currently in the minipool, so we need to create
15437 a new entry for it. If MAX_MP is NULL, the entry will be put on
15438 the end of the list since the placement is less constrained than
15439 any existing entry. Otherwise, we insert the new fix before
15440 MAX_MP and, if necessary, adjust the constraints on the other
15443 mp
->fix_size
= fix
->fix_size
;
15444 mp
->mode
= fix
->mode
;
15445 mp
->value
= fix
->value
;
15447 /* Not yet required for a backwards ref. */
15448 mp
->min_address
= -65536;
15450 if (max_mp
== NULL
)
15452 mp
->max_address
= max_address
;
15454 mp
->prev
= minipool_vector_tail
;
15456 if (mp
->prev
== NULL
)
15458 minipool_vector_head
= mp
;
15459 minipool_vector_label
= gen_label_rtx ();
15462 mp
->prev
->next
= mp
;
15464 minipool_vector_tail
= mp
;
15468 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15469 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15471 mp
->max_address
= max_address
;
15474 mp
->prev
= max_mp
->prev
;
15476 if (mp
->prev
!= NULL
)
15477 mp
->prev
->next
= mp
;
15479 minipool_vector_head
= mp
;
15482 /* Save the new entry. */
15485 /* Scan over the preceding entries and adjust their addresses as
15487 while (mp
->prev
!= NULL
15488 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15490 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15498 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15499 HOST_WIDE_INT min_address
)
15501 HOST_WIDE_INT offset
;
15503 /* The code below assumes these are different. */
15504 gcc_assert (mp
!= min_mp
);
15506 if (min_mp
== NULL
)
15508 if (min_address
> mp
->min_address
)
15509 mp
->min_address
= min_address
;
15513 /* We will adjust this below if it is too loose. */
15514 mp
->min_address
= min_address
;
15516 /* Unlink MP from its current position. Since min_mp is non-null,
15517 mp->next must be non-null. */
15518 mp
->next
->prev
= mp
->prev
;
15519 if (mp
->prev
!= NULL
)
15520 mp
->prev
->next
= mp
->next
;
15522 minipool_vector_head
= mp
->next
;
15524 /* Reinsert it after MIN_MP. */
15526 mp
->next
= min_mp
->next
;
15528 if (mp
->next
!= NULL
)
15529 mp
->next
->prev
= mp
;
15531 minipool_vector_tail
= mp
;
15537 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15539 mp
->offset
= offset
;
15540 if (mp
->refcount
> 0)
15541 offset
+= mp
->fix_size
;
15543 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15544 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15550 /* Add a constant to the minipool for a backward reference. Returns the
15551 node added or NULL if the constant will not fit in this pool.
15553 Note that the code for insertion for a backwards reference can be
15554 somewhat confusing because the calculated offsets for each fix do
15555 not take into account the size of the pool (which is still under
15558 add_minipool_backward_ref (Mfix
*fix
)
15560 /* If set, min_mp is the last pool_entry that has a lower constraint
15561 than the one we are trying to add. */
15562 Mnode
*min_mp
= NULL
;
15563 /* This can be negative, since it is only a constraint. */
15564 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15567 /* If we can't reach the current pool from this insn, or if we can't
15568 insert this entry at the end of the pool without pushing other
15569 fixes out of range, then we don't try. This ensures that we
15570 can't fail later on. */
15571 if (min_address
>= minipool_barrier
->address
15572 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15573 >= minipool_barrier
->address
))
15576 /* Scan the pool to see if a constant with the same value has
15577 already been added. While we are doing this, also note the
15578 location where we must insert the constant if it doesn't already
15580 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15582 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15583 && fix
->mode
== mp
->mode
15584 && (!LABEL_P (fix
->value
)
15585 || (CODE_LABEL_NUMBER (fix
->value
)
15586 == CODE_LABEL_NUMBER (mp
->value
)))
15587 && rtx_equal_p (fix
->value
, mp
->value
)
15588 /* Check that there is enough slack to move this entry to the
15589 end of the table (this is conservative). */
15590 && (mp
->max_address
15591 > (minipool_barrier
->address
15592 + minipool_vector_tail
->offset
15593 + minipool_vector_tail
->fix_size
)))
15596 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15599 if (min_mp
!= NULL
)
15600 mp
->min_address
+= fix
->fix_size
;
15603 /* Note the insertion point if necessary. */
15604 if (mp
->min_address
< min_address
)
15606 /* For now, we do not allow the insertion of 8-byte alignment
15607 requiring nodes anywhere but at the start of the pool. */
15608 if (ARM_DOUBLEWORD_ALIGN
15609 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15614 else if (mp
->max_address
15615 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15617 /* Inserting before this entry would push the fix beyond
15618 its maximum address (which can happen if we have
15619 re-located a forwards fix); force the new fix to come
15621 if (ARM_DOUBLEWORD_ALIGN
15622 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15627 min_address
= mp
->min_address
+ fix
->fix_size
;
15630 /* Do not insert a non-8-byte aligned quantity before 8-byte
15631 aligned quantities. */
15632 else if (ARM_DOUBLEWORD_ALIGN
15633 && fix
->fix_size
< 8
15634 && mp
->fix_size
>= 8)
15637 min_address
= mp
->min_address
+ fix
->fix_size
;
15642 /* We need to create a new entry. */
15644 mp
->fix_size
= fix
->fix_size
;
15645 mp
->mode
= fix
->mode
;
15646 mp
->value
= fix
->value
;
15648 mp
->max_address
= minipool_barrier
->address
+ 65536;
15650 mp
->min_address
= min_address
;
15652 if (min_mp
== NULL
)
15655 mp
->next
= minipool_vector_head
;
15657 if (mp
->next
== NULL
)
15659 minipool_vector_tail
= mp
;
15660 minipool_vector_label
= gen_label_rtx ();
15663 mp
->next
->prev
= mp
;
15665 minipool_vector_head
= mp
;
15669 mp
->next
= min_mp
->next
;
15673 if (mp
->next
!= NULL
)
15674 mp
->next
->prev
= mp
;
15676 minipool_vector_tail
= mp
;
15679 /* Save the new entry. */
15687 /* Scan over the following entries and adjust their offsets. */
15688 while (mp
->next
!= NULL
)
15690 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15691 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15694 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
15696 mp
->next
->offset
= mp
->offset
;
15705 assign_minipool_offsets (Mfix
*barrier
)
15707 HOST_WIDE_INT offset
= 0;
15710 minipool_barrier
= barrier
;
15712 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15714 mp
->offset
= offset
;
15716 if (mp
->refcount
> 0)
15717 offset
+= mp
->fix_size
;
15721 /* Output the literal table */
15723 dump_minipool (rtx scan
)
15729 if (ARM_DOUBLEWORD_ALIGN
)
15730 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15731 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
15738 fprintf (dump_file
,
15739 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15740 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
15742 scan
= emit_label_after (gen_label_rtx (), scan
);
15743 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
15744 scan
= emit_label_after (minipool_vector_label
, scan
);
15746 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
15748 if (mp
->refcount
> 0)
15752 fprintf (dump_file
,
15753 ";; Offset %u, min %ld, max %ld ",
15754 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
15755 (unsigned long) mp
->max_address
);
15756 arm_print_value (dump_file
, mp
->value
);
15757 fputc ('\n', dump_file
);
15760 switch (mp
->fix_size
)
15762 #ifdef HAVE_consttable_1
15764 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
15768 #ifdef HAVE_consttable_2
15770 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
15774 #ifdef HAVE_consttable_4
15776 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
15780 #ifdef HAVE_consttable_8
15782 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
15786 #ifdef HAVE_consttable_16
15788 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
15793 gcc_unreachable ();
15801 minipool_vector_head
= minipool_vector_tail
= NULL
;
15802 scan
= emit_insn_after (gen_consttable_end (), scan
);
15803 scan
= emit_barrier_after (scan
);
15806 /* Return the cost of forcibly inserting a barrier after INSN. */
15808 arm_barrier_cost (rtx insn
)
15810 /* Basing the location of the pool on the loop depth is preferable,
15811 but at the moment, the basic block information seems to be
15812 corrupt by this stage of the compilation. */
15813 int base_cost
= 50;
15814 rtx next
= next_nonnote_insn (insn
);
15816 if (next
!= NULL
&& LABEL_P (next
))
15819 switch (GET_CODE (insn
))
15822 /* It will always be better to place the table before the label, rather
15831 return base_cost
- 10;
15834 return base_cost
+ 10;
15838 /* Find the best place in the insn stream in the range
15839 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15840 Create the barrier by inserting a jump and add a new fix entry for
15843 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
15845 HOST_WIDE_INT count
= 0;
15847 rtx from
= fix
->insn
;
15848 /* The instruction after which we will insert the jump. */
15849 rtx selected
= NULL
;
15851 /* The address at which the jump instruction will be placed. */
15852 HOST_WIDE_INT selected_address
;
15854 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
15855 rtx label
= gen_label_rtx ();
15857 selected_cost
= arm_barrier_cost (from
);
15858 selected_address
= fix
->address
;
15860 while (from
&& count
< max_count
)
15865 /* This code shouldn't have been called if there was a natural barrier
15867 gcc_assert (!BARRIER_P (from
));
15869 /* Count the length of this insn. This must stay in sync with the
15870 code that pushes minipool fixes. */
15871 if (LABEL_P (from
))
15872 count
+= get_label_padding (from
);
15874 count
+= get_attr_length (from
);
15876 /* If there is a jump table, add its length. */
15877 tmp
= is_jump_table (from
);
15880 count
+= get_jump_table_size (tmp
);
15882 /* Jump tables aren't in a basic block, so base the cost on
15883 the dispatch insn. If we select this location, we will
15884 still put the pool after the table. */
15885 new_cost
= arm_barrier_cost (from
);
15887 if (count
< max_count
15888 && (!selected
|| new_cost
<= selected_cost
))
15891 selected_cost
= new_cost
;
15892 selected_address
= fix
->address
+ count
;
15895 /* Continue after the dispatch table. */
15896 from
= NEXT_INSN (tmp
);
15900 new_cost
= arm_barrier_cost (from
);
15902 if (count
< max_count
15903 && (!selected
|| new_cost
<= selected_cost
))
15906 selected_cost
= new_cost
;
15907 selected_address
= fix
->address
+ count
;
15910 from
= NEXT_INSN (from
);
15913 /* Make sure that we found a place to insert the jump. */
15914 gcc_assert (selected
);
15916 /* Make sure we do not split a call and its corresponding
15917 CALL_ARG_LOCATION note. */
15918 if (CALL_P (selected
))
15920 rtx next
= NEXT_INSN (selected
);
15921 if (next
&& NOTE_P (next
)
15922 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
15926 /* Create a new JUMP_INSN that branches around a barrier. */
15927 from
= emit_jump_insn_after (gen_jump (label
), selected
);
15928 JUMP_LABEL (from
) = label
;
15929 barrier
= emit_barrier_after (from
);
15930 emit_label_after (label
, barrier
);
15932 /* Create a minipool barrier entry for the new barrier. */
15933 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
15934 new_fix
->insn
= barrier
;
15935 new_fix
->address
= selected_address
;
15936 new_fix
->next
= fix
->next
;
15937 fix
->next
= new_fix
;
15942 /* Record that there is a natural barrier in the insn stream at
15945 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
15947 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15950 fix
->address
= address
;
15953 if (minipool_fix_head
!= NULL
)
15954 minipool_fix_tail
->next
= fix
;
15956 minipool_fix_head
= fix
;
15958 minipool_fix_tail
= fix
;
15961 /* Record INSN, which will need fixing up to load a value from the
15962 minipool. ADDRESS is the offset of the insn since the start of the
15963 function; LOC is a pointer to the part of the insn which requires
15964 fixing; VALUE is the constant that must be loaded, which is of type
15967 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
15968 enum machine_mode mode
, rtx value
)
15970 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15973 fix
->address
= address
;
15976 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
15977 fix
->value
= value
;
15978 fix
->forwards
= get_attr_pool_range (insn
);
15979 fix
->backwards
= get_attr_neg_pool_range (insn
);
15980 fix
->minipool
= NULL
;
15982 /* If an insn doesn't have a range defined for it, then it isn't
15983 expecting to be reworked by this code. Better to stop now than
15984 to generate duff assembly code. */
15985 gcc_assert (fix
->forwards
|| fix
->backwards
);
15987 /* If an entry requires 8-byte alignment then assume all constant pools
15988 require 4 bytes of padding. Trying to do this later on a per-pool
15989 basis is awkward because existing pool entries have to be modified. */
15990 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
15995 fprintf (dump_file
,
15996 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15997 GET_MODE_NAME (mode
),
15998 INSN_UID (insn
), (unsigned long) address
,
15999 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16000 arm_print_value (dump_file
, fix
->value
);
16001 fprintf (dump_file
, "\n");
16004 /* Add it to the chain of fixes. */
16007 if (minipool_fix_head
!= NULL
)
16008 minipool_fix_tail
->next
= fix
;
16010 minipool_fix_head
= fix
;
16012 minipool_fix_tail
= fix
;
16015 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16016 Returns the number of insns needed, or 99 if we don't know how to
16019 arm_const_double_inline_cost (rtx val
)
16021 rtx lowpart
, highpart
;
16022 enum machine_mode mode
;
16024 mode
= GET_MODE (val
);
16026 if (mode
== VOIDmode
)
16029 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16031 lowpart
= gen_lowpart (SImode
, val
);
16032 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16034 gcc_assert (CONST_INT_P (lowpart
));
16035 gcc_assert (CONST_INT_P (highpart
));
16037 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16038 NULL_RTX
, NULL_RTX
, 0, 0)
16039 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16040 NULL_RTX
, NULL_RTX
, 0, 0));
16043 /* Return true if it is worthwhile to split a 64-bit constant into two
16044 32-bit operations. This is the case if optimizing for size, or
16045 if we have load delay slots, or if one 32-bit part can be done with
16046 a single data operation. */
16048 arm_const_double_by_parts (rtx val
)
16050 enum machine_mode mode
= GET_MODE (val
);
16053 if (optimize_size
|| arm_ld_sched
)
16056 if (mode
== VOIDmode
)
16059 part
= gen_highpart_mode (SImode
, mode
, val
);
16061 gcc_assert (CONST_INT_P (part
));
16063 if (const_ok_for_arm (INTVAL (part
))
16064 || const_ok_for_arm (~INTVAL (part
)))
16067 part
= gen_lowpart (SImode
, val
);
16069 gcc_assert (CONST_INT_P (part
));
16071 if (const_ok_for_arm (INTVAL (part
))
16072 || const_ok_for_arm (~INTVAL (part
)))
16078 /* Return true if it is possible to inline both the high and low parts
16079 of a 64-bit constant into 32-bit data processing instructions. */
16081 arm_const_double_by_immediates (rtx val
)
16083 enum machine_mode mode
= GET_MODE (val
);
16086 if (mode
== VOIDmode
)
16089 part
= gen_highpart_mode (SImode
, mode
, val
);
16091 gcc_assert (CONST_INT_P (part
));
16093 if (!const_ok_for_arm (INTVAL (part
)))
16096 part
= gen_lowpart (SImode
, val
);
16098 gcc_assert (CONST_INT_P (part
));
16100 if (!const_ok_for_arm (INTVAL (part
)))
16106 /* Scan INSN and note any of its operands that need fixing.
16107 If DO_PUSHES is false we do not actually push any of the fixups
16110 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16114 extract_insn (insn
);
16116 if (!constrain_operands (1))
16117 fatal_insn_not_found (insn
);
16119 if (recog_data
.n_alternatives
== 0)
16122 /* Fill in recog_op_alt with information about the constraints of
16124 preprocess_constraints ();
16126 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16128 /* Things we need to fix can only occur in inputs. */
16129 if (recog_data
.operand_type
[opno
] != OP_IN
)
16132 /* If this alternative is a memory reference, then any mention
16133 of constants in this alternative is really to fool reload
16134 into allowing us to accept one there. We need to fix them up
16135 now so that we output the right code. */
16136 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16138 rtx op
= recog_data
.operand
[opno
];
16140 if (CONSTANT_P (op
))
16143 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16144 recog_data
.operand_mode
[opno
], op
);
16146 else if (MEM_P (op
)
16147 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16148 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16152 rtx cop
= avoid_constant_pool_reference (op
);
16154 /* Casting the address of something to a mode narrower
16155 than a word can cause avoid_constant_pool_reference()
16156 to return the pool reference itself. That's no good to
16157 us here. Lets just hope that we can use the
16158 constant pool value directly. */
16160 cop
= get_pool_constant (XEXP (op
, 0));
16162 push_minipool_fix (insn
, address
,
16163 recog_data
.operand_loc
[opno
],
16164 recog_data
.operand_mode
[opno
], cop
);
16174 /* Rewrite move insn into subtract of 0 if the condition codes will
16175 be useful in next conditional jump insn. */
16178 thumb1_reorg (void)
16185 rtx pat
, op0
, set
= NULL
;
16186 rtx prev
, insn
= BB_END (bb
);
16187 bool insn_clobbered
= false;
16189 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16190 insn
= PREV_INSN (insn
);
16192 /* Find the last cbranchsi4_insn in basic block BB. */
16193 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16196 /* Get the register with which we are comparing. */
16197 pat
= PATTERN (insn
);
16198 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16200 /* Find the first flag setting insn before INSN in basic block BB. */
16201 gcc_assert (insn
!= BB_HEAD (bb
));
16202 for (prev
= PREV_INSN (insn
);
16204 && prev
!= BB_HEAD (bb
)
16206 || DEBUG_INSN_P (prev
)
16207 || ((set
= single_set (prev
)) != NULL
16208 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16209 prev
= PREV_INSN (prev
))
16211 if (reg_set_p (op0
, prev
))
16212 insn_clobbered
= true;
16215 /* Skip if op0 is clobbered by insn other than prev. */
16216 if (insn_clobbered
)
16222 dest
= SET_DEST (set
);
16223 src
= SET_SRC (set
);
16224 if (!low_register_operand (dest
, SImode
)
16225 || !low_register_operand (src
, SImode
))
16228 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16229 in INSN. Both src and dest of the move insn are checked. */
16230 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16232 dest
= copy_rtx (dest
);
16233 src
= copy_rtx (src
);
16234 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16235 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16236 INSN_CODE (prev
) = -1;
16237 /* Set test register in INSN to dest. */
16238 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16239 INSN_CODE (insn
) = -1;
16244 /* Convert instructions to their cc-clobbering variant if possible, since
16245 that allows us to use smaller encodings. */
16248 thumb2_reorg (void)
16253 INIT_REG_SET (&live
);
16255 /* We are freeing block_for_insn in the toplev to keep compatibility
16256 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16257 compute_bb_for_insn ();
16264 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16265 df_simulate_initialize_backwards (bb
, &live
);
16266 FOR_BB_INSNS_REVERSE (bb
, insn
)
16268 if (NONJUMP_INSN_P (insn
)
16269 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16270 && GET_CODE (PATTERN (insn
)) == SET
)
16272 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16273 rtx pat
= PATTERN (insn
);
16274 rtx dst
= XEXP (pat
, 0);
16275 rtx src
= XEXP (pat
, 1);
16276 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16278 if (!OBJECT_P (src
))
16279 op0
= XEXP (src
, 0);
16281 if (BINARY_P (src
))
16282 op1
= XEXP (src
, 1);
16284 if (low_register_operand (dst
, SImode
))
16286 switch (GET_CODE (src
))
16289 /* Adding two registers and storing the result
16290 in the first source is already a 16-bit
16292 if (rtx_equal_p (dst
, op0
)
16293 && register_operand (op1
, SImode
))
16296 if (low_register_operand (op0
, SImode
))
16298 /* ADDS <Rd>,<Rn>,<Rm> */
16299 if (low_register_operand (op1
, SImode
))
16301 /* ADDS <Rdn>,#<imm8> */
16302 /* SUBS <Rdn>,#<imm8> */
16303 else if (rtx_equal_p (dst
, op0
)
16304 && CONST_INT_P (op1
)
16305 && IN_RANGE (INTVAL (op1
), -255, 255))
16307 /* ADDS <Rd>,<Rn>,#<imm3> */
16308 /* SUBS <Rd>,<Rn>,#<imm3> */
16309 else if (CONST_INT_P (op1
)
16310 && IN_RANGE (INTVAL (op1
), -7, 7))
16313 /* ADCS <Rd>, <Rn> */
16314 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16315 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16316 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16318 && COMPARISON_P (op1
)
16319 && cc_register (XEXP (op1
, 0), VOIDmode
)
16320 && maybe_get_arm_condition_code (op1
) == ARM_CS
16321 && XEXP (op1
, 1) == const0_rtx
)
16326 /* RSBS <Rd>,<Rn>,#0
16327 Not handled here: see NEG below. */
16328 /* SUBS <Rd>,<Rn>,#<imm3>
16330 Not handled here: see PLUS above. */
16331 /* SUBS <Rd>,<Rn>,<Rm> */
16332 if (low_register_operand (op0
, SImode
)
16333 && low_register_operand (op1
, SImode
))
16338 /* MULS <Rdm>,<Rn>,<Rdm>
16339 As an exception to the rule, this is only used
16340 when optimizing for size since MULS is slow on all
16341 known implementations. We do not even want to use
16342 MULS in cold code, if optimizing for speed, so we
16343 test the global flag here. */
16344 if (!optimize_size
)
16346 /* else fall through. */
16350 /* ANDS <Rdn>,<Rm> */
16351 if (rtx_equal_p (dst
, op0
)
16352 && low_register_operand (op1
, SImode
))
16354 else if (rtx_equal_p (dst
, op1
)
16355 && low_register_operand (op0
, SImode
))
16356 action
= SWAP_CONV
;
16362 /* ASRS <Rdn>,<Rm> */
16363 /* LSRS <Rdn>,<Rm> */
16364 /* LSLS <Rdn>,<Rm> */
16365 if (rtx_equal_p (dst
, op0
)
16366 && low_register_operand (op1
, SImode
))
16368 /* ASRS <Rd>,<Rm>,#<imm5> */
16369 /* LSRS <Rd>,<Rm>,#<imm5> */
16370 /* LSLS <Rd>,<Rm>,#<imm5> */
16371 else if (low_register_operand (op0
, SImode
)
16372 && CONST_INT_P (op1
)
16373 && IN_RANGE (INTVAL (op1
), 0, 31))
16378 /* RORS <Rdn>,<Rm> */
16379 if (rtx_equal_p (dst
, op0
)
16380 && low_register_operand (op1
, SImode
))
16386 /* MVNS <Rd>,<Rm> */
16387 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16388 if (low_register_operand (op0
, SImode
))
16393 /* MOVS <Rd>,#<imm8> */
16394 if (CONST_INT_P (src
)
16395 && IN_RANGE (INTVAL (src
), 0, 255))
16400 /* MOVS and MOV<c> with registers have different
16401 encodings, so are not relevant here. */
16409 if (action
!= SKIP
)
16411 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16412 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16415 if (action
== SWAP_CONV
)
16417 src
= copy_rtx (src
);
16418 XEXP (src
, 0) = op1
;
16419 XEXP (src
, 1) = op0
;
16420 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16421 vec
= gen_rtvec (2, pat
, clobber
);
16423 else /* action == CONV */
16424 vec
= gen_rtvec (2, pat
, clobber
);
16426 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16427 INSN_CODE (insn
) = -1;
16431 if (NONDEBUG_INSN_P (insn
))
16432 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16436 CLEAR_REG_SET (&live
);
16439 /* Gcc puts the pool in the wrong place for ARM, since we can only
16440 load addresses a limited distance around the pc. We do some
16441 special munging to move the constant pool values to the correct
16442 point in the code. */
16447 HOST_WIDE_INT address
= 0;
16452 else if (TARGET_THUMB2
)
16455 /* Ensure all insns that must be split have been split at this point.
16456 Otherwise, the pool placement code below may compute incorrect
16457 insn lengths. Note that when optimizing, all insns have already
16458 been split at this point. */
16460 split_all_insns_noflow ();
16462 minipool_fix_head
= minipool_fix_tail
= NULL
;
16464 /* The first insn must always be a note, or the code below won't
16465 scan it properly. */
16466 insn
= get_insns ();
16467 gcc_assert (NOTE_P (insn
));
16470 /* Scan all the insns and record the operands that will need fixing. */
16471 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16473 if (BARRIER_P (insn
))
16474 push_minipool_barrier (insn
, address
);
16475 else if (INSN_P (insn
))
16479 note_invalid_constants (insn
, address
, true);
16480 address
+= get_attr_length (insn
);
16482 /* If the insn is a vector jump, add the size of the table
16483 and skip the table. */
16484 if ((table
= is_jump_table (insn
)) != NULL
)
16486 address
+= get_jump_table_size (table
);
16490 else if (LABEL_P (insn
))
16491 /* Add the worst-case padding due to alignment. We don't add
16492 the _current_ padding because the minipool insertions
16493 themselves might change it. */
16494 address
+= get_label_padding (insn
);
16497 fix
= minipool_fix_head
;
16499 /* Now scan the fixups and perform the required changes. */
16504 Mfix
* last_added_fix
;
16505 Mfix
* last_barrier
= NULL
;
16508 /* Skip any further barriers before the next fix. */
16509 while (fix
&& BARRIER_P (fix
->insn
))
16512 /* No more fixes. */
16516 last_added_fix
= NULL
;
16518 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
16520 if (BARRIER_P (ftmp
->insn
))
16522 if (ftmp
->address
>= minipool_vector_head
->max_address
)
16525 last_barrier
= ftmp
;
16527 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
16530 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
16533 /* If we found a barrier, drop back to that; any fixes that we
16534 could have reached but come after the barrier will now go in
16535 the next mini-pool. */
16536 if (last_barrier
!= NULL
)
16538 /* Reduce the refcount for those fixes that won't go into this
16540 for (fdel
= last_barrier
->next
;
16541 fdel
&& fdel
!= ftmp
;
16544 fdel
->minipool
->refcount
--;
16545 fdel
->minipool
= NULL
;
16548 ftmp
= last_barrier
;
16552 /* ftmp is first fix that we can't fit into this pool and
16553 there no natural barriers that we could use. Insert a
16554 new barrier in the code somewhere between the previous
16555 fix and this one, and arrange to jump around it. */
16556 HOST_WIDE_INT max_address
;
16558 /* The last item on the list of fixes must be a barrier, so
16559 we can never run off the end of the list of fixes without
16560 last_barrier being set. */
16563 max_address
= minipool_vector_head
->max_address
;
16564 /* Check that there isn't another fix that is in range that
16565 we couldn't fit into this pool because the pool was
16566 already too large: we need to put the pool before such an
16567 instruction. The pool itself may come just after the
16568 fix because create_fix_barrier also allows space for a
16569 jump instruction. */
16570 if (ftmp
->address
< max_address
)
16571 max_address
= ftmp
->address
+ 1;
16573 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
16576 assign_minipool_offsets (last_barrier
);
16580 if (!BARRIER_P (ftmp
->insn
)
16581 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
16588 /* Scan over the fixes we have identified for this pool, fixing them
16589 up and adding the constants to the pool itself. */
16590 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
16591 this_fix
= this_fix
->next
)
16592 if (!BARRIER_P (this_fix
->insn
))
16595 = plus_constant (Pmode
,
16596 gen_rtx_LABEL_REF (VOIDmode
,
16597 minipool_vector_label
),
16598 this_fix
->minipool
->offset
);
16599 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
16602 dump_minipool (last_barrier
->insn
);
16606 /* From now on we must synthesize any constants that we can't handle
16607 directly. This can happen if the RTL gets split during final
16608 instruction generation. */
16609 after_arm_reorg
= 1;
16611 /* Free the minipool memory. */
16612 obstack_free (&minipool_obstack
, minipool_startobj
);
16615 /* Routines to output assembly language. */
16617 /* If the rtx is the correct value then return the string of the number.
16618 In this way we can ensure that valid double constants are generated even
16619 when cross compiling. */
16621 fp_immediate_constant (rtx x
)
16625 if (!fp_consts_inited
)
16628 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16630 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
16634 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
16635 static const char *
16636 fp_const_from_val (REAL_VALUE_TYPE
*r
)
16638 if (!fp_consts_inited
)
16641 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
16645 /* OPERANDS[0] is the entire list of insns that constitute pop,
16646 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16647 is in the list, UPDATE is true iff the list contains explicit
16648 update of base register. */
16650 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
16656 const char *conditional
;
16657 int num_saves
= XVECLEN (operands
[0], 0);
16658 unsigned int regno
;
16659 unsigned int regno_base
= REGNO (operands
[1]);
16662 offset
+= update
? 1 : 0;
16663 offset
+= return_pc
? 1 : 0;
16665 /* Is the base register in the list? */
16666 for (i
= offset
; i
< num_saves
; i
++)
16668 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
16669 /* If SP is in the list, then the base register must be SP. */
16670 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
16671 /* If base register is in the list, there must be no explicit update. */
16672 if (regno
== regno_base
)
16673 gcc_assert (!update
);
16676 conditional
= reverse
? "%?%D0" : "%?%d0";
16677 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
16679 /* Output pop (not stmfd) because it has a shorter encoding. */
16680 gcc_assert (update
);
16681 sprintf (pattern
, "pop%s\t{", conditional
);
16685 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16686 It's just a convention, their semantics are identical. */
16687 if (regno_base
== SP_REGNUM
)
16688 sprintf (pattern
, "ldm%sfd\t", conditional
);
16689 else if (TARGET_UNIFIED_ASM
)
16690 sprintf (pattern
, "ldmia%s\t", conditional
);
16692 sprintf (pattern
, "ldm%sia\t", conditional
);
16694 strcat (pattern
, reg_names
[regno_base
]);
16696 strcat (pattern
, "!, {");
16698 strcat (pattern
, ", {");
16701 /* Output the first destination register. */
16703 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
16705 /* Output the rest of the destination registers. */
16706 for (i
= offset
+ 1; i
< num_saves
; i
++)
16708 strcat (pattern
, ", ");
16710 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
16713 strcat (pattern
, "}");
16715 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
16716 strcat (pattern
, "^");
16718 output_asm_insn (pattern
, &cond
);
16722 /* Output the assembly for a store multiple. */
16725 vfp_output_fstmd (rtx
* operands
)
16732 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
16733 p
= strlen (pattern
);
16735 gcc_assert (REG_P (operands
[1]));
16737 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
16738 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
16740 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
16742 strcpy (&pattern
[p
], "}");
16744 output_asm_insn (pattern
, operands
);
16749 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16750 number of bytes pushed. */
16753 vfp_emit_fstmd (int base_reg
, int count
)
16760 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16761 register pairs are stored by a store multiple insn. We avoid this
16762 by pushing an extra pair. */
16763 if (count
== 2 && !arm_arch6
)
16765 if (base_reg
== LAST_VFP_REGNUM
- 3)
16770 /* FSTMD may not store more than 16 doubleword registers at once. Split
16771 larger stores into multiple parts (up to a maximum of two, in
16776 /* NOTE: base_reg is an internal register number, so each D register
16778 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
16779 saved
+= vfp_emit_fstmd (base_reg
, 16);
16783 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
16784 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
16786 reg
= gen_rtx_REG (DFmode
, base_reg
);
16789 XVECEXP (par
, 0, 0)
16790 = gen_rtx_SET (VOIDmode
,
16793 gen_rtx_PRE_MODIFY (Pmode
,
16796 (Pmode
, stack_pointer_rtx
,
16799 gen_rtx_UNSPEC (BLKmode
,
16800 gen_rtvec (1, reg
),
16801 UNSPEC_PUSH_MULT
));
16803 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16804 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
16805 RTX_FRAME_RELATED_P (tmp
) = 1;
16806 XVECEXP (dwarf
, 0, 0) = tmp
;
16808 tmp
= gen_rtx_SET (VOIDmode
,
16809 gen_frame_mem (DFmode
, stack_pointer_rtx
),
16811 RTX_FRAME_RELATED_P (tmp
) = 1;
16812 XVECEXP (dwarf
, 0, 1) = tmp
;
16814 for (i
= 1; i
< count
; i
++)
16816 reg
= gen_rtx_REG (DFmode
, base_reg
);
16818 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
16820 tmp
= gen_rtx_SET (VOIDmode
,
16821 gen_frame_mem (DFmode
,
16822 plus_constant (Pmode
,
16826 RTX_FRAME_RELATED_P (tmp
) = 1;
16827 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
16830 par
= emit_insn (par
);
16831 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
16832 RTX_FRAME_RELATED_P (par
) = 1;
16837 /* Emit a call instruction with pattern PAT. ADDR is the address of
16838 the call target. */
16841 arm_emit_call_insn (rtx pat
, rtx addr
)
16845 insn
= emit_call_insn (pat
);
16847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16848 If the call might use such an entry, add a use of the PIC register
16849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16850 if (TARGET_VXWORKS_RTP
16852 && GET_CODE (addr
) == SYMBOL_REF
16853 && (SYMBOL_REF_DECL (addr
)
16854 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
16855 : !SYMBOL_REF_LOCAL_P (addr
)))
16857 require_pic_register ();
16858 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
16862 /* Output a 'call' insn. */
16864 output_call (rtx
*operands
)
16866 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
16868 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16869 if (REGNO (operands
[0]) == LR_REGNUM
)
16871 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
16872 output_asm_insn ("mov%?\t%0, %|lr", operands
);
16875 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16877 if (TARGET_INTERWORK
|| arm_arch4t
)
16878 output_asm_insn ("bx%?\t%0", operands
);
16880 output_asm_insn ("mov%?\t%|pc, %0", operands
);
16885 /* Output a 'call' insn that is a reference in memory. This is
16886 disabled for ARMv5 and we prefer a blx instead because otherwise
16887 there's a significant performance overhead. */
16889 output_call_mem (rtx
*operands
)
16891 gcc_assert (!arm_arch5
);
16892 if (TARGET_INTERWORK
)
16894 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
16895 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16896 output_asm_insn ("bx%?\t%|ip", operands
);
16898 else if (regno_use_in (LR_REGNUM
, operands
[0]))
16900 /* LR is used in the memory address. We load the address in the
16901 first instruction. It's safe to use IP as the target of the
16902 load since the call will kill it anyway. */
16903 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
16904 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16906 output_asm_insn ("bx%?\t%|ip", operands
);
16908 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
16912 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16913 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
16920 /* Output a move from arm registers to arm registers of a long double
16921 OPERANDS[0] is the destination.
16922 OPERANDS[1] is the source. */
16924 output_mov_long_double_arm_from_arm (rtx
*operands
)
16926 /* We have to be careful here because the two might overlap. */
16927 int dest_start
= REGNO (operands
[0]);
16928 int src_start
= REGNO (operands
[1]);
16932 if (dest_start
< src_start
)
16934 for (i
= 0; i
< 3; i
++)
16936 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16937 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16938 output_asm_insn ("mov%?\t%0, %1", ops
);
16943 for (i
= 2; i
>= 0; i
--)
16945 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16946 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16947 output_asm_insn ("mov%?\t%0, %1", ops
);
16955 arm_emit_movpair (rtx dest
, rtx src
)
16957 /* If the src is an immediate, simplify it. */
16958 if (CONST_INT_P (src
))
16960 HOST_WIDE_INT val
= INTVAL (src
);
16961 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
16962 if ((val
>> 16) & 0x0000ffff)
16963 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
16965 GEN_INT ((val
>> 16) & 0x0000ffff));
16968 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
16969 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
16972 /* Output a move between double words. It must be REG<-MEM
16975 output_move_double (rtx
*operands
, bool emit
, int *count
)
16977 enum rtx_code code0
= GET_CODE (operands
[0]);
16978 enum rtx_code code1
= GET_CODE (operands
[1]);
16983 /* The only case when this might happen is when
16984 you are looking at the length of a DImode instruction
16985 that has an invalid constant in it. */
16986 if (code0
== REG
&& code1
!= MEM
)
16988 gcc_assert (!emit
);
16995 unsigned int reg0
= REGNO (operands
[0]);
16997 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
16999 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17001 switch (GET_CODE (XEXP (operands
[1], 0)))
17008 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17009 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17011 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17016 gcc_assert (TARGET_LDRD
);
17018 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17025 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17027 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17035 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17037 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17042 gcc_assert (TARGET_LDRD
);
17044 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17049 /* Autoicrement addressing modes should never have overlapping
17050 base and destination registers, and overlapping index registers
17051 are already prohibited, so this doesn't need to worry about
17053 otherops
[0] = operands
[0];
17054 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17055 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17057 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17059 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17061 /* Registers overlap so split out the increment. */
17064 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17065 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17072 /* Use a single insn if we can.
17073 FIXME: IWMMXT allows offsets larger than ldrd can
17074 handle, fix these up with a pair of ldr. */
17076 || !CONST_INT_P (otherops
[2])
17077 || (INTVAL (otherops
[2]) > -256
17078 && INTVAL (otherops
[2]) < 256))
17081 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17087 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17088 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17098 /* Use a single insn if we can.
17099 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17100 fix these up with a pair of ldr. */
17102 || !CONST_INT_P (otherops
[2])
17103 || (INTVAL (otherops
[2]) > -256
17104 && INTVAL (otherops
[2]) < 256))
17107 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17113 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17114 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17124 /* We might be able to use ldrd %0, %1 here. However the range is
17125 different to ldr/adr, and it is broken on some ARMv7-M
17126 implementations. */
17127 /* Use the second register of the pair to avoid problematic
17129 otherops
[1] = operands
[1];
17131 output_asm_insn ("adr%?\t%0, %1", otherops
);
17132 operands
[1] = otherops
[0];
17136 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17138 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17145 /* ??? This needs checking for thumb2. */
17147 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17148 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17150 otherops
[0] = operands
[0];
17151 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17152 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17154 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17156 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17158 switch ((int) INTVAL (otherops
[2]))
17162 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17168 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17174 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17178 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17179 operands
[1] = otherops
[0];
17181 && (REG_P (otherops
[2])
17183 || (CONST_INT_P (otherops
[2])
17184 && INTVAL (otherops
[2]) > -256
17185 && INTVAL (otherops
[2]) < 256)))
17187 if (reg_overlap_mentioned_p (operands
[0],
17191 /* Swap base and index registers over to
17192 avoid a conflict. */
17194 otherops
[1] = otherops
[2];
17197 /* If both registers conflict, it will usually
17198 have been fixed by a splitter. */
17199 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17200 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17204 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17205 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17212 otherops
[0] = operands
[0];
17214 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17219 if (CONST_INT_P (otherops
[2]))
17223 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17224 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17226 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17232 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17238 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17245 return "ldr%(d%)\t%0, [%1]";
17247 return "ldm%(ia%)\t%1, %M0";
17251 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17252 /* Take care of overlapping base/data reg. */
17253 if (reg_mentioned_p (operands
[0], operands
[1]))
17257 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17258 output_asm_insn ("ldr%?\t%0, %1", operands
);
17268 output_asm_insn ("ldr%?\t%0, %1", operands
);
17269 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17279 /* Constraints should ensure this. */
17280 gcc_assert (code0
== MEM
&& code1
== REG
);
17281 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17282 || (TARGET_ARM
&& TARGET_LDRD
));
17284 switch (GET_CODE (XEXP (operands
[0], 0)))
17290 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17292 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17297 gcc_assert (TARGET_LDRD
);
17299 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17306 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17308 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17316 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17318 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17323 gcc_assert (TARGET_LDRD
);
17325 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17330 otherops
[0] = operands
[1];
17331 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17332 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17334 /* IWMMXT allows offsets larger than ldrd can handle,
17335 fix these up with a pair of ldr. */
17337 && CONST_INT_P (otherops
[2])
17338 && (INTVAL(otherops
[2]) <= -256
17339 || INTVAL(otherops
[2]) >= 256))
17341 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17345 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17346 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17355 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17356 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17362 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17365 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17370 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17375 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17376 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17378 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17382 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17389 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17396 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17401 && (REG_P (otherops
[2])
17403 || (CONST_INT_P (otherops
[2])
17404 && INTVAL (otherops
[2]) > -256
17405 && INTVAL (otherops
[2]) < 256)))
17407 otherops
[0] = operands
[1];
17408 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17410 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17416 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17417 otherops
[1] = operands
[1];
17420 output_asm_insn ("str%?\t%1, %0", operands
);
17421 output_asm_insn ("str%?\t%H1, %0", otherops
);
17431 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17432 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17435 output_move_quad (rtx
*operands
)
17437 if (REG_P (operands
[0]))
17439 /* Load, or reg->reg move. */
17441 if (MEM_P (operands
[1]))
17443 switch (GET_CODE (XEXP (operands
[1], 0)))
17446 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17451 output_asm_insn ("adr%?\t%0, %1", operands
);
17452 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17456 gcc_unreachable ();
17464 gcc_assert (REG_P (operands
[1]));
17466 dest
= REGNO (operands
[0]);
17467 src
= REGNO (operands
[1]);
17469 /* This seems pretty dumb, but hopefully GCC won't try to do it
17472 for (i
= 0; i
< 4; i
++)
17474 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17475 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17476 output_asm_insn ("mov%?\t%0, %1", ops
);
17479 for (i
= 3; i
>= 0; i
--)
17481 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17482 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17483 output_asm_insn ("mov%?\t%0, %1", ops
);
17489 gcc_assert (MEM_P (operands
[0]));
17490 gcc_assert (REG_P (operands
[1]));
17491 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17493 switch (GET_CODE (XEXP (operands
[0], 0)))
17496 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17500 gcc_unreachable ();
17507 /* Output a VFP load or store instruction. */
17510 output_move_vfp (rtx
*operands
)
17512 rtx reg
, mem
, addr
, ops
[2];
17513 int load
= REG_P (operands
[0]);
17514 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
17515 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
17518 enum machine_mode mode
;
17520 reg
= operands
[!load
];
17521 mem
= operands
[load
];
17523 mode
= GET_MODE (reg
);
17525 gcc_assert (REG_P (reg
));
17526 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
17527 gcc_assert (mode
== SFmode
17531 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
17532 gcc_assert (MEM_P (mem
));
17534 addr
= XEXP (mem
, 0);
17536 switch (GET_CODE (addr
))
17539 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
17540 ops
[0] = XEXP (addr
, 0);
17545 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
17546 ops
[0] = XEXP (addr
, 0);
17551 templ
= "f%s%c%%?\t%%%s0, %%1%s";
17557 sprintf (buff
, templ
,
17558 load
? "ld" : "st",
17561 integer_p
? "\t%@ int" : "");
17562 output_asm_insn (buff
, ops
);
17567 /* Output a Neon double-word or quad-word load or store, or a load
17568 or store for larger structure modes.
17570 WARNING: The ordering of elements is weird in big-endian mode,
17571 because the EABI requires that vectors stored in memory appear
17572 as though they were stored by a VSTM, as required by the EABI.
17573 GCC RTL defines element ordering based on in-memory order.
17574 This can be different from the architectural ordering of elements
17575 within a NEON register. The intrinsics defined in arm_neon.h use the
17576 NEON register element ordering, not the GCC RTL element ordering.
17578 For example, the in-memory ordering of a big-endian a quadword
17579 vector with 16-bit elements when stored from register pair {d0,d1}
17580 will be (lowest address first, d0[N] is NEON register element N):
17582 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17584 When necessary, quadword registers (dN, dN+1) are moved to ARM
17585 registers from rN in the order:
17587 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17589 So that STM/LDM can be used on vectors in ARM registers, and the
17590 same memory layout will result as if VSTM/VLDM were used.
17592 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17593 possible, which allows use of appropriate alignment tags.
17594 Note that the choice of "64" is independent of the actual vector
17595 element size; this size simply ensures that the behavior is
17596 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17598 Due to limitations of those instructions, use of VST1.64/VLD1.64
17599 is not possible if:
17600 - the address contains PRE_DEC, or
17601 - the mode refers to more than 4 double-word registers
17603 In those cases, it would be possible to replace VSTM/VLDM by a
17604 sequence of instructions; this is not currently implemented since
17605 this is not certain to actually improve performance. */
17608 output_move_neon (rtx
*operands
)
17610 rtx reg
, mem
, addr
, ops
[2];
17611 int regno
, nregs
, load
= REG_P (operands
[0]);
17614 enum machine_mode mode
;
17616 reg
= operands
[!load
];
17617 mem
= operands
[load
];
17619 mode
= GET_MODE (reg
);
17621 gcc_assert (REG_P (reg
));
17622 regno
= REGNO (reg
);
17623 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
17624 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
17625 || NEON_REGNO_OK_FOR_QUAD (regno
));
17626 gcc_assert (VALID_NEON_DREG_MODE (mode
)
17627 || VALID_NEON_QREG_MODE (mode
)
17628 || VALID_NEON_STRUCT_MODE (mode
));
17629 gcc_assert (MEM_P (mem
));
17631 addr
= XEXP (mem
, 0);
17633 /* Strip off const from addresses like (const (plus (...))). */
17634 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17635 addr
= XEXP (addr
, 0);
17637 switch (GET_CODE (addr
))
17640 /* We have to use vldm / vstm for too-large modes. */
17643 templ
= "v%smia%%?\t%%0!, %%h1";
17644 ops
[0] = XEXP (addr
, 0);
17648 templ
= "v%s1.64\t%%h1, %%A0";
17655 /* We have to use vldm / vstm in this case, since there is no
17656 pre-decrement form of the vld1 / vst1 instructions. */
17657 templ
= "v%smdb%%?\t%%0!, %%h1";
17658 ops
[0] = XEXP (addr
, 0);
17663 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17664 gcc_unreachable ();
17671 for (i
= 0; i
< nregs
; i
++)
17673 /* We're only using DImode here because it's a convenient size. */
17674 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
17675 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
17676 if (reg_overlap_mentioned_p (ops
[0], mem
))
17678 gcc_assert (overlap
== -1);
17683 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17684 output_asm_insn (buff
, ops
);
17689 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
17690 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
17691 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17692 output_asm_insn (buff
, ops
);
17699 /* We have to use vldm / vstm for too-large modes. */
17701 templ
= "v%smia%%?\t%%m0, %%h1";
17703 templ
= "v%s1.64\t%%h1, %%A0";
17709 sprintf (buff
, templ
, load
? "ld" : "st");
17710 output_asm_insn (buff
, ops
);
17715 /* Compute and return the length of neon_mov<mode>, where <mode> is
17716 one of VSTRUCT modes: EI, OI, CI or XI. */
17718 arm_attr_length_move_neon (rtx insn
)
17720 rtx reg
, mem
, addr
;
17722 enum machine_mode mode
;
17724 extract_insn_cached (insn
);
17726 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
17728 mode
= GET_MODE (recog_data
.operand
[0]);
17739 gcc_unreachable ();
17743 load
= REG_P (recog_data
.operand
[0]);
17744 reg
= recog_data
.operand
[!load
];
17745 mem
= recog_data
.operand
[load
];
17747 gcc_assert (MEM_P (mem
));
17749 mode
= GET_MODE (reg
);
17750 addr
= XEXP (mem
, 0);
17752 /* Strip off const from addresses like (const (plus (...))). */
17753 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17754 addr
= XEXP (addr
, 0);
17756 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
17758 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
17765 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17769 arm_address_offset_is_imm (rtx insn
)
17773 extract_insn_cached (insn
);
17775 if (REG_P (recog_data
.operand
[0]))
17778 mem
= recog_data
.operand
[0];
17780 gcc_assert (MEM_P (mem
));
17782 addr
= XEXP (mem
, 0);
17785 || (GET_CODE (addr
) == PLUS
17786 && REG_P (XEXP (addr
, 0))
17787 && CONST_INT_P (XEXP (addr
, 1))))
17793 /* Output an ADD r, s, #n where n may be too big for one instruction.
17794 If adding zero to one register, output nothing. */
17796 output_add_immediate (rtx
*operands
)
17798 HOST_WIDE_INT n
= INTVAL (operands
[2]);
17800 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
17803 output_multi_immediate (operands
,
17804 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17807 output_multi_immediate (operands
,
17808 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17815 /* Output a multiple immediate operation.
17816 OPERANDS is the vector of operands referred to in the output patterns.
17817 INSTR1 is the output pattern to use for the first constant.
17818 INSTR2 is the output pattern to use for subsequent constants.
17819 IMMED_OP is the index of the constant slot in OPERANDS.
17820 N is the constant value. */
17821 static const char *
17822 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
17823 int immed_op
, HOST_WIDE_INT n
)
17825 #if HOST_BITS_PER_WIDE_INT > 32
17831 /* Quick and easy output. */
17832 operands
[immed_op
] = const0_rtx
;
17833 output_asm_insn (instr1
, operands
);
17838 const char * instr
= instr1
;
17840 /* Note that n is never zero here (which would give no output). */
17841 for (i
= 0; i
< 32; i
+= 2)
17845 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
17846 output_asm_insn (instr
, operands
);
17856 /* Return the name of a shifter operation. */
17857 static const char *
17858 arm_shift_nmem(enum rtx_code code
)
17863 return ARM_LSL_NAME
;
17879 /* Return the appropriate ARM instruction for the operation code.
17880 The returned result should not be overwritten. OP is the rtx of the
17881 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17884 arithmetic_instr (rtx op
, int shift_first_arg
)
17886 switch (GET_CODE (op
))
17892 return shift_first_arg
? "rsb" : "sub";
17907 return arm_shift_nmem(GET_CODE(op
));
17910 gcc_unreachable ();
17914 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17915 for the operation code. The returned result should not be overwritten.
17916 OP is the rtx code of the shift.
17917 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17919 static const char *
17920 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
17923 enum rtx_code code
= GET_CODE (op
);
17928 if (!CONST_INT_P (XEXP (op
, 1)))
17930 output_operand_lossage ("invalid shift operand");
17935 *amountp
= 32 - INTVAL (XEXP (op
, 1));
17943 mnem
= arm_shift_nmem(code
);
17944 if (CONST_INT_P (XEXP (op
, 1)))
17946 *amountp
= INTVAL (XEXP (op
, 1));
17948 else if (REG_P (XEXP (op
, 1)))
17955 output_operand_lossage ("invalid shift operand");
17961 /* We never have to worry about the amount being other than a
17962 power of 2, since this case can never be reloaded from a reg. */
17963 if (!CONST_INT_P (XEXP (op
, 1)))
17965 output_operand_lossage ("invalid shift operand");
17969 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
17971 /* Amount must be a power of two. */
17972 if (*amountp
& (*amountp
- 1))
17974 output_operand_lossage ("invalid shift operand");
17978 *amountp
= int_log2 (*amountp
);
17979 return ARM_LSL_NAME
;
17982 output_operand_lossage ("invalid shift operand");
17986 /* This is not 100% correct, but follows from the desire to merge
17987 multiplication by a power of 2 with the recognizer for a
17988 shift. >=32 is not a valid shift for "lsl", so we must try and
17989 output a shift that produces the correct arithmetical result.
17990 Using lsr #32 is identical except for the fact that the carry bit
17991 is not set correctly if we set the flags; but we never use the
17992 carry bit from such an operation, so we can ignore that. */
17993 if (code
== ROTATERT
)
17994 /* Rotate is just modulo 32. */
17996 else if (*amountp
!= (*amountp
& 31))
17998 if (code
== ASHIFT
)
18003 /* Shifts of 0 are no-ops. */
18010 /* Obtain the shift from the POWER of two. */
18012 static HOST_WIDE_INT
18013 int_log2 (HOST_WIDE_INT power
)
18015 HOST_WIDE_INT shift
= 0;
18017 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18019 gcc_assert (shift
<= 31);
18026 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18027 because /bin/as is horribly restrictive. The judgement about
18028 whether or not each character is 'printable' (and can be output as
18029 is) or not (and must be printed with an octal escape) must be made
18030 with reference to the *host* character set -- the situation is
18031 similar to that discussed in the comments above pp_c_char in
18032 c-pretty-print.c. */
18034 #define MAX_ASCII_LEN 51
18037 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18040 int len_so_far
= 0;
18042 fputs ("\t.ascii\t\"", stream
);
18044 for (i
= 0; i
< len
; i
++)
18048 if (len_so_far
>= MAX_ASCII_LEN
)
18050 fputs ("\"\n\t.ascii\t\"", stream
);
18056 if (c
== '\\' || c
== '\"')
18058 putc ('\\', stream
);
18066 fprintf (stream
, "\\%03o", c
);
18071 fputs ("\"\n", stream
);
18074 /* Compute the register save mask for registers 0 through 12
18075 inclusive. This code is used by arm_compute_save_reg_mask. */
18077 static unsigned long
18078 arm_compute_save_reg0_reg12_mask (void)
18080 unsigned long func_type
= arm_current_func_type ();
18081 unsigned long save_reg_mask
= 0;
18084 if (IS_INTERRUPT (func_type
))
18086 unsigned int max_reg
;
18087 /* Interrupt functions must not corrupt any registers,
18088 even call clobbered ones. If this is a leaf function
18089 we can just examine the registers used by the RTL, but
18090 otherwise we have to assume that whatever function is
18091 called might clobber anything, and so we have to save
18092 all the call-clobbered registers as well. */
18093 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18094 /* FIQ handlers have registers r8 - r12 banked, so
18095 we only need to check r0 - r7, Normal ISRs only
18096 bank r14 and r15, so we must check up to r12.
18097 r13 is the stack pointer which is always preserved,
18098 so we do not need to consider it here. */
18103 for (reg
= 0; reg
<= max_reg
; reg
++)
18104 if (df_regs_ever_live_p (reg
)
18105 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18106 save_reg_mask
|= (1 << reg
);
18108 /* Also save the pic base register if necessary. */
18110 && !TARGET_SINGLE_PIC_BASE
18111 && arm_pic_register
!= INVALID_REGNUM
18112 && crtl
->uses_pic_offset_table
)
18113 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18115 else if (IS_VOLATILE(func_type
))
18117 /* For noreturn functions we historically omitted register saves
18118 altogether. However this really messes up debugging. As a
18119 compromise save just the frame pointers. Combined with the link
18120 register saved elsewhere this should be sufficient to get
18122 if (frame_pointer_needed
)
18123 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18124 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18125 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18126 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18127 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18131 /* In the normal case we only need to save those registers
18132 which are call saved and which are used by this function. */
18133 for (reg
= 0; reg
<= 11; reg
++)
18134 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18135 save_reg_mask
|= (1 << reg
);
18137 /* Handle the frame pointer as a special case. */
18138 if (frame_pointer_needed
)
18139 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18141 /* If we aren't loading the PIC register,
18142 don't stack it even though it may be live. */
18144 && !TARGET_SINGLE_PIC_BASE
18145 && arm_pic_register
!= INVALID_REGNUM
18146 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18147 || crtl
->uses_pic_offset_table
))
18148 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18150 /* The prologue will copy SP into R0, so save it. */
18151 if (IS_STACKALIGN (func_type
))
18152 save_reg_mask
|= 1;
18155 /* Save registers so the exception handler can modify them. */
18156 if (crtl
->calls_eh_return
)
18162 reg
= EH_RETURN_DATA_REGNO (i
);
18163 if (reg
== INVALID_REGNUM
)
18165 save_reg_mask
|= 1 << reg
;
18169 return save_reg_mask
;
18172 /* Return true if r3 is live at the start of the function. */
18175 arm_r3_live_at_start_p (void)
18177 /* Just look at cfg info, which is still close enough to correct at this
18178 point. This gives false positives for broken functions that might use
18179 uninitialized data that happens to be allocated in r3, but who cares? */
18180 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 3);
18183 /* Compute the number of bytes used to store the static chain register on the
18184 stack, above the stack frame. We need to know this accurately to get the
18185 alignment of the rest of the stack frame correct. */
18188 arm_compute_static_chain_stack_bytes (void)
18190 /* See the defining assertion in arm_expand_prologue. */
18191 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18192 && IS_NESTED (arm_current_func_type ())
18193 && arm_r3_live_at_start_p ()
18194 && crtl
->args
.pretend_args_size
== 0)
18200 /* Compute a bit mask of which registers need to be
18201 saved on the stack for the current function.
18202 This is used by arm_get_frame_offsets, which may add extra registers. */
18204 static unsigned long
18205 arm_compute_save_reg_mask (void)
18207 unsigned int save_reg_mask
= 0;
18208 unsigned long func_type
= arm_current_func_type ();
18211 if (IS_NAKED (func_type
))
18212 /* This should never really happen. */
18215 /* If we are creating a stack frame, then we must save the frame pointer,
18216 IP (which will hold the old stack pointer), LR and the PC. */
18217 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18219 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18222 | (1 << PC_REGNUM
);
18224 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18226 /* Decide if we need to save the link register.
18227 Interrupt routines have their own banked link register,
18228 so they never need to save it.
18229 Otherwise if we do not use the link register we do not need to save
18230 it. If we are pushing other registers onto the stack however, we
18231 can save an instruction in the epilogue by pushing the link register
18232 now and then popping it back into the PC. This incurs extra memory
18233 accesses though, so we only do it when optimizing for size, and only
18234 if we know that we will not need a fancy return sequence. */
18235 if (df_regs_ever_live_p (LR_REGNUM
)
18238 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18239 && !crtl
->calls_eh_return
))
18240 save_reg_mask
|= 1 << LR_REGNUM
;
18242 if (cfun
->machine
->lr_save_eliminated
)
18243 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18245 if (TARGET_REALLY_IWMMXT
18246 && ((bit_count (save_reg_mask
)
18247 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18248 arm_compute_static_chain_stack_bytes())
18251 /* The total number of registers that are going to be pushed
18252 onto the stack is odd. We need to ensure that the stack
18253 is 64-bit aligned before we start to save iWMMXt registers,
18254 and also before we start to create locals. (A local variable
18255 might be a double or long long which we will load/store using
18256 an iWMMXt instruction). Therefore we need to push another
18257 ARM register, so that the stack will be 64-bit aligned. We
18258 try to avoid using the arg registers (r0 -r3) as they might be
18259 used to pass values in a tail call. */
18260 for (reg
= 4; reg
<= 12; reg
++)
18261 if ((save_reg_mask
& (1 << reg
)) == 0)
18265 save_reg_mask
|= (1 << reg
);
18268 cfun
->machine
->sibcall_blocked
= 1;
18269 save_reg_mask
|= (1 << 3);
18273 /* We may need to push an additional register for use initializing the
18274 PIC base register. */
18275 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18276 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18278 reg
= thumb_find_work_register (1 << 4);
18279 if (!call_used_regs
[reg
])
18280 save_reg_mask
|= (1 << reg
);
18283 return save_reg_mask
;
18287 /* Compute a bit mask of which registers need to be
18288 saved on the stack for the current function. */
18289 static unsigned long
18290 thumb1_compute_save_reg_mask (void)
18292 unsigned long mask
;
18296 for (reg
= 0; reg
< 12; reg
++)
18297 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18301 && !TARGET_SINGLE_PIC_BASE
18302 && arm_pic_register
!= INVALID_REGNUM
18303 && crtl
->uses_pic_offset_table
)
18304 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18306 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18307 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18308 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18310 /* LR will also be pushed if any lo regs are pushed. */
18311 if (mask
& 0xff || thumb_force_lr_save ())
18312 mask
|= (1 << LR_REGNUM
);
18314 /* Make sure we have a low work register if we need one.
18315 We will need one if we are going to push a high register,
18316 but we are not currently intending to push a low register. */
18317 if ((mask
& 0xff) == 0
18318 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18320 /* Use thumb_find_work_register to choose which register
18321 we will use. If the register is live then we will
18322 have to push it. Use LAST_LO_REGNUM as our fallback
18323 choice for the register to select. */
18324 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18325 /* Make sure the register returned by thumb_find_work_register is
18326 not part of the return value. */
18327 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18328 reg
= LAST_LO_REGNUM
;
18330 if (! call_used_regs
[reg
])
18334 /* The 504 below is 8 bytes less than 512 because there are two possible
18335 alignment words. We can't tell here if they will be present or not so we
18336 have to play it safe and assume that they are. */
18337 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18338 ROUND_UP_WORD (get_frame_size ()) +
18339 crtl
->outgoing_args_size
) >= 504)
18341 /* This is the same as the code in thumb1_expand_prologue() which
18342 determines which register to use for stack decrement. */
18343 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18344 if (mask
& (1 << reg
))
18347 if (reg
> LAST_LO_REGNUM
)
18349 /* Make sure we have a register available for stack decrement. */
18350 mask
|= 1 << LAST_LO_REGNUM
;
18358 /* Return the number of bytes required to save VFP registers. */
18360 arm_get_vfp_saved_size (void)
18362 unsigned int regno
;
18367 /* Space for saved VFP registers. */
18368 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18371 for (regno
= FIRST_VFP_REGNUM
;
18372 regno
< LAST_VFP_REGNUM
;
18375 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18376 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18380 /* Workaround ARM10 VFPr1 bug. */
18381 if (count
== 2 && !arm_arch6
)
18383 saved
+= count
* 8;
18392 if (count
== 2 && !arm_arch6
)
18394 saved
+= count
* 8;
18401 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18402 everything bar the final return instruction. If simple_return is true,
18403 then do not output epilogue, because it has already been emitted in RTL. */
18405 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18406 bool simple_return
)
18408 char conditional
[10];
18411 unsigned long live_regs_mask
;
18412 unsigned long func_type
;
18413 arm_stack_offsets
*offsets
;
18415 func_type
= arm_current_func_type ();
18417 if (IS_NAKED (func_type
))
18420 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18422 /* If this function was declared non-returning, and we have
18423 found a tail call, then we have to trust that the called
18424 function won't return. */
18429 /* Otherwise, trap an attempted return by aborting. */
18431 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18433 assemble_external_libcall (ops
[1]);
18434 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18440 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18442 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18444 cfun
->machine
->return_used_this_function
= 1;
18446 offsets
= arm_get_frame_offsets ();
18447 live_regs_mask
= offsets
->saved_regs_mask
;
18449 if (!simple_return
&& live_regs_mask
)
18451 const char * return_reg
;
18453 /* If we do not have any special requirements for function exit
18454 (e.g. interworking) then we can load the return address
18455 directly into the PC. Otherwise we must load it into LR. */
18457 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18458 return_reg
= reg_names
[PC_REGNUM
];
18460 return_reg
= reg_names
[LR_REGNUM
];
18462 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18464 /* There are three possible reasons for the IP register
18465 being saved. 1) a stack frame was created, in which case
18466 IP contains the old stack pointer, or 2) an ISR routine
18467 corrupted it, or 3) it was saved to align the stack on
18468 iWMMXt. In case 1, restore IP into SP, otherwise just
18470 if (frame_pointer_needed
)
18472 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18473 live_regs_mask
|= (1 << SP_REGNUM
);
18476 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18479 /* On some ARM architectures it is faster to use LDR rather than
18480 LDM to load a single register. On other architectures, the
18481 cost is the same. In 26 bit mode, or for exception handlers,
18482 we have to use LDM to load the PC so that the CPSR is also
18484 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18485 if (live_regs_mask
== (1U << reg
))
18488 if (reg
<= LAST_ARM_REGNUM
18489 && (reg
!= LR_REGNUM
18491 || ! IS_INTERRUPT (func_type
)))
18493 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
18494 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
18501 /* Generate the load multiple instruction to restore the
18502 registers. Note we can get here, even if
18503 frame_pointer_needed is true, but only if sp already
18504 points to the base of the saved core registers. */
18505 if (live_regs_mask
& (1 << SP_REGNUM
))
18507 unsigned HOST_WIDE_INT stack_adjust
;
18509 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
18510 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
18512 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
18513 if (TARGET_UNIFIED_ASM
)
18514 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
18516 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
18519 /* If we can't use ldmib (SA110 bug),
18520 then try to pop r3 instead. */
18522 live_regs_mask
|= 1 << 3;
18524 if (TARGET_UNIFIED_ASM
)
18525 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
18527 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
18531 if (TARGET_UNIFIED_ASM
)
18532 sprintf (instr
, "pop%s\t{", conditional
);
18534 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
18536 p
= instr
+ strlen (instr
);
18538 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
18539 if (live_regs_mask
& (1 << reg
))
18541 int l
= strlen (reg_names
[reg
]);
18547 memcpy (p
, ", ", 2);
18551 memcpy (p
, "%|", 2);
18552 memcpy (p
+ 2, reg_names
[reg
], l
);
18556 if (live_regs_mask
& (1 << LR_REGNUM
))
18558 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
18559 /* If returning from an interrupt, restore the CPSR. */
18560 if (IS_INTERRUPT (func_type
))
18567 output_asm_insn (instr
, & operand
);
18569 /* See if we need to generate an extra instruction to
18570 perform the actual function return. */
18572 && func_type
!= ARM_FT_INTERWORKED
18573 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
18575 /* The return has already been handled
18576 by loading the LR into the PC. */
18583 switch ((int) ARM_FUNC_TYPE (func_type
))
18587 /* ??? This is wrong for unified assembly syntax. */
18588 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
18591 case ARM_FT_INTERWORKED
:
18592 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18595 case ARM_FT_EXCEPTION
:
18596 /* ??? This is wrong for unified assembly syntax. */
18597 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
18601 /* Use bx if it's available. */
18602 if (arm_arch5
|| arm_arch4t
)
18603 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18605 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
18609 output_asm_insn (instr
, & operand
);
18615 /* Write the function name into the code section, directly preceding
18616 the function prologue.
18618 Code will be output similar to this:
18620 .ascii "arm_poke_function_name", 0
18623 .word 0xff000000 + (t1 - t0)
18624 arm_poke_function_name
18626 stmfd sp!, {fp, ip, lr, pc}
18629 When performing a stack backtrace, code can inspect the value
18630 of 'pc' stored at 'fp' + 0. If the trace function then looks
18631 at location pc - 12 and the top 8 bits are set, then we know
18632 that there is a function name embedded immediately preceding this
18633 location and has length ((pc[-3]) & 0xff000000).
18635 We assume that pc is declared as a pointer to an unsigned long.
18637 It is of no benefit to output the function name if we are assembling
18638 a leaf function. These function types will not contain a stack
18639 backtrace structure, therefore it is not possible to determine the
18642 arm_poke_function_name (FILE *stream
, const char *name
)
18644 unsigned long alignlength
;
18645 unsigned long length
;
18648 length
= strlen (name
) + 1;
18649 alignlength
= ROUND_UP_WORD (length
);
18651 ASM_OUTPUT_ASCII (stream
, name
, length
);
18652 ASM_OUTPUT_ALIGN (stream
, 2);
18653 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
18654 assemble_aligned_integer (UNITS_PER_WORD
, x
);
18657 /* Place some comments into the assembler stream
18658 describing the current function. */
18660 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
18662 unsigned long func_type
;
18664 /* ??? Do we want to print some of the below anyway? */
18668 /* Sanity check. */
18669 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
18671 func_type
= arm_current_func_type ();
18673 switch ((int) ARM_FUNC_TYPE (func_type
))
18676 case ARM_FT_NORMAL
:
18678 case ARM_FT_INTERWORKED
:
18679 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
18682 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
18685 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
18687 case ARM_FT_EXCEPTION
:
18688 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
18692 if (IS_NAKED (func_type
))
18693 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18695 if (IS_VOLATILE (func_type
))
18696 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
18698 if (IS_NESTED (func_type
))
18699 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
18700 if (IS_STACKALIGN (func_type
))
18701 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18703 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18705 crtl
->args
.pretend_args_size
, frame_size
);
18707 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18708 frame_pointer_needed
,
18709 cfun
->machine
->uses_anonymous_args
);
18711 if (cfun
->machine
->lr_save_eliminated
)
18712 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
18714 if (crtl
->calls_eh_return
)
18715 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
18720 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
18721 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
18723 arm_stack_offsets
*offsets
;
18729 /* Emit any call-via-reg trampolines that are needed for v4t support
18730 of call_reg and call_value_reg type insns. */
18731 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
18733 rtx label
= cfun
->machine
->call_via
[regno
];
18737 switch_to_section (function_section (current_function_decl
));
18738 targetm
.asm_out
.internal_label (asm_out_file
, "L",
18739 CODE_LABEL_NUMBER (label
));
18740 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
18744 /* ??? Probably not safe to set this here, since it assumes that a
18745 function will be emitted as assembly immediately after we generate
18746 RTL for it. This does not happen for inline functions. */
18747 cfun
->machine
->return_used_this_function
= 0;
18749 else /* TARGET_32BIT */
18751 /* We need to take into account any stack-frame rounding. */
18752 offsets
= arm_get_frame_offsets ();
18754 gcc_assert (!use_return_insn (FALSE
, NULL
)
18755 || (cfun
->machine
->return_used_this_function
!= 0)
18756 || offsets
->saved_regs
== offsets
->outgoing_args
18757 || frame_pointer_needed
);
18759 /* Reset the ARM-specific per-function variables. */
18760 after_arm_reorg
= 0;
18764 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18765 STR and STRD. If an even number of registers are being pushed, one
18766 or more STRD patterns are created for each register pair. If an
18767 odd number of registers are pushed, emit an initial STR followed by
18768 as many STRD instructions as are needed. This works best when the
18769 stack is initially 64-bit aligned (the normal case), since it
18770 ensures that each STRD is also 64-bit aligned. */
18772 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
18777 rtx par
= NULL_RTX
;
18778 rtx dwarf
= NULL_RTX
;
18782 num_regs
= bit_count (saved_regs_mask
);
18784 /* Must be at least one register to save, and can't save SP or PC. */
18785 gcc_assert (num_regs
> 0 && num_regs
<= 14);
18786 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18787 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18789 /* Create sequence for DWARF info. All the frame-related data for
18790 debugging is held in this wrapper. */
18791 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18793 /* Describe the stack adjustment. */
18794 tmp
= gen_rtx_SET (VOIDmode
,
18796 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18797 RTX_FRAME_RELATED_P (tmp
) = 1;
18798 XVECEXP (dwarf
, 0, 0) = tmp
;
18800 /* Find the first register. */
18801 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
18806 /* If there's an odd number of registers to push. Start off by
18807 pushing a single register. This ensures that subsequent strd
18808 operations are dword aligned (assuming that SP was originally
18809 64-bit aligned). */
18810 if ((num_regs
& 1) != 0)
18812 rtx reg
, mem
, insn
;
18814 reg
= gen_rtx_REG (SImode
, regno
);
18816 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
18817 stack_pointer_rtx
));
18819 mem
= gen_frame_mem (Pmode
,
18821 (Pmode
, stack_pointer_rtx
,
18822 plus_constant (Pmode
, stack_pointer_rtx
,
18825 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
18826 RTX_FRAME_RELATED_P (tmp
) = 1;
18827 insn
= emit_insn (tmp
);
18828 RTX_FRAME_RELATED_P (insn
) = 1;
18829 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18830 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
18832 RTX_FRAME_RELATED_P (tmp
) = 1;
18835 XVECEXP (dwarf
, 0, i
) = tmp
;
18839 while (i
< num_regs
)
18840 if (saved_regs_mask
& (1 << regno
))
18842 rtx reg1
, reg2
, mem1
, mem2
;
18843 rtx tmp0
, tmp1
, tmp2
;
18846 /* Find the register to pair with this one. */
18847 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
18851 reg1
= gen_rtx_REG (SImode
, regno
);
18852 reg2
= gen_rtx_REG (SImode
, regno2
);
18859 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18862 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18864 -4 * (num_regs
- 1)));
18865 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18866 plus_constant (Pmode
, stack_pointer_rtx
,
18868 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
18869 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
18870 RTX_FRAME_RELATED_P (tmp0
) = 1;
18871 RTX_FRAME_RELATED_P (tmp1
) = 1;
18872 RTX_FRAME_RELATED_P (tmp2
) = 1;
18873 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
18874 XVECEXP (par
, 0, 0) = tmp0
;
18875 XVECEXP (par
, 0, 1) = tmp1
;
18876 XVECEXP (par
, 0, 2) = tmp2
;
18877 insn
= emit_insn (par
);
18878 RTX_FRAME_RELATED_P (insn
) = 1;
18879 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18883 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18886 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18889 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
18890 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
18891 RTX_FRAME_RELATED_P (tmp1
) = 1;
18892 RTX_FRAME_RELATED_P (tmp2
) = 1;
18893 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
18894 XVECEXP (par
, 0, 0) = tmp1
;
18895 XVECEXP (par
, 0, 1) = tmp2
;
18899 /* Create unwind information. This is an approximation. */
18900 tmp1
= gen_rtx_SET (VOIDmode
,
18901 gen_frame_mem (Pmode
,
18902 plus_constant (Pmode
,
18906 tmp2
= gen_rtx_SET (VOIDmode
,
18907 gen_frame_mem (Pmode
,
18908 plus_constant (Pmode
,
18913 RTX_FRAME_RELATED_P (tmp1
) = 1;
18914 RTX_FRAME_RELATED_P (tmp2
) = 1;
18915 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
18916 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
18918 regno
= regno2
+ 1;
18926 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18927 whenever possible, otherwise it emits single-word stores. The first store
18928 also allocates stack space for all saved registers, using writeback with
18929 post-addressing mode. All other stores use offset addressing. If no STRD
18930 can be emitted, this function emits a sequence of single-word stores,
18931 and not an STM as before, because single-word stores provide more freedom
18932 scheduling and can be turned into an STM by peephole optimizations. */
18934 arm_emit_strd_push (unsigned long saved_regs_mask
)
18937 int i
, j
, dwarf_index
= 0;
18939 rtx dwarf
= NULL_RTX
;
18940 rtx insn
= NULL_RTX
;
18943 /* TODO: A more efficient code can be emitted by changing the
18944 layout, e.g., first push all pairs that can use STRD to keep the
18945 stack aligned, and then push all other registers. */
18946 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
18947 if (saved_regs_mask
& (1 << i
))
18950 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18951 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18952 gcc_assert (num_regs
> 0);
18954 /* Create sequence for DWARF info. */
18955 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18957 /* For dwarf info, we generate explicit stack update. */
18958 tmp
= gen_rtx_SET (VOIDmode
,
18960 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18961 RTX_FRAME_RELATED_P (tmp
) = 1;
18962 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18964 /* Save registers. */
18965 offset
= - 4 * num_regs
;
18967 while (j
<= LAST_ARM_REGNUM
)
18968 if (saved_regs_mask
& (1 << j
))
18971 && (saved_regs_mask
& (1 << (j
+ 1))))
18973 /* Current register and previous register form register pair for
18974 which STRD can be generated. */
18977 /* Allocate stack space for all saved registers. */
18978 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
18979 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
18980 mem
= gen_frame_mem (DImode
, tmp
);
18983 else if (offset
> 0)
18984 mem
= gen_frame_mem (DImode
,
18985 plus_constant (Pmode
,
18989 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
18991 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
18992 RTX_FRAME_RELATED_P (tmp
) = 1;
18993 tmp
= emit_insn (tmp
);
18995 /* Record the first store insn. */
18996 if (dwarf_index
== 1)
18999 /* Generate dwarf info. */
19000 mem
= gen_frame_mem (SImode
,
19001 plus_constant (Pmode
,
19004 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19005 RTX_FRAME_RELATED_P (tmp
) = 1;
19006 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19008 mem
= gen_frame_mem (SImode
,
19009 plus_constant (Pmode
,
19012 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19013 RTX_FRAME_RELATED_P (tmp
) = 1;
19014 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19021 /* Emit a single word store. */
19024 /* Allocate stack space for all saved registers. */
19025 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19026 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19027 mem
= gen_frame_mem (SImode
, tmp
);
19030 else if (offset
> 0)
19031 mem
= gen_frame_mem (SImode
,
19032 plus_constant (Pmode
,
19036 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19038 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19039 RTX_FRAME_RELATED_P (tmp
) = 1;
19040 tmp
= emit_insn (tmp
);
19042 /* Record the first store insn. */
19043 if (dwarf_index
== 1)
19046 /* Generate dwarf info. */
19047 mem
= gen_frame_mem (SImode
,
19048 plus_constant(Pmode
,
19051 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19052 RTX_FRAME_RELATED_P (tmp
) = 1;
19053 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19062 /* Attach dwarf info to the first insn we generate. */
19063 gcc_assert (insn
!= NULL_RTX
);
19064 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19065 RTX_FRAME_RELATED_P (insn
) = 1;
19068 /* Generate and emit an insn that we will recognize as a push_multi.
19069 Unfortunately, since this insn does not reflect very well the actual
19070 semantics of the operation, we need to annotate the insn for the benefit
19071 of DWARF2 frame unwind information. */
19073 emit_multi_reg_push (unsigned long mask
)
19076 int num_dwarf_regs
;
19080 int dwarf_par_index
;
19083 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19084 if (mask
& (1 << i
))
19087 gcc_assert (num_regs
&& num_regs
<= 16);
19089 /* We don't record the PC in the dwarf frame information. */
19090 num_dwarf_regs
= num_regs
;
19091 if (mask
& (1 << PC_REGNUM
))
19094 /* For the body of the insn we are going to generate an UNSPEC in
19095 parallel with several USEs. This allows the insn to be recognized
19096 by the push_multi pattern in the arm.md file.
19098 The body of the insn looks something like this:
19101 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19102 (const_int:SI <num>)))
19103 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19109 For the frame note however, we try to be more explicit and actually
19110 show each register being stored into the stack frame, plus a (single)
19111 decrement of the stack pointer. We do it this way in order to be
19112 friendly to the stack unwinding code, which only wants to see a single
19113 stack decrement per instruction. The RTL we generate for the note looks
19114 something like this:
19117 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19118 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19119 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19120 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19124 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19125 instead we'd have a parallel expression detailing all
19126 the stores to the various memory addresses so that debug
19127 information is more up-to-date. Remember however while writing
19128 this to take care of the constraints with the push instruction.
19130 Note also that this has to be taken care of for the VFP registers.
19132 For more see PR43399. */
19134 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19135 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19136 dwarf_par_index
= 1;
19138 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19140 if (mask
& (1 << i
))
19142 reg
= gen_rtx_REG (SImode
, i
);
19144 XVECEXP (par
, 0, 0)
19145 = gen_rtx_SET (VOIDmode
,
19148 gen_rtx_PRE_MODIFY (Pmode
,
19151 (Pmode
, stack_pointer_rtx
,
19154 gen_rtx_UNSPEC (BLKmode
,
19155 gen_rtvec (1, reg
),
19156 UNSPEC_PUSH_MULT
));
19158 if (i
!= PC_REGNUM
)
19160 tmp
= gen_rtx_SET (VOIDmode
,
19161 gen_frame_mem (SImode
, stack_pointer_rtx
),
19163 RTX_FRAME_RELATED_P (tmp
) = 1;
19164 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
19172 for (j
= 1, i
++; j
< num_regs
; i
++)
19174 if (mask
& (1 << i
))
19176 reg
= gen_rtx_REG (SImode
, i
);
19178 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19180 if (i
!= PC_REGNUM
)
19183 = gen_rtx_SET (VOIDmode
,
19186 plus_constant (Pmode
, stack_pointer_rtx
,
19189 RTX_FRAME_RELATED_P (tmp
) = 1;
19190 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19197 par
= emit_insn (par
);
19199 tmp
= gen_rtx_SET (VOIDmode
,
19201 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19202 RTX_FRAME_RELATED_P (tmp
) = 1;
19203 XVECEXP (dwarf
, 0, 0) = tmp
;
19205 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19210 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19211 SIZE is the offset to be adjusted.
19212 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19214 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19218 RTX_FRAME_RELATED_P (insn
) = 1;
19219 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19220 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19223 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19224 SAVED_REGS_MASK shows which registers need to be restored.
19226 Unfortunately, since this insn does not reflect very well the actual
19227 semantics of the operation, we need to annotate the insn for the benefit
19228 of DWARF2 frame unwind information. */
19230 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19235 rtx dwarf
= NULL_RTX
;
19241 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19242 offset_adj
= return_in_pc
? 1 : 0;
19243 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19244 if (saved_regs_mask
& (1 << i
))
19247 gcc_assert (num_regs
&& num_regs
<= 16);
19249 /* If SP is in reglist, then we don't emit SP update insn. */
19250 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19252 /* The parallel needs to hold num_regs SETs
19253 and one SET for the stack update. */
19254 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19259 XVECEXP (par
, 0, 0) = tmp
;
19264 /* Increment the stack pointer, based on there being
19265 num_regs 4-byte registers to restore. */
19266 tmp
= gen_rtx_SET (VOIDmode
,
19268 plus_constant (Pmode
,
19271 RTX_FRAME_RELATED_P (tmp
) = 1;
19272 XVECEXP (par
, 0, offset_adj
) = tmp
;
19275 /* Now restore every reg, which may include PC. */
19276 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19277 if (saved_regs_mask
& (1 << i
))
19279 reg
= gen_rtx_REG (SImode
, i
);
19280 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19282 /* Emit single load with writeback. */
19283 tmp
= gen_frame_mem (SImode
,
19284 gen_rtx_POST_INC (Pmode
,
19285 stack_pointer_rtx
));
19286 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19287 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19291 tmp
= gen_rtx_SET (VOIDmode
,
19295 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19296 RTX_FRAME_RELATED_P (tmp
) = 1;
19297 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19299 /* We need to maintain a sequence for DWARF info too. As dwarf info
19300 should not have PC, skip PC. */
19301 if (i
!= PC_REGNUM
)
19302 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19308 par
= emit_jump_insn (par
);
19310 par
= emit_insn (par
);
19312 REG_NOTES (par
) = dwarf
;
19314 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19315 stack_pointer_rtx
, stack_pointer_rtx
);
19318 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19319 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19321 Unfortunately, since this insn does not reflect very well the actual
19322 semantics of the operation, we need to annotate the insn for the benefit
19323 of DWARF2 frame unwind information. */
19325 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19329 rtx dwarf
= NULL_RTX
;
19332 gcc_assert (num_regs
&& num_regs
<= 32);
19334 /* Workaround ARM10 VFPr1 bug. */
19335 if (num_regs
== 2 && !arm_arch6
)
19337 if (first_reg
== 15)
19343 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19344 there could be up to 32 D-registers to restore.
19345 If there are more than 16 D-registers, make two recursive calls,
19346 each of which emits one pop_multi instruction. */
19349 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19350 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19354 /* The parallel needs to hold num_regs SETs
19355 and one SET for the stack update. */
19356 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19358 /* Increment the stack pointer, based on there being
19359 num_regs 8-byte registers to restore. */
19360 tmp
= gen_rtx_SET (VOIDmode
,
19362 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19363 RTX_FRAME_RELATED_P (tmp
) = 1;
19364 XVECEXP (par
, 0, 0) = tmp
;
19366 /* Now show every reg that will be restored, using a SET for each. */
19367 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19369 reg
= gen_rtx_REG (DFmode
, i
);
19371 tmp
= gen_rtx_SET (VOIDmode
,
19375 plus_constant (Pmode
, base_reg
, 8 * j
)));
19376 RTX_FRAME_RELATED_P (tmp
) = 1;
19377 XVECEXP (par
, 0, j
+ 1) = tmp
;
19379 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19384 par
= emit_insn (par
);
19385 REG_NOTES (par
) = dwarf
;
19387 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19388 base_reg
, base_reg
);
19391 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19392 number of registers are being popped, multiple LDRD patterns are created for
19393 all register pairs. If odd number of registers are popped, last register is
19394 loaded by using LDR pattern. */
19396 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19400 rtx par
= NULL_RTX
;
19401 rtx dwarf
= NULL_RTX
;
19402 rtx tmp
, reg
, tmp1
;
19405 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19406 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19407 if (saved_regs_mask
& (1 << i
))
19410 gcc_assert (num_regs
&& num_regs
<= 16);
19412 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19413 to be popped. So, if num_regs is even, now it will become odd,
19414 and we can generate pop with PC. If num_regs is odd, it will be
19415 even now, and ldr with return can be generated for PC. */
19419 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19421 /* Var j iterates over all the registers to gather all the registers in
19422 saved_regs_mask. Var i gives index of saved registers in stack frame.
19423 A PARALLEL RTX of register-pair is created here, so that pattern for
19424 LDRD can be matched. As PC is always last register to be popped, and
19425 we have already decremented num_regs if PC, we don't have to worry
19426 about PC in this loop. */
19427 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19428 if (saved_regs_mask
& (1 << j
))
19430 /* Create RTX for memory load. */
19431 reg
= gen_rtx_REG (SImode
, j
);
19432 tmp
= gen_rtx_SET (SImode
,
19434 gen_frame_mem (SImode
,
19435 plus_constant (Pmode
,
19436 stack_pointer_rtx
, 4 * i
)));
19437 RTX_FRAME_RELATED_P (tmp
) = 1;
19441 /* When saved-register index (i) is even, the RTX to be emitted is
19442 yet to be created. Hence create it first. The LDRD pattern we
19443 are generating is :
19444 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19445 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19446 where target registers need not be consecutive. */
19447 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19451 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19452 added as 0th element and if i is odd, reg_i is added as 1st element
19453 of LDRD pattern shown above. */
19454 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19455 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19459 /* When saved-register index (i) is odd, RTXs for both the registers
19460 to be loaded are generated in above given LDRD pattern, and the
19461 pattern can be emitted now. */
19462 par
= emit_insn (par
);
19463 REG_NOTES (par
) = dwarf
;
19464 RTX_FRAME_RELATED_P (par
) = 1;
19470 /* If the number of registers pushed is odd AND return_in_pc is false OR
19471 number of registers are even AND return_in_pc is true, last register is
19472 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19473 then LDR with post increment. */
19475 /* Increment the stack pointer, based on there being
19476 num_regs 4-byte registers to restore. */
19477 tmp
= gen_rtx_SET (VOIDmode
,
19479 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19480 RTX_FRAME_RELATED_P (tmp
) = 1;
19481 tmp
= emit_insn (tmp
);
19484 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19485 stack_pointer_rtx
, stack_pointer_rtx
);
19490 if (((num_regs
% 2) == 1 && !return_in_pc
)
19491 || ((num_regs
% 2) == 0 && return_in_pc
))
19493 /* Scan for the single register to be popped. Skip until the saved
19494 register is found. */
19495 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
19497 /* Gen LDR with post increment here. */
19498 tmp1
= gen_rtx_MEM (SImode
,
19499 gen_rtx_POST_INC (SImode
,
19500 stack_pointer_rtx
));
19501 set_mem_alias_set (tmp1
, get_frame_alias_set ());
19503 reg
= gen_rtx_REG (SImode
, j
);
19504 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
19505 RTX_FRAME_RELATED_P (tmp
) = 1;
19506 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19510 /* If return_in_pc, j must be PC_REGNUM. */
19511 gcc_assert (j
== PC_REGNUM
);
19512 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19513 XVECEXP (par
, 0, 0) = ret_rtx
;
19514 XVECEXP (par
, 0, 1) = tmp
;
19515 par
= emit_jump_insn (par
);
19519 par
= emit_insn (tmp
);
19520 REG_NOTES (par
) = dwarf
;
19521 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19522 stack_pointer_rtx
, stack_pointer_rtx
);
19526 else if ((num_regs
% 2) == 1 && return_in_pc
)
19528 /* There are 2 registers to be popped. So, generate the pattern
19529 pop_multiple_with_stack_update_and_return to pop in PC. */
19530 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
19536 /* LDRD in ARM mode needs consecutive registers as operands. This function
19537 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19538 offset addressing and then generates one separate stack udpate. This provides
19539 more scheduling freedom, compared to writeback on every load. However,
19540 if the function returns using load into PC directly
19541 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19542 before the last load. TODO: Add a peephole optimization to recognize
19543 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19544 peephole optimization to merge the load at stack-offset zero
19545 with the stack update instruction using load with writeback
19546 in post-index addressing mode. */
19548 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
19552 rtx par
= NULL_RTX
;
19553 rtx dwarf
= NULL_RTX
;
19556 /* Restore saved registers. */
19557 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
19559 while (j
<= LAST_ARM_REGNUM
)
19560 if (saved_regs_mask
& (1 << j
))
19563 && (saved_regs_mask
& (1 << (j
+ 1)))
19564 && (j
+ 1) != PC_REGNUM
)
19566 /* Current register and next register form register pair for which
19567 LDRD can be generated. PC is always the last register popped, and
19568 we handle it separately. */
19570 mem
= gen_frame_mem (DImode
,
19571 plus_constant (Pmode
,
19575 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19577 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
19578 tmp
= emit_insn (tmp
);
19579 RTX_FRAME_RELATED_P (tmp
) = 1;
19581 /* Generate dwarf info. */
19583 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19584 gen_rtx_REG (SImode
, j
),
19586 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19587 gen_rtx_REG (SImode
, j
+ 1),
19590 REG_NOTES (tmp
) = dwarf
;
19595 else if (j
!= PC_REGNUM
)
19597 /* Emit a single word load. */
19599 mem
= gen_frame_mem (SImode
,
19600 plus_constant (Pmode
,
19604 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19606 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
19607 tmp
= emit_insn (tmp
);
19608 RTX_FRAME_RELATED_P (tmp
) = 1;
19610 /* Generate dwarf info. */
19611 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
19612 gen_rtx_REG (SImode
, j
),
19618 else /* j == PC_REGNUM */
19624 /* Update the stack. */
19627 tmp
= gen_rtx_SET (Pmode
,
19629 plus_constant (Pmode
,
19632 tmp
= emit_insn (tmp
);
19633 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
19634 stack_pointer_rtx
, stack_pointer_rtx
);
19638 if (saved_regs_mask
& (1 << PC_REGNUM
))
19640 /* Only PC is to be popped. */
19641 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19642 XVECEXP (par
, 0, 0) = ret_rtx
;
19643 tmp
= gen_rtx_SET (SImode
,
19644 gen_rtx_REG (SImode
, PC_REGNUM
),
19645 gen_frame_mem (SImode
,
19646 gen_rtx_POST_INC (SImode
,
19647 stack_pointer_rtx
)));
19648 RTX_FRAME_RELATED_P (tmp
) = 1;
19649 XVECEXP (par
, 0, 1) = tmp
;
19650 par
= emit_jump_insn (par
);
19652 /* Generate dwarf info. */
19653 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19654 gen_rtx_REG (SImode
, PC_REGNUM
),
19656 REG_NOTES (par
) = dwarf
;
19657 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19658 stack_pointer_rtx
, stack_pointer_rtx
);
19662 /* Calculate the size of the return value that is passed in registers. */
19664 arm_size_return_regs (void)
19666 enum machine_mode mode
;
19668 if (crtl
->return_rtx
!= 0)
19669 mode
= GET_MODE (crtl
->return_rtx
);
19671 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
19673 return GET_MODE_SIZE (mode
);
19676 /* Return true if the current function needs to save/restore LR. */
19678 thumb_force_lr_save (void)
19680 return !cfun
->machine
->lr_save_eliminated
19681 && (!leaf_function_p ()
19682 || thumb_far_jump_used_p ()
19683 || df_regs_ever_live_p (LR_REGNUM
));
19686 /* We do not know if r3 will be available because
19687 we do have an indirect tailcall happening in this
19688 particular case. */
19690 is_indirect_tailcall_p (rtx call
)
19692 rtx pat
= PATTERN (call
);
19694 /* Indirect tail call. */
19695 pat
= XVECEXP (pat
, 0, 0);
19696 if (GET_CODE (pat
) == SET
)
19697 pat
= SET_SRC (pat
);
19699 pat
= XEXP (XEXP (pat
, 0), 0);
19700 return REG_P (pat
);
19703 /* Return true if r3 is used by any of the tail call insns in the
19704 current function. */
19706 any_sibcall_could_use_r3 (void)
19711 if (!crtl
->tail_call_emit
)
19713 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19714 if (e
->flags
& EDGE_SIBCALL
)
19716 rtx call
= BB_END (e
->src
);
19717 if (!CALL_P (call
))
19718 call
= prev_nonnote_nondebug_insn (call
);
19719 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
19720 if (find_regno_fusage (call
, USE
, 3)
19721 || is_indirect_tailcall_p (call
))
19728 /* Compute the distance from register FROM to register TO.
19729 These can be the arg pointer (26), the soft frame pointer (25),
19730 the stack pointer (13) or the hard frame pointer (11).
19731 In thumb mode r7 is used as the soft frame pointer, if needed.
19732 Typical stack layout looks like this:
19734 old stack pointer -> | |
19737 | | saved arguments for
19738 | | vararg functions
19741 hard FP & arg pointer -> | | \
19749 soft frame pointer -> | | /
19754 locals base pointer -> | | /
19759 current stack pointer -> | | /
19762 For a given function some or all of these stack components
19763 may not be needed, giving rise to the possibility of
19764 eliminating some of the registers.
19766 The values returned by this function must reflect the behavior
19767 of arm_expand_prologue() and arm_compute_save_reg_mask().
19769 The sign of the number returned reflects the direction of stack
19770 growth, so the values are positive for all eliminations except
19771 from the soft frame pointer to the hard frame pointer.
19773 SFP may point just inside the local variables block to ensure correct
19777 /* Calculate stack offsets. These are used to calculate register elimination
19778 offsets and in prologue/epilogue code. Also calculates which registers
19779 should be saved. */
19781 static arm_stack_offsets
*
19782 arm_get_frame_offsets (void)
19784 struct arm_stack_offsets
*offsets
;
19785 unsigned long func_type
;
19789 HOST_WIDE_INT frame_size
;
19792 offsets
= &cfun
->machine
->stack_offsets
;
19794 /* We need to know if we are a leaf function. Unfortunately, it
19795 is possible to be called after start_sequence has been called,
19796 which causes get_insns to return the insns for the sequence,
19797 not the function, which will cause leaf_function_p to return
19798 the incorrect result.
19800 to know about leaf functions once reload has completed, and the
19801 frame size cannot be changed after that time, so we can safely
19802 use the cached value. */
19804 if (reload_completed
)
19807 /* Initially this is the size of the local variables. It will translated
19808 into an offset once we have determined the size of preceding data. */
19809 frame_size
= ROUND_UP_WORD (get_frame_size ());
19811 leaf
= leaf_function_p ();
19813 /* Space for variadic functions. */
19814 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
19816 /* In Thumb mode this is incorrect, but never used. */
19817 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
19818 arm_compute_static_chain_stack_bytes();
19822 unsigned int regno
;
19824 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
19825 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19826 saved
= core_saved
;
19828 /* We know that SP will be doubleword aligned on entry, and we must
19829 preserve that condition at any subroutine call. We also require the
19830 soft frame pointer to be doubleword aligned. */
19832 if (TARGET_REALLY_IWMMXT
)
19834 /* Check for the call-saved iWMMXt registers. */
19835 for (regno
= FIRST_IWMMXT_REGNUM
;
19836 regno
<= LAST_IWMMXT_REGNUM
;
19838 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
19842 func_type
= arm_current_func_type ();
19843 /* Space for saved VFP registers. */
19844 if (! IS_VOLATILE (func_type
)
19845 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
19846 saved
+= arm_get_vfp_saved_size ();
19848 else /* TARGET_THUMB1 */
19850 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
19851 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19852 saved
= core_saved
;
19853 if (TARGET_BACKTRACE
)
19857 /* Saved registers include the stack frame. */
19858 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
19859 arm_compute_static_chain_stack_bytes();
19860 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
19861 /* A leaf function does not need any stack alignment if it has nothing
19863 if (leaf
&& frame_size
== 0
19864 /* However if it calls alloca(), we have a dynamically allocated
19865 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19866 && ! cfun
->calls_alloca
)
19868 offsets
->outgoing_args
= offsets
->soft_frame
;
19869 offsets
->locals_base
= offsets
->soft_frame
;
19873 /* Ensure SFP has the correct alignment. */
19874 if (ARM_DOUBLEWORD_ALIGN
19875 && (offsets
->soft_frame
& 7))
19877 offsets
->soft_frame
+= 4;
19878 /* Try to align stack by pushing an extra reg. Don't bother doing this
19879 when there is a stack frame as the alignment will be rolled into
19880 the normal stack adjustment. */
19881 if (frame_size
+ crtl
->outgoing_args_size
== 0)
19885 /* If it is safe to use r3, then do so. This sometimes
19886 generates better code on Thumb-2 by avoiding the need to
19887 use 32-bit push/pop instructions. */
19888 if (! any_sibcall_could_use_r3 ()
19889 && arm_size_return_regs () <= 12
19890 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
19892 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
19897 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
19899 /* Avoid fixed registers; they may be changed at
19900 arbitrary times so it's unsafe to restore them
19901 during the epilogue. */
19903 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
19912 offsets
->saved_regs
+= 4;
19913 offsets
->saved_regs_mask
|= (1 << reg
);
19918 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
19919 offsets
->outgoing_args
= (offsets
->locals_base
19920 + crtl
->outgoing_args_size
);
19922 if (ARM_DOUBLEWORD_ALIGN
)
19924 /* Ensure SP remains doubleword aligned. */
19925 if (offsets
->outgoing_args
& 7)
19926 offsets
->outgoing_args
+= 4;
19927 gcc_assert (!(offsets
->outgoing_args
& 7));
19934 /* Calculate the relative offsets for the different stack pointers. Positive
19935 offsets are in the direction of stack growth. */
19938 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
19940 arm_stack_offsets
*offsets
;
19942 offsets
= arm_get_frame_offsets ();
19944 /* OK, now we have enough information to compute the distances.
19945 There must be an entry in these switch tables for each pair
19946 of registers in ELIMINABLE_REGS, even if some of the entries
19947 seem to be redundant or useless. */
19950 case ARG_POINTER_REGNUM
:
19953 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19956 case FRAME_POINTER_REGNUM
:
19957 /* This is the reverse of the soft frame pointer
19958 to hard frame pointer elimination below. */
19959 return offsets
->soft_frame
- offsets
->saved_args
;
19961 case ARM_HARD_FRAME_POINTER_REGNUM
:
19962 /* This is only non-zero in the case where the static chain register
19963 is stored above the frame. */
19964 return offsets
->frame
- offsets
->saved_args
- 4;
19966 case STACK_POINTER_REGNUM
:
19967 /* If nothing has been pushed on the stack at all
19968 then this will return -4. This *is* correct! */
19969 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
19972 gcc_unreachable ();
19974 gcc_unreachable ();
19976 case FRAME_POINTER_REGNUM
:
19979 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19982 case ARM_HARD_FRAME_POINTER_REGNUM
:
19983 /* The hard frame pointer points to the top entry in the
19984 stack frame. The soft frame pointer to the bottom entry
19985 in the stack frame. If there is no stack frame at all,
19986 then they are identical. */
19988 return offsets
->frame
- offsets
->soft_frame
;
19990 case STACK_POINTER_REGNUM
:
19991 return offsets
->outgoing_args
- offsets
->soft_frame
;
19994 gcc_unreachable ();
19996 gcc_unreachable ();
19999 /* You cannot eliminate from the stack pointer.
20000 In theory you could eliminate from the hard frame
20001 pointer to the stack pointer, but this will never
20002 happen, since if a stack frame is not needed the
20003 hard frame pointer will never be used. */
20004 gcc_unreachable ();
20008 /* Given FROM and TO register numbers, say whether this elimination is
20009 allowed. Frame pointer elimination is automatically handled.
20011 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20012 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20013 pointer, we must eliminate FRAME_POINTER_REGNUM into
20014 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20015 ARG_POINTER_REGNUM. */
20018 arm_can_eliminate (const int from
, const int to
)
20020 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20021 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20022 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20023 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20027 /* Emit RTL to save coprocessor registers on function entry. Returns the
20028 number of bytes pushed. */
20031 arm_save_coproc_regs(void)
20033 int saved_size
= 0;
20035 unsigned start_reg
;
20038 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20039 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20041 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20042 insn
= gen_rtx_MEM (V2SImode
, insn
);
20043 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20044 RTX_FRAME_RELATED_P (insn
) = 1;
20048 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20050 start_reg
= FIRST_VFP_REGNUM
;
20052 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20054 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20055 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20057 if (start_reg
!= reg
)
20058 saved_size
+= vfp_emit_fstmd (start_reg
,
20059 (reg
- start_reg
) / 2);
20060 start_reg
= reg
+ 2;
20063 if (start_reg
!= reg
)
20064 saved_size
+= vfp_emit_fstmd (start_reg
,
20065 (reg
- start_reg
) / 2);
20071 /* Set the Thumb frame pointer from the stack pointer. */
20074 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20076 HOST_WIDE_INT amount
;
20079 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20081 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20082 stack_pointer_rtx
, GEN_INT (amount
)));
20085 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20086 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20087 expects the first two operands to be the same. */
20090 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20092 hard_frame_pointer_rtx
));
20096 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20097 hard_frame_pointer_rtx
,
20098 stack_pointer_rtx
));
20100 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20101 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20102 RTX_FRAME_RELATED_P (dwarf
) = 1;
20103 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20106 RTX_FRAME_RELATED_P (insn
) = 1;
20109 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20112 arm_expand_prologue (void)
20117 unsigned long live_regs_mask
;
20118 unsigned long func_type
;
20120 int saved_pretend_args
= 0;
20121 int saved_regs
= 0;
20122 unsigned HOST_WIDE_INT args_to_push
;
20123 arm_stack_offsets
*offsets
;
20125 func_type
= arm_current_func_type ();
20127 /* Naked functions don't have prologues. */
20128 if (IS_NAKED (func_type
))
20131 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20132 args_to_push
= crtl
->args
.pretend_args_size
;
20134 /* Compute which register we will have to save onto the stack. */
20135 offsets
= arm_get_frame_offsets ();
20136 live_regs_mask
= offsets
->saved_regs_mask
;
20138 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20140 if (IS_STACKALIGN (func_type
))
20144 /* Handle a word-aligned stack pointer. We generate the following:
20149 <save and restore r0 in normal prologue/epilogue>
20153 The unwinder doesn't need to know about the stack realignment.
20154 Just tell it we saved SP in r0. */
20155 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20157 r0
= gen_rtx_REG (SImode
, 0);
20158 r1
= gen_rtx_REG (SImode
, 1);
20160 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20161 RTX_FRAME_RELATED_P (insn
) = 1;
20162 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20164 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20166 /* ??? The CFA changes here, which may cause GDB to conclude that it
20167 has entered a different function. That said, the unwind info is
20168 correct, individually, before and after this instruction because
20169 we've described the save of SP, which will override the default
20170 handling of SP as restoring from the CFA. */
20171 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20174 /* For APCS frames, if IP register is clobbered
20175 when creating frame, save that register in a special
20177 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20179 if (IS_INTERRUPT (func_type
))
20181 /* Interrupt functions must not corrupt any registers.
20182 Creating a frame pointer however, corrupts the IP
20183 register, so we must push it first. */
20184 emit_multi_reg_push (1 << IP_REGNUM
);
20186 /* Do not set RTX_FRAME_RELATED_P on this insn.
20187 The dwarf stack unwinding code only wants to see one
20188 stack decrement per function, and this is not it. If
20189 this instruction is labeled as being part of the frame
20190 creation sequence then dwarf2out_frame_debug_expr will
20191 die when it encounters the assignment of IP to FP
20192 later on, since the use of SP here establishes SP as
20193 the CFA register and not IP.
20195 Anyway this instruction is not really part of the stack
20196 frame creation although it is part of the prologue. */
20198 else if (IS_NESTED (func_type
))
20200 /* The static chain register is the same as the IP register
20201 used as a scratch register during stack frame creation.
20202 To get around this need to find somewhere to store IP
20203 whilst the frame is being created. We try the following
20206 1. The last argument register r3.
20207 2. A slot on the stack above the frame. (This only
20208 works if the function is not a varargs function).
20209 3. Register r3 again, after pushing the argument registers
20212 Note - we only need to tell the dwarf2 backend about the SP
20213 adjustment in the second variant; the static chain register
20214 doesn't need to be unwound, as it doesn't contain a value
20215 inherited from the caller. */
20217 if (!arm_r3_live_at_start_p ())
20218 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20219 else if (args_to_push
== 0)
20223 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20226 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
20227 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
20230 /* Just tell the dwarf backend that we adjusted SP. */
20231 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20232 plus_constant (Pmode
, stack_pointer_rtx
,
20234 RTX_FRAME_RELATED_P (insn
) = 1;
20235 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20239 /* Store the args on the stack. */
20240 if (cfun
->machine
->uses_anonymous_args
)
20241 insn
= emit_multi_reg_push
20242 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20245 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20246 GEN_INT (- args_to_push
)));
20248 RTX_FRAME_RELATED_P (insn
) = 1;
20250 saved_pretend_args
= 1;
20251 fp_offset
= args_to_push
;
20254 /* Now reuse r3 to preserve IP. */
20255 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20259 insn
= emit_set_insn (ip_rtx
,
20260 plus_constant (Pmode
, stack_pointer_rtx
,
20262 RTX_FRAME_RELATED_P (insn
) = 1;
20267 /* Push the argument registers, or reserve space for them. */
20268 if (cfun
->machine
->uses_anonymous_args
)
20269 insn
= emit_multi_reg_push
20270 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20273 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20274 GEN_INT (- args_to_push
)));
20275 RTX_FRAME_RELATED_P (insn
) = 1;
20278 /* If this is an interrupt service routine, and the link register
20279 is going to be pushed, and we're not generating extra
20280 push of IP (needed when frame is needed and frame layout if apcs),
20281 subtracting four from LR now will mean that the function return
20282 can be done with a single instruction. */
20283 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20284 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20285 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20288 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20290 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20293 if (live_regs_mask
)
20295 saved_regs
+= bit_count (live_regs_mask
) * 4;
20296 if (optimize_size
&& !frame_pointer_needed
20297 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20299 /* If no coprocessor registers are being pushed and we don't have
20300 to worry about a frame pointer then push extra registers to
20301 create the stack frame. This is done is a way that does not
20302 alter the frame layout, so is independent of the epilogue. */
20306 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20308 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20309 if (frame
&& n
* 4 >= frame
)
20312 live_regs_mask
|= (1 << n
) - 1;
20313 saved_regs
+= frame
;
20318 && current_tune
->prefer_ldrd_strd
20319 && !optimize_function_for_size_p (cfun
))
20323 thumb2_emit_strd_push (live_regs_mask
);
20325 else if (TARGET_ARM
20326 && !TARGET_APCS_FRAME
20327 && !IS_INTERRUPT (func_type
))
20329 arm_emit_strd_push (live_regs_mask
);
20333 insn
= emit_multi_reg_push (live_regs_mask
);
20334 RTX_FRAME_RELATED_P (insn
) = 1;
20339 insn
= emit_multi_reg_push (live_regs_mask
);
20340 RTX_FRAME_RELATED_P (insn
) = 1;
20344 if (! IS_VOLATILE (func_type
))
20345 saved_regs
+= arm_save_coproc_regs ();
20347 if (frame_pointer_needed
&& TARGET_ARM
)
20349 /* Create the new frame pointer. */
20350 if (TARGET_APCS_FRAME
)
20352 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20353 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20354 RTX_FRAME_RELATED_P (insn
) = 1;
20356 if (IS_NESTED (func_type
))
20358 /* Recover the static chain register. */
20359 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20360 insn
= gen_rtx_REG (SImode
, 3);
20361 else /* if (crtl->args.pretend_args_size == 0) */
20363 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20364 insn
= gen_frame_mem (SImode
, insn
);
20366 emit_set_insn (ip_rtx
, insn
);
20367 /* Add a USE to stop propagate_one_insn() from barfing. */
20368 emit_insn (gen_force_register_use (ip_rtx
));
20373 insn
= GEN_INT (saved_regs
- 4);
20374 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20375 stack_pointer_rtx
, insn
));
20376 RTX_FRAME_RELATED_P (insn
) = 1;
20380 if (flag_stack_usage_info
)
20381 current_function_static_stack_size
20382 = offsets
->outgoing_args
- offsets
->saved_args
;
20384 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20386 /* This add can produce multiple insns for a large constant, so we
20387 need to get tricky. */
20388 rtx last
= get_last_insn ();
20390 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20391 - offsets
->outgoing_args
);
20393 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20397 last
= last
? NEXT_INSN (last
) : get_insns ();
20398 RTX_FRAME_RELATED_P (last
) = 1;
20400 while (last
!= insn
);
20402 /* If the frame pointer is needed, emit a special barrier that
20403 will prevent the scheduler from moving stores to the frame
20404 before the stack adjustment. */
20405 if (frame_pointer_needed
)
20406 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20407 hard_frame_pointer_rtx
));
20411 if (frame_pointer_needed
&& TARGET_THUMB2
)
20412 thumb_set_frame_pointer (offsets
);
20414 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20416 unsigned long mask
;
20418 mask
= live_regs_mask
;
20419 mask
&= THUMB2_WORK_REGS
;
20420 if (!IS_NESTED (func_type
))
20421 mask
|= (1 << IP_REGNUM
);
20422 arm_load_pic_register (mask
);
20425 /* If we are profiling, make sure no instructions are scheduled before
20426 the call to mcount. Similarly if the user has requested no
20427 scheduling in the prolog. Similarly if we want non-call exceptions
20428 using the EABI unwinder, to prevent faulting instructions from being
20429 swapped with a stack adjustment. */
20430 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20431 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20432 && cfun
->can_throw_non_call_exceptions
))
20433 emit_insn (gen_blockage ());
20435 /* If the link register is being kept alive, with the return address in it,
20436 then make sure that it does not get reused by the ce2 pass. */
20437 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20438 cfun
->machine
->lr_save_eliminated
= 1;
20441 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20443 arm_print_condition (FILE *stream
)
20445 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20447 /* Branch conversion is not implemented for Thumb-2. */
20450 output_operand_lossage ("predicated Thumb instruction");
20453 if (current_insn_predicate
!= NULL
)
20455 output_operand_lossage
20456 ("predicated instruction in conditional sequence");
20460 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20462 else if (current_insn_predicate
)
20464 enum arm_cond_code code
;
20468 output_operand_lossage ("predicated Thumb instruction");
20472 code
= get_arm_condition_code (current_insn_predicate
);
20473 fputs (arm_condition_codes
[code
], stream
);
20478 /* If CODE is 'd', then the X is a condition operand and the instruction
20479 should only be executed if the condition is true.
20480 if CODE is 'D', then the X is a condition operand and the instruction
20481 should only be executed if the condition is false: however, if the mode
20482 of the comparison is CCFPEmode, then always execute the instruction -- we
20483 do this because in these circumstances !GE does not necessarily imply LT;
20484 in these cases the instruction pattern will take care to make sure that
20485 an instruction containing %d will follow, thereby undoing the effects of
20486 doing this instruction unconditionally.
20487 If CODE is 'N' then X is a floating point operand that must be negated
20489 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20490 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20492 arm_print_operand (FILE *stream
, rtx x
, int code
)
20497 fputs (ASM_COMMENT_START
, stream
);
20501 fputs (user_label_prefix
, stream
);
20505 fputs (REGISTER_PREFIX
, stream
);
20509 arm_print_condition (stream
);
20513 /* Nothing in unified syntax, otherwise the current condition code. */
20514 if (!TARGET_UNIFIED_ASM
)
20515 arm_print_condition (stream
);
20519 /* The current condition code in unified syntax, otherwise nothing. */
20520 if (TARGET_UNIFIED_ASM
)
20521 arm_print_condition (stream
);
20525 /* The current condition code for a condition code setting instruction.
20526 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20527 if (TARGET_UNIFIED_ASM
)
20529 fputc('s', stream
);
20530 arm_print_condition (stream
);
20534 arm_print_condition (stream
);
20535 fputc('s', stream
);
20540 /* If the instruction is conditionally executed then print
20541 the current condition code, otherwise print 's'. */
20542 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
20543 if (current_insn_predicate
)
20544 arm_print_condition (stream
);
20546 fputc('s', stream
);
20549 /* %# is a "break" sequence. It doesn't output anything, but is used to
20550 separate e.g. operand numbers from following text, if that text consists
20551 of further digits which we don't want to be part of the operand
20559 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
20560 r
= real_value_negate (&r
);
20561 fprintf (stream
, "%s", fp_const_from_val (&r
));
20565 /* An integer or symbol address without a preceding # sign. */
20567 switch (GET_CODE (x
))
20570 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
20574 output_addr_const (stream
, x
);
20578 if (GET_CODE (XEXP (x
, 0)) == PLUS
20579 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
20581 output_addr_const (stream
, x
);
20584 /* Fall through. */
20587 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20591 /* An integer that we want to print in HEX. */
20593 switch (GET_CODE (x
))
20596 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
20600 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20605 if (CONST_INT_P (x
))
20608 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
20609 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
20613 putc ('~', stream
);
20614 output_addr_const (stream
, x
);
20619 /* The low 16 bits of an immediate constant. */
20620 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
20624 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
20628 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
20636 shift
= shift_op (x
, &val
);
20640 fprintf (stream
, ", %s ", shift
);
20642 arm_print_operand (stream
, XEXP (x
, 1), 0);
20644 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
20649 /* An explanation of the 'Q', 'R' and 'H' register operands:
20651 In a pair of registers containing a DI or DF value the 'Q'
20652 operand returns the register number of the register containing
20653 the least significant part of the value. The 'R' operand returns
20654 the register number of the register containing the most
20655 significant part of the value.
20657 The 'H' operand returns the higher of the two register numbers.
20658 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20659 same as the 'Q' operand, since the most significant part of the
20660 value is held in the lower number register. The reverse is true
20661 on systems where WORDS_BIG_ENDIAN is false.
20663 The purpose of these operands is to distinguish between cases
20664 where the endian-ness of the values is important (for example
20665 when they are added together), and cases where the endian-ness
20666 is irrelevant, but the order of register operations is important.
20667 For example when loading a value from memory into a register
20668 pair, the endian-ness does not matter. Provided that the value
20669 from the lower memory address is put into the lower numbered
20670 register, and the value from the higher address is put into the
20671 higher numbered register, the load will work regardless of whether
20672 the value being loaded is big-wordian or little-wordian. The
20673 order of the two register loads can matter however, if the address
20674 of the memory location is actually held in one of the registers
20675 being overwritten by the load.
20677 The 'Q' and 'R' constraints are also available for 64-bit
20680 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20682 rtx part
= gen_lowpart (SImode
, x
);
20683 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20687 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20689 output_operand_lossage ("invalid operand for code '%c'", code
);
20693 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
20697 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20699 enum machine_mode mode
= GET_MODE (x
);
20702 if (mode
== VOIDmode
)
20704 part
= gen_highpart_mode (SImode
, mode
, x
);
20705 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20709 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20711 output_operand_lossage ("invalid operand for code '%c'", code
);
20715 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
20719 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20721 output_operand_lossage ("invalid operand for code '%c'", code
);
20725 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
20729 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20731 output_operand_lossage ("invalid operand for code '%c'", code
);
20735 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
20739 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20741 output_operand_lossage ("invalid operand for code '%c'", code
);
20745 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
20749 asm_fprintf (stream
, "%r",
20750 REG_P (XEXP (x
, 0))
20751 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
20755 asm_fprintf (stream
, "{%r-%r}",
20757 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
20760 /* Like 'M', but writing doubleword vector registers, for use by Neon
20764 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
20765 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
20767 asm_fprintf (stream
, "{d%d}", regno
);
20769 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
20774 /* CONST_TRUE_RTX means always -- that's the default. */
20775 if (x
== const_true_rtx
)
20778 if (!COMPARISON_P (x
))
20780 output_operand_lossage ("invalid operand for code '%c'", code
);
20784 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
20789 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
20790 want to do that. */
20791 if (x
== const_true_rtx
)
20793 output_operand_lossage ("instruction never executed");
20796 if (!COMPARISON_P (x
))
20798 output_operand_lossage ("invalid operand for code '%c'", code
);
20802 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
20803 (get_arm_condition_code (x
))],
20813 /* Former Maverick support, removed after GCC-4.7. */
20814 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
20819 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
20820 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
20821 /* Bad value for wCG register number. */
20823 output_operand_lossage ("invalid operand for code '%c'", code
);
20828 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
20831 /* Print an iWMMXt control register name. */
20833 if (!CONST_INT_P (x
)
20835 || INTVAL (x
) >= 16)
20836 /* Bad value for wC register number. */
20838 output_operand_lossage ("invalid operand for code '%c'", code
);
20844 static const char * wc_reg_names
[16] =
20846 "wCID", "wCon", "wCSSF", "wCASF",
20847 "wC4", "wC5", "wC6", "wC7",
20848 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
20849 "wC12", "wC13", "wC14", "wC15"
20852 fputs (wc_reg_names
[INTVAL (x
)], stream
);
20856 /* Print the high single-precision register of a VFP double-precision
20860 int mode
= GET_MODE (x
);
20863 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
20865 output_operand_lossage ("invalid operand for code '%c'", code
);
20870 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
20872 output_operand_lossage ("invalid operand for code '%c'", code
);
20876 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
20880 /* Print a VFP/Neon double precision or quad precision register name. */
20884 int mode
= GET_MODE (x
);
20885 int is_quad
= (code
== 'q');
20888 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
20890 output_operand_lossage ("invalid operand for code '%c'", code
);
20895 || !IS_VFP_REGNUM (REGNO (x
)))
20897 output_operand_lossage ("invalid operand for code '%c'", code
);
20902 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
20903 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
20905 output_operand_lossage ("invalid operand for code '%c'", code
);
20909 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
20910 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
20914 /* These two codes print the low/high doubleword register of a Neon quad
20915 register, respectively. For pair-structure types, can also print
20916 low/high quadword registers. */
20920 int mode
= GET_MODE (x
);
20923 if ((GET_MODE_SIZE (mode
) != 16
20924 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
20926 output_operand_lossage ("invalid operand for code '%c'", code
);
20931 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
20933 output_operand_lossage ("invalid operand for code '%c'", code
);
20937 if (GET_MODE_SIZE (mode
) == 16)
20938 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
20939 + (code
== 'f' ? 1 : 0));
20941 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
20942 + (code
== 'f' ? 1 : 0));
20946 /* Print a VFPv3 floating-point constant, represented as an integer
20950 int index
= vfp3_const_double_index (x
);
20951 gcc_assert (index
!= -1);
20952 fprintf (stream
, "%d", index
);
20956 /* Print bits representing opcode features for Neon.
20958 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
20959 and polynomials as unsigned.
20961 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
20963 Bit 2 is 1 for rounding functions, 0 otherwise. */
20965 /* Identify the type as 's', 'u', 'p' or 'f'. */
20968 HOST_WIDE_INT bits
= INTVAL (x
);
20969 fputc ("uspf"[bits
& 3], stream
);
20973 /* Likewise, but signed and unsigned integers are both 'i'. */
20976 HOST_WIDE_INT bits
= INTVAL (x
);
20977 fputc ("iipf"[bits
& 3], stream
);
20981 /* As for 'T', but emit 'u' instead of 'p'. */
20984 HOST_WIDE_INT bits
= INTVAL (x
);
20985 fputc ("usuf"[bits
& 3], stream
);
20989 /* Bit 2: rounding (vs none). */
20992 HOST_WIDE_INT bits
= INTVAL (x
);
20993 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
20997 /* Memory operand for vld1/vst1 instruction. */
21001 bool postinc
= FALSE
;
21002 unsigned align
, memsize
, align_bits
;
21004 gcc_assert (MEM_P (x
));
21005 addr
= XEXP (x
, 0);
21006 if (GET_CODE (addr
) == POST_INC
)
21009 addr
= XEXP (addr
, 0);
21011 asm_fprintf (stream
, "[%r", REGNO (addr
));
21013 /* We know the alignment of this access, so we can emit a hint in the
21014 instruction (for some alignments) as an aid to the memory subsystem
21016 align
= MEM_ALIGN (x
) >> 3;
21017 memsize
= MEM_SIZE (x
);
21019 /* Only certain alignment specifiers are supported by the hardware. */
21020 if (memsize
== 32 && (align
% 32) == 0)
21022 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21024 else if (memsize
>= 8 && (align
% 8) == 0)
21029 if (align_bits
!= 0)
21030 asm_fprintf (stream
, ":%d", align_bits
);
21032 asm_fprintf (stream
, "]");
21035 fputs("!", stream
);
21043 gcc_assert (MEM_P (x
));
21044 addr
= XEXP (x
, 0);
21045 gcc_assert (REG_P (addr
));
21046 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21050 /* Translate an S register number into a D register number and element index. */
21053 int mode
= GET_MODE (x
);
21056 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21058 output_operand_lossage ("invalid operand for code '%c'", code
);
21063 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21065 output_operand_lossage ("invalid operand for code '%c'", code
);
21069 regno
= regno
- FIRST_VFP_REGNUM
;
21070 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21075 gcc_assert (CONST_DOUBLE_P (x
));
21076 fprintf (stream
, "#%d", vfp3_const_double_for_fract_bits (x
));
21079 /* Register specifier for vld1.16/vst1.16. Translate the S register
21080 number into a D register number and element index. */
21083 int mode
= GET_MODE (x
);
21086 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21088 output_operand_lossage ("invalid operand for code '%c'", code
);
21093 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21095 output_operand_lossage ("invalid operand for code '%c'", code
);
21099 regno
= regno
- FIRST_VFP_REGNUM
;
21100 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21107 output_operand_lossage ("missing operand");
21111 switch (GET_CODE (x
))
21114 asm_fprintf (stream
, "%r", REGNO (x
));
21118 output_memory_reference_mode
= GET_MODE (x
);
21119 output_address (XEXP (x
, 0));
21126 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21127 sizeof (fpstr
), 0, 1);
21128 fprintf (stream
, "#%s", fpstr
);
21131 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21135 gcc_assert (GET_CODE (x
) != NEG
);
21136 fputc ('#', stream
);
21137 if (GET_CODE (x
) == HIGH
)
21139 fputs (":lower16:", stream
);
21143 output_addr_const (stream
, x
);
21149 /* Target hook for printing a memory address. */
21151 arm_print_operand_address (FILE *stream
, rtx x
)
21155 int is_minus
= GET_CODE (x
) == MINUS
;
21158 asm_fprintf (stream
, "[%r]", REGNO (x
));
21159 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21161 rtx base
= XEXP (x
, 0);
21162 rtx index
= XEXP (x
, 1);
21163 HOST_WIDE_INT offset
= 0;
21165 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21167 /* Ensure that BASE is a register. */
21168 /* (one of them must be). */
21169 /* Also ensure the SP is not used as in index register. */
21174 switch (GET_CODE (index
))
21177 offset
= INTVAL (index
);
21180 asm_fprintf (stream
, "[%r, #%wd]",
21181 REGNO (base
), offset
);
21185 asm_fprintf (stream
, "[%r, %s%r]",
21186 REGNO (base
), is_minus
? "-" : "",
21196 asm_fprintf (stream
, "[%r, %s%r",
21197 REGNO (base
), is_minus
? "-" : "",
21198 REGNO (XEXP (index
, 0)));
21199 arm_print_operand (stream
, index
, 'S');
21200 fputs ("]", stream
);
21205 gcc_unreachable ();
21208 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21209 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21211 extern enum machine_mode output_memory_reference_mode
;
21213 gcc_assert (REG_P (XEXP (x
, 0)));
21215 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21216 asm_fprintf (stream
, "[%r, #%s%d]!",
21217 REGNO (XEXP (x
, 0)),
21218 GET_CODE (x
) == PRE_DEC
? "-" : "",
21219 GET_MODE_SIZE (output_memory_reference_mode
));
21221 asm_fprintf (stream
, "[%r], #%s%d",
21222 REGNO (XEXP (x
, 0)),
21223 GET_CODE (x
) == POST_DEC
? "-" : "",
21224 GET_MODE_SIZE (output_memory_reference_mode
));
21226 else if (GET_CODE (x
) == PRE_MODIFY
)
21228 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21229 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21230 asm_fprintf (stream
, "#%wd]!",
21231 INTVAL (XEXP (XEXP (x
, 1), 1)));
21233 asm_fprintf (stream
, "%r]!",
21234 REGNO (XEXP (XEXP (x
, 1), 1)));
21236 else if (GET_CODE (x
) == POST_MODIFY
)
21238 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21239 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21240 asm_fprintf (stream
, "#%wd",
21241 INTVAL (XEXP (XEXP (x
, 1), 1)));
21243 asm_fprintf (stream
, "%r",
21244 REGNO (XEXP (XEXP (x
, 1), 1)));
21246 else output_addr_const (stream
, x
);
21251 asm_fprintf (stream
, "[%r]", REGNO (x
));
21252 else if (GET_CODE (x
) == POST_INC
)
21253 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21254 else if (GET_CODE (x
) == PLUS
)
21256 gcc_assert (REG_P (XEXP (x
, 0)));
21257 if (CONST_INT_P (XEXP (x
, 1)))
21258 asm_fprintf (stream
, "[%r, #%wd]",
21259 REGNO (XEXP (x
, 0)),
21260 INTVAL (XEXP (x
, 1)));
21262 asm_fprintf (stream
, "[%r, %r]",
21263 REGNO (XEXP (x
, 0)),
21264 REGNO (XEXP (x
, 1)));
21267 output_addr_const (stream
, x
);
21271 /* Target hook for indicating whether a punctuation character for
21272 TARGET_PRINT_OPERAND is valid. */
21274 arm_print_operand_punct_valid_p (unsigned char code
)
21276 return (code
== '@' || code
== '|' || code
== '.'
21277 || code
== '(' || code
== ')' || code
== '#'
21278 || (TARGET_32BIT
&& (code
== '?'))
21279 || (TARGET_THUMB2
&& (code
== '!'))
21280 || (TARGET_THUMB
&& (code
== '_')));
21283 /* Target hook for assembling integer objects. The ARM version needs to
21284 handle word-sized values specially. */
21286 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21288 enum machine_mode mode
;
21290 if (size
== UNITS_PER_WORD
&& aligned_p
)
21292 fputs ("\t.word\t", asm_out_file
);
21293 output_addr_const (asm_out_file
, x
);
21295 /* Mark symbols as position independent. We only do this in the
21296 .text segment, not in the .data segment. */
21297 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21298 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21300 /* See legitimize_pic_address for an explanation of the
21301 TARGET_VXWORKS_RTP check. */
21302 if (TARGET_VXWORKS_RTP
21303 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21304 fputs ("(GOT)", asm_out_file
);
21306 fputs ("(GOTOFF)", asm_out_file
);
21308 fputc ('\n', asm_out_file
);
21312 mode
= GET_MODE (x
);
21314 if (arm_vector_mode_supported_p (mode
))
21318 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21320 units
= CONST_VECTOR_NUNITS (x
);
21321 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21323 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21324 for (i
= 0; i
< units
; i
++)
21326 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21328 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21331 for (i
= 0; i
< units
; i
++)
21333 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21334 REAL_VALUE_TYPE rval
;
21336 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21339 (rval
, GET_MODE_INNER (mode
),
21340 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21346 return default_assemble_integer (x
, size
, aligned_p
);
21350 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21354 if (!TARGET_AAPCS_BASED
)
21357 default_named_section_asm_out_constructor
21358 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21362 /* Put these in the .init_array section, using a special relocation. */
21363 if (priority
!= DEFAULT_INIT_PRIORITY
)
21366 sprintf (buf
, "%s.%.5u",
21367 is_ctor
? ".init_array" : ".fini_array",
21369 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21376 switch_to_section (s
);
21377 assemble_align (POINTER_SIZE
);
21378 fputs ("\t.word\t", asm_out_file
);
21379 output_addr_const (asm_out_file
, symbol
);
21380 fputs ("(target1)\n", asm_out_file
);
21383 /* Add a function to the list of static constructors. */
21386 arm_elf_asm_constructor (rtx symbol
, int priority
)
21388 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21391 /* Add a function to the list of static destructors. */
21394 arm_elf_asm_destructor (rtx symbol
, int priority
)
21396 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21399 /* A finite state machine takes care of noticing whether or not instructions
21400 can be conditionally executed, and thus decrease execution time and code
21401 size by deleting branch instructions. The fsm is controlled by
21402 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21404 /* The state of the fsm controlling condition codes are:
21405 0: normal, do nothing special
21406 1: make ASM_OUTPUT_OPCODE not output this instruction
21407 2: make ASM_OUTPUT_OPCODE not output this instruction
21408 3: make instructions conditional
21409 4: make instructions conditional
21411 State transitions (state->state by whom under condition):
21412 0 -> 1 final_prescan_insn if the `target' is a label
21413 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21414 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21415 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21416 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21417 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21418 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21419 (the target insn is arm_target_insn).
21421 If the jump clobbers the conditions then we use states 2 and 4.
21423 A similar thing can be done with conditional return insns.
21425 XXX In case the `target' is an unconditional branch, this conditionalising
21426 of the instructions always reduces code size, but not always execution
21427 time. But then, I want to reduce the code size to somewhere near what
21428 /bin/cc produces. */
21430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21431 instructions. When a COND_EXEC instruction is seen the subsequent
21432 instructions are scanned so that multiple conditional instructions can be
21433 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21434 specify the length and true/false mask for the IT block. These will be
21435 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21437 /* Returns the index of the ARM condition code string in
21438 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21439 COMPARISON should be an rtx like `(eq (...) (...))'. */
21442 maybe_get_arm_condition_code (rtx comparison
)
21444 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21445 enum arm_cond_code code
;
21446 enum rtx_code comp_code
= GET_CODE (comparison
);
21448 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21449 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21450 XEXP (comparison
, 1));
21454 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21455 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21456 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21457 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21458 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21459 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21460 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21461 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21462 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21463 case CC_DLTUmode
: code
= ARM_CC
;
21466 if (comp_code
== EQ
)
21467 return ARM_INVERSE_CONDITION_CODE (code
);
21468 if (comp_code
== NE
)
21475 case NE
: return ARM_NE
;
21476 case EQ
: return ARM_EQ
;
21477 case GE
: return ARM_PL
;
21478 case LT
: return ARM_MI
;
21479 default: return ARM_NV
;
21485 case NE
: return ARM_NE
;
21486 case EQ
: return ARM_EQ
;
21487 default: return ARM_NV
;
21493 case NE
: return ARM_MI
;
21494 case EQ
: return ARM_PL
;
21495 default: return ARM_NV
;
21500 /* We can handle all cases except UNEQ and LTGT. */
21503 case GE
: return ARM_GE
;
21504 case GT
: return ARM_GT
;
21505 case LE
: return ARM_LS
;
21506 case LT
: return ARM_MI
;
21507 case NE
: return ARM_NE
;
21508 case EQ
: return ARM_EQ
;
21509 case ORDERED
: return ARM_VC
;
21510 case UNORDERED
: return ARM_VS
;
21511 case UNLT
: return ARM_LT
;
21512 case UNLE
: return ARM_LE
;
21513 case UNGT
: return ARM_HI
;
21514 case UNGE
: return ARM_PL
;
21515 /* UNEQ and LTGT do not have a representation. */
21516 case UNEQ
: /* Fall through. */
21517 case LTGT
: /* Fall through. */
21518 default: return ARM_NV
;
21524 case NE
: return ARM_NE
;
21525 case EQ
: return ARM_EQ
;
21526 case GE
: return ARM_LE
;
21527 case GT
: return ARM_LT
;
21528 case LE
: return ARM_GE
;
21529 case LT
: return ARM_GT
;
21530 case GEU
: return ARM_LS
;
21531 case GTU
: return ARM_CC
;
21532 case LEU
: return ARM_CS
;
21533 case LTU
: return ARM_HI
;
21534 default: return ARM_NV
;
21540 case LTU
: return ARM_CS
;
21541 case GEU
: return ARM_CC
;
21542 default: return ARM_NV
;
21548 case NE
: return ARM_NE
;
21549 case EQ
: return ARM_EQ
;
21550 case GEU
: return ARM_CS
;
21551 case GTU
: return ARM_HI
;
21552 case LEU
: return ARM_LS
;
21553 case LTU
: return ARM_CC
;
21554 default: return ARM_NV
;
21560 case GE
: return ARM_GE
;
21561 case LT
: return ARM_LT
;
21562 case GEU
: return ARM_CS
;
21563 case LTU
: return ARM_CC
;
21564 default: return ARM_NV
;
21570 case NE
: return ARM_NE
;
21571 case EQ
: return ARM_EQ
;
21572 case GE
: return ARM_GE
;
21573 case GT
: return ARM_GT
;
21574 case LE
: return ARM_LE
;
21575 case LT
: return ARM_LT
;
21576 case GEU
: return ARM_CS
;
21577 case GTU
: return ARM_HI
;
21578 case LEU
: return ARM_LS
;
21579 case LTU
: return ARM_CC
;
21580 default: return ARM_NV
;
21583 default: gcc_unreachable ();
21587 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21588 static enum arm_cond_code
21589 get_arm_condition_code (rtx comparison
)
21591 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
21592 gcc_assert (code
!= ARM_NV
);
21596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21599 thumb2_final_prescan_insn (rtx insn
)
21601 rtx first_insn
= insn
;
21602 rtx body
= PATTERN (insn
);
21604 enum arm_cond_code code
;
21609 /* Maximum number of conditionally executed instructions in a block
21610 is minimum of the two max values: maximum allowed in an IT block
21611 and maximum that is beneficial according to the cost model and tune. */
21612 max
= (max_insns_skipped
< MAX_INSN_PER_IT_BLOCK
) ?
21613 max_insns_skipped
: MAX_INSN_PER_IT_BLOCK
;
21615 /* Remove the previous insn from the count of insns to be output. */
21616 if (arm_condexec_count
)
21617 arm_condexec_count
--;
21619 /* Nothing to do if we are already inside a conditional block. */
21620 if (arm_condexec_count
)
21623 if (GET_CODE (body
) != COND_EXEC
)
21626 /* Conditional jumps are implemented directly. */
21630 predicate
= COND_EXEC_TEST (body
);
21631 arm_current_cc
= get_arm_condition_code (predicate
);
21633 n
= get_attr_ce_count (insn
);
21634 arm_condexec_count
= 1;
21635 arm_condexec_mask
= (1 << n
) - 1;
21636 arm_condexec_masklen
= n
;
21637 /* See if subsequent instructions can be combined into the same block. */
21640 insn
= next_nonnote_insn (insn
);
21642 /* Jumping into the middle of an IT block is illegal, so a label or
21643 barrier terminates the block. */
21644 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
21647 body
= PATTERN (insn
);
21648 /* USE and CLOBBER aren't really insns, so just skip them. */
21649 if (GET_CODE (body
) == USE
21650 || GET_CODE (body
) == CLOBBER
)
21653 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21654 if (GET_CODE (body
) != COND_EXEC
)
21656 /* Maximum number of conditionally executed instructions in a block. */
21657 n
= get_attr_ce_count (insn
);
21658 if (arm_condexec_masklen
+ n
> max
)
21661 predicate
= COND_EXEC_TEST (body
);
21662 code
= get_arm_condition_code (predicate
);
21663 mask
= (1 << n
) - 1;
21664 if (arm_current_cc
== code
)
21665 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
21666 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
21669 arm_condexec_count
++;
21670 arm_condexec_masklen
+= n
;
21672 /* A jump must be the last instruction in a conditional block. */
21676 /* Restore recog_data (getting the attributes of other insns can
21677 destroy this array, but final.c assumes that it remains intact
21678 across this call). */
21679 extract_constrain_insn_cached (first_insn
);
21683 arm_final_prescan_insn (rtx insn
)
21685 /* BODY will hold the body of INSN. */
21686 rtx body
= PATTERN (insn
);
21688 /* This will be 1 if trying to repeat the trick, and things need to be
21689 reversed if it appears to fail. */
21692 /* If we start with a return insn, we only succeed if we find another one. */
21693 int seeking_return
= 0;
21694 enum rtx_code return_code
= UNKNOWN
;
21696 /* START_INSN will hold the insn from where we start looking. This is the
21697 first insn after the following code_label if REVERSE is true. */
21698 rtx start_insn
= insn
;
21700 /* If in state 4, check if the target branch is reached, in order to
21701 change back to state 0. */
21702 if (arm_ccfsm_state
== 4)
21704 if (insn
== arm_target_insn
)
21706 arm_target_insn
= NULL
;
21707 arm_ccfsm_state
= 0;
21712 /* If in state 3, it is possible to repeat the trick, if this insn is an
21713 unconditional branch to a label, and immediately following this branch
21714 is the previous target label which is only used once, and the label this
21715 branch jumps to is not too far off. */
21716 if (arm_ccfsm_state
== 3)
21718 if (simplejump_p (insn
))
21720 start_insn
= next_nonnote_insn (start_insn
);
21721 if (BARRIER_P (start_insn
))
21723 /* XXX Isn't this always a barrier? */
21724 start_insn
= next_nonnote_insn (start_insn
);
21726 if (LABEL_P (start_insn
)
21727 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
21728 && LABEL_NUSES (start_insn
) == 1)
21733 else if (ANY_RETURN_P (body
))
21735 start_insn
= next_nonnote_insn (start_insn
);
21736 if (BARRIER_P (start_insn
))
21737 start_insn
= next_nonnote_insn (start_insn
);
21738 if (LABEL_P (start_insn
)
21739 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
21740 && LABEL_NUSES (start_insn
) == 1)
21743 seeking_return
= 1;
21744 return_code
= GET_CODE (body
);
21753 gcc_assert (!arm_ccfsm_state
|| reverse
);
21754 if (!JUMP_P (insn
))
21757 /* This jump might be paralleled with a clobber of the condition codes
21758 the jump should always come first */
21759 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
21760 body
= XVECEXP (body
, 0, 0);
21763 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
21764 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
21767 int fail
= FALSE
, succeed
= FALSE
;
21768 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
21769 int then_not_else
= TRUE
;
21770 rtx this_insn
= start_insn
, label
= 0;
21772 /* Register the insn jumped to. */
21775 if (!seeking_return
)
21776 label
= XEXP (SET_SRC (body
), 0);
21778 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
21779 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
21780 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
21782 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
21783 then_not_else
= FALSE
;
21785 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
21787 seeking_return
= 1;
21788 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
21790 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
21792 seeking_return
= 1;
21793 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
21794 then_not_else
= FALSE
;
21797 gcc_unreachable ();
21799 /* See how many insns this branch skips, and what kind of insns. If all
21800 insns are okay, and the label or unconditional branch to the same
21801 label is not too far away, succeed. */
21802 for (insns_skipped
= 0;
21803 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
21807 this_insn
= next_nonnote_insn (this_insn
);
21811 switch (GET_CODE (this_insn
))
21814 /* Succeed if it is the target label, otherwise fail since
21815 control falls in from somewhere else. */
21816 if (this_insn
== label
)
21818 arm_ccfsm_state
= 1;
21826 /* Succeed if the following insn is the target label.
21828 If return insns are used then the last insn in a function
21829 will be a barrier. */
21830 this_insn
= next_nonnote_insn (this_insn
);
21831 if (this_insn
&& this_insn
== label
)
21833 arm_ccfsm_state
= 1;
21841 /* The AAPCS says that conditional calls should not be
21842 used since they make interworking inefficient (the
21843 linker can't transform BL<cond> into BLX). That's
21844 only a problem if the machine has BLX. */
21851 /* Succeed if the following insn is the target label, or
21852 if the following two insns are a barrier and the
21854 this_insn
= next_nonnote_insn (this_insn
);
21855 if (this_insn
&& BARRIER_P (this_insn
))
21856 this_insn
= next_nonnote_insn (this_insn
);
21858 if (this_insn
&& this_insn
== label
21859 && insns_skipped
< max_insns_skipped
)
21861 arm_ccfsm_state
= 1;
21869 /* If this is an unconditional branch to the same label, succeed.
21870 If it is to another label, do nothing. If it is conditional,
21872 /* XXX Probably, the tests for SET and the PC are
21875 scanbody
= PATTERN (this_insn
);
21876 if (GET_CODE (scanbody
) == SET
21877 && GET_CODE (SET_DEST (scanbody
)) == PC
)
21879 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
21880 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
21882 arm_ccfsm_state
= 2;
21885 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
21888 /* Fail if a conditional return is undesirable (e.g. on a
21889 StrongARM), but still allow this if optimizing for size. */
21890 else if (GET_CODE (scanbody
) == return_code
21891 && !use_return_insn (TRUE
, NULL
)
21894 else if (GET_CODE (scanbody
) == return_code
)
21896 arm_ccfsm_state
= 2;
21899 else if (GET_CODE (scanbody
) == PARALLEL
)
21901 switch (get_attr_conds (this_insn
))
21911 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
21916 /* Instructions using or affecting the condition codes make it
21918 scanbody
= PATTERN (this_insn
);
21919 if (!(GET_CODE (scanbody
) == SET
21920 || GET_CODE (scanbody
) == PARALLEL
)
21921 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
21931 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
21932 arm_target_label
= CODE_LABEL_NUMBER (label
);
21935 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
21937 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
21939 this_insn
= next_nonnote_insn (this_insn
);
21940 gcc_assert (!this_insn
21941 || (!BARRIER_P (this_insn
)
21942 && !LABEL_P (this_insn
)));
21946 /* Oh, dear! we ran off the end.. give up. */
21947 extract_constrain_insn_cached (insn
);
21948 arm_ccfsm_state
= 0;
21949 arm_target_insn
= NULL
;
21952 arm_target_insn
= this_insn
;
21955 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
21958 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
21960 if (reverse
|| then_not_else
)
21961 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
21964 /* Restore recog_data (getting the attributes of other insns can
21965 destroy this array, but final.c assumes that it remains intact
21966 across this call. */
21967 extract_constrain_insn_cached (insn
);
21971 /* Output IT instructions. */
21973 thumb2_asm_output_opcode (FILE * stream
)
21978 if (arm_condexec_mask
)
21980 for (n
= 0; n
< arm_condexec_masklen
; n
++)
21981 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
21983 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
21984 arm_condition_codes
[arm_current_cc
]);
21985 arm_condexec_mask
= 0;
21989 /* Returns true if REGNO is a valid register
21990 for holding a quantity of type MODE. */
21992 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
21994 if (GET_MODE_CLASS (mode
) == MODE_CC
)
21995 return (regno
== CC_REGNUM
21996 || (TARGET_HARD_FLOAT
&& TARGET_VFP
21997 && regno
== VFPCC_REGNUM
));
22000 /* For the Thumb we only allow values bigger than SImode in
22001 registers 0 - 6, so that there is always a second low
22002 register available to hold the upper part of the value.
22003 We probably we ought to ensure that the register is the
22004 start of an even numbered register pair. */
22005 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22007 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22008 && IS_VFP_REGNUM (regno
))
22010 if (mode
== SFmode
|| mode
== SImode
)
22011 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22013 if (mode
== DFmode
)
22014 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22016 /* VFP registers can hold HFmode values, but there is no point in
22017 putting them there unless we have hardware conversion insns. */
22018 if (mode
== HFmode
)
22019 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22022 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22023 || (VALID_NEON_QREG_MODE (mode
)
22024 && NEON_REGNO_OK_FOR_QUAD (regno
))
22025 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22026 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22027 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22028 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22029 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22034 if (TARGET_REALLY_IWMMXT
)
22036 if (IS_IWMMXT_GR_REGNUM (regno
))
22037 return mode
== SImode
;
22039 if (IS_IWMMXT_REGNUM (regno
))
22040 return VALID_IWMMXT_REG_MODE (mode
);
22043 /* We allow almost any value to be stored in the general registers.
22044 Restrict doubleword quantities to even register pairs so that we can
22045 use ldrd. Do not allow very large Neon structure opaque modes in
22046 general registers; they would use too many. */
22047 if (regno
<= LAST_ARM_REGNUM
)
22048 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22049 && ARM_NUM_REGS (mode
) <= 4;
22051 if (regno
== FRAME_POINTER_REGNUM
22052 || regno
== ARG_POINTER_REGNUM
)
22053 /* We only allow integers in the fake hard registers. */
22054 return GET_MODE_CLASS (mode
) == MODE_INT
;
22059 /* Implement MODES_TIEABLE_P. */
22062 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22064 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22067 /* We specifically want to allow elements of "structure" modes to
22068 be tieable to the structure. This more general condition allows
22069 other rarer situations too. */
22071 && (VALID_NEON_DREG_MODE (mode1
)
22072 || VALID_NEON_QREG_MODE (mode1
)
22073 || VALID_NEON_STRUCT_MODE (mode1
))
22074 && (VALID_NEON_DREG_MODE (mode2
)
22075 || VALID_NEON_QREG_MODE (mode2
)
22076 || VALID_NEON_STRUCT_MODE (mode2
)))
22082 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22083 not used in arm mode. */
22086 arm_regno_class (int regno
)
22090 if (regno
== STACK_POINTER_REGNUM
)
22092 if (regno
== CC_REGNUM
)
22099 if (TARGET_THUMB2
&& regno
< 8)
22102 if ( regno
<= LAST_ARM_REGNUM
22103 || regno
== FRAME_POINTER_REGNUM
22104 || regno
== ARG_POINTER_REGNUM
)
22105 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22107 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22108 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22110 if (IS_VFP_REGNUM (regno
))
22112 if (regno
<= D7_VFP_REGNUM
)
22113 return VFP_D0_D7_REGS
;
22114 else if (regno
<= LAST_LO_VFP_REGNUM
)
22115 return VFP_LO_REGS
;
22117 return VFP_HI_REGS
;
22120 if (IS_IWMMXT_REGNUM (regno
))
22121 return IWMMXT_REGS
;
22123 if (IS_IWMMXT_GR_REGNUM (regno
))
22124 return IWMMXT_GR_REGS
;
22129 /* Handle a special case when computing the offset
22130 of an argument from the frame pointer. */
22132 arm_debugger_arg_offset (int value
, rtx addr
)
22136 /* We are only interested if dbxout_parms() failed to compute the offset. */
22140 /* We can only cope with the case where the address is held in a register. */
22144 /* If we are using the frame pointer to point at the argument, then
22145 an offset of 0 is correct. */
22146 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22149 /* If we are using the stack pointer to point at the
22150 argument, then an offset of 0 is correct. */
22151 /* ??? Check this is consistent with thumb2 frame layout. */
22152 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22153 && REGNO (addr
) == SP_REGNUM
)
22156 /* Oh dear. The argument is pointed to by a register rather
22157 than being held in a register, or being stored at a known
22158 offset from the frame pointer. Since GDB only understands
22159 those two kinds of argument we must translate the address
22160 held in the register into an offset from the frame pointer.
22161 We do this by searching through the insns for the function
22162 looking to see where this register gets its value. If the
22163 register is initialized from the frame pointer plus an offset
22164 then we are in luck and we can continue, otherwise we give up.
22166 This code is exercised by producing debugging information
22167 for a function with arguments like this:
22169 double func (double a, double b, int c, double d) {return d;}
22171 Without this code the stab for parameter 'd' will be set to
22172 an offset of 0 from the frame pointer, rather than 8. */
22174 /* The if() statement says:
22176 If the insn is a normal instruction
22177 and if the insn is setting the value in a register
22178 and if the register being set is the register holding the address of the argument
22179 and if the address is computing by an addition
22180 that involves adding to a register
22181 which is the frame pointer
22186 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22188 if ( NONJUMP_INSN_P (insn
)
22189 && GET_CODE (PATTERN (insn
)) == SET
22190 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22191 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22192 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22193 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22194 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22197 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22206 warning (0, "unable to compute real location of stacked parameter");
22207 value
= 8; /* XXX magic hack */
22228 T_MAX
/* Size of enum. Keep last. */
22229 } neon_builtin_type_mode
;
22231 #define TYPE_MODE_BIT(X) (1 << (X))
22233 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22234 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22235 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22236 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22237 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22238 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22240 #define v8qi_UP T_V8QI
22241 #define v4hi_UP T_V4HI
22242 #define v4hf_UP T_V4HF
22243 #define v2si_UP T_V2SI
22244 #define v2sf_UP T_V2SF
22246 #define v16qi_UP T_V16QI
22247 #define v8hi_UP T_V8HI
22248 #define v4si_UP T_V4SI
22249 #define v4sf_UP T_V4SF
22250 #define v2di_UP T_V2DI
22255 #define UP(X) X##_UP
22291 NEON_LOADSTRUCTLANE
,
22293 NEON_STORESTRUCTLANE
,
22302 const neon_itype itype
;
22303 const neon_builtin_type_mode mode
;
22304 const enum insn_code code
;
22305 unsigned int fcode
;
22306 } neon_builtin_datum
;
22308 #define CF(N,X) CODE_FOR_neon_##N##X
22310 #define VAR1(T, N, A) \
22311 {#N, NEON_##T, UP (A), CF (N, A), 0}
22312 #define VAR2(T, N, A, B) \
22314 {#N, NEON_##T, UP (B), CF (N, B), 0}
22315 #define VAR3(T, N, A, B, C) \
22316 VAR2 (T, N, A, B), \
22317 {#N, NEON_##T, UP (C), CF (N, C), 0}
22318 #define VAR4(T, N, A, B, C, D) \
22319 VAR3 (T, N, A, B, C), \
22320 {#N, NEON_##T, UP (D), CF (N, D), 0}
22321 #define VAR5(T, N, A, B, C, D, E) \
22322 VAR4 (T, N, A, B, C, D), \
22323 {#N, NEON_##T, UP (E), CF (N, E), 0}
22324 #define VAR6(T, N, A, B, C, D, E, F) \
22325 VAR5 (T, N, A, B, C, D, E), \
22326 {#N, NEON_##T, UP (F), CF (N, F), 0}
22327 #define VAR7(T, N, A, B, C, D, E, F, G) \
22328 VAR6 (T, N, A, B, C, D, E, F), \
22329 {#N, NEON_##T, UP (G), CF (N, G), 0}
22330 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22331 VAR7 (T, N, A, B, C, D, E, F, G), \
22332 {#N, NEON_##T, UP (H), CF (N, H), 0}
22333 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22334 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22335 {#N, NEON_##T, UP (I), CF (N, I), 0}
22336 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22337 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22338 {#N, NEON_##T, UP (J), CF (N, J), 0}
22340 /* The NEON builtin data can be found in arm_neon_builtins.def.
22341 The mode entries in the following table correspond to the "key" type of the
22342 instruction variant, i.e. equivalent to that which would be specified after
22343 the assembler mnemonic, which usually refers to the last vector operand.
22344 (Signed/unsigned/polynomial types are not differentiated between though, and
22345 are all mapped onto the same mode for a given element size.) The modes
22346 listed per instruction should be the same as those defined for that
22347 instruction's pattern in neon.md. */
22349 static neon_builtin_datum neon_builtin_data
[] =
22351 #include "arm_neon_builtins.def"
22366 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22367 #define VAR1(T, N, A) \
22369 #define VAR2(T, N, A, B) \
22372 #define VAR3(T, N, A, B, C) \
22373 VAR2 (T, N, A, B), \
22375 #define VAR4(T, N, A, B, C, D) \
22376 VAR3 (T, N, A, B, C), \
22378 #define VAR5(T, N, A, B, C, D, E) \
22379 VAR4 (T, N, A, B, C, D), \
22381 #define VAR6(T, N, A, B, C, D, E, F) \
22382 VAR5 (T, N, A, B, C, D, E), \
22384 #define VAR7(T, N, A, B, C, D, E, F, G) \
22385 VAR6 (T, N, A, B, C, D, E, F), \
22387 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22388 VAR7 (T, N, A, B, C, D, E, F, G), \
22390 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22391 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22393 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22394 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22398 ARM_BUILTIN_GETWCGR0
,
22399 ARM_BUILTIN_GETWCGR1
,
22400 ARM_BUILTIN_GETWCGR2
,
22401 ARM_BUILTIN_GETWCGR3
,
22403 ARM_BUILTIN_SETWCGR0
,
22404 ARM_BUILTIN_SETWCGR1
,
22405 ARM_BUILTIN_SETWCGR2
,
22406 ARM_BUILTIN_SETWCGR3
,
22410 ARM_BUILTIN_WAVG2BR
,
22411 ARM_BUILTIN_WAVG2HR
,
22412 ARM_BUILTIN_WAVG2B
,
22413 ARM_BUILTIN_WAVG2H
,
22420 ARM_BUILTIN_WMACSZ
,
22422 ARM_BUILTIN_WMACUZ
,
22425 ARM_BUILTIN_WSADBZ
,
22427 ARM_BUILTIN_WSADHZ
,
22429 ARM_BUILTIN_WALIGNI
,
22430 ARM_BUILTIN_WALIGNR0
,
22431 ARM_BUILTIN_WALIGNR1
,
22432 ARM_BUILTIN_WALIGNR2
,
22433 ARM_BUILTIN_WALIGNR3
,
22436 ARM_BUILTIN_TMIAPH
,
22437 ARM_BUILTIN_TMIABB
,
22438 ARM_BUILTIN_TMIABT
,
22439 ARM_BUILTIN_TMIATB
,
22440 ARM_BUILTIN_TMIATT
,
22442 ARM_BUILTIN_TMOVMSKB
,
22443 ARM_BUILTIN_TMOVMSKH
,
22444 ARM_BUILTIN_TMOVMSKW
,
22446 ARM_BUILTIN_TBCSTB
,
22447 ARM_BUILTIN_TBCSTH
,
22448 ARM_BUILTIN_TBCSTW
,
22450 ARM_BUILTIN_WMADDS
,
22451 ARM_BUILTIN_WMADDU
,
22453 ARM_BUILTIN_WPACKHSS
,
22454 ARM_BUILTIN_WPACKWSS
,
22455 ARM_BUILTIN_WPACKDSS
,
22456 ARM_BUILTIN_WPACKHUS
,
22457 ARM_BUILTIN_WPACKWUS
,
22458 ARM_BUILTIN_WPACKDUS
,
22463 ARM_BUILTIN_WADDSSB
,
22464 ARM_BUILTIN_WADDSSH
,
22465 ARM_BUILTIN_WADDSSW
,
22466 ARM_BUILTIN_WADDUSB
,
22467 ARM_BUILTIN_WADDUSH
,
22468 ARM_BUILTIN_WADDUSW
,
22472 ARM_BUILTIN_WSUBSSB
,
22473 ARM_BUILTIN_WSUBSSH
,
22474 ARM_BUILTIN_WSUBSSW
,
22475 ARM_BUILTIN_WSUBUSB
,
22476 ARM_BUILTIN_WSUBUSH
,
22477 ARM_BUILTIN_WSUBUSW
,
22484 ARM_BUILTIN_WCMPEQB
,
22485 ARM_BUILTIN_WCMPEQH
,
22486 ARM_BUILTIN_WCMPEQW
,
22487 ARM_BUILTIN_WCMPGTUB
,
22488 ARM_BUILTIN_WCMPGTUH
,
22489 ARM_BUILTIN_WCMPGTUW
,
22490 ARM_BUILTIN_WCMPGTSB
,
22491 ARM_BUILTIN_WCMPGTSH
,
22492 ARM_BUILTIN_WCMPGTSW
,
22494 ARM_BUILTIN_TEXTRMSB
,
22495 ARM_BUILTIN_TEXTRMSH
,
22496 ARM_BUILTIN_TEXTRMSW
,
22497 ARM_BUILTIN_TEXTRMUB
,
22498 ARM_BUILTIN_TEXTRMUH
,
22499 ARM_BUILTIN_TEXTRMUW
,
22500 ARM_BUILTIN_TINSRB
,
22501 ARM_BUILTIN_TINSRH
,
22502 ARM_BUILTIN_TINSRW
,
22504 ARM_BUILTIN_WMAXSW
,
22505 ARM_BUILTIN_WMAXSH
,
22506 ARM_BUILTIN_WMAXSB
,
22507 ARM_BUILTIN_WMAXUW
,
22508 ARM_BUILTIN_WMAXUH
,
22509 ARM_BUILTIN_WMAXUB
,
22510 ARM_BUILTIN_WMINSW
,
22511 ARM_BUILTIN_WMINSH
,
22512 ARM_BUILTIN_WMINSB
,
22513 ARM_BUILTIN_WMINUW
,
22514 ARM_BUILTIN_WMINUH
,
22515 ARM_BUILTIN_WMINUB
,
22517 ARM_BUILTIN_WMULUM
,
22518 ARM_BUILTIN_WMULSM
,
22519 ARM_BUILTIN_WMULUL
,
22521 ARM_BUILTIN_PSADBH
,
22522 ARM_BUILTIN_WSHUFH
,
22536 ARM_BUILTIN_WSLLHI
,
22537 ARM_BUILTIN_WSLLWI
,
22538 ARM_BUILTIN_WSLLDI
,
22539 ARM_BUILTIN_WSRAHI
,
22540 ARM_BUILTIN_WSRAWI
,
22541 ARM_BUILTIN_WSRADI
,
22542 ARM_BUILTIN_WSRLHI
,
22543 ARM_BUILTIN_WSRLWI
,
22544 ARM_BUILTIN_WSRLDI
,
22545 ARM_BUILTIN_WRORHI
,
22546 ARM_BUILTIN_WRORWI
,
22547 ARM_BUILTIN_WRORDI
,
22549 ARM_BUILTIN_WUNPCKIHB
,
22550 ARM_BUILTIN_WUNPCKIHH
,
22551 ARM_BUILTIN_WUNPCKIHW
,
22552 ARM_BUILTIN_WUNPCKILB
,
22553 ARM_BUILTIN_WUNPCKILH
,
22554 ARM_BUILTIN_WUNPCKILW
,
22556 ARM_BUILTIN_WUNPCKEHSB
,
22557 ARM_BUILTIN_WUNPCKEHSH
,
22558 ARM_BUILTIN_WUNPCKEHSW
,
22559 ARM_BUILTIN_WUNPCKEHUB
,
22560 ARM_BUILTIN_WUNPCKEHUH
,
22561 ARM_BUILTIN_WUNPCKEHUW
,
22562 ARM_BUILTIN_WUNPCKELSB
,
22563 ARM_BUILTIN_WUNPCKELSH
,
22564 ARM_BUILTIN_WUNPCKELSW
,
22565 ARM_BUILTIN_WUNPCKELUB
,
22566 ARM_BUILTIN_WUNPCKELUH
,
22567 ARM_BUILTIN_WUNPCKELUW
,
22573 ARM_BUILTIN_WADDSUBHX
,
22574 ARM_BUILTIN_WSUBADDHX
,
22576 ARM_BUILTIN_WABSDIFFB
,
22577 ARM_BUILTIN_WABSDIFFH
,
22578 ARM_BUILTIN_WABSDIFFW
,
22580 ARM_BUILTIN_WADDCH
,
22581 ARM_BUILTIN_WADDCW
,
22584 ARM_BUILTIN_WAVG4R
,
22586 ARM_BUILTIN_WMADDSX
,
22587 ARM_BUILTIN_WMADDUX
,
22589 ARM_BUILTIN_WMADDSN
,
22590 ARM_BUILTIN_WMADDUN
,
22592 ARM_BUILTIN_WMULWSM
,
22593 ARM_BUILTIN_WMULWUM
,
22595 ARM_BUILTIN_WMULWSMR
,
22596 ARM_BUILTIN_WMULWUMR
,
22598 ARM_BUILTIN_WMULWL
,
22600 ARM_BUILTIN_WMULSMR
,
22601 ARM_BUILTIN_WMULUMR
,
22603 ARM_BUILTIN_WQMULM
,
22604 ARM_BUILTIN_WQMULMR
,
22606 ARM_BUILTIN_WQMULWM
,
22607 ARM_BUILTIN_WQMULWMR
,
22609 ARM_BUILTIN_WADDBHUSM
,
22610 ARM_BUILTIN_WADDBHUSL
,
22612 ARM_BUILTIN_WQMIABB
,
22613 ARM_BUILTIN_WQMIABT
,
22614 ARM_BUILTIN_WQMIATB
,
22615 ARM_BUILTIN_WQMIATT
,
22617 ARM_BUILTIN_WQMIABBN
,
22618 ARM_BUILTIN_WQMIABTN
,
22619 ARM_BUILTIN_WQMIATBN
,
22620 ARM_BUILTIN_WQMIATTN
,
22622 ARM_BUILTIN_WMIABB
,
22623 ARM_BUILTIN_WMIABT
,
22624 ARM_BUILTIN_WMIATB
,
22625 ARM_BUILTIN_WMIATT
,
22627 ARM_BUILTIN_WMIABBN
,
22628 ARM_BUILTIN_WMIABTN
,
22629 ARM_BUILTIN_WMIATBN
,
22630 ARM_BUILTIN_WMIATTN
,
22632 ARM_BUILTIN_WMIAWBB
,
22633 ARM_BUILTIN_WMIAWBT
,
22634 ARM_BUILTIN_WMIAWTB
,
22635 ARM_BUILTIN_WMIAWTT
,
22637 ARM_BUILTIN_WMIAWBBN
,
22638 ARM_BUILTIN_WMIAWBTN
,
22639 ARM_BUILTIN_WMIAWTBN
,
22640 ARM_BUILTIN_WMIAWTTN
,
22642 ARM_BUILTIN_WMERGE
,
22644 #include "arm_neon_builtins.def"
22649 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
22663 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
22666 arm_init_neon_builtins (void)
22668 unsigned int i
, fcode
;
22671 tree neon_intQI_type_node
;
22672 tree neon_intHI_type_node
;
22673 tree neon_floatHF_type_node
;
22674 tree neon_polyQI_type_node
;
22675 tree neon_polyHI_type_node
;
22676 tree neon_intSI_type_node
;
22677 tree neon_intDI_type_node
;
22678 tree neon_float_type_node
;
22680 tree intQI_pointer_node
;
22681 tree intHI_pointer_node
;
22682 tree intSI_pointer_node
;
22683 tree intDI_pointer_node
;
22684 tree float_pointer_node
;
22686 tree const_intQI_node
;
22687 tree const_intHI_node
;
22688 tree const_intSI_node
;
22689 tree const_intDI_node
;
22690 tree const_float_node
;
22692 tree const_intQI_pointer_node
;
22693 tree const_intHI_pointer_node
;
22694 tree const_intSI_pointer_node
;
22695 tree const_intDI_pointer_node
;
22696 tree const_float_pointer_node
;
22698 tree V8QI_type_node
;
22699 tree V4HI_type_node
;
22700 tree V4HF_type_node
;
22701 tree V2SI_type_node
;
22702 tree V2SF_type_node
;
22703 tree V16QI_type_node
;
22704 tree V8HI_type_node
;
22705 tree V4SI_type_node
;
22706 tree V4SF_type_node
;
22707 tree V2DI_type_node
;
22709 tree intUQI_type_node
;
22710 tree intUHI_type_node
;
22711 tree intUSI_type_node
;
22712 tree intUDI_type_node
;
22714 tree intEI_type_node
;
22715 tree intOI_type_node
;
22716 tree intCI_type_node
;
22717 tree intXI_type_node
;
22719 tree V8QI_pointer_node
;
22720 tree V4HI_pointer_node
;
22721 tree V2SI_pointer_node
;
22722 tree V2SF_pointer_node
;
22723 tree V16QI_pointer_node
;
22724 tree V8HI_pointer_node
;
22725 tree V4SI_pointer_node
;
22726 tree V4SF_pointer_node
;
22727 tree V2DI_pointer_node
;
22729 tree void_ftype_pv8qi_v8qi_v8qi
;
22730 tree void_ftype_pv4hi_v4hi_v4hi
;
22731 tree void_ftype_pv2si_v2si_v2si
;
22732 tree void_ftype_pv2sf_v2sf_v2sf
;
22733 tree void_ftype_pdi_di_di
;
22734 tree void_ftype_pv16qi_v16qi_v16qi
;
22735 tree void_ftype_pv8hi_v8hi_v8hi
;
22736 tree void_ftype_pv4si_v4si_v4si
;
22737 tree void_ftype_pv4sf_v4sf_v4sf
;
22738 tree void_ftype_pv2di_v2di_v2di
;
22740 tree reinterp_ftype_dreg
[5][5];
22741 tree reinterp_ftype_qreg
[5][5];
22742 tree dreg_types
[5], qreg_types
[5];
22744 /* Create distinguished type nodes for NEON vector element types,
22745 and pointers to values of such types, so we can detect them later. */
22746 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
22747 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
22748 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
22749 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
22750 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
22751 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
22752 neon_float_type_node
= make_node (REAL_TYPE
);
22753 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
22754 layout_type (neon_float_type_node
);
22755 neon_floatHF_type_node
= make_node (REAL_TYPE
);
22756 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
22757 layout_type (neon_floatHF_type_node
);
22759 /* Define typedefs which exactly correspond to the modes we are basing vector
22760 types on. If you change these names you'll need to change
22761 the table used by arm_mangle_type too. */
22762 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
22763 "__builtin_neon_qi");
22764 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
22765 "__builtin_neon_hi");
22766 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
22767 "__builtin_neon_hf");
22768 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
22769 "__builtin_neon_si");
22770 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
22771 "__builtin_neon_sf");
22772 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
22773 "__builtin_neon_di");
22774 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
22775 "__builtin_neon_poly8");
22776 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
22777 "__builtin_neon_poly16");
22779 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
22780 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
22781 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
22782 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
22783 float_pointer_node
= build_pointer_type (neon_float_type_node
);
22785 /* Next create constant-qualified versions of the above types. */
22786 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
22788 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
22790 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
22792 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
22794 const_float_node
= build_qualified_type (neon_float_type_node
,
22797 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
22798 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
22799 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
22800 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
22801 const_float_pointer_node
= build_pointer_type (const_float_node
);
22803 /* Now create vector types based on our NEON element types. */
22804 /* 64-bit vectors. */
22806 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
22808 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
22810 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
22812 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
22814 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
22815 /* 128-bit vectors. */
22817 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
22819 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
22821 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
22823 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
22825 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
22827 /* Unsigned integer types for various mode sizes. */
22828 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
22829 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
22830 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
22831 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
22833 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
22834 "__builtin_neon_uqi");
22835 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
22836 "__builtin_neon_uhi");
22837 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
22838 "__builtin_neon_usi");
22839 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
22840 "__builtin_neon_udi");
22842 /* Opaque integer types for structures of vectors. */
22843 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
22844 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
22845 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
22846 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
22848 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
22849 "__builtin_neon_ti");
22850 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
22851 "__builtin_neon_ei");
22852 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
22853 "__builtin_neon_oi");
22854 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
22855 "__builtin_neon_ci");
22856 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
22857 "__builtin_neon_xi");
22859 /* Pointers to vector types. */
22860 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
22861 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
22862 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
22863 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
22864 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
22865 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
22866 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
22867 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
22868 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
22870 /* Operations which return results as pairs. */
22871 void_ftype_pv8qi_v8qi_v8qi
=
22872 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
22873 V8QI_type_node
, NULL
);
22874 void_ftype_pv4hi_v4hi_v4hi
=
22875 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
22876 V4HI_type_node
, NULL
);
22877 void_ftype_pv2si_v2si_v2si
=
22878 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
22879 V2SI_type_node
, NULL
);
22880 void_ftype_pv2sf_v2sf_v2sf
=
22881 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
22882 V2SF_type_node
, NULL
);
22883 void_ftype_pdi_di_di
=
22884 build_function_type_list (void_type_node
, intDI_pointer_node
,
22885 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
22886 void_ftype_pv16qi_v16qi_v16qi
=
22887 build_function_type_list (void_type_node
, V16QI_pointer_node
,
22888 V16QI_type_node
, V16QI_type_node
, NULL
);
22889 void_ftype_pv8hi_v8hi_v8hi
=
22890 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
22891 V8HI_type_node
, NULL
);
22892 void_ftype_pv4si_v4si_v4si
=
22893 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
22894 V4SI_type_node
, NULL
);
22895 void_ftype_pv4sf_v4sf_v4sf
=
22896 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
22897 V4SF_type_node
, NULL
);
22898 void_ftype_pv2di_v2di_v2di
=
22899 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
22900 V2DI_type_node
, NULL
);
22902 dreg_types
[0] = V8QI_type_node
;
22903 dreg_types
[1] = V4HI_type_node
;
22904 dreg_types
[2] = V2SI_type_node
;
22905 dreg_types
[3] = V2SF_type_node
;
22906 dreg_types
[4] = neon_intDI_type_node
;
22908 qreg_types
[0] = V16QI_type_node
;
22909 qreg_types
[1] = V8HI_type_node
;
22910 qreg_types
[2] = V4SI_type_node
;
22911 qreg_types
[3] = V4SF_type_node
;
22912 qreg_types
[4] = V2DI_type_node
;
22914 for (i
= 0; i
< 5; i
++)
22917 for (j
= 0; j
< 5; j
++)
22919 reinterp_ftype_dreg
[i
][j
]
22920 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
22921 reinterp_ftype_qreg
[i
][j
]
22922 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
22926 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
22927 i
< ARRAY_SIZE (neon_builtin_data
);
22930 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
22932 const char* const modenames
[] = {
22933 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
22934 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
22939 int is_load
= 0, is_store
= 0;
22941 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
22948 case NEON_LOAD1LANE
:
22949 case NEON_LOADSTRUCT
:
22950 case NEON_LOADSTRUCTLANE
:
22952 /* Fall through. */
22954 case NEON_STORE1LANE
:
22955 case NEON_STORESTRUCT
:
22956 case NEON_STORESTRUCTLANE
:
22959 /* Fall through. */
22963 case NEON_LOGICBINOP
:
22964 case NEON_SHIFTINSERT
:
22971 case NEON_SHIFTIMM
:
22972 case NEON_SHIFTACC
:
22978 case NEON_LANEMULL
:
22979 case NEON_LANEMULH
:
22981 case NEON_SCALARMUL
:
22982 case NEON_SCALARMULL
:
22983 case NEON_SCALARMULH
:
22984 case NEON_SCALARMAC
:
22990 tree return_type
= void_type_node
, args
= void_list_node
;
22992 /* Build a function type directly from the insn_data for
22993 this builtin. The build_function_type() function takes
22994 care of removing duplicates for us. */
22995 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
22999 if (is_load
&& k
== 1)
23001 /* Neon load patterns always have the memory
23002 operand in the operand 1 position. */
23003 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23004 == neon_struct_operand
);
23010 eltype
= const_intQI_pointer_node
;
23015 eltype
= const_intHI_pointer_node
;
23020 eltype
= const_intSI_pointer_node
;
23025 eltype
= const_float_pointer_node
;
23030 eltype
= const_intDI_pointer_node
;
23033 default: gcc_unreachable ();
23036 else if (is_store
&& k
== 0)
23038 /* Similarly, Neon store patterns use operand 0 as
23039 the memory location to store to. */
23040 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23041 == neon_struct_operand
);
23047 eltype
= intQI_pointer_node
;
23052 eltype
= intHI_pointer_node
;
23057 eltype
= intSI_pointer_node
;
23062 eltype
= float_pointer_node
;
23067 eltype
= intDI_pointer_node
;
23070 default: gcc_unreachable ();
23075 switch (insn_data
[d
->code
].operand
[k
].mode
)
23077 case VOIDmode
: eltype
= void_type_node
; break;
23079 case QImode
: eltype
= neon_intQI_type_node
; break;
23080 case HImode
: eltype
= neon_intHI_type_node
; break;
23081 case SImode
: eltype
= neon_intSI_type_node
; break;
23082 case SFmode
: eltype
= neon_float_type_node
; break;
23083 case DImode
: eltype
= neon_intDI_type_node
; break;
23084 case TImode
: eltype
= intTI_type_node
; break;
23085 case EImode
: eltype
= intEI_type_node
; break;
23086 case OImode
: eltype
= intOI_type_node
; break;
23087 case CImode
: eltype
= intCI_type_node
; break;
23088 case XImode
: eltype
= intXI_type_node
; break;
23089 /* 64-bit vectors. */
23090 case V8QImode
: eltype
= V8QI_type_node
; break;
23091 case V4HImode
: eltype
= V4HI_type_node
; break;
23092 case V2SImode
: eltype
= V2SI_type_node
; break;
23093 case V2SFmode
: eltype
= V2SF_type_node
; break;
23094 /* 128-bit vectors. */
23095 case V16QImode
: eltype
= V16QI_type_node
; break;
23096 case V8HImode
: eltype
= V8HI_type_node
; break;
23097 case V4SImode
: eltype
= V4SI_type_node
; break;
23098 case V4SFmode
: eltype
= V4SF_type_node
; break;
23099 case V2DImode
: eltype
= V2DI_type_node
; break;
23100 default: gcc_unreachable ();
23104 if (k
== 0 && !is_store
)
23105 return_type
= eltype
;
23107 args
= tree_cons (NULL_TREE
, eltype
, args
);
23110 ftype
= build_function_type (return_type
, args
);
23114 case NEON_RESULTPAIR
:
23116 switch (insn_data
[d
->code
].operand
[1].mode
)
23118 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23119 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23120 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23121 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23122 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23123 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23124 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23125 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23126 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23127 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23128 default: gcc_unreachable ();
23133 case NEON_REINTERP
:
23135 /* We iterate over 5 doubleword types, then 5 quadword
23136 types. V4HF is not a type used in reinterpret, so we translate
23137 d->mode to the correct index in reinterp_ftype_dreg. */
23138 int rhs
= (d
->mode
- ((d
->mode
> T_V4HF
) ? 1 : 0)) % 5;
23139 switch (insn_data
[d
->code
].operand
[0].mode
)
23141 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23142 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23143 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23144 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23145 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23146 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23147 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23148 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23149 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23150 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23151 default: gcc_unreachable ();
23155 case NEON_FLOAT_WIDEN
:
23157 tree eltype
= NULL_TREE
;
23158 tree return_type
= NULL_TREE
;
23160 switch (insn_data
[d
->code
].operand
[1].mode
)
23163 eltype
= V4HF_type_node
;
23164 return_type
= V4SF_type_node
;
23166 default: gcc_unreachable ();
23168 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23171 case NEON_FLOAT_NARROW
:
23173 tree eltype
= NULL_TREE
;
23174 tree return_type
= NULL_TREE
;
23176 switch (insn_data
[d
->code
].operand
[1].mode
)
23179 eltype
= V4SF_type_node
;
23180 return_type
= V4HF_type_node
;
23182 default: gcc_unreachable ();
23184 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23188 gcc_unreachable ();
23191 gcc_assert (ftype
!= NULL
);
23193 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23195 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23197 arm_builtin_decls
[fcode
] = decl
;
23201 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23204 if ((MASK) & insn_flags) \
23207 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23208 BUILT_IN_MD, NULL, NULL_TREE); \
23209 arm_builtin_decls[CODE] = bdecl; \
23214 struct builtin_description
23216 const unsigned int mask
;
23217 const enum insn_code icode
;
23218 const char * const name
;
23219 const enum arm_builtins code
;
23220 const enum rtx_code comparison
;
23221 const unsigned int flag
;
23224 static const struct builtin_description bdesc_2arg
[] =
23226 #define IWMMXT_BUILTIN(code, string, builtin) \
23227 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23228 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23230 #define IWMMXT2_BUILTIN(code, string, builtin) \
23231 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23232 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23234 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23235 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23236 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23237 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23238 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23239 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23240 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23241 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23242 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23243 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23244 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23245 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23246 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23247 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23248 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23249 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23250 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23251 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23252 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23253 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23254 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23255 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23256 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23257 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23258 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23259 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23260 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23261 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23262 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23263 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23264 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23265 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23266 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23267 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23268 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23269 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23270 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23271 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23272 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23273 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23274 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23275 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23276 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23277 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23278 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23279 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23280 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23281 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23282 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23283 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23284 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23285 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23286 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23287 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23288 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23289 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23290 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23291 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23292 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23293 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23294 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23295 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23296 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23297 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23298 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23299 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23300 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23301 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23302 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23303 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23304 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23305 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23306 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
23307 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
23308 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
23309 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
23310 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
23311 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
23313 #define IWMMXT_BUILTIN2(code, builtin) \
23314 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23316 #define IWMMXT2_BUILTIN2(code, builtin) \
23317 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23319 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
23320 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
23321 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
23322 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
23323 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
23324 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
23325 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
23326 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
23327 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
23328 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
23331 static const struct builtin_description bdesc_1arg
[] =
23333 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
23334 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
23335 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
23336 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
23337 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
23338 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
23339 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
23340 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
23341 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
23342 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
23343 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
23344 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
23345 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
23346 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
23347 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
23348 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
23349 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
23350 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
23351 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
23352 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
23353 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
23354 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
23355 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
23356 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
23359 /* Set up all the iWMMXt builtins. This is not called if
23360 TARGET_IWMMXT is zero. */
23363 arm_init_iwmmxt_builtins (void)
23365 const struct builtin_description
* d
;
23368 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
23369 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
23370 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
23372 tree v8qi_ftype_v8qi_v8qi_int
23373 = build_function_type_list (V8QI_type_node
,
23374 V8QI_type_node
, V8QI_type_node
,
23375 integer_type_node
, NULL_TREE
);
23376 tree v4hi_ftype_v4hi_int
23377 = build_function_type_list (V4HI_type_node
,
23378 V4HI_type_node
, integer_type_node
, NULL_TREE
);
23379 tree v2si_ftype_v2si_int
23380 = build_function_type_list (V2SI_type_node
,
23381 V2SI_type_node
, integer_type_node
, NULL_TREE
);
23382 tree v2si_ftype_di_di
23383 = build_function_type_list (V2SI_type_node
,
23384 long_long_integer_type_node
,
23385 long_long_integer_type_node
,
23387 tree di_ftype_di_int
23388 = build_function_type_list (long_long_integer_type_node
,
23389 long_long_integer_type_node
,
23390 integer_type_node
, NULL_TREE
);
23391 tree di_ftype_di_int_int
23392 = build_function_type_list (long_long_integer_type_node
,
23393 long_long_integer_type_node
,
23395 integer_type_node
, NULL_TREE
);
23396 tree int_ftype_v8qi
23397 = build_function_type_list (integer_type_node
,
23398 V8QI_type_node
, NULL_TREE
);
23399 tree int_ftype_v4hi
23400 = build_function_type_list (integer_type_node
,
23401 V4HI_type_node
, NULL_TREE
);
23402 tree int_ftype_v2si
23403 = build_function_type_list (integer_type_node
,
23404 V2SI_type_node
, NULL_TREE
);
23405 tree int_ftype_v8qi_int
23406 = build_function_type_list (integer_type_node
,
23407 V8QI_type_node
, integer_type_node
, NULL_TREE
);
23408 tree int_ftype_v4hi_int
23409 = build_function_type_list (integer_type_node
,
23410 V4HI_type_node
, integer_type_node
, NULL_TREE
);
23411 tree int_ftype_v2si_int
23412 = build_function_type_list (integer_type_node
,
23413 V2SI_type_node
, integer_type_node
, NULL_TREE
);
23414 tree v8qi_ftype_v8qi_int_int
23415 = build_function_type_list (V8QI_type_node
,
23416 V8QI_type_node
, integer_type_node
,
23417 integer_type_node
, NULL_TREE
);
23418 tree v4hi_ftype_v4hi_int_int
23419 = build_function_type_list (V4HI_type_node
,
23420 V4HI_type_node
, integer_type_node
,
23421 integer_type_node
, NULL_TREE
);
23422 tree v2si_ftype_v2si_int_int
23423 = build_function_type_list (V2SI_type_node
,
23424 V2SI_type_node
, integer_type_node
,
23425 integer_type_node
, NULL_TREE
);
23426 /* Miscellaneous. */
23427 tree v8qi_ftype_v4hi_v4hi
23428 = build_function_type_list (V8QI_type_node
,
23429 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
23430 tree v4hi_ftype_v2si_v2si
23431 = build_function_type_list (V4HI_type_node
,
23432 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
23433 tree v8qi_ftype_v4hi_v8qi
23434 = build_function_type_list (V8QI_type_node
,
23435 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
23436 tree v2si_ftype_v4hi_v4hi
23437 = build_function_type_list (V2SI_type_node
,
23438 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
23439 tree v2si_ftype_v8qi_v8qi
23440 = build_function_type_list (V2SI_type_node
,
23441 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
23442 tree v4hi_ftype_v4hi_di
23443 = build_function_type_list (V4HI_type_node
,
23444 V4HI_type_node
, long_long_integer_type_node
,
23446 tree v2si_ftype_v2si_di
23447 = build_function_type_list (V2SI_type_node
,
23448 V2SI_type_node
, long_long_integer_type_node
,
23451 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
23452 tree int_ftype_void
23453 = build_function_type_list (integer_type_node
, NULL_TREE
);
23455 = build_function_type_list (long_long_integer_type_node
,
23456 V8QI_type_node
, NULL_TREE
);
23458 = build_function_type_list (long_long_integer_type_node
,
23459 V4HI_type_node
, NULL_TREE
);
23461 = build_function_type_list (long_long_integer_type_node
,
23462 V2SI_type_node
, NULL_TREE
);
23463 tree v2si_ftype_v4hi
23464 = build_function_type_list (V2SI_type_node
,
23465 V4HI_type_node
, NULL_TREE
);
23466 tree v4hi_ftype_v8qi
23467 = build_function_type_list (V4HI_type_node
,
23468 V8QI_type_node
, NULL_TREE
);
23469 tree v8qi_ftype_v8qi
23470 = build_function_type_list (V8QI_type_node
,
23471 V8QI_type_node
, NULL_TREE
);
23472 tree v4hi_ftype_v4hi
23473 = build_function_type_list (V4HI_type_node
,
23474 V4HI_type_node
, NULL_TREE
);
23475 tree v2si_ftype_v2si
23476 = build_function_type_list (V2SI_type_node
,
23477 V2SI_type_node
, NULL_TREE
);
23479 tree di_ftype_di_v4hi_v4hi
23480 = build_function_type_list (long_long_unsigned_type_node
,
23481 long_long_unsigned_type_node
,
23482 V4HI_type_node
, V4HI_type_node
,
23485 tree di_ftype_v4hi_v4hi
23486 = build_function_type_list (long_long_unsigned_type_node
,
23487 V4HI_type_node
,V4HI_type_node
,
23490 tree v2si_ftype_v2si_v4hi_v4hi
23491 = build_function_type_list (V2SI_type_node
,
23492 V2SI_type_node
, V4HI_type_node
,
23493 V4HI_type_node
, NULL_TREE
);
23495 tree v2si_ftype_v2si_v8qi_v8qi
23496 = build_function_type_list (V2SI_type_node
,
23497 V2SI_type_node
, V8QI_type_node
,
23498 V8QI_type_node
, NULL_TREE
);
23500 tree di_ftype_di_v2si_v2si
23501 = build_function_type_list (long_long_unsigned_type_node
,
23502 long_long_unsigned_type_node
,
23503 V2SI_type_node
, V2SI_type_node
,
23506 tree di_ftype_di_di_int
23507 = build_function_type_list (long_long_unsigned_type_node
,
23508 long_long_unsigned_type_node
,
23509 long_long_unsigned_type_node
,
23510 integer_type_node
, NULL_TREE
);
23512 tree void_ftype_int
23513 = build_function_type_list (void_type_node
,
23514 integer_type_node
, NULL_TREE
);
23516 tree v8qi_ftype_char
23517 = build_function_type_list (V8QI_type_node
,
23518 signed_char_type_node
, NULL_TREE
);
23520 tree v4hi_ftype_short
23521 = build_function_type_list (V4HI_type_node
,
23522 short_integer_type_node
, NULL_TREE
);
23524 tree v2si_ftype_int
23525 = build_function_type_list (V2SI_type_node
,
23526 integer_type_node
, NULL_TREE
);
23528 /* Normal vector binops. */
23529 tree v8qi_ftype_v8qi_v8qi
23530 = build_function_type_list (V8QI_type_node
,
23531 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
23532 tree v4hi_ftype_v4hi_v4hi
23533 = build_function_type_list (V4HI_type_node
,
23534 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
23535 tree v2si_ftype_v2si_v2si
23536 = build_function_type_list (V2SI_type_node
,
23537 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
23538 tree di_ftype_di_di
23539 = build_function_type_list (long_long_unsigned_type_node
,
23540 long_long_unsigned_type_node
,
23541 long_long_unsigned_type_node
,
23544 /* Add all builtins that are more or less simple operations on two
23546 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
23548 /* Use one of the operands; the target can have a different mode for
23549 mask-generating compares. */
23550 enum machine_mode mode
;
23556 mode
= insn_data
[d
->icode
].operand
[1].mode
;
23561 type
= v8qi_ftype_v8qi_v8qi
;
23564 type
= v4hi_ftype_v4hi_v4hi
;
23567 type
= v2si_ftype_v2si_v2si
;
23570 type
= di_ftype_di_di
;
23574 gcc_unreachable ();
23577 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
23580 /* Add the remaining MMX insns with somewhat more complicated types. */
23581 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
23582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
23583 ARM_BUILTIN_ ## CODE)
23585 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
23586 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
23587 ARM_BUILTIN_ ## CODE)
23589 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
23590 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
23591 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
23592 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
23593 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
23594 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
23595 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
23596 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
23597 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
23599 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
23600 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
23601 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
23602 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
23603 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
23604 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
23606 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
23607 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
23608 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
23609 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
23610 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
23611 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
23613 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
23614 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
23615 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
23616 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
23617 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
23618 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
23620 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
23621 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
23622 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
23623 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
23624 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
23625 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
23627 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
23629 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
23630 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
23631 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
23632 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
23633 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
23634 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
23635 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
23636 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
23637 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
23638 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
23640 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
23641 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
23642 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
23643 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
23644 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
23645 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
23646 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
23647 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
23648 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
23650 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
23651 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
23652 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
23654 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
23655 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
23656 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
23658 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
23659 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
23661 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
23662 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
23663 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
23664 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
23665 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
23666 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
23668 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
23669 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
23670 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
23671 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
23672 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
23673 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
23674 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
23675 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
23676 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
23677 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
23678 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
23679 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
23681 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
23682 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
23683 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
23684 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
23686 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
23687 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
23688 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
23689 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
23690 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
23691 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
23692 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
23694 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
23695 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
23696 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
23698 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
23699 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
23700 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
23701 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
23703 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
23704 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
23705 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
23706 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
23708 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
23709 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
23710 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
23711 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
23713 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
23714 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
23715 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
23716 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
23718 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
23719 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
23720 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
23721 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
23723 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
23724 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
23725 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
23726 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
23728 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
23730 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
23731 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
23732 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
23734 #undef iwmmx_mbuiltin
23735 #undef iwmmx2_mbuiltin
23739 arm_init_fp16_builtins (void)
23741 tree fp16_type
= make_node (REAL_TYPE
);
23742 TYPE_PRECISION (fp16_type
) = 16;
23743 layout_type (fp16_type
);
23744 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
23748 arm_init_builtins (void)
23750 if (TARGET_REALLY_IWMMXT
)
23751 arm_init_iwmmxt_builtins ();
23754 arm_init_neon_builtins ();
23756 if (arm_fp16_format
)
23757 arm_init_fp16_builtins ();
23760 /* Return the ARM builtin for CODE. */
23763 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
23765 if (code
>= ARM_BUILTIN_MAX
)
23766 return error_mark_node
;
23768 return arm_builtin_decls
[code
];
23771 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23773 static const char *
23774 arm_invalid_parameter_type (const_tree t
)
23776 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23777 return N_("function parameters cannot have __fp16 type");
23781 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23783 static const char *
23784 arm_invalid_return_type (const_tree t
)
23786 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23787 return N_("functions cannot return __fp16 type");
23791 /* Implement TARGET_PROMOTED_TYPE. */
23794 arm_promoted_type (const_tree t
)
23796 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23797 return float_type_node
;
23801 /* Implement TARGET_CONVERT_TO_TYPE.
23802 Specifically, this hook implements the peculiarity of the ARM
23803 half-precision floating-point C semantics that requires conversions between
23804 __fp16 to or from double to do an intermediate conversion to float. */
23807 arm_convert_to_type (tree type
, tree expr
)
23809 tree fromtype
= TREE_TYPE (expr
);
23810 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23812 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23813 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23814 return convert (type
, convert (float_type_node
, expr
));
23818 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23819 This simply adds HFmode as a supported mode; even though we don't
23820 implement arithmetic on this type directly, it's supported by
23821 optabs conversions, much the way the double-word arithmetic is
23822 special-cased in the default hook. */
23825 arm_scalar_mode_supported_p (enum machine_mode mode
)
23827 if (mode
== HFmode
)
23828 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23829 else if (ALL_FIXED_POINT_MODE_P (mode
))
23832 return default_scalar_mode_supported_p (mode
);
23835 /* Errors in the source file can cause expand_expr to return const0_rtx
23836 where we expect a vector. To avoid crashing, use one of the vector
23837 clear instructions. */
23840 safe_vector_operand (rtx x
, enum machine_mode mode
)
23842 if (x
!= const0_rtx
)
23844 x
= gen_reg_rtx (mode
);
23846 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
23847 : gen_rtx_SUBREG (DImode
, x
, 0)));
23851 /* Subroutine of arm_expand_builtin to take care of binop insns. */
23854 arm_expand_binop_builtin (enum insn_code icode
,
23855 tree exp
, rtx target
)
23858 tree arg0
= CALL_EXPR_ARG (exp
, 0);
23859 tree arg1
= CALL_EXPR_ARG (exp
, 1);
23860 rtx op0
= expand_normal (arg0
);
23861 rtx op1
= expand_normal (arg1
);
23862 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
23863 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
23864 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
23866 if (VECTOR_MODE_P (mode0
))
23867 op0
= safe_vector_operand (op0
, mode0
);
23868 if (VECTOR_MODE_P (mode1
))
23869 op1
= safe_vector_operand (op1
, mode1
);
23872 || GET_MODE (target
) != tmode
23873 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
23874 target
= gen_reg_rtx (tmode
);
23876 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
23877 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
23879 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
23880 op0
= copy_to_mode_reg (mode0
, op0
);
23881 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
23882 op1
= copy_to_mode_reg (mode1
, op1
);
23884 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
23891 /* Subroutine of arm_expand_builtin to take care of unop insns. */
23894 arm_expand_unop_builtin (enum insn_code icode
,
23895 tree exp
, rtx target
, int do_load
)
23898 tree arg0
= CALL_EXPR_ARG (exp
, 0);
23899 rtx op0
= expand_normal (arg0
);
23900 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
23901 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
23904 || GET_MODE (target
) != tmode
23905 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
23906 target
= gen_reg_rtx (tmode
);
23908 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
23911 if (VECTOR_MODE_P (mode0
))
23912 op0
= safe_vector_operand (op0
, mode0
);
23914 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
23915 op0
= copy_to_mode_reg (mode0
, op0
);
23918 pat
= GEN_FCN (icode
) (target
, op0
);
23926 NEON_ARG_COPY_TO_REG
,
23932 #define NEON_MAX_BUILTIN_ARGS 5
23934 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
23935 and return an expression for the accessed memory.
23937 The intrinsic function operates on a block of registers that has
23938 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
23939 function references the memory at EXP of type TYPE and in mode
23940 MEM_MODE; this mode may be BLKmode if no more suitable mode is
23944 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
23945 enum machine_mode reg_mode
,
23946 neon_builtin_type_mode type_mode
)
23948 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
23949 tree elem_type
, upper_bound
, array_type
;
23951 /* Work out the size of the register block in bytes. */
23952 reg_size
= GET_MODE_SIZE (reg_mode
);
23954 /* Work out the size of each vector in bytes. */
23955 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
23956 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
23958 /* Work out how many vectors there are. */
23959 gcc_assert (reg_size
% vector_size
== 0);
23960 nvectors
= reg_size
/ vector_size
;
23962 /* Work out the type of each element. */
23963 gcc_assert (POINTER_TYPE_P (type
));
23964 elem_type
= TREE_TYPE (type
);
23966 /* Work out how many elements are being loaded or stored.
23967 MEM_MODE == REG_MODE implies a one-to-one mapping between register
23968 and memory elements; anything else implies a lane load or store. */
23969 if (mem_mode
== reg_mode
)
23970 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
23974 /* Create a type that describes the full access. */
23975 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
23976 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
23978 /* Dereference EXP using that type. */
23979 return fold_build2 (MEM_REF
, array_type
, exp
,
23980 build_int_cst (build_pointer_type (array_type
), 0));
23983 /* Expand a Neon builtin. */
23985 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
23986 neon_builtin_type_mode type_mode
,
23987 tree exp
, int fcode
, ...)
23991 tree arg
[NEON_MAX_BUILTIN_ARGS
];
23992 rtx op
[NEON_MAX_BUILTIN_ARGS
];
23995 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
23996 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
23997 enum machine_mode other_mode
;
24003 || GET_MODE (target
) != tmode
24004 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24005 target
= gen_reg_rtx (tmode
);
24007 va_start (ap
, fcode
);
24009 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24013 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24015 if (thisarg
== NEON_ARG_STOP
)
24019 opno
= argc
+ have_retval
;
24020 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24021 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24022 arg_type
= TREE_VALUE (formals
);
24023 if (thisarg
== NEON_ARG_MEMORY
)
24025 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24026 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24027 mode
[argc
], other_mode
,
24031 op
[argc
] = expand_normal (arg
[argc
]);
24035 case NEON_ARG_COPY_TO_REG
:
24036 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24037 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24038 (op
[argc
], mode
[argc
]))
24039 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24042 case NEON_ARG_CONSTANT
:
24043 /* FIXME: This error message is somewhat unhelpful. */
24044 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24045 (op
[argc
], mode
[argc
]))
24046 error ("argument must be a constant");
24049 case NEON_ARG_MEMORY
:
24050 gcc_assert (MEM_P (op
[argc
]));
24051 PUT_MODE (op
[argc
], mode
[argc
]);
24052 /* ??? arm_neon.h uses the same built-in functions for signed
24053 and unsigned accesses, casting where necessary. This isn't
24055 set_mem_alias_set (op
[argc
], 0);
24056 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24057 (op
[argc
], mode
[argc
]))
24058 op
[argc
] = (replace_equiv_address
24059 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24062 case NEON_ARG_STOP
:
24063 gcc_unreachable ();
24067 formals
= TREE_CHAIN (formals
);
24077 pat
= GEN_FCN (icode
) (target
, op
[0]);
24081 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24085 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24089 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24093 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24097 gcc_unreachable ();
24103 pat
= GEN_FCN (icode
) (op
[0]);
24107 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24111 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24115 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24119 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24123 gcc_unreachable ();
24134 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24135 constants defined per-instruction or per instruction-variant. Instead, the
24136 required info is looked up in the table neon_builtin_data. */
24138 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
24140 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
24141 neon_itype itype
= d
->itype
;
24142 enum insn_code icode
= d
->code
;
24143 neon_builtin_type_mode type_mode
= d
->mode
;
24150 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24151 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24155 case NEON_SCALARMUL
:
24156 case NEON_SCALARMULL
:
24157 case NEON_SCALARMULH
:
24158 case NEON_SHIFTINSERT
:
24159 case NEON_LOGICBINOP
:
24160 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24161 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24165 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24166 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24167 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24171 case NEON_SHIFTIMM
:
24172 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24173 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
24177 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24178 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24183 case NEON_FLOAT_WIDEN
:
24184 case NEON_FLOAT_NARROW
:
24185 case NEON_REINTERP
:
24186 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24187 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24191 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24192 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24194 case NEON_RESULTPAIR
:
24195 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
24196 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24200 case NEON_LANEMULL
:
24201 case NEON_LANEMULH
:
24202 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24203 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24204 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24207 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24208 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24209 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24211 case NEON_SHIFTACC
:
24212 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24213 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24214 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24216 case NEON_SCALARMAC
:
24217 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24218 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24219 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24223 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24224 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24228 case NEON_LOADSTRUCT
:
24229 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24230 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
24232 case NEON_LOAD1LANE
:
24233 case NEON_LOADSTRUCTLANE
:
24234 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24235 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24239 case NEON_STORESTRUCT
:
24240 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
24241 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24243 case NEON_STORE1LANE
:
24244 case NEON_STORESTRUCTLANE
:
24245 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
24246 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24250 gcc_unreachable ();
24253 /* Emit code to reinterpret one Neon type as another, without altering bits. */
24255 neon_reinterpret (rtx dest
, rtx src
)
24257 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
24260 /* Emit code to place a Neon pair result in memory locations (with equal
24263 neon_emit_pair_result_insn (enum machine_mode mode
,
24264 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
24267 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
24268 rtx tmp1
= gen_reg_rtx (mode
);
24269 rtx tmp2
= gen_reg_rtx (mode
);
24271 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
24273 emit_move_insn (mem
, tmp1
);
24274 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
24275 emit_move_insn (mem
, tmp2
);
24278 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24279 not to early-clobber SRC registers in the process.
24281 We assume that the operands described by SRC and DEST represent a
24282 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24283 number of components into which the copy has been decomposed. */
24285 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
24289 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
24290 || REGNO (operands
[0]) < REGNO (operands
[1]))
24292 for (i
= 0; i
< count
; i
++)
24294 operands
[2 * i
] = dest
[i
];
24295 operands
[2 * i
+ 1] = src
[i
];
24300 for (i
= 0; i
< count
; i
++)
24302 operands
[2 * i
] = dest
[count
- i
- 1];
24303 operands
[2 * i
+ 1] = src
[count
- i
- 1];
24308 /* Split operands into moves from op[1] + op[2] into op[0]. */
24311 neon_split_vcombine (rtx operands
[3])
24313 unsigned int dest
= REGNO (operands
[0]);
24314 unsigned int src1
= REGNO (operands
[1]);
24315 unsigned int src2
= REGNO (operands
[2]);
24316 enum machine_mode halfmode
= GET_MODE (operands
[1]);
24317 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
24318 rtx destlo
, desthi
;
24320 if (src1
== dest
&& src2
== dest
+ halfregs
)
24322 /* No-op move. Can't split to nothing; emit something. */
24323 emit_note (NOTE_INSN_DELETED
);
24327 /* Preserve register attributes for variable tracking. */
24328 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
24329 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
24330 GET_MODE_SIZE (halfmode
));
24332 /* Special case of reversed high/low parts. Use VSWP. */
24333 if (src2
== dest
&& src1
== dest
+ halfregs
)
24335 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
24336 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
24337 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
24341 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
24343 /* Try to avoid unnecessary moves if part of the result
24344 is in the right place already. */
24346 emit_move_insn (destlo
, operands
[1]);
24347 if (src2
!= dest
+ halfregs
)
24348 emit_move_insn (desthi
, operands
[2]);
24352 if (src2
!= dest
+ halfregs
)
24353 emit_move_insn (desthi
, operands
[2]);
24355 emit_move_insn (destlo
, operands
[1]);
24359 /* Expand an expression EXP that calls a built-in function,
24360 with result going to TARGET if that's convenient
24361 (and in mode MODE if that's convenient).
24362 SUBTARGET may be used as the target for computing one of EXP's operands.
24363 IGNORE is nonzero if the value is to be ignored. */
24366 arm_expand_builtin (tree exp
,
24368 rtx subtarget ATTRIBUTE_UNUSED
,
24369 enum machine_mode mode ATTRIBUTE_UNUSED
,
24370 int ignore ATTRIBUTE_UNUSED
)
24372 const struct builtin_description
* d
;
24373 enum insn_code icode
;
24374 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
24382 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
24384 enum machine_mode tmode
;
24385 enum machine_mode mode0
;
24386 enum machine_mode mode1
;
24387 enum machine_mode mode2
;
24393 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
24394 return arm_expand_neon_builtin (fcode
, exp
, target
);
24398 case ARM_BUILTIN_TEXTRMSB
:
24399 case ARM_BUILTIN_TEXTRMUB
:
24400 case ARM_BUILTIN_TEXTRMSH
:
24401 case ARM_BUILTIN_TEXTRMUH
:
24402 case ARM_BUILTIN_TEXTRMSW
:
24403 case ARM_BUILTIN_TEXTRMUW
:
24404 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
24405 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
24406 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
24407 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
24408 : CODE_FOR_iwmmxt_textrmw
);
24410 arg0
= CALL_EXPR_ARG (exp
, 0);
24411 arg1
= CALL_EXPR_ARG (exp
, 1);
24412 op0
= expand_normal (arg0
);
24413 op1
= expand_normal (arg1
);
24414 tmode
= insn_data
[icode
].operand
[0].mode
;
24415 mode0
= insn_data
[icode
].operand
[1].mode
;
24416 mode1
= insn_data
[icode
].operand
[2].mode
;
24418 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24419 op0
= copy_to_mode_reg (mode0
, op0
);
24420 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24422 /* @@@ better error message */
24423 error ("selector must be an immediate");
24424 return gen_reg_rtx (tmode
);
24427 opint
= INTVAL (op1
);
24428 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
24430 if (opint
> 7 || opint
< 0)
24431 error ("the range of selector should be in 0 to 7");
24433 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
24435 if (opint
> 3 || opint
< 0)
24436 error ("the range of selector should be in 0 to 3");
24438 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
24440 if (opint
> 1 || opint
< 0)
24441 error ("the range of selector should be in 0 to 1");
24445 || GET_MODE (target
) != tmode
24446 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24447 target
= gen_reg_rtx (tmode
);
24448 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24454 case ARM_BUILTIN_WALIGNI
:
24455 /* If op2 is immediate, call walighi, else call walighr. */
24456 arg0
= CALL_EXPR_ARG (exp
, 0);
24457 arg1
= CALL_EXPR_ARG (exp
, 1);
24458 arg2
= CALL_EXPR_ARG (exp
, 2);
24459 op0
= expand_normal (arg0
);
24460 op1
= expand_normal (arg1
);
24461 op2
= expand_normal (arg2
);
24462 if (CONST_INT_P (op2
))
24464 icode
= CODE_FOR_iwmmxt_waligni
;
24465 tmode
= insn_data
[icode
].operand
[0].mode
;
24466 mode0
= insn_data
[icode
].operand
[1].mode
;
24467 mode1
= insn_data
[icode
].operand
[2].mode
;
24468 mode2
= insn_data
[icode
].operand
[3].mode
;
24469 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24470 op0
= copy_to_mode_reg (mode0
, op0
);
24471 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24472 op1
= copy_to_mode_reg (mode1
, op1
);
24473 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
24474 selector
= INTVAL (op2
);
24475 if (selector
> 7 || selector
< 0)
24476 error ("the range of selector should be in 0 to 7");
24480 icode
= CODE_FOR_iwmmxt_walignr
;
24481 tmode
= insn_data
[icode
].operand
[0].mode
;
24482 mode0
= insn_data
[icode
].operand
[1].mode
;
24483 mode1
= insn_data
[icode
].operand
[2].mode
;
24484 mode2
= insn_data
[icode
].operand
[3].mode
;
24485 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24486 op0
= copy_to_mode_reg (mode0
, op0
);
24487 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24488 op1
= copy_to_mode_reg (mode1
, op1
);
24489 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24490 op2
= copy_to_mode_reg (mode2
, op2
);
24493 || GET_MODE (target
) != tmode
24494 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24495 target
= gen_reg_rtx (tmode
);
24496 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24502 case ARM_BUILTIN_TINSRB
:
24503 case ARM_BUILTIN_TINSRH
:
24504 case ARM_BUILTIN_TINSRW
:
24505 case ARM_BUILTIN_WMERGE
:
24506 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
24507 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
24508 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
24509 : CODE_FOR_iwmmxt_tinsrw
);
24510 arg0
= CALL_EXPR_ARG (exp
, 0);
24511 arg1
= CALL_EXPR_ARG (exp
, 1);
24512 arg2
= CALL_EXPR_ARG (exp
, 2);
24513 op0
= expand_normal (arg0
);
24514 op1
= expand_normal (arg1
);
24515 op2
= expand_normal (arg2
);
24516 tmode
= insn_data
[icode
].operand
[0].mode
;
24517 mode0
= insn_data
[icode
].operand
[1].mode
;
24518 mode1
= insn_data
[icode
].operand
[2].mode
;
24519 mode2
= insn_data
[icode
].operand
[3].mode
;
24521 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24522 op0
= copy_to_mode_reg (mode0
, op0
);
24523 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24524 op1
= copy_to_mode_reg (mode1
, op1
);
24525 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24527 error ("selector must be an immediate");
24530 if (icode
== CODE_FOR_iwmmxt_wmerge
)
24532 selector
= INTVAL (op2
);
24533 if (selector
> 7 || selector
< 0)
24534 error ("the range of selector should be in 0 to 7");
24536 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
24537 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
24538 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
24541 selector
= INTVAL (op2
);
24542 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
24543 error ("the range of selector should be in 0 to 7");
24544 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
24545 error ("the range of selector should be in 0 to 3");
24546 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
24547 error ("the range of selector should be in 0 to 1");
24549 op2
= GEN_INT (mask
);
24552 || GET_MODE (target
) != tmode
24553 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24554 target
= gen_reg_rtx (tmode
);
24555 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24561 case ARM_BUILTIN_SETWCGR0
:
24562 case ARM_BUILTIN_SETWCGR1
:
24563 case ARM_BUILTIN_SETWCGR2
:
24564 case ARM_BUILTIN_SETWCGR3
:
24565 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
24566 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
24567 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
24568 : CODE_FOR_iwmmxt_setwcgr3
);
24569 arg0
= CALL_EXPR_ARG (exp
, 0);
24570 op0
= expand_normal (arg0
);
24571 mode0
= insn_data
[icode
].operand
[0].mode
;
24572 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
24573 op0
= copy_to_mode_reg (mode0
, op0
);
24574 pat
= GEN_FCN (icode
) (op0
);
24580 case ARM_BUILTIN_GETWCGR0
:
24581 case ARM_BUILTIN_GETWCGR1
:
24582 case ARM_BUILTIN_GETWCGR2
:
24583 case ARM_BUILTIN_GETWCGR3
:
24584 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
24585 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
24586 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
24587 : CODE_FOR_iwmmxt_getwcgr3
);
24588 tmode
= insn_data
[icode
].operand
[0].mode
;
24590 || GET_MODE (target
) != tmode
24591 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24592 target
= gen_reg_rtx (tmode
);
24593 pat
= GEN_FCN (icode
) (target
);
24599 case ARM_BUILTIN_WSHUFH
:
24600 icode
= CODE_FOR_iwmmxt_wshufh
;
24601 arg0
= CALL_EXPR_ARG (exp
, 0);
24602 arg1
= CALL_EXPR_ARG (exp
, 1);
24603 op0
= expand_normal (arg0
);
24604 op1
= expand_normal (arg1
);
24605 tmode
= insn_data
[icode
].operand
[0].mode
;
24606 mode1
= insn_data
[icode
].operand
[1].mode
;
24607 mode2
= insn_data
[icode
].operand
[2].mode
;
24609 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
24610 op0
= copy_to_mode_reg (mode1
, op0
);
24611 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
24613 error ("mask must be an immediate");
24616 selector
= INTVAL (op1
);
24617 if (selector
< 0 || selector
> 255)
24618 error ("the range of mask should be in 0 to 255");
24620 || GET_MODE (target
) != tmode
24621 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24622 target
= gen_reg_rtx (tmode
);
24623 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24629 case ARM_BUILTIN_WMADDS
:
24630 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
24631 case ARM_BUILTIN_WMADDSX
:
24632 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
24633 case ARM_BUILTIN_WMADDSN
:
24634 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
24635 case ARM_BUILTIN_WMADDU
:
24636 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
24637 case ARM_BUILTIN_WMADDUX
:
24638 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
24639 case ARM_BUILTIN_WMADDUN
:
24640 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
24641 case ARM_BUILTIN_WSADBZ
:
24642 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
24643 case ARM_BUILTIN_WSADHZ
:
24644 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
24646 /* Several three-argument builtins. */
24647 case ARM_BUILTIN_WMACS
:
24648 case ARM_BUILTIN_WMACU
:
24649 case ARM_BUILTIN_TMIA
:
24650 case ARM_BUILTIN_TMIAPH
:
24651 case ARM_BUILTIN_TMIATT
:
24652 case ARM_BUILTIN_TMIATB
:
24653 case ARM_BUILTIN_TMIABT
:
24654 case ARM_BUILTIN_TMIABB
:
24655 case ARM_BUILTIN_WQMIABB
:
24656 case ARM_BUILTIN_WQMIABT
:
24657 case ARM_BUILTIN_WQMIATB
:
24658 case ARM_BUILTIN_WQMIATT
:
24659 case ARM_BUILTIN_WQMIABBN
:
24660 case ARM_BUILTIN_WQMIABTN
:
24661 case ARM_BUILTIN_WQMIATBN
:
24662 case ARM_BUILTIN_WQMIATTN
:
24663 case ARM_BUILTIN_WMIABB
:
24664 case ARM_BUILTIN_WMIABT
:
24665 case ARM_BUILTIN_WMIATB
:
24666 case ARM_BUILTIN_WMIATT
:
24667 case ARM_BUILTIN_WMIABBN
:
24668 case ARM_BUILTIN_WMIABTN
:
24669 case ARM_BUILTIN_WMIATBN
:
24670 case ARM_BUILTIN_WMIATTN
:
24671 case ARM_BUILTIN_WMIAWBB
:
24672 case ARM_BUILTIN_WMIAWBT
:
24673 case ARM_BUILTIN_WMIAWTB
:
24674 case ARM_BUILTIN_WMIAWTT
:
24675 case ARM_BUILTIN_WMIAWBBN
:
24676 case ARM_BUILTIN_WMIAWBTN
:
24677 case ARM_BUILTIN_WMIAWTBN
:
24678 case ARM_BUILTIN_WMIAWTTN
:
24679 case ARM_BUILTIN_WSADB
:
24680 case ARM_BUILTIN_WSADH
:
24681 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
24682 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
24683 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
24684 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
24685 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
24686 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
24687 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
24688 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
24689 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
24690 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
24691 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
24692 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
24693 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
24694 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
24695 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
24696 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
24697 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
24698 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
24699 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
24700 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
24701 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
24702 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
24703 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
24704 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
24705 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
24706 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
24707 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
24708 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
24709 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
24710 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
24711 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
24712 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
24713 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
24714 : CODE_FOR_iwmmxt_wsadh
);
24715 arg0
= CALL_EXPR_ARG (exp
, 0);
24716 arg1
= CALL_EXPR_ARG (exp
, 1);
24717 arg2
= CALL_EXPR_ARG (exp
, 2);
24718 op0
= expand_normal (arg0
);
24719 op1
= expand_normal (arg1
);
24720 op2
= expand_normal (arg2
);
24721 tmode
= insn_data
[icode
].operand
[0].mode
;
24722 mode0
= insn_data
[icode
].operand
[1].mode
;
24723 mode1
= insn_data
[icode
].operand
[2].mode
;
24724 mode2
= insn_data
[icode
].operand
[3].mode
;
24726 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24727 op0
= copy_to_mode_reg (mode0
, op0
);
24728 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24729 op1
= copy_to_mode_reg (mode1
, op1
);
24730 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24731 op2
= copy_to_mode_reg (mode2
, op2
);
24733 || GET_MODE (target
) != tmode
24734 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24735 target
= gen_reg_rtx (tmode
);
24736 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24742 case ARM_BUILTIN_WZERO
:
24743 target
= gen_reg_rtx (DImode
);
24744 emit_insn (gen_iwmmxt_clrdi (target
));
24747 case ARM_BUILTIN_WSRLHI
:
24748 case ARM_BUILTIN_WSRLWI
:
24749 case ARM_BUILTIN_WSRLDI
:
24750 case ARM_BUILTIN_WSLLHI
:
24751 case ARM_BUILTIN_WSLLWI
:
24752 case ARM_BUILTIN_WSLLDI
:
24753 case ARM_BUILTIN_WSRAHI
:
24754 case ARM_BUILTIN_WSRAWI
:
24755 case ARM_BUILTIN_WSRADI
:
24756 case ARM_BUILTIN_WRORHI
:
24757 case ARM_BUILTIN_WRORWI
:
24758 case ARM_BUILTIN_WRORDI
:
24759 case ARM_BUILTIN_WSRLH
:
24760 case ARM_BUILTIN_WSRLW
:
24761 case ARM_BUILTIN_WSRLD
:
24762 case ARM_BUILTIN_WSLLH
:
24763 case ARM_BUILTIN_WSLLW
:
24764 case ARM_BUILTIN_WSLLD
:
24765 case ARM_BUILTIN_WSRAH
:
24766 case ARM_BUILTIN_WSRAW
:
24767 case ARM_BUILTIN_WSRAD
:
24768 case ARM_BUILTIN_WRORH
:
24769 case ARM_BUILTIN_WRORW
:
24770 case ARM_BUILTIN_WRORD
:
24771 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
24772 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
24773 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
24774 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
24775 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
24776 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
24777 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
24778 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
24779 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
24780 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
24781 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
24782 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
24783 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
24784 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
24785 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
24786 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
24787 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
24788 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
24789 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
24790 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
24791 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
24792 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
24793 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
24794 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
24795 : CODE_FOR_nothing
);
24796 arg1
= CALL_EXPR_ARG (exp
, 1);
24797 op1
= expand_normal (arg1
);
24798 if (GET_MODE (op1
) == VOIDmode
)
24800 imm
= INTVAL (op1
);
24801 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
24802 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
24803 && (imm
< 0 || imm
> 32))
24805 if (fcode
== ARM_BUILTIN_WRORHI
)
24806 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
24807 else if (fcode
== ARM_BUILTIN_WRORWI
)
24808 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
24809 else if (fcode
== ARM_BUILTIN_WRORH
)
24810 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
24812 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
24814 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
24815 && (imm
< 0 || imm
> 64))
24817 if (fcode
== ARM_BUILTIN_WRORDI
)
24818 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
24820 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
24824 if (fcode
== ARM_BUILTIN_WSRLHI
)
24825 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
24826 else if (fcode
== ARM_BUILTIN_WSRLWI
)
24827 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
24828 else if (fcode
== ARM_BUILTIN_WSRLDI
)
24829 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
24830 else if (fcode
== ARM_BUILTIN_WSLLHI
)
24831 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
24832 else if (fcode
== ARM_BUILTIN_WSLLWI
)
24833 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
24834 else if (fcode
== ARM_BUILTIN_WSLLDI
)
24835 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
24836 else if (fcode
== ARM_BUILTIN_WSRAHI
)
24837 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
24838 else if (fcode
== ARM_BUILTIN_WSRAWI
)
24839 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
24840 else if (fcode
== ARM_BUILTIN_WSRADI
)
24841 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
24842 else if (fcode
== ARM_BUILTIN_WSRLH
)
24843 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
24844 else if (fcode
== ARM_BUILTIN_WSRLW
)
24845 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
24846 else if (fcode
== ARM_BUILTIN_WSRLD
)
24847 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
24848 else if (fcode
== ARM_BUILTIN_WSLLH
)
24849 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
24850 else if (fcode
== ARM_BUILTIN_WSLLW
)
24851 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
24852 else if (fcode
== ARM_BUILTIN_WSLLD
)
24853 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
24854 else if (fcode
== ARM_BUILTIN_WSRAH
)
24855 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
24856 else if (fcode
== ARM_BUILTIN_WSRAW
)
24857 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
24859 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
24862 return arm_expand_binop_builtin (icode
, exp
, target
);
24868 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24869 if (d
->code
== (const enum arm_builtins
) fcode
)
24870 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
24872 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
24873 if (d
->code
== (const enum arm_builtins
) fcode
)
24874 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
24876 /* @@@ Should really do something sensible here. */
24880 /* Return the number (counting from 0) of
24881 the least significant set bit in MASK. */
24884 number_of_first_bit_set (unsigned mask
)
24886 return ctz_hwi (mask
);
24889 /* Like emit_multi_reg_push, but allowing for a different set of
24890 registers to be described as saved. MASK is the set of registers
24891 to be saved; REAL_REGS is the set of registers to be described as
24892 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24895 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24897 unsigned long regno
;
24898 rtx par
[10], tmp
, reg
, insn
;
24901 /* Build the parallel of the registers actually being stored. */
24902 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24904 regno
= ctz_hwi (mask
);
24905 reg
= gen_rtx_REG (SImode
, regno
);
24908 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24910 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24915 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24916 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24917 tmp
= gen_frame_mem (BLKmode
, tmp
);
24918 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
24921 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24922 insn
= emit_insn (tmp
);
24924 /* Always build the stack adjustment note for unwind info. */
24925 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24926 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
24929 /* Build the parallel of the registers recorded as saved for unwind. */
24930 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24932 regno
= ctz_hwi (real_regs
);
24933 reg
= gen_rtx_REG (SImode
, regno
);
24935 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24936 tmp
= gen_frame_mem (SImode
, tmp
);
24937 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
24938 RTX_FRAME_RELATED_P (tmp
) = 1;
24946 RTX_FRAME_RELATED_P (par
[0]) = 1;
24947 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24950 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24955 /* Emit code to push or pop registers to or from the stack. F is the
24956 assembly file. MASK is the registers to pop. */
24958 thumb_pop (FILE *f
, unsigned long mask
)
24961 int lo_mask
= mask
& 0xFF;
24962 int pushed_words
= 0;
24966 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24968 /* Special case. Do not generate a POP PC statement here, do it in
24970 thumb_exit (f
, -1);
24974 fprintf (f
, "\tpop\t{");
24976 /* Look at the low registers first. */
24977 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24981 asm_fprintf (f
, "%r", regno
);
24983 if ((lo_mask
& ~1) != 0)
24990 if (mask
& (1 << PC_REGNUM
))
24992 /* Catch popping the PC. */
24993 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
24994 || crtl
->calls_eh_return
)
24996 /* The PC is never poped directly, instead
24997 it is popped into r3 and then BX is used. */
24998 fprintf (f
, "}\n");
25000 thumb_exit (f
, -1);
25009 asm_fprintf (f
, "%r", PC_REGNUM
);
25013 fprintf (f
, "}\n");
25016 /* Generate code to return from a thumb function.
25017 If 'reg_containing_return_addr' is -1, then the return address is
25018 actually on the stack, at the stack pointer. */
25020 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25022 unsigned regs_available_for_popping
;
25023 unsigned regs_to_pop
;
25025 unsigned available
;
25029 int restore_a4
= FALSE
;
25031 /* Compute the registers we need to pop. */
25035 if (reg_containing_return_addr
== -1)
25037 regs_to_pop
|= 1 << LR_REGNUM
;
25041 if (TARGET_BACKTRACE
)
25043 /* Restore the (ARM) frame pointer and stack pointer. */
25044 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25048 /* If there is nothing to pop then just emit the BX instruction and
25050 if (pops_needed
== 0)
25052 if (crtl
->calls_eh_return
)
25053 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25055 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25058 /* Otherwise if we are not supporting interworking and we have not created
25059 a backtrace structure and the function was not entered in ARM mode then
25060 just pop the return address straight into the PC. */
25061 else if (!TARGET_INTERWORK
25062 && !TARGET_BACKTRACE
25063 && !is_called_in_ARM_mode (current_function_decl
)
25064 && !crtl
->calls_eh_return
)
25066 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25070 /* Find out how many of the (return) argument registers we can corrupt. */
25071 regs_available_for_popping
= 0;
25073 /* If returning via __builtin_eh_return, the bottom three registers
25074 all contain information needed for the return. */
25075 if (crtl
->calls_eh_return
)
25079 /* If we can deduce the registers used from the function's
25080 return value. This is more reliable that examining
25081 df_regs_ever_live_p () because that will be set if the register is
25082 ever used in the function, not just if the register is used
25083 to hold a return value. */
25085 if (crtl
->return_rtx
!= 0)
25086 mode
= GET_MODE (crtl
->return_rtx
);
25088 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25090 size
= GET_MODE_SIZE (mode
);
25094 /* In a void function we can use any argument register.
25095 In a function that returns a structure on the stack
25096 we can use the second and third argument registers. */
25097 if (mode
== VOIDmode
)
25098 regs_available_for_popping
=
25099 (1 << ARG_REGISTER (1))
25100 | (1 << ARG_REGISTER (2))
25101 | (1 << ARG_REGISTER (3));
25103 regs_available_for_popping
=
25104 (1 << ARG_REGISTER (2))
25105 | (1 << ARG_REGISTER (3));
25107 else if (size
<= 4)
25108 regs_available_for_popping
=
25109 (1 << ARG_REGISTER (2))
25110 | (1 << ARG_REGISTER (3));
25111 else if (size
<= 8)
25112 regs_available_for_popping
=
25113 (1 << ARG_REGISTER (3));
25116 /* Match registers to be popped with registers into which we pop them. */
25117 for (available
= regs_available_for_popping
,
25118 required
= regs_to_pop
;
25119 required
!= 0 && available
!= 0;
25120 available
&= ~(available
& - available
),
25121 required
&= ~(required
& - required
))
25124 /* If we have any popping registers left over, remove them. */
25126 regs_available_for_popping
&= ~available
;
25128 /* Otherwise if we need another popping register we can use
25129 the fourth argument register. */
25130 else if (pops_needed
)
25132 /* If we have not found any free argument registers and
25133 reg a4 contains the return address, we must move it. */
25134 if (regs_available_for_popping
== 0
25135 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25137 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25138 reg_containing_return_addr
= LR_REGNUM
;
25140 else if (size
> 12)
25142 /* Register a4 is being used to hold part of the return value,
25143 but we have dire need of a free, low register. */
25146 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25149 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25151 /* The fourth argument register is available. */
25152 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25158 /* Pop as many registers as we can. */
25159 thumb_pop (f
, regs_available_for_popping
);
25161 /* Process the registers we popped. */
25162 if (reg_containing_return_addr
== -1)
25164 /* The return address was popped into the lowest numbered register. */
25165 regs_to_pop
&= ~(1 << LR_REGNUM
);
25167 reg_containing_return_addr
=
25168 number_of_first_bit_set (regs_available_for_popping
);
25170 /* Remove this register for the mask of available registers, so that
25171 the return address will not be corrupted by further pops. */
25172 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
25175 /* If we popped other registers then handle them here. */
25176 if (regs_available_for_popping
)
25180 /* Work out which register currently contains the frame pointer. */
25181 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25183 /* Move it into the correct place. */
25184 asm_fprintf (f
, "\tmov\t%r, %r\n",
25185 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
25187 /* (Temporarily) remove it from the mask of popped registers. */
25188 regs_available_for_popping
&= ~(1 << frame_pointer
);
25189 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
25191 if (regs_available_for_popping
)
25195 /* We popped the stack pointer as well,
25196 find the register that contains it. */
25197 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25199 /* Move it into the stack register. */
25200 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
25202 /* At this point we have popped all necessary registers, so
25203 do not worry about restoring regs_available_for_popping
25204 to its correct value:
25206 assert (pops_needed == 0)
25207 assert (regs_available_for_popping == (1 << frame_pointer))
25208 assert (regs_to_pop == (1 << STACK_POINTER)) */
25212 /* Since we have just move the popped value into the frame
25213 pointer, the popping register is available for reuse, and
25214 we know that we still have the stack pointer left to pop. */
25215 regs_available_for_popping
|= (1 << frame_pointer
);
25219 /* If we still have registers left on the stack, but we no longer have
25220 any registers into which we can pop them, then we must move the return
25221 address into the link register and make available the register that
25223 if (regs_available_for_popping
== 0 && pops_needed
> 0)
25225 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
25227 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
25228 reg_containing_return_addr
);
25230 reg_containing_return_addr
= LR_REGNUM
;
25233 /* If we have registers left on the stack then pop some more.
25234 We know that at most we will want to pop FP and SP. */
25235 if (pops_needed
> 0)
25240 thumb_pop (f
, regs_available_for_popping
);
25242 /* We have popped either FP or SP.
25243 Move whichever one it is into the correct register. */
25244 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
25245 move_to
= number_of_first_bit_set (regs_to_pop
);
25247 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
25249 regs_to_pop
&= ~(1 << move_to
);
25254 /* If we still have not popped everything then we must have only
25255 had one register available to us and we are now popping the SP. */
25256 if (pops_needed
> 0)
25260 thumb_pop (f
, regs_available_for_popping
);
25262 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
25264 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
25266 assert (regs_to_pop == (1 << STACK_POINTER))
25267 assert (pops_needed == 1)
25271 /* If necessary restore the a4 register. */
25274 if (reg_containing_return_addr
!= LR_REGNUM
)
25276 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25277 reg_containing_return_addr
= LR_REGNUM
;
25280 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
25283 if (crtl
->calls_eh_return
)
25284 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25286 /* Return to caller. */
25287 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25290 /* Scan INSN just before assembler is output for it.
25291 For Thumb-1, we track the status of the condition codes; this
25292 information is used in the cbranchsi4_insn pattern. */
25294 thumb1_final_prescan_insn (rtx insn
)
25296 if (flag_print_asm_name
)
25297 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
25298 INSN_ADDRESSES (INSN_UID (insn
)));
25299 /* Don't overwrite the previous setter when we get to a cbranch. */
25300 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
25302 enum attr_conds conds
;
25304 if (cfun
->machine
->thumb1_cc_insn
)
25306 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
25307 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
25310 conds
= get_attr_conds (insn
);
25311 if (conds
== CONDS_SET
)
25313 rtx set
= single_set (insn
);
25314 cfun
->machine
->thumb1_cc_insn
= insn
;
25315 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
25316 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
25317 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
25318 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
25320 rtx src1
= XEXP (SET_SRC (set
), 1);
25321 if (src1
== const0_rtx
)
25322 cfun
->machine
->thumb1_cc_mode
= CCmode
;
25324 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
25326 /* Record the src register operand instead of dest because
25327 cprop_hardreg pass propagates src. */
25328 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
25331 else if (conds
!= CONDS_NOCOND
)
25332 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
25335 /* Check if unexpected far jump is used. */
25336 if (cfun
->machine
->lr_save_eliminated
25337 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25338 internal_error("Unexpected thumb1 far jump");
25342 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
25344 unsigned HOST_WIDE_INT mask
= 0xff;
25347 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
25348 if (val
== 0) /* XXX */
25351 for (i
= 0; i
< 25; i
++)
25352 if ((val
& (mask
<< i
)) == val
)
25358 /* Returns nonzero if the current function contains,
25359 or might contain a far jump. */
25361 thumb_far_jump_used_p (void)
25364 bool far_jump
= false;
25365 unsigned int func_size
= 0;
25367 /* This test is only important for leaf functions. */
25368 /* assert (!leaf_function_p ()); */
25370 /* If we have already decided that far jumps may be used,
25371 do not bother checking again, and always return true even if
25372 it turns out that they are not being used. Once we have made
25373 the decision that far jumps are present (and that hence the link
25374 register will be pushed onto the stack) we cannot go back on it. */
25375 if (cfun
->machine
->far_jump_used
)
25378 /* If this function is not being called from the prologue/epilogue
25379 generation code then it must be being called from the
25380 INITIAL_ELIMINATION_OFFSET macro. */
25381 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
25383 /* In this case we know that we are being asked about the elimination
25384 of the arg pointer register. If that register is not being used,
25385 then there are no arguments on the stack, and we do not have to
25386 worry that a far jump might force the prologue to push the link
25387 register, changing the stack offsets. In this case we can just
25388 return false, since the presence of far jumps in the function will
25389 not affect stack offsets.
25391 If the arg pointer is live (or if it was live, but has now been
25392 eliminated and so set to dead) then we do have to test to see if
25393 the function might contain a far jump. This test can lead to some
25394 false negatives, since before reload is completed, then length of
25395 branch instructions is not known, so gcc defaults to returning their
25396 longest length, which in turn sets the far jump attribute to true.
25398 A false negative will not result in bad code being generated, but it
25399 will result in a needless push and pop of the link register. We
25400 hope that this does not occur too often.
25402 If we need doubleword stack alignment this could affect the other
25403 elimination offsets so we can't risk getting it wrong. */
25404 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
25405 cfun
->machine
->arg_pointer_live
= 1;
25406 else if (!cfun
->machine
->arg_pointer_live
)
25410 /* Check to see if the function contains a branch
25411 insn with the far jump attribute set. */
25412 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25414 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25418 func_size
+= get_attr_length (insn
);
25421 /* Attribute far_jump will always be true for thumb1 before
25422 shorten_branch pass. So checking far_jump attribute before
25423 shorten_branch isn't much useful.
25425 Following heuristic tries to estimate more accurately if a far jump
25426 may finally be used. The heuristic is very conservative as there is
25427 no chance to roll-back the decision of not to use far jump.
25429 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25430 2-byte insn is associated with a 4 byte constant pool. Using
25431 function size 2048/3 as the threshold is conservative enough. */
25434 if ((func_size
* 3) >= 2048)
25436 /* Record the fact that we have decided that
25437 the function does use far jumps. */
25438 cfun
->machine
->far_jump_used
= 1;
25446 /* Return nonzero if FUNC must be entered in ARM mode. */
25448 is_called_in_ARM_mode (tree func
)
25450 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
25452 /* Ignore the problem about functions whose address is taken. */
25453 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
25457 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
25463 /* Given the stack offsets and register mask in OFFSETS, decide how
25464 many additional registers to push instead of subtracting a constant
25465 from SP. For epilogues the principle is the same except we use pop.
25466 FOR_PROLOGUE indicates which we're generating. */
25468 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
25470 HOST_WIDE_INT amount
;
25471 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
25472 /* Extract a mask of the ones we can give to the Thumb's push/pop
25474 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
25475 /* Then count how many other high registers will need to be pushed. */
25476 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25477 int n_free
, reg_base
, size
;
25479 if (!for_prologue
&& frame_pointer_needed
)
25480 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25482 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25484 /* If the stack frame size is 512 exactly, we can save one load
25485 instruction, which should make this a win even when optimizing
25487 if (!optimize_size
&& amount
!= 512)
25490 /* Can't do this if there are high registers to push. */
25491 if (high_regs_pushed
!= 0)
25494 /* Shouldn't do it in the prologue if no registers would normally
25495 be pushed at all. In the epilogue, also allow it if we'll have
25496 a pop insn for the PC. */
25499 || TARGET_BACKTRACE
25500 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
25501 || TARGET_INTERWORK
25502 || crtl
->args
.pretend_args_size
!= 0))
25505 /* Don't do this if thumb_expand_prologue wants to emit instructions
25506 between the push and the stack frame allocation. */
25508 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25509 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
25516 size
= arm_size_return_regs ();
25517 reg_base
= ARM_NUM_INTS (size
);
25518 live_regs_mask
>>= reg_base
;
25521 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
25522 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
25524 live_regs_mask
>>= 1;
25530 gcc_assert (amount
/ 4 * 4 == amount
);
25532 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
25533 return (amount
- 508) / 4;
25534 if (amount
<= n_free
* 4)
25539 /* The bits which aren't usefully expanded as rtl. */
25541 thumb1_unexpanded_epilogue (void)
25543 arm_stack_offsets
*offsets
;
25545 unsigned long live_regs_mask
= 0;
25546 int high_regs_pushed
= 0;
25548 int had_to_push_lr
;
25551 if (cfun
->machine
->return_used_this_function
!= 0)
25554 if (IS_NAKED (arm_current_func_type ()))
25557 offsets
= arm_get_frame_offsets ();
25558 live_regs_mask
= offsets
->saved_regs_mask
;
25559 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25561 /* If we can deduce the registers used from the function's return value.
25562 This is more reliable that examining df_regs_ever_live_p () because that
25563 will be set if the register is ever used in the function, not just if
25564 the register is used to hold a return value. */
25565 size
= arm_size_return_regs ();
25567 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
25570 unsigned long extra_mask
= (1 << extra_pop
) - 1;
25571 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
25574 /* The prolog may have pushed some high registers to use as
25575 work registers. e.g. the testsuite file:
25576 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25577 compiles to produce:
25578 push {r4, r5, r6, r7, lr}
25582 as part of the prolog. We have to undo that pushing here. */
25584 if (high_regs_pushed
)
25586 unsigned long mask
= live_regs_mask
& 0xff;
25589 /* The available low registers depend on the size of the value we are
25597 /* Oh dear! We have no low registers into which we can pop
25600 ("no low registers available for popping high registers");
25602 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
25603 if (live_regs_mask
& (1 << next_hi_reg
))
25606 while (high_regs_pushed
)
25608 /* Find lo register(s) into which the high register(s) can
25610 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
25612 if (mask
& (1 << regno
))
25613 high_regs_pushed
--;
25614 if (high_regs_pushed
== 0)
25618 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
25620 /* Pop the values into the low register(s). */
25621 thumb_pop (asm_out_file
, mask
);
25623 /* Move the value(s) into the high registers. */
25624 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
25626 if (mask
& (1 << regno
))
25628 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
25631 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
25632 if (live_regs_mask
& (1 << next_hi_reg
))
25637 live_regs_mask
&= ~0x0f00;
25640 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
25641 live_regs_mask
&= 0xff;
25643 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
25645 /* Pop the return address into the PC. */
25646 if (had_to_push_lr
)
25647 live_regs_mask
|= 1 << PC_REGNUM
;
25649 /* Either no argument registers were pushed or a backtrace
25650 structure was created which includes an adjusted stack
25651 pointer, so just pop everything. */
25652 if (live_regs_mask
)
25653 thumb_pop (asm_out_file
, live_regs_mask
);
25655 /* We have either just popped the return address into the
25656 PC or it is was kept in LR for the entire function.
25657 Note that thumb_pop has already called thumb_exit if the
25658 PC was in the list. */
25659 if (!had_to_push_lr
)
25660 thumb_exit (asm_out_file
, LR_REGNUM
);
25664 /* Pop everything but the return address. */
25665 if (live_regs_mask
)
25666 thumb_pop (asm_out_file
, live_regs_mask
);
25668 if (had_to_push_lr
)
25672 /* We have no free low regs, so save one. */
25673 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
25677 /* Get the return address into a temporary register. */
25678 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
25682 /* Move the return address to lr. */
25683 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
25685 /* Restore the low register. */
25686 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
25691 regno
= LAST_ARG_REGNUM
;
25696 /* Remove the argument registers that were pushed onto the stack. */
25697 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
25698 SP_REGNUM
, SP_REGNUM
,
25699 crtl
->args
.pretend_args_size
);
25701 thumb_exit (asm_out_file
, regno
);
25707 /* Functions to save and restore machine-specific function data. */
25708 static struct machine_function
*
25709 arm_init_machine_status (void)
25711 struct machine_function
*machine
;
25712 machine
= ggc_alloc_cleared_machine_function ();
25714 #if ARM_FT_UNKNOWN != 0
25715 machine
->func_type
= ARM_FT_UNKNOWN
;
25720 /* Return an RTX indicating where the return address to the
25721 calling function can be found. */
25723 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
25728 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
25731 /* Do anything needed before RTL is emitted for each function. */
25733 arm_init_expanders (void)
25735 /* Arrange to initialize and mark the machine per-function status. */
25736 init_machine_status
= arm_init_machine_status
;
25738 /* This is to stop the combine pass optimizing away the alignment
25739 adjustment of va_arg. */
25740 /* ??? It is claimed that this should not be necessary. */
25742 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
25746 /* Like arm_compute_initial_elimination offset. Simpler because there
25747 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25748 to point at the base of the local variables after static stack
25749 space for a function has been allocated. */
25752 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
25754 arm_stack_offsets
*offsets
;
25756 offsets
= arm_get_frame_offsets ();
25760 case ARG_POINTER_REGNUM
:
25763 case STACK_POINTER_REGNUM
:
25764 return offsets
->outgoing_args
- offsets
->saved_args
;
25766 case FRAME_POINTER_REGNUM
:
25767 return offsets
->soft_frame
- offsets
->saved_args
;
25769 case ARM_HARD_FRAME_POINTER_REGNUM
:
25770 return offsets
->saved_regs
- offsets
->saved_args
;
25772 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25773 return offsets
->locals_base
- offsets
->saved_args
;
25776 gcc_unreachable ();
25780 case FRAME_POINTER_REGNUM
:
25783 case STACK_POINTER_REGNUM
:
25784 return offsets
->outgoing_args
- offsets
->soft_frame
;
25786 case ARM_HARD_FRAME_POINTER_REGNUM
:
25787 return offsets
->saved_regs
- offsets
->soft_frame
;
25789 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25790 return offsets
->locals_base
- offsets
->soft_frame
;
25793 gcc_unreachable ();
25798 gcc_unreachable ();
25802 /* Generate the function's prologue. */
25805 thumb1_expand_prologue (void)
25809 HOST_WIDE_INT amount
;
25810 arm_stack_offsets
*offsets
;
25811 unsigned long func_type
;
25813 unsigned long live_regs_mask
;
25814 unsigned long l_mask
;
25815 unsigned high_regs_pushed
= 0;
25817 func_type
= arm_current_func_type ();
25819 /* Naked functions don't have prologues. */
25820 if (IS_NAKED (func_type
))
25823 if (IS_INTERRUPT (func_type
))
25825 error ("interrupt Service Routines cannot be coded in Thumb mode");
25829 if (is_called_in_ARM_mode (current_function_decl
))
25830 emit_insn (gen_prologue_thumb1_interwork ());
25832 offsets
= arm_get_frame_offsets ();
25833 live_regs_mask
= offsets
->saved_regs_mask
;
25835 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25836 l_mask
= live_regs_mask
& 0x40ff;
25837 /* Then count how many other high registers will need to be pushed. */
25838 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25840 if (crtl
->args
.pretend_args_size
)
25842 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25844 if (cfun
->machine
->uses_anonymous_args
)
25846 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25847 unsigned long mask
;
25849 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25850 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25852 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25856 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25857 stack_pointer_rtx
, x
));
25859 RTX_FRAME_RELATED_P (insn
) = 1;
25862 if (TARGET_BACKTRACE
)
25864 HOST_WIDE_INT offset
= 0;
25865 unsigned work_register
;
25866 rtx work_reg
, x
, arm_hfp_rtx
;
25868 /* We have been asked to create a stack backtrace structure.
25869 The code looks like this:
25873 0 sub SP, #16 Reserve space for 4 registers.
25874 2 push {R7} Push low registers.
25875 4 add R7, SP, #20 Get the stack pointer before the push.
25876 6 str R7, [SP, #8] Store the stack pointer
25877 (before reserving the space).
25878 8 mov R7, PC Get hold of the start of this code + 12.
25879 10 str R7, [SP, #16] Store it.
25880 12 mov R7, FP Get hold of the current frame pointer.
25881 14 str R7, [SP, #4] Store it.
25882 16 mov R7, LR Get hold of the current return address.
25883 18 str R7, [SP, #12] Store it.
25884 20 add R7, SP, #16 Point at the start of the
25885 backtrace structure.
25886 22 mov FP, R7 Put this value into the frame pointer. */
25888 work_register
= thumb_find_work_register (live_regs_mask
);
25889 work_reg
= gen_rtx_REG (SImode
, work_register
);
25890 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25892 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25893 stack_pointer_rtx
, GEN_INT (-16)));
25894 RTX_FRAME_RELATED_P (insn
) = 1;
25898 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25899 RTX_FRAME_RELATED_P (insn
) = 1;
25901 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25904 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25905 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25907 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25908 x
= gen_frame_mem (SImode
, x
);
25909 emit_move_insn (x
, work_reg
);
25911 /* Make sure that the instruction fetching the PC is in the right place
25912 to calculate "start of backtrace creation code + 12". */
25913 /* ??? The stores using the common WORK_REG ought to be enough to
25914 prevent the scheduler from doing anything weird. Failing that
25915 we could always move all of the following into an UNSPEC_VOLATILE. */
25918 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25919 emit_move_insn (work_reg
, x
);
25921 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25922 x
= gen_frame_mem (SImode
, x
);
25923 emit_move_insn (x
, work_reg
);
25925 emit_move_insn (work_reg
, arm_hfp_rtx
);
25927 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25928 x
= gen_frame_mem (SImode
, x
);
25929 emit_move_insn (x
, work_reg
);
25933 emit_move_insn (work_reg
, arm_hfp_rtx
);
25935 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25936 x
= gen_frame_mem (SImode
, x
);
25937 emit_move_insn (x
, work_reg
);
25939 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25940 emit_move_insn (work_reg
, x
);
25942 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25943 x
= gen_frame_mem (SImode
, x
);
25944 emit_move_insn (x
, work_reg
);
25947 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25948 emit_move_insn (work_reg
, x
);
25950 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25951 x
= gen_frame_mem (SImode
, x
);
25952 emit_move_insn (x
, work_reg
);
25954 x
= GEN_INT (offset
+ 12);
25955 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25957 emit_move_insn (arm_hfp_rtx
, work_reg
);
25959 /* Optimization: If we are not pushing any low registers but we are going
25960 to push some high registers then delay our first push. This will just
25961 be a push of LR and we can combine it with the push of the first high
25963 else if ((l_mask
& 0xff) != 0
25964 || (high_regs_pushed
== 0 && l_mask
))
25966 unsigned long mask
= l_mask
;
25967 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25968 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25969 RTX_FRAME_RELATED_P (insn
) = 1;
25972 if (high_regs_pushed
)
25974 unsigned pushable_regs
;
25975 unsigned next_hi_reg
;
25976 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25977 : crtl
->args
.info
.nregs
;
25978 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25980 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25981 if (live_regs_mask
& (1 << next_hi_reg
))
25984 /* Here we need to mask out registers used for passing arguments
25985 even if they can be pushed. This is to avoid using them to stash the high
25986 registers. Such kind of stash may clobber the use of arguments. */
25987 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
25989 if (pushable_regs
== 0)
25990 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
25992 while (high_regs_pushed
> 0)
25994 unsigned long real_regs_mask
= 0;
25996 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
25998 if (pushable_regs
& (1 << regno
))
26000 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26001 gen_rtx_REG (SImode
, next_hi_reg
));
26003 high_regs_pushed
--;
26004 real_regs_mask
|= (1 << next_hi_reg
);
26006 if (high_regs_pushed
)
26008 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26010 if (live_regs_mask
& (1 << next_hi_reg
))
26015 pushable_regs
&= ~((1 << regno
) - 1);
26021 /* If we had to find a work register and we have not yet
26022 saved the LR then add it to the list of regs to push. */
26023 if (l_mask
== (1 << LR_REGNUM
))
26025 pushable_regs
|= l_mask
;
26026 real_regs_mask
|= l_mask
;
26030 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26031 RTX_FRAME_RELATED_P (insn
) = 1;
26035 /* Load the pic register before setting the frame pointer,
26036 so we can use r7 as a temporary work register. */
26037 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26038 arm_load_pic_register (live_regs_mask
);
26040 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26041 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26042 stack_pointer_rtx
);
26044 if (flag_stack_usage_info
)
26045 current_function_static_stack_size
26046 = offsets
->outgoing_args
- offsets
->saved_args
;
26048 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26049 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26054 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26055 GEN_INT (- amount
)));
26056 RTX_FRAME_RELATED_P (insn
) = 1;
26062 /* The stack decrement is too big for an immediate value in a single
26063 insn. In theory we could issue multiple subtracts, but after
26064 three of them it becomes more space efficient to place the full
26065 value in the constant pool and load into a register. (Also the
26066 ARM debugger really likes to see only one stack decrement per
26067 function). So instead we look for a scratch register into which
26068 we can load the decrement, and then we subtract this from the
26069 stack pointer. Unfortunately on the thumb the only available
26070 scratch registers are the argument registers, and we cannot use
26071 these as they may hold arguments to the function. Instead we
26072 attempt to locate a call preserved register which is used by this
26073 function. If we can find one, then we know that it will have
26074 been pushed at the start of the prologue and so we can corrupt
26076 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26077 if (live_regs_mask
& (1 << regno
))
26080 gcc_assert(regno
<= LAST_LO_REGNUM
);
26082 reg
= gen_rtx_REG (SImode
, regno
);
26084 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26086 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26087 stack_pointer_rtx
, reg
));
26089 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26090 plus_constant (Pmode
, stack_pointer_rtx
,
26092 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26093 RTX_FRAME_RELATED_P (insn
) = 1;
26097 if (frame_pointer_needed
)
26098 thumb_set_frame_pointer (offsets
);
26100 /* If we are profiling, make sure no instructions are scheduled before
26101 the call to mcount. Similarly if the user has requested no
26102 scheduling in the prolog. Similarly if we want non-call exceptions
26103 using the EABI unwinder, to prevent faulting instructions from being
26104 swapped with a stack adjustment. */
26105 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26106 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26107 && cfun
->can_throw_non_call_exceptions
))
26108 emit_insn (gen_blockage ());
26110 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26111 if (live_regs_mask
& 0xff)
26112 cfun
->machine
->lr_save_eliminated
= 0;
26115 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26116 POP instruction can be generated. LR should be replaced by PC. All
26117 the checks required are already done by USE_RETURN_INSN (). Hence,
26118 all we really need to check here is if single register is to be
26119 returned, or multiple register return. */
26121 thumb2_expand_return (bool simple_return
)
26124 unsigned long saved_regs_mask
;
26125 arm_stack_offsets
*offsets
;
26127 offsets
= arm_get_frame_offsets ();
26128 saved_regs_mask
= offsets
->saved_regs_mask
;
26130 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26131 if (saved_regs_mask
& (1 << i
))
26134 if (!simple_return
&& saved_regs_mask
)
26138 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26139 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
26140 rtx addr
= gen_rtx_MEM (SImode
,
26141 gen_rtx_POST_INC (SImode
,
26142 stack_pointer_rtx
));
26143 set_mem_alias_set (addr
, get_frame_alias_set ());
26144 XVECEXP (par
, 0, 0) = ret_rtx
;
26145 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
26146 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26147 emit_jump_insn (par
);
26151 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26152 saved_regs_mask
|= (1 << PC_REGNUM
);
26153 arm_emit_multi_reg_pop (saved_regs_mask
);
26158 emit_jump_insn (simple_return_rtx
);
26163 thumb1_expand_epilogue (void)
26165 HOST_WIDE_INT amount
;
26166 arm_stack_offsets
*offsets
;
26169 /* Naked functions don't have prologues. */
26170 if (IS_NAKED (arm_current_func_type ()))
26173 offsets
= arm_get_frame_offsets ();
26174 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26176 if (frame_pointer_needed
)
26178 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
26179 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26181 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
26183 gcc_assert (amount
>= 0);
26186 emit_insn (gen_blockage ());
26189 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26190 GEN_INT (amount
)));
26193 /* r3 is always free in the epilogue. */
26194 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
26196 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
26197 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
26201 /* Emit a USE (stack_pointer_rtx), so that
26202 the stack adjustment will not be deleted. */
26203 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26205 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
26206 emit_insn (gen_blockage ());
26208 /* Emit a clobber for each insn that will be restored in the epilogue,
26209 so that flow2 will get register lifetimes correct. */
26210 for (regno
= 0; regno
< 13; regno
++)
26211 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
26212 emit_clobber (gen_rtx_REG (SImode
, regno
));
26214 if (! df_regs_ever_live_p (LR_REGNUM
))
26215 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
26218 /* Epilogue code for APCS frame. */
26220 arm_expand_epilogue_apcs_frame (bool really_return
)
26222 unsigned long func_type
;
26223 unsigned long saved_regs_mask
;
26226 int floats_from_frame
= 0;
26227 arm_stack_offsets
*offsets
;
26229 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
26230 func_type
= arm_current_func_type ();
26232 /* Get frame offsets for ARM. */
26233 offsets
= arm_get_frame_offsets ();
26234 saved_regs_mask
= offsets
->saved_regs_mask
;
26236 /* Find the offset of the floating-point save area in the frame. */
26237 floats_from_frame
= offsets
->saved_args
- offsets
->frame
;
26239 /* Compute how many core registers saved and how far away the floats are. */
26240 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26241 if (saved_regs_mask
& (1 << i
))
26244 floats_from_frame
+= 4;
26247 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
26251 /* The offset is from IP_REGNUM. */
26252 int saved_size
= arm_get_vfp_saved_size ();
26253 if (saved_size
> 0)
26255 floats_from_frame
+= saved_size
;
26256 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
26257 hard_frame_pointer_rtx
,
26258 GEN_INT (-floats_from_frame
)));
26261 /* Generate VFP register multi-pop. */
26262 start_reg
= FIRST_VFP_REGNUM
;
26264 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
26265 /* Look for a case where a reg does not need restoring. */
26266 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
26267 && (!df_regs_ever_live_p (i
+ 1)
26268 || call_used_regs
[i
+ 1]))
26270 if (start_reg
!= i
)
26271 arm_emit_vfp_multi_reg_pop (start_reg
,
26272 (i
- start_reg
) / 2,
26273 gen_rtx_REG (SImode
,
26278 /* Restore the remaining regs that we have discovered (or possibly
26279 even all of them, if the conditional in the for loop never
26281 if (start_reg
!= i
)
26282 arm_emit_vfp_multi_reg_pop (start_reg
,
26283 (i
- start_reg
) / 2,
26284 gen_rtx_REG (SImode
, IP_REGNUM
));
26289 /* The frame pointer is guaranteed to be non-double-word aligned, as
26290 it is set to double-word-aligned old_stack_pointer - 4. */
26292 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
26294 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
26295 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
26297 rtx addr
= gen_frame_mem (V2SImode
,
26298 plus_constant (Pmode
, hard_frame_pointer_rtx
,
26300 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26301 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26302 gen_rtx_REG (V2SImode
, i
),
26308 /* saved_regs_mask should contain IP which contains old stack pointer
26309 at the time of activation creation. Since SP and IP are adjacent registers,
26310 we can restore the value directly into SP. */
26311 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
26312 saved_regs_mask
&= ~(1 << IP_REGNUM
);
26313 saved_regs_mask
|= (1 << SP_REGNUM
);
26315 /* There are two registers left in saved_regs_mask - LR and PC. We
26316 only need to restore LR (the return address), but to
26317 save time we can load it directly into PC, unless we need a
26318 special function exit sequence, or we are not really returning. */
26320 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
26321 && !crtl
->calls_eh_return
)
26322 /* Delete LR from the register mask, so that LR on
26323 the stack is loaded into the PC in the register mask. */
26324 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26326 saved_regs_mask
&= ~(1 << PC_REGNUM
);
26328 num_regs
= bit_count (saved_regs_mask
);
26329 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
26331 /* Unwind the stack to just below the saved registers. */
26332 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26333 hard_frame_pointer_rtx
,
26334 GEN_INT (- 4 * num_regs
)));
26337 arm_emit_multi_reg_pop (saved_regs_mask
);
26339 if (IS_INTERRUPT (func_type
))
26341 /* Interrupt handlers will have pushed the
26342 IP onto the stack, so restore it now. */
26344 rtx addr
= gen_rtx_MEM (SImode
,
26345 gen_rtx_POST_INC (SImode
,
26346 stack_pointer_rtx
));
26347 set_mem_alias_set (addr
, get_frame_alias_set ());
26348 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
26349 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26350 gen_rtx_REG (SImode
, IP_REGNUM
),
26354 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
26357 if (crtl
->calls_eh_return
)
26358 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26360 GEN_INT (ARM_EH_STACKADJ_REGNUM
)));
26362 if (IS_STACKALIGN (func_type
))
26363 /* Restore the original stack pointer. Before prologue, the stack was
26364 realigned and the original stack pointer saved in r0. For details,
26365 see comment in arm_expand_prologue. */
26366 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
26368 emit_jump_insn (simple_return_rtx
);
26371 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26372 function is not a sibcall. */
26374 arm_expand_epilogue (bool really_return
)
26376 unsigned long func_type
;
26377 unsigned long saved_regs_mask
;
26381 arm_stack_offsets
*offsets
;
26383 func_type
= arm_current_func_type ();
26385 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26386 let output_return_instruction take care of instruction emission if any. */
26387 if (IS_NAKED (func_type
)
26388 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
26391 emit_jump_insn (simple_return_rtx
);
26395 /* If we are throwing an exception, then we really must be doing a
26396 return, so we can't tail-call. */
26397 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
26399 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
26401 arm_expand_epilogue_apcs_frame (really_return
);
26405 /* Get frame offsets for ARM. */
26406 offsets
= arm_get_frame_offsets ();
26407 saved_regs_mask
= offsets
->saved_regs_mask
;
26408 num_regs
= bit_count (saved_regs_mask
);
26410 if (frame_pointer_needed
)
26413 /* Restore stack pointer if necessary. */
26416 /* In ARM mode, frame pointer points to first saved register.
26417 Restore stack pointer to last saved register. */
26418 amount
= offsets
->frame
- offsets
->saved_regs
;
26420 /* Force out any pending memory operations that reference stacked data
26421 before stack de-allocation occurs. */
26422 emit_insn (gen_blockage ());
26423 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26424 hard_frame_pointer_rtx
,
26425 GEN_INT (amount
)));
26426 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26428 hard_frame_pointer_rtx
);
26430 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26432 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26436 /* In Thumb-2 mode, the frame pointer points to the last saved
26438 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26441 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
26442 hard_frame_pointer_rtx
,
26443 GEN_INT (amount
)));
26444 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26445 hard_frame_pointer_rtx
,
26446 hard_frame_pointer_rtx
);
26449 /* Force out any pending memory operations that reference stacked data
26450 before stack de-allocation occurs. */
26451 emit_insn (gen_blockage ());
26452 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
26453 hard_frame_pointer_rtx
));
26454 arm_add_cfa_adjust_cfa_note (insn
, 0,
26456 hard_frame_pointer_rtx
);
26457 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26459 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26464 /* Pop off outgoing args and local frame to adjust stack pointer to
26465 last saved register. */
26466 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26470 /* Force out any pending memory operations that reference stacked data
26471 before stack de-allocation occurs. */
26472 emit_insn (gen_blockage ());
26473 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26475 GEN_INT (amount
)));
26476 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26477 stack_pointer_rtx
, stack_pointer_rtx
);
26478 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26480 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26484 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
26486 /* Generate VFP register multi-pop. */
26487 int end_reg
= LAST_VFP_REGNUM
+ 1;
26489 /* Scan the registers in reverse order. We need to match
26490 any groupings made in the prologue and generate matching
26491 vldm operations. The need to match groups is because,
26492 unlike pop, vldm can only do consecutive regs. */
26493 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
26494 /* Look for a case where a reg does not need restoring. */
26495 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
26496 && (!df_regs_ever_live_p (i
+ 1)
26497 || call_used_regs
[i
+ 1]))
26499 /* Restore the regs discovered so far (from reg+2 to
26501 if (end_reg
> i
+ 2)
26502 arm_emit_vfp_multi_reg_pop (i
+ 2,
26503 (end_reg
- (i
+ 2)) / 2,
26504 stack_pointer_rtx
);
26508 /* Restore the remaining regs that we have discovered (or possibly
26509 even all of them, if the conditional in the for loop never
26511 if (end_reg
> i
+ 2)
26512 arm_emit_vfp_multi_reg_pop (i
+ 2,
26513 (end_reg
- (i
+ 2)) / 2,
26514 stack_pointer_rtx
);
26518 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
26519 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
26522 rtx addr
= gen_rtx_MEM (V2SImode
,
26523 gen_rtx_POST_INC (SImode
,
26524 stack_pointer_rtx
));
26525 set_mem_alias_set (addr
, get_frame_alias_set ());
26526 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26527 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26528 gen_rtx_REG (V2SImode
, i
),
26530 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26531 stack_pointer_rtx
, stack_pointer_rtx
);
26534 if (saved_regs_mask
)
26537 bool return_in_pc
= false;
26539 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
26540 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
26541 && !IS_STACKALIGN (func_type
)
26543 && crtl
->args
.pretend_args_size
== 0
26544 && saved_regs_mask
& (1 << LR_REGNUM
)
26545 && !crtl
->calls_eh_return
)
26547 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26548 saved_regs_mask
|= (1 << PC_REGNUM
);
26549 return_in_pc
= true;
26552 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
26554 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26555 if (saved_regs_mask
& (1 << i
))
26557 rtx addr
= gen_rtx_MEM (SImode
,
26558 gen_rtx_POST_INC (SImode
,
26559 stack_pointer_rtx
));
26560 set_mem_alias_set (addr
, get_frame_alias_set ());
26562 if (i
== PC_REGNUM
)
26564 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26565 XVECEXP (insn
, 0, 0) = ret_rtx
;
26566 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
26567 gen_rtx_REG (SImode
, i
),
26569 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
26570 insn
= emit_jump_insn (insn
);
26574 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
26576 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26577 gen_rtx_REG (SImode
, i
),
26579 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26581 stack_pointer_rtx
);
26588 && current_tune
->prefer_ldrd_strd
26589 && !optimize_function_for_size_p (cfun
))
26592 thumb2_emit_ldrd_pop (saved_regs_mask
);
26593 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
26594 arm_emit_ldrd_pop (saved_regs_mask
);
26596 arm_emit_multi_reg_pop (saved_regs_mask
);
26599 arm_emit_multi_reg_pop (saved_regs_mask
);
26602 if (return_in_pc
== true)
26606 if (crtl
->args
.pretend_args_size
)
26609 rtx dwarf
= NULL_RTX
;
26610 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26612 GEN_INT (crtl
->args
.pretend_args_size
)));
26614 RTX_FRAME_RELATED_P (tmp
) = 1;
26616 if (cfun
->machine
->uses_anonymous_args
)
26618 /* Restore pretend args. Refer arm_expand_prologue on how to save
26619 pretend_args in stack. */
26620 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
26621 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
26622 for (j
= 0, i
= 0; j
< num_regs
; i
++)
26623 if (saved_regs_mask
& (1 << i
))
26625 rtx reg
= gen_rtx_REG (SImode
, i
);
26626 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
26629 REG_NOTES (tmp
) = dwarf
;
26631 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
26632 stack_pointer_rtx
, stack_pointer_rtx
);
26635 if (!really_return
)
26638 if (crtl
->calls_eh_return
)
26639 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26641 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26643 if (IS_STACKALIGN (func_type
))
26644 /* Restore the original stack pointer. Before prologue, the stack was
26645 realigned and the original stack pointer saved in r0. For details,
26646 see comment in arm_expand_prologue. */
26647 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
26649 emit_jump_insn (simple_return_rtx
);
26652 /* Implementation of insn prologue_thumb1_interwork. This is the first
26653 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26656 thumb1_output_interwork (void)
26659 FILE *f
= asm_out_file
;
26661 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
26662 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
26664 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
26666 /* Generate code sequence to switch us into Thumb mode. */
26667 /* The .code 32 directive has already been emitted by
26668 ASM_DECLARE_FUNCTION_NAME. */
26669 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
26670 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
26672 /* Generate a label, so that the debugger will notice the
26673 change in instruction sets. This label is also used by
26674 the assembler to bypass the ARM code when this function
26675 is called from a Thumb encoded function elsewhere in the
26676 same file. Hence the definition of STUB_NAME here must
26677 agree with the definition in gas/config/tc-arm.c. */
26679 #define STUB_NAME ".real_start_of"
26681 fprintf (f
, "\t.code\t16\n");
26683 if (arm_dllexport_name_p (name
))
26684 name
= arm_strip_name_encoding (name
);
26686 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26687 fprintf (f
, "\t.thumb_func\n");
26688 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26693 /* Handle the case of a double word load into a low register from
26694 a computed memory address. The computed address may involve a
26695 register which is overwritten by the load. */
26697 thumb_load_double_from_address (rtx
*operands
)
26705 gcc_assert (REG_P (operands
[0]));
26706 gcc_assert (MEM_P (operands
[1]));
26708 /* Get the memory address. */
26709 addr
= XEXP (operands
[1], 0);
26711 /* Work out how the memory address is computed. */
26712 switch (GET_CODE (addr
))
26715 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26717 if (REGNO (operands
[0]) == REGNO (addr
))
26719 output_asm_insn ("ldr\t%H0, %2", operands
);
26720 output_asm_insn ("ldr\t%0, %1", operands
);
26724 output_asm_insn ("ldr\t%0, %1", operands
);
26725 output_asm_insn ("ldr\t%H0, %2", operands
);
26730 /* Compute <address> + 4 for the high order load. */
26731 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26733 output_asm_insn ("ldr\t%0, %1", operands
);
26734 output_asm_insn ("ldr\t%H0, %2", operands
);
26738 arg1
= XEXP (addr
, 0);
26739 arg2
= XEXP (addr
, 1);
26741 if (CONSTANT_P (arg1
))
26742 base
= arg2
, offset
= arg1
;
26744 base
= arg1
, offset
= arg2
;
26746 gcc_assert (REG_P (base
));
26748 /* Catch the case of <address> = <reg> + <reg> */
26749 if (REG_P (offset
))
26751 int reg_offset
= REGNO (offset
);
26752 int reg_base
= REGNO (base
);
26753 int reg_dest
= REGNO (operands
[0]);
26755 /* Add the base and offset registers together into the
26756 higher destination register. */
26757 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26758 reg_dest
+ 1, reg_base
, reg_offset
);
26760 /* Load the lower destination register from the address in
26761 the higher destination register. */
26762 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26763 reg_dest
, reg_dest
+ 1);
26765 /* Load the higher destination register from its own address
26767 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26768 reg_dest
+ 1, reg_dest
+ 1);
26772 /* Compute <address> + 4 for the high order load. */
26773 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26775 /* If the computed address is held in the low order register
26776 then load the high order register first, otherwise always
26777 load the low order register first. */
26778 if (REGNO (operands
[0]) == REGNO (base
))
26780 output_asm_insn ("ldr\t%H0, %2", operands
);
26781 output_asm_insn ("ldr\t%0, %1", operands
);
26785 output_asm_insn ("ldr\t%0, %1", operands
);
26786 output_asm_insn ("ldr\t%H0, %2", operands
);
26792 /* With no registers to worry about we can just load the value
26794 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26796 output_asm_insn ("ldr\t%H0, %2", operands
);
26797 output_asm_insn ("ldr\t%0, %1", operands
);
26801 gcc_unreachable ();
26808 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26815 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26818 operands
[4] = operands
[5];
26821 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26822 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26826 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26829 operands
[4] = operands
[5];
26832 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26835 operands
[5] = operands
[6];
26838 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26841 operands
[4] = operands
[5];
26845 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26846 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26850 gcc_unreachable ();
26856 /* Output a call-via instruction for thumb state. */
26858 thumb_call_via_reg (rtx reg
)
26860 int regno
= REGNO (reg
);
26863 gcc_assert (regno
< LR_REGNUM
);
26865 /* If we are in the normal text section we can use a single instance
26866 per compilation unit. If we are doing function sections, then we need
26867 an entry per section, since we can't rely on reachability. */
26868 if (in_section
== text_section
)
26870 thumb_call_reg_needed
= 1;
26872 if (thumb_call_via_label
[regno
] == NULL
)
26873 thumb_call_via_label
[regno
] = gen_label_rtx ();
26874 labelp
= thumb_call_via_label
+ regno
;
26878 if (cfun
->machine
->call_via
[regno
] == NULL
)
26879 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26880 labelp
= cfun
->machine
->call_via
+ regno
;
26883 output_asm_insn ("bl\t%a0", labelp
);
26887 /* Routines for generating rtl. */
26889 thumb_expand_movmemqi (rtx
*operands
)
26891 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26892 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26893 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26894 HOST_WIDE_INT offset
= 0;
26898 emit_insn (gen_movmem12b (out
, in
, out
, in
));
26904 emit_insn (gen_movmem8b (out
, in
, out
, in
));
26910 rtx reg
= gen_reg_rtx (SImode
);
26911 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26912 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26919 rtx reg
= gen_reg_rtx (HImode
);
26920 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26921 plus_constant (Pmode
, in
,
26923 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26932 rtx reg
= gen_reg_rtx (QImode
);
26933 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26934 plus_constant (Pmode
, in
,
26936 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26943 thumb_reload_out_hi (rtx
*operands
)
26945 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26948 /* Handle reading a half-word from memory during reload. */
26950 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
26952 gcc_unreachable ();
26955 /* Return the length of a function name prefix
26956 that starts with the character 'c'. */
26958 arm_get_strip_length (int c
)
26962 ARM_NAME_ENCODING_LENGTHS
26967 /* Return a pointer to a function's name with any
26968 and all prefix encodings stripped from it. */
26970 arm_strip_name_encoding (const char *name
)
26974 while ((skip
= arm_get_strip_length (* name
)))
26980 /* If there is a '*' anywhere in the name's prefix, then
26981 emit the stripped name verbatim, otherwise prepend an
26982 underscore if leading underscores are being used. */
26984 arm_asm_output_labelref (FILE *stream
, const char *name
)
26989 while ((skip
= arm_get_strip_length (* name
)))
26991 verbatim
|= (*name
== '*');
26996 fputs (name
, stream
);
26998 asm_fprintf (stream
, "%U%s", name
);
27001 /* This function is used to emit an EABI tag and its associated value.
27002 We emit the numerical value of the tag in case the assembler does not
27003 support textual tags. (Eg gas prior to 2.20). If requested we include
27004 the tag name in a comment so that anyone reading the assembler output
27005 will know which tag is being set.
27007 This function is not static because arm-c.c needs it too. */
27010 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27012 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27013 if (flag_verbose_asm
|| flag_debug_asm
)
27014 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27015 asm_fprintf (asm_out_file
, "\n");
27019 arm_file_start (void)
27023 if (TARGET_UNIFIED_ASM
)
27024 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27028 const char *fpu_name
;
27029 if (arm_selected_arch
)
27030 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27031 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27032 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27034 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
27036 if (TARGET_SOFT_FLOAT
)
27038 fpu_name
= "softvfp";
27042 fpu_name
= arm_fpu_desc
->name
;
27043 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27045 if (TARGET_HARD_FLOAT
)
27046 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27047 if (TARGET_HARD_FLOAT_ABI
)
27048 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27051 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27053 /* Some of these attributes only apply when the corresponding features
27054 are used. However we don't have any easy way of figuring this out.
27055 Conservatively record the setting that would have been used. */
27057 if (flag_rounding_math
)
27058 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27060 if (!flag_unsafe_math_optimizations
)
27062 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27063 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27065 if (flag_signaling_nans
)
27066 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27068 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27069 flag_finite_math_only
? 1 : 3);
27071 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27072 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27073 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27074 flag_short_enums
? 1 : 2);
27076 /* Tag_ABI_optimization_goals. */
27079 else if (optimize
>= 2)
27085 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27087 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27090 if (arm_fp16_format
)
27091 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27092 (int) arm_fp16_format
);
27094 if (arm_lang_output_object_attributes_hook
)
27095 arm_lang_output_object_attributes_hook();
27098 default_file_start ();
27102 arm_file_end (void)
27106 if (NEED_INDICATE_EXEC_STACK
)
27107 /* Add .note.GNU-stack. */
27108 file_end_indicate_exec_stack ();
27110 if (! thumb_call_reg_needed
)
27113 switch_to_section (text_section
);
27114 asm_fprintf (asm_out_file
, "\t.code 16\n");
27115 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27117 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27119 rtx label
= thumb_call_via_label
[regno
];
27123 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27124 CODE_LABEL_NUMBER (label
));
27125 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27131 /* Symbols in the text segment can be accessed without indirecting via the
27132 constant pool; it may take an extra binary operation, but this is still
27133 faster than indirecting via memory. Don't do this when not optimizing,
27134 since we won't be calculating al of the offsets necessary to do this
27138 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27140 if (optimize
> 0 && TREE_CONSTANT (decl
))
27141 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27143 default_encode_section_info (decl
, rtl
, first
);
27145 #endif /* !ARM_PE */
27148 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
27150 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
27151 && !strcmp (prefix
, "L"))
27153 arm_ccfsm_state
= 0;
27154 arm_target_insn
= NULL
;
27156 default_internal_label (stream
, prefix
, labelno
);
27159 /* Output code to add DELTA to the first argument, and then jump
27160 to FUNCTION. Used for C++ multiple inheritance. */
27162 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
27163 HOST_WIDE_INT delta
,
27164 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
27167 static int thunk_label
= 0;
27170 int mi_delta
= delta
;
27171 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
27173 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
27176 mi_delta
= - mi_delta
;
27178 final_start_function (emit_barrier (), file
, 1);
27182 int labelno
= thunk_label
++;
27183 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
27184 /* Thunks are entered in arm mode when avaiable. */
27185 if (TARGET_THUMB1_ONLY
)
27187 /* push r3 so we can use it as a temporary. */
27188 /* TODO: Omit this save if r3 is not used. */
27189 fputs ("\tpush {r3}\n", file
);
27190 fputs ("\tldr\tr3, ", file
);
27194 fputs ("\tldr\tr12, ", file
);
27196 assemble_name (file
, label
);
27197 fputc ('\n', file
);
27200 /* If we are generating PIC, the ldr instruction below loads
27201 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27202 the address of the add + 8, so we have:
27204 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27207 Note that we have "+ 1" because some versions of GNU ld
27208 don't set the low bit of the result for R_ARM_REL32
27209 relocations against thumb function symbols.
27210 On ARMv6M this is +4, not +8. */
27211 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
27212 assemble_name (file
, labelpc
);
27213 fputs (":\n", file
);
27214 if (TARGET_THUMB1_ONLY
)
27216 /* This is 2 insns after the start of the thunk, so we know it
27217 is 4-byte aligned. */
27218 fputs ("\tadd\tr3, pc, r3\n", file
);
27219 fputs ("\tmov r12, r3\n", file
);
27222 fputs ("\tadd\tr12, pc, r12\n", file
);
27224 else if (TARGET_THUMB1_ONLY
)
27225 fputs ("\tmov r12, r3\n", file
);
27227 if (TARGET_THUMB1_ONLY
)
27229 if (mi_delta
> 255)
27231 fputs ("\tldr\tr3, ", file
);
27232 assemble_name (file
, label
);
27233 fputs ("+4\n", file
);
27234 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
27235 mi_op
, this_regno
, this_regno
);
27237 else if (mi_delta
!= 0)
27239 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
27240 mi_op
, this_regno
, this_regno
,
27246 /* TODO: Use movw/movt for large constants when available. */
27247 while (mi_delta
!= 0)
27249 if ((mi_delta
& (3 << shift
)) == 0)
27253 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
27254 mi_op
, this_regno
, this_regno
,
27255 mi_delta
& (0xff << shift
));
27256 mi_delta
&= ~(0xff << shift
);
27263 if (TARGET_THUMB1_ONLY
)
27264 fputs ("\tpop\t{r3}\n", file
);
27266 fprintf (file
, "\tbx\tr12\n");
27267 ASM_OUTPUT_ALIGN (file
, 2);
27268 assemble_name (file
, label
);
27269 fputs (":\n", file
);
27272 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
27273 rtx tem
= XEXP (DECL_RTL (function
), 0);
27274 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
27275 tem
= gen_rtx_MINUS (GET_MODE (tem
),
27277 gen_rtx_SYMBOL_REF (Pmode
,
27278 ggc_strdup (labelpc
)));
27279 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
27282 /* Output ".word .LTHUNKn". */
27283 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
27285 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
27286 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
27290 fputs ("\tb\t", file
);
27291 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
27292 if (NEED_PLT_RELOC
)
27293 fputs ("(PLT)", file
);
27294 fputc ('\n', file
);
27297 final_end_function ();
27301 arm_emit_vector_const (FILE *file
, rtx x
)
27304 const char * pattern
;
27306 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
27308 switch (GET_MODE (x
))
27310 case V2SImode
: pattern
= "%08x"; break;
27311 case V4HImode
: pattern
= "%04x"; break;
27312 case V8QImode
: pattern
= "%02x"; break;
27313 default: gcc_unreachable ();
27316 fprintf (file
, "0x");
27317 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
27321 element
= CONST_VECTOR_ELT (x
, i
);
27322 fprintf (file
, pattern
, INTVAL (element
));
27328 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27329 HFmode constant pool entries are actually loaded with ldr. */
27331 arm_emit_fp16_const (rtx c
)
27336 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
27337 bits
= real_to_target (NULL
, &r
, HFmode
);
27338 if (WORDS_BIG_ENDIAN
)
27339 assemble_zeros (2);
27340 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
27341 if (!WORDS_BIG_ENDIAN
)
27342 assemble_zeros (2);
27346 arm_output_load_gr (rtx
*operands
)
27353 if (!MEM_P (operands
[1])
27354 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
27355 || !REG_P (reg
= XEXP (sum
, 0))
27356 || !CONST_INT_P (offset
= XEXP (sum
, 1))
27357 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
27358 return "wldrw%?\t%0, %1";
27360 /* Fix up an out-of-range load of a GR register. */
27361 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
27362 wcgr
= operands
[0];
27364 output_asm_insn ("ldr%?\t%0, %1", operands
);
27366 operands
[0] = wcgr
;
27368 output_asm_insn ("tmcr%?\t%0, %1", operands
);
27369 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
27374 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27376 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27377 named arg and all anonymous args onto the stack.
27378 XXX I know the prologue shouldn't be pushing registers, but it is faster
27382 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
27383 enum machine_mode mode
,
27386 int second_time ATTRIBUTE_UNUSED
)
27388 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
27391 cfun
->machine
->uses_anonymous_args
= 1;
27392 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
27394 nregs
= pcum
->aapcs_ncrn
;
27395 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
27399 nregs
= pcum
->nregs
;
27401 if (nregs
< NUM_ARG_REGS
)
27402 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
27405 /* We can't rely on the caller doing the proper promotion when
27406 using APCS or ATPCS. */
27409 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
27411 return !TARGET_AAPCS_BASED
;
27414 static enum machine_mode
27415 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
27416 enum machine_mode mode
,
27417 int *punsignedp ATTRIBUTE_UNUSED
,
27418 const_tree fntype ATTRIBUTE_UNUSED
,
27419 int for_return ATTRIBUTE_UNUSED
)
27421 if (GET_MODE_CLASS (mode
) == MODE_INT
27422 && GET_MODE_SIZE (mode
) < 4)
27428 /* AAPCS based ABIs use short enums by default. */
27431 arm_default_short_enums (void)
27433 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
27437 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27440 arm_align_anon_bitfield (void)
27442 return TARGET_AAPCS_BASED
;
27446 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27449 arm_cxx_guard_type (void)
27451 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
27455 /* The EABI says test the least significant bit of a guard variable. */
27458 arm_cxx_guard_mask_bit (void)
27460 return TARGET_AAPCS_BASED
;
27464 /* The EABI specifies that all array cookies are 8 bytes long. */
27467 arm_get_cookie_size (tree type
)
27471 if (!TARGET_AAPCS_BASED
)
27472 return default_cxx_get_cookie_size (type
);
27474 size
= build_int_cst (sizetype
, 8);
27479 /* The EABI says that array cookies should also contain the element size. */
27482 arm_cookie_has_size (void)
27484 return TARGET_AAPCS_BASED
;
27488 /* The EABI says constructors and destructors should return a pointer to
27489 the object constructed/destroyed. */
27492 arm_cxx_cdtor_returns_this (void)
27494 return TARGET_AAPCS_BASED
;
27497 /* The EABI says that an inline function may never be the key
27501 arm_cxx_key_method_may_be_inline (void)
27503 return !TARGET_AAPCS_BASED
;
27507 arm_cxx_determine_class_data_visibility (tree decl
)
27509 if (!TARGET_AAPCS_BASED
27510 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27513 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27514 is exported. However, on systems without dynamic vague linkage,
27515 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27516 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27517 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27519 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27520 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27524 arm_cxx_class_data_always_comdat (void)
27526 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27527 vague linkage if the class has no key function. */
27528 return !TARGET_AAPCS_BASED
;
27532 /* The EABI says __aeabi_atexit should be used to register static
27536 arm_cxx_use_aeabi_atexit (void)
27538 return TARGET_AAPCS_BASED
;
27543 arm_set_return_address (rtx source
, rtx scratch
)
27545 arm_stack_offsets
*offsets
;
27546 HOST_WIDE_INT delta
;
27548 unsigned long saved_regs
;
27550 offsets
= arm_get_frame_offsets ();
27551 saved_regs
= offsets
->saved_regs_mask
;
27553 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27554 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27557 if (frame_pointer_needed
)
27558 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27561 /* LR will be the first saved register. */
27562 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27567 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27568 GEN_INT (delta
& ~4095)));
27573 addr
= stack_pointer_rtx
;
27575 addr
= plus_constant (Pmode
, addr
, delta
);
27577 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
27583 thumb_set_return_address (rtx source
, rtx scratch
)
27585 arm_stack_offsets
*offsets
;
27586 HOST_WIDE_INT delta
;
27587 HOST_WIDE_INT limit
;
27590 unsigned long mask
;
27594 offsets
= arm_get_frame_offsets ();
27595 mask
= offsets
->saved_regs_mask
;
27596 if (mask
& (1 << LR_REGNUM
))
27599 /* Find the saved regs. */
27600 if (frame_pointer_needed
)
27602 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27603 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27609 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27612 /* Allow for the stack frame. */
27613 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27615 /* The link register is always the first saved register. */
27618 /* Construct the address. */
27619 addr
= gen_rtx_REG (SImode
, reg
);
27622 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27623 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27627 addr
= plus_constant (Pmode
, addr
, delta
);
27629 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
27632 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27635 /* Implements target hook vector_mode_supported_p. */
27637 arm_vector_mode_supported_p (enum machine_mode mode
)
27639 /* Neon also supports V2SImode, etc. listed in the clause below. */
27640 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27641 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
27644 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27645 && ((mode
== V2SImode
)
27646 || (mode
== V4HImode
)
27647 || (mode
== V8QImode
)))
27650 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27651 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27652 || mode
== V2HAmode
))
27658 /* Implements target hook array_mode_supported_p. */
27661 arm_array_mode_supported_p (enum machine_mode mode
,
27662 unsigned HOST_WIDE_INT nelems
)
27665 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27666 && (nelems
>= 2 && nelems
<= 4))
27672 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27673 registers when autovectorizing for Neon, at least until multiple vector
27674 widths are supported properly by the middle-end. */
27676 static enum machine_mode
27677 arm_preferred_simd_mode (enum machine_mode mode
)
27683 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27685 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27687 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27689 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27691 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27698 if (TARGET_REALLY_IWMMXT
)
27714 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27716 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27717 using r0-r4 for function arguments, r7 for the stack frame and don't have
27718 enough left over to do doubleword arithmetic. For Thumb-2 all the
27719 potentially problematic instructions accept high registers so this is not
27720 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27721 that require many low registers. */
27723 arm_class_likely_spilled_p (reg_class_t rclass
)
27725 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27726 || rclass
== CC_REG
)
27732 /* Implements target hook small_register_classes_for_mode_p. */
27734 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
27736 return TARGET_THUMB1
;
27739 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27740 ARM insns and therefore guarantee that the shift count is modulo 256.
27741 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27742 guarantee no particular behavior for out-of-range counts. */
27744 static unsigned HOST_WIDE_INT
27745 arm_shift_truncation_mask (enum machine_mode mode
)
27747 return mode
== SImode
? 255 : 0;
27751 /* Map internal gcc register numbers to DWARF2 register numbers. */
27754 arm_dbx_register_number (unsigned int regno
)
27759 if (IS_VFP_REGNUM (regno
))
27761 /* See comment in arm_dwarf_register_span. */
27762 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27763 return 64 + regno
- FIRST_VFP_REGNUM
;
27765 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27768 if (IS_IWMMXT_GR_REGNUM (regno
))
27769 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27771 if (IS_IWMMXT_REGNUM (regno
))
27772 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27774 gcc_unreachable ();
27777 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27778 GCC models tham as 64 32-bit registers, so we need to describe this to
27779 the DWARF generation code. Other registers can use the default. */
27781 arm_dwarf_register_span (rtx rtl
)
27788 regno
= REGNO (rtl
);
27789 if (!IS_VFP_REGNUM (regno
))
27792 /* XXX FIXME: The EABI defines two VFP register ranges:
27793 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27795 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27796 corresponding D register. Until GDB supports this, we shall use the
27797 legacy encodings. We also use these encodings for D0-D15 for
27798 compatibility with older debuggers. */
27799 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27802 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
27803 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
27804 for (i
= 0; i
< nregs
; i
++)
27805 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, regno
+ i
);
27810 #if ARM_UNWIND_INFO
27811 /* Emit unwind directives for a store-multiple instruction or stack pointer
27812 push during alignment.
27813 These should only ever be generated by the function prologue code, so
27814 expect them to have a particular form. */
27817 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27820 HOST_WIDE_INT offset
;
27821 HOST_WIDE_INT nregs
;
27827 e
= XVECEXP (p
, 0, 0);
27828 if (GET_CODE (e
) != SET
)
27831 /* First insn will adjust the stack pointer. */
27832 if (GET_CODE (e
) != SET
27833 || !REG_P (XEXP (e
, 0))
27834 || REGNO (XEXP (e
, 0)) != SP_REGNUM
27835 || GET_CODE (XEXP (e
, 1)) != PLUS
)
27838 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
27839 nregs
= XVECLEN (p
, 0) - 1;
27841 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
27844 /* The function prologue may also push pc, but not annotate it as it is
27845 never restored. We turn this into a stack pointer adjustment. */
27846 if (nregs
* 4 == offset
- 4)
27848 fprintf (asm_out_file
, "\t.pad #4\n");
27852 fprintf (asm_out_file
, "\t.save {");
27854 else if (IS_VFP_REGNUM (reg
))
27857 fprintf (asm_out_file
, "\t.vsave {");
27860 /* Unknown register type. */
27863 /* If the stack increment doesn't match the size of the saved registers,
27864 something has gone horribly wrong. */
27865 if (offset
!= nregs
* reg_size
)
27870 /* The remaining insns will describe the stores. */
27871 for (i
= 1; i
<= nregs
; i
++)
27873 /* Expect (set (mem <addr>) (reg)).
27874 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27875 e
= XVECEXP (p
, 0, i
);
27876 if (GET_CODE (e
) != SET
27877 || !MEM_P (XEXP (e
, 0))
27878 || !REG_P (XEXP (e
, 1)))
27881 reg
= REGNO (XEXP (e
, 1));
27886 fprintf (asm_out_file
, ", ");
27887 /* We can't use %r for vfp because we need to use the
27888 double precision register names. */
27889 if (IS_VFP_REGNUM (reg
))
27890 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27892 asm_fprintf (asm_out_file
, "%r", reg
);
27894 #ifdef ENABLE_CHECKING
27895 /* Check that the addresses are consecutive. */
27896 e
= XEXP (XEXP (e
, 0), 0);
27897 if (GET_CODE (e
) == PLUS
)
27899 offset
+= reg_size
;
27900 if (!REG_P (XEXP (e
, 0))
27901 || REGNO (XEXP (e
, 0)) != SP_REGNUM
27902 || !CONST_INT_P (XEXP (e
, 1))
27903 || offset
!= INTVAL (XEXP (e
, 1)))
27908 || REGNO (e
) != SP_REGNUM
)
27912 fprintf (asm_out_file
, "}\n");
27915 /* Emit unwind directives for a SET. */
27918 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27926 switch (GET_CODE (e0
))
27929 /* Pushing a single register. */
27930 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27931 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27932 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27935 asm_fprintf (asm_out_file
, "\t.save ");
27936 if (IS_VFP_REGNUM (REGNO (e1
)))
27937 asm_fprintf(asm_out_file
, "{d%d}\n",
27938 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27940 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27944 if (REGNO (e0
) == SP_REGNUM
)
27946 /* A stack increment. */
27947 if (GET_CODE (e1
) != PLUS
27948 || !REG_P (XEXP (e1
, 0))
27949 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27950 || !CONST_INT_P (XEXP (e1
, 1)))
27953 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27954 -INTVAL (XEXP (e1
, 1)));
27956 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27958 HOST_WIDE_INT offset
;
27960 if (GET_CODE (e1
) == PLUS
)
27962 if (!REG_P (XEXP (e1
, 0))
27963 || !CONST_INT_P (XEXP (e1
, 1)))
27965 reg
= REGNO (XEXP (e1
, 0));
27966 offset
= INTVAL (XEXP (e1
, 1));
27967 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27968 HARD_FRAME_POINTER_REGNUM
, reg
,
27971 else if (REG_P (e1
))
27974 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27975 HARD_FRAME_POINTER_REGNUM
, reg
);
27980 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27982 /* Move from sp to reg. */
27983 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27985 else if (GET_CODE (e1
) == PLUS
27986 && REG_P (XEXP (e1
, 0))
27987 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27988 && CONST_INT_P (XEXP (e1
, 1)))
27990 /* Set reg to offset from sp. */
27991 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27992 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28004 /* Emit unwind directives for the given insn. */
28007 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28010 bool handled_one
= false;
28012 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28015 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28016 && (TREE_NOTHROW (current_function_decl
)
28017 || crtl
->all_throwers_are_sibcalls
))
28020 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28023 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28025 switch (REG_NOTE_KIND (note
))
28027 case REG_FRAME_RELATED_EXPR
:
28028 pat
= XEXP (note
, 0);
28031 case REG_CFA_REGISTER
:
28032 pat
= XEXP (note
, 0);
28035 pat
= PATTERN (insn
);
28036 if (GET_CODE (pat
) == PARALLEL
)
28037 pat
= XVECEXP (pat
, 0, 0);
28040 /* Only emitted for IS_STACKALIGN re-alignment. */
28045 src
= SET_SRC (pat
);
28046 dest
= SET_DEST (pat
);
28048 gcc_assert (src
== stack_pointer_rtx
);
28049 reg
= REGNO (dest
);
28050 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28053 handled_one
= true;
28056 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28057 to get correct dwarf information for shrink-wrap. We should not
28058 emit unwind information for it because these are used either for
28059 pretend arguments or notes to adjust sp and restore registers from
28061 case REG_CFA_ADJUST_CFA
:
28062 case REG_CFA_RESTORE
:
28065 case REG_CFA_DEF_CFA
:
28066 case REG_CFA_EXPRESSION
:
28067 case REG_CFA_OFFSET
:
28068 /* ??? Only handling here what we actually emit. */
28069 gcc_unreachable ();
28077 pat
= PATTERN (insn
);
28080 switch (GET_CODE (pat
))
28083 arm_unwind_emit_set (asm_out_file
, pat
);
28087 /* Store multiple. */
28088 arm_unwind_emit_sequence (asm_out_file
, pat
);
28097 /* Output a reference from a function exception table to the type_info
28098 object X. The EABI specifies that the symbol should be relocated by
28099 an R_ARM_TARGET2 relocation. */
28102 arm_output_ttype (rtx x
)
28104 fputs ("\t.word\t", asm_out_file
);
28105 output_addr_const (asm_out_file
, x
);
28106 /* Use special relocations for symbol references. */
28107 if (!CONST_INT_P (x
))
28108 fputs ("(TARGET2)", asm_out_file
);
28109 fputc ('\n', asm_out_file
);
28114 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28117 arm_asm_emit_except_personality (rtx personality
)
28119 fputs ("\t.personality\t", asm_out_file
);
28120 output_addr_const (asm_out_file
, personality
);
28121 fputc ('\n', asm_out_file
);
28124 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28127 arm_asm_init_sections (void)
28129 exception_section
= get_unnamed_section (0, output_section_asm_op
,
28132 #endif /* ARM_UNWIND_INFO */
28134 /* Output unwind directives for the start/end of a function. */
28137 arm_output_fn_unwind (FILE * f
, bool prologue
)
28139 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28143 fputs ("\t.fnstart\n", f
);
28146 /* If this function will never be unwound, then mark it as such.
28147 The came condition is used in arm_unwind_emit to suppress
28148 the frame annotations. */
28149 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28150 && (TREE_NOTHROW (current_function_decl
)
28151 || crtl
->all_throwers_are_sibcalls
))
28152 fputs("\t.cantunwind\n", f
);
28154 fputs ("\t.fnend\n", f
);
28159 arm_emit_tls_decoration (FILE *fp
, rtx x
)
28161 enum tls_reloc reloc
;
28164 val
= XVECEXP (x
, 0, 0);
28165 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
28167 output_addr_const (fp
, val
);
28172 fputs ("(tlsgd)", fp
);
28175 fputs ("(tlsldm)", fp
);
28178 fputs ("(tlsldo)", fp
);
28181 fputs ("(gottpoff)", fp
);
28184 fputs ("(tpoff)", fp
);
28187 fputs ("(tlsdesc)", fp
);
28190 gcc_unreachable ();
28199 fputs (" + (. - ", fp
);
28200 output_addr_const (fp
, XVECEXP (x
, 0, 2));
28201 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28202 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
28203 output_addr_const (fp
, XVECEXP (x
, 0, 3));
28213 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28216 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
28218 gcc_assert (size
== 4);
28219 fputs ("\t.word\t", file
);
28220 output_addr_const (file
, x
);
28221 fputs ("(tlsldo)", file
);
28224 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28227 arm_output_addr_const_extra (FILE *fp
, rtx x
)
28229 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
28230 return arm_emit_tls_decoration (fp
, x
);
28231 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
28234 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
28236 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
28237 assemble_name_raw (fp
, label
);
28241 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
28243 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
28247 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28251 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
28253 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28257 output_addr_const (fp
, XVECEXP (x
, 0, 1));
28261 else if (GET_CODE (x
) == CONST_VECTOR
)
28262 return arm_emit_vector_const (fp
, x
);
28267 /* Output assembly for a shift instruction.
28268 SET_FLAGS determines how the instruction modifies the condition codes.
28269 0 - Do not set condition codes.
28270 1 - Set condition codes.
28271 2 - Use smallest instruction. */
28273 arm_output_shift(rtx
* operands
, int set_flags
)
28276 static const char flag_chars
[3] = {'?', '.', '!'};
28281 c
= flag_chars
[set_flags
];
28282 if (TARGET_UNIFIED_ASM
)
28284 shift
= shift_op(operands
[3], &val
);
28288 operands
[2] = GEN_INT(val
);
28289 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
28292 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
28295 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
28296 output_asm_insn (pattern
, operands
);
28300 /* Output assembly for a WMMX immediate shift instruction. */
28302 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
28304 int shift
= INTVAL (operands
[2]);
28306 enum machine_mode opmode
= GET_MODE (operands
[0]);
28308 gcc_assert (shift
>= 0);
28310 /* If the shift value in the register versions is > 63 (for D qualifier),
28311 31 (for W qualifier) or 15 (for H qualifier). */
28312 if (((opmode
== V4HImode
) && (shift
> 15))
28313 || ((opmode
== V2SImode
) && (shift
> 31))
28314 || ((opmode
== DImode
) && (shift
> 63)))
28318 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28319 output_asm_insn (templ
, operands
);
28320 if (opmode
== DImode
)
28322 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
28323 output_asm_insn (templ
, operands
);
28328 /* The destination register will contain all zeros. */
28329 sprintf (templ
, "wzero\t%%0");
28330 output_asm_insn (templ
, operands
);
28335 if ((opmode
== DImode
) && (shift
> 32))
28337 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28338 output_asm_insn (templ
, operands
);
28339 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
28340 output_asm_insn (templ
, operands
);
28344 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
28345 output_asm_insn (templ
, operands
);
28350 /* Output assembly for a WMMX tinsr instruction. */
28352 arm_output_iwmmxt_tinsr (rtx
*operands
)
28354 int mask
= INTVAL (operands
[3]);
28357 int units
= mode_nunits
[GET_MODE (operands
[0])];
28358 gcc_assert ((mask
& (mask
- 1)) == 0);
28359 for (i
= 0; i
< units
; ++i
)
28361 if ((mask
& 0x01) == 1)
28367 gcc_assert (i
< units
);
28369 switch (GET_MODE (operands
[0]))
28372 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
28375 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
28378 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
28381 gcc_unreachable ();
28384 output_asm_insn (templ
, operands
);
28389 /* Output a Thumb-1 casesi dispatch sequence. */
28391 thumb1_output_casesi (rtx
*operands
)
28393 rtx diff_vec
= PATTERN (next_active_insn (operands
[0]));
28395 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28397 switch (GET_MODE(diff_vec
))
28400 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28401 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28403 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28404 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28406 return "bl\t%___gnu_thumb1_case_si";
28408 gcc_unreachable ();
28412 /* Output a Thumb-2 casesi instruction. */
28414 thumb2_output_casesi (rtx
*operands
)
28416 rtx diff_vec
= PATTERN (next_active_insn (operands
[2]));
28418 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28420 output_asm_insn ("cmp\t%0, %1", operands
);
28421 output_asm_insn ("bhi\t%l3", operands
);
28422 switch (GET_MODE(diff_vec
))
28425 return "tbb\t[%|pc, %0]";
28427 return "tbh\t[%|pc, %0, lsl #1]";
28431 output_asm_insn ("adr\t%4, %l2", operands
);
28432 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
28433 output_asm_insn ("add\t%4, %4, %5", operands
);
28438 output_asm_insn ("adr\t%4, %l2", operands
);
28439 return "ldr\t%|pc, [%4, %0, lsl #2]";
28442 gcc_unreachable ();
28446 /* Most ARM cores are single issue, but some newer ones can dual issue.
28447 The scheduler descriptions rely on this being correct. */
28449 arm_issue_rate (void)
28474 /* A table and a function to perform ARM-specific name mangling for
28475 NEON vector types in order to conform to the AAPCS (see "Procedure
28476 Call Standard for the ARM Architecture", Appendix A). To qualify
28477 for emission with the mangled names defined in that document, a
28478 vector type must not only be of the correct mode but also be
28479 composed of NEON vector element types (e.g. __builtin_neon_qi). */
28482 enum machine_mode mode
;
28483 const char *element_type_name
;
28484 const char *aapcs_name
;
28485 } arm_mangle_map_entry
;
28487 static arm_mangle_map_entry arm_mangle_map
[] = {
28488 /* 64-bit containerized types. */
28489 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
28490 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
28491 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
28492 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
28493 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
28494 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
28495 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
28496 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
28497 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
28498 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
28499 /* 128-bit containerized types. */
28500 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
28501 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
28502 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
28503 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
28504 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
28505 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
28506 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
28507 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
28508 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
28509 { VOIDmode
, NULL
, NULL
}
28513 arm_mangle_type (const_tree type
)
28515 arm_mangle_map_entry
*pos
= arm_mangle_map
;
28517 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28518 has to be managled as if it is in the "std" namespace. */
28519 if (TARGET_AAPCS_BASED
28520 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28521 return "St9__va_list";
28523 /* Half-precision float. */
28524 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28527 if (TREE_CODE (type
) != VECTOR_TYPE
)
28530 /* Check the mode of the vector type, and the name of the vector
28531 element type, against the table. */
28532 while (pos
->mode
!= VOIDmode
)
28534 tree elt_type
= TREE_TYPE (type
);
28536 if (pos
->mode
== TYPE_MODE (type
)
28537 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
28538 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
28539 pos
->element_type_name
))
28540 return pos
->aapcs_name
;
28545 /* Use the default mangling for unrecognized (possibly user-defined)
28550 /* Order of allocation of core registers for Thumb: this allocation is
28551 written over the corresponding initial entries of the array
28552 initialized with REG_ALLOC_ORDER. We allocate all low registers
28553 first. Saving and restoring a low register is usually cheaper than
28554 using a call-clobbered high register. */
28556 static const int thumb_core_reg_alloc_order
[] =
28558 3, 2, 1, 0, 4, 5, 6, 7,
28559 14, 12, 8, 9, 10, 11
28562 /* Adjust register allocation order when compiling for Thumb. */
28565 arm_order_regs_for_local_alloc (void)
28567 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28568 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28570 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28571 sizeof (thumb_core_reg_alloc_order
));
28574 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28577 arm_frame_pointer_required (void)
28579 return (cfun
->has_nonlocal_label
28580 || SUBTARGET_FRAME_POINTER_REQUIRED
28581 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
28584 /* Only thumb1 can't support conditional execution, so return true if
28585 the target is not thumb1. */
28587 arm_have_conditional_execution (void)
28589 return !TARGET_THUMB1
;
28593 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
28595 enum machine_mode in_mode
, out_mode
;
28598 if (TREE_CODE (type_out
) != VECTOR_TYPE
28599 || TREE_CODE (type_in
) != VECTOR_TYPE
28600 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
28603 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
28604 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
28605 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
28606 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
28608 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
28609 decl of the vectorized builtin for the appropriate vector mode.
28610 NULL_TREE is returned if no such builtin is available. */
28611 #undef ARM_CHECK_BUILTIN_MODE
28612 #define ARM_CHECK_BUILTIN_MODE(C) \
28613 (out_mode == SFmode && out_n == C \
28614 && in_mode == SFmode && in_n == C)
28616 #undef ARM_FIND_VRINT_VARIANT
28617 #define ARM_FIND_VRINT_VARIANT(N) \
28618 (ARM_CHECK_BUILTIN_MODE (2) \
28619 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
28620 : (ARM_CHECK_BUILTIN_MODE (4) \
28621 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
28624 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
28626 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
28629 case BUILT_IN_FLOORF
:
28630 return ARM_FIND_VRINT_VARIANT (vrintm
);
28631 case BUILT_IN_CEILF
:
28632 return ARM_FIND_VRINT_VARIANT (vrintp
);
28633 case BUILT_IN_TRUNCF
:
28634 return ARM_FIND_VRINT_VARIANT (vrintz
);
28635 case BUILT_IN_ROUNDF
:
28636 return ARM_FIND_VRINT_VARIANT (vrinta
);
28643 #undef ARM_CHECK_BUILTIN_MODE
28644 #undef ARM_FIND_VRINT_VARIANT
28646 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28647 static HOST_WIDE_INT
28648 arm_vector_alignment (const_tree type
)
28650 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
28652 if (TARGET_AAPCS_BASED
)
28653 align
= MIN (align
, 64);
28658 static unsigned int
28659 arm_autovectorize_vector_sizes (void)
28661 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
28665 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
28667 /* Vectors which aren't in packed structures will not be less aligned than
28668 the natural alignment of their element type, so this is safe. */
28669 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
28672 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28676 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
28677 const_tree type
, int misalignment
,
28680 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
28682 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28687 /* If the misalignment is unknown, we should be able to handle the access
28688 so long as it is not to a member of a packed data structure. */
28689 if (misalignment
== -1)
28692 /* Return true if the misalignment is a multiple of the natural alignment
28693 of the vector's element type. This is probably always going to be
28694 true in practice, since we've already established that this isn't a
28696 return ((misalignment
% align
) == 0);
28699 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28704 arm_conditional_register_usage (void)
28708 if (TARGET_THUMB1
&& optimize_size
)
28710 /* When optimizing for size on Thumb-1, it's better not
28711 to use the HI regs, because of the overhead of
28713 for (regno
= FIRST_HI_REGNUM
;
28714 regno
<= LAST_HI_REGNUM
; ++regno
)
28715 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28718 /* The link register can be clobbered by any branch insn,
28719 but we have no way to track that at present, so mark
28720 it as unavailable. */
28722 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28724 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
28726 /* VFPv3 registers are disabled when earlier VFP
28727 versions are selected due to the definition of
28728 LAST_VFP_REGNUM. */
28729 for (regno
= FIRST_VFP_REGNUM
;
28730 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28732 fixed_regs
[regno
] = 0;
28733 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28734 || regno
>= FIRST_VFP_REGNUM
+ 32;
28738 if (TARGET_REALLY_IWMMXT
)
28740 regno
= FIRST_IWMMXT_GR_REGNUM
;
28741 /* The 2002/10/09 revision of the XScale ABI has wCG0
28742 and wCG1 as call-preserved registers. The 2002/11/21
28743 revision changed this so that all wCG registers are
28744 scratch registers. */
28745 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28746 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28747 fixed_regs
[regno
] = 0;
28748 /* The XScale ABI has wR0 - wR9 as scratch registers,
28749 the rest as call-preserved registers. */
28750 for (regno
= FIRST_IWMMXT_REGNUM
;
28751 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28753 fixed_regs
[regno
] = 0;
28754 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28758 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28760 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28761 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28763 else if (TARGET_APCS_STACK
)
28765 fixed_regs
[10] = 1;
28766 call_used_regs
[10] = 1;
28768 /* -mcaller-super-interworking reserves r11 for calls to
28769 _interwork_r11_call_via_rN(). Making the register global
28770 is an easy way of ensuring that it remains valid for all
28772 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28773 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28775 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28776 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28777 if (TARGET_CALLER_INTERWORKING
)
28778 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28780 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28784 arm_preferred_rename_class (reg_class_t rclass
)
28786 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28787 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28788 and code size can be reduced. */
28789 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28795 /* Compute the atrribute "length" of insn "*push_multi".
28796 So this function MUST be kept in sync with that insn pattern. */
28798 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28800 int i
, regno
, hi_reg
;
28801 int num_saves
= XVECLEN (parallel_op
, 0);
28811 regno
= REGNO (first_op
);
28812 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28813 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28815 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28816 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28824 /* Compute the number of instructions emitted by output_move_double. */
28826 arm_count_output_move_double_insns (rtx
*operands
)
28830 /* output_move_double may modify the operands array, so call it
28831 here on a copy of the array. */
28832 ops
[0] = operands
[0];
28833 ops
[1] = operands
[1];
28834 output_move_double (ops
, false, &count
);
28839 vfp3_const_double_for_fract_bits (rtx operand
)
28841 REAL_VALUE_TYPE r0
;
28843 if (!CONST_DOUBLE_P (operand
))
28846 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
28847 if (exact_real_inverse (DFmode
, &r0
))
28849 if (exact_real_truncate (DFmode
, &r0
))
28851 HOST_WIDE_INT value
= real_to_integer (&r0
);
28852 value
= value
& 0xffffffff;
28853 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28854 return int_log2 (value
);
28860 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28863 arm_pre_atomic_barrier (enum memmodel model
)
28865 if (need_atomic_barrier_p (model
, true))
28866 emit_insn (gen_memory_barrier ());
28870 arm_post_atomic_barrier (enum memmodel model
)
28872 if (need_atomic_barrier_p (model
, false))
28873 emit_insn (gen_memory_barrier ());
28876 /* Emit the load-exclusive and store-exclusive instructions.
28877 Use acquire and release versions if necessary. */
28880 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28882 rtx (*gen
) (rtx
, rtx
);
28888 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28889 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28890 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28891 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28893 gcc_unreachable ();
28900 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28901 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28902 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28903 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28905 gcc_unreachable ();
28909 emit_insn (gen (rval
, mem
));
28913 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
28916 rtx (*gen
) (rtx
, rtx
, rtx
);
28922 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28923 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28924 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28925 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28927 gcc_unreachable ();
28934 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28935 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28936 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28937 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28939 gcc_unreachable ();
28943 emit_insn (gen (bval
, rval
, mem
));
28946 /* Mark the previous jump instruction as unlikely. */
28949 emit_unlikely_jump (rtx insn
)
28951 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28953 insn
= emit_jump_insn (insn
);
28954 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
28957 /* Expand a compare and swap pattern. */
28960 arm_expand_compare_and_swap (rtx operands
[])
28962 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28963 enum machine_mode mode
;
28964 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28966 bval
= operands
[0];
28967 rval
= operands
[1];
28969 oldval
= operands
[3];
28970 newval
= operands
[4];
28971 is_weak
= operands
[5];
28972 mod_s
= operands
[6];
28973 mod_f
= operands
[7];
28974 mode
= GET_MODE (mem
);
28976 /* Normally the succ memory model must be stronger than fail, but in the
28977 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28978 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28980 if (TARGET_HAVE_LDACQ
28981 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
28982 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
28983 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28989 /* For narrow modes, we're going to perform the comparison in SImode,
28990 so do the zero-extension now. */
28991 rval
= gen_reg_rtx (SImode
);
28992 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28996 /* Force the value into a register if needed. We waited until after
28997 the zero-extension above to do this properly. */
28998 if (!arm_add_operand (oldval
, SImode
))
28999 oldval
= force_reg (SImode
, oldval
);
29003 if (!cmpdi_operand (oldval
, mode
))
29004 oldval
= force_reg (mode
, oldval
);
29008 gcc_unreachable ();
29013 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29014 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29015 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29016 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29018 gcc_unreachable ();
29021 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29023 if (mode
== QImode
|| mode
== HImode
)
29024 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29026 /* In all cases, we arrange for success to be signaled by Z set.
29027 This arrangement allows for the boolean result to be used directly
29028 in a subsequent branch, post optimization. */
29029 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29030 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
29031 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
29034 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29035 another memory store between the load-exclusive and store-exclusive can
29036 reset the monitor from Exclusive to Open state. This means we must wait
29037 until after reload to split the pattern, lest we get a register spill in
29038 the middle of the atomic sequence. */
29041 arm_split_compare_and_swap (rtx operands
[])
29043 rtx rval
, mem
, oldval
, newval
, scratch
;
29044 enum machine_mode mode
;
29045 enum memmodel mod_s
, mod_f
;
29047 rtx label1
, label2
, x
, cond
;
29049 rval
= operands
[0];
29051 oldval
= operands
[2];
29052 newval
= operands
[3];
29053 is_weak
= (operands
[4] != const0_rtx
);
29054 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
29055 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
29056 scratch
= operands
[7];
29057 mode
= GET_MODE (mem
);
29059 bool use_acquire
= TARGET_HAVE_LDACQ
29060 && !(mod_s
== MEMMODEL_RELAXED
29061 || mod_s
== MEMMODEL_CONSUME
29062 || mod_s
== MEMMODEL_RELEASE
);
29064 bool use_release
= TARGET_HAVE_LDACQ
29065 && !(mod_s
== MEMMODEL_RELAXED
29066 || mod_s
== MEMMODEL_CONSUME
29067 || mod_s
== MEMMODEL_ACQUIRE
);
29069 /* Checks whether a barrier is needed and emits one accordingly. */
29070 if (!(use_acquire
|| use_release
))
29071 arm_pre_atomic_barrier (mod_s
);
29076 label1
= gen_label_rtx ();
29077 emit_label (label1
);
29079 label2
= gen_label_rtx ();
29081 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29083 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
29084 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29085 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29086 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29087 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29089 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
29091 /* Weak or strong, we want EQ to be true for success, so that we
29092 match the flags that we got from the compare above. */
29093 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29094 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
29095 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
29099 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29100 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29101 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
29102 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29105 if (mod_f
!= MEMMODEL_RELAXED
)
29106 emit_label (label2
);
29108 /* Checks whether a barrier is needed and emits one accordingly. */
29109 if (!(use_acquire
|| use_release
))
29110 arm_post_atomic_barrier (mod_s
);
29112 if (mod_f
== MEMMODEL_RELAXED
)
29113 emit_label (label2
);
29117 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
29118 rtx value
, rtx model_rtx
, rtx cond
)
29120 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
29121 enum machine_mode mode
= GET_MODE (mem
);
29122 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
29125 bool use_acquire
= TARGET_HAVE_LDACQ
29126 && !(model
== MEMMODEL_RELAXED
29127 || model
== MEMMODEL_CONSUME
29128 || model
== MEMMODEL_RELEASE
);
29130 bool use_release
= TARGET_HAVE_LDACQ
29131 && !(model
== MEMMODEL_RELAXED
29132 || model
== MEMMODEL_CONSUME
29133 || model
== MEMMODEL_ACQUIRE
);
29135 /* Checks whether a barrier is needed and emits one accordingly. */
29136 if (!(use_acquire
|| use_release
))
29137 arm_pre_atomic_barrier (model
);
29139 label
= gen_label_rtx ();
29140 emit_label (label
);
29143 new_out
= gen_lowpart (wmode
, new_out
);
29145 old_out
= gen_lowpart (wmode
, old_out
);
29148 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
29150 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
29159 x
= gen_rtx_AND (wmode
, old_out
, value
);
29160 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
29161 x
= gen_rtx_NOT (wmode
, new_out
);
29162 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
29166 if (CONST_INT_P (value
))
29168 value
= GEN_INT (-INTVAL (value
));
29174 if (mode
== DImode
)
29176 /* DImode plus/minus need to clobber flags. */
29177 /* The adddi3 and subdi3 patterns are incorrectly written so that
29178 they require matching operands, even when we could easily support
29179 three operands. Thankfully, this can be fixed up post-splitting,
29180 as the individual add+adc patterns do accept three operands and
29181 post-reload cprop can make these moves go away. */
29182 emit_move_insn (new_out
, old_out
);
29184 x
= gen_adddi3 (new_out
, new_out
, value
);
29186 x
= gen_subdi3 (new_out
, new_out
, value
);
29193 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
29194 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
29198 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
29201 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29202 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
29204 /* Checks whether a barrier is needed and emits one accordingly. */
29205 if (!(use_acquire
|| use_release
))
29206 arm_post_atomic_barrier (model
);
29209 #define MAX_VECT_LEN 16
29211 struct expand_vec_perm_d
29213 rtx target
, op0
, op1
;
29214 unsigned char perm
[MAX_VECT_LEN
];
29215 enum machine_mode vmode
;
29216 unsigned char nelt
;
29221 /* Generate a variable permutation. */
29224 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29226 enum machine_mode vmode
= GET_MODE (target
);
29227 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29229 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
29230 gcc_checking_assert (GET_MODE (op0
) == vmode
);
29231 gcc_checking_assert (GET_MODE (op1
) == vmode
);
29232 gcc_checking_assert (GET_MODE (sel
) == vmode
);
29233 gcc_checking_assert (TARGET_NEON
);
29237 if (vmode
== V8QImode
)
29238 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
29240 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
29246 if (vmode
== V8QImode
)
29248 pair
= gen_reg_rtx (V16QImode
);
29249 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
29250 pair
= gen_lowpart (TImode
, pair
);
29251 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
29255 pair
= gen_reg_rtx (OImode
);
29256 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
29257 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
29263 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29265 enum machine_mode vmode
= GET_MODE (target
);
29266 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
29267 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29268 rtx rmask
[MAX_VECT_LEN
], mask
;
29270 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29271 numbering of elements for big-endian, we must reverse the order. */
29272 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
29274 /* The VTBL instruction does not use a modulo index, so we must take care
29275 of that ourselves. */
29276 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29277 for (i
= 0; i
< nelt
; ++i
)
29279 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
29280 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
29282 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
29285 /* Generate or test for an insn that supports a constant permutation. */
29287 /* Recognize patterns for the VUZP insns. */
29290 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
29292 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
29293 rtx out0
, out1
, in0
, in1
, x
;
29294 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29296 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29299 /* Note that these are little-endian tests. Adjust for big-endian later. */
29300 if (d
->perm
[0] == 0)
29302 else if (d
->perm
[0] == 1)
29306 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29308 for (i
= 0; i
< nelt
; i
++)
29310 unsigned elt
= (i
* 2 + odd
) & mask
;
29311 if (d
->perm
[i
] != elt
)
29321 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
29322 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
29323 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
29324 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
29325 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
29326 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
29327 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
29328 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
29330 gcc_unreachable ();
29335 if (BYTES_BIG_ENDIAN
)
29337 x
= in0
, in0
= in1
, in1
= x
;
29342 out1
= gen_reg_rtx (d
->vmode
);
29344 x
= out0
, out0
= out1
, out1
= x
;
29346 emit_insn (gen (out0
, in0
, in1
, out1
));
29350 /* Recognize patterns for the VZIP insns. */
29353 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29355 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
29356 rtx out0
, out1
, in0
, in1
, x
;
29357 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29359 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29362 /* Note that these are little-endian tests. Adjust for big-endian later. */
29364 if (d
->perm
[0] == high
)
29366 else if (d
->perm
[0] == 0)
29370 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29372 for (i
= 0; i
< nelt
/ 2; i
++)
29374 unsigned elt
= (i
+ high
) & mask
;
29375 if (d
->perm
[i
* 2] != elt
)
29377 elt
= (elt
+ nelt
) & mask
;
29378 if (d
->perm
[i
* 2 + 1] != elt
)
29388 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
29389 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
29390 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
29391 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
29392 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
29393 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
29394 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
29395 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
29397 gcc_unreachable ();
29402 if (BYTES_BIG_ENDIAN
)
29404 x
= in0
, in0
= in1
, in1
= x
;
29409 out1
= gen_reg_rtx (d
->vmode
);
29411 x
= out0
, out0
= out1
, out1
= x
;
29413 emit_insn (gen (out0
, in0
, in1
, out1
));
29417 /* Recognize patterns for the VREV insns. */
29420 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29422 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
29423 rtx (*gen
)(rtx
, rtx
, rtx
);
29425 if (!d
->one_vector_p
)
29434 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
29435 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
29443 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
29444 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
29445 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
29446 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
29454 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
29455 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
29456 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
29457 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
29458 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
29459 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
29460 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
29461 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
29470 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29471 for (j
= 0; j
<= diff
; j
+= 1)
29473 /* This is guaranteed to be true as the value of diff
29474 is 7, 3, 1 and we should have enough elements in the
29475 queue to generate this. Getting a vector mask with a
29476 value of diff other than these values implies that
29477 something is wrong by the time we get here. */
29478 gcc_assert (i
+ j
< nelt
);
29479 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29487 /* ??? The third operand is an artifact of the builtin infrastructure
29488 and is ignored by the actual instruction. */
29489 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
29493 /* Recognize patterns for the VTRN insns. */
29496 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29498 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
29499 rtx out0
, out1
, in0
, in1
, x
;
29500 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29502 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29505 /* Note that these are little-endian tests. Adjust for big-endian later. */
29506 if (d
->perm
[0] == 0)
29508 else if (d
->perm
[0] == 1)
29512 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29514 for (i
= 0; i
< nelt
; i
+= 2)
29516 if (d
->perm
[i
] != i
+ odd
)
29518 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29528 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
29529 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
29530 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
29531 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
29532 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29533 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29534 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29535 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29537 gcc_unreachable ();
29542 if (BYTES_BIG_ENDIAN
)
29544 x
= in0
, in0
= in1
, in1
= x
;
29549 out1
= gen_reg_rtx (d
->vmode
);
29551 x
= out0
, out0
= out1
, out1
= x
;
29553 emit_insn (gen (out0
, in0
, in1
, out1
));
29557 /* Recognize patterns for the VEXT insns. */
29560 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29562 unsigned int i
, nelt
= d
->nelt
;
29563 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29566 unsigned int location
;
29568 unsigned int next
= d
->perm
[0] + 1;
29570 /* TODO: Handle GCC's numbering of elements for big-endian. */
29571 if (BYTES_BIG_ENDIAN
)
29574 /* Check if the extracted indexes are increasing by one. */
29575 for (i
= 1; i
< nelt
; next
++, i
++)
29577 /* If we hit the most significant element of the 2nd vector in
29578 the previous iteration, no need to test further. */
29579 if (next
== 2 * nelt
)
29582 /* If we are operating on only one vector: it could be a
29583 rotation. If there are only two elements of size < 64, let
29584 arm_evpc_neon_vrev catch it. */
29585 if (d
->one_vector_p
&& (next
== nelt
))
29587 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29593 if (d
->perm
[i
] != next
)
29597 location
= d
->perm
[0];
29601 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
29602 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
29603 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
29604 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
29605 case V2SImode
: gen
= gen_neon_vextv2si
; break;
29606 case V4SImode
: gen
= gen_neon_vextv4si
; break;
29607 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29608 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29609 case V2DImode
: gen
= gen_neon_vextv2di
; break;
29618 offset
= GEN_INT (location
);
29619 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29623 /* The NEON VTBL instruction is a fully variable permuation that's even
29624 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29625 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29626 can do slightly better by expanding this as a constant where we don't
29627 have to apply a mask. */
29630 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29632 rtx rperm
[MAX_VECT_LEN
], sel
;
29633 enum machine_mode vmode
= d
->vmode
;
29634 unsigned int i
, nelt
= d
->nelt
;
29636 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29637 numbering of elements for big-endian, we must reverse the order. */
29638 if (BYTES_BIG_ENDIAN
)
29644 /* Generic code will try constant permutation twice. Once with the
29645 original mode and again with the elements lowered to QImode.
29646 So wait and don't do the selector expansion ourselves. */
29647 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29650 for (i
= 0; i
< nelt
; ++i
)
29651 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29652 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29653 sel
= force_reg (vmode
, sel
);
29655 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29660 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29662 /* Check if the input mask matches vext before reordering the
29665 if (arm_evpc_neon_vext (d
))
29668 /* The pattern matching functions above are written to look for a small
29669 number to begin the sequence (0, 1, N/2). If we begin with an index
29670 from the second operand, we can swap the operands. */
29671 if (d
->perm
[0] >= d
->nelt
)
29673 unsigned i
, nelt
= d
->nelt
;
29676 for (i
= 0; i
< nelt
; ++i
)
29677 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29686 if (arm_evpc_neon_vuzp (d
))
29688 if (arm_evpc_neon_vzip (d
))
29690 if (arm_evpc_neon_vrev (d
))
29692 if (arm_evpc_neon_vtrn (d
))
29694 return arm_evpc_neon_vtbl (d
);
29699 /* Expand a vec_perm_const pattern. */
29702 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29704 struct expand_vec_perm_d d
;
29705 int i
, nelt
, which
;
29711 d
.vmode
= GET_MODE (target
);
29712 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29713 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29714 d
.testing_p
= false;
29716 for (i
= which
= 0; i
< nelt
; ++i
)
29718 rtx e
= XVECEXP (sel
, 0, i
);
29719 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29720 which
|= (ei
< nelt
? 1 : 2);
29730 d
.one_vector_p
= false;
29731 if (!rtx_equal_p (op0
, op1
))
29734 /* The elements of PERM do not suggest that only the first operand
29735 is used, but both operands are identical. Allow easier matching
29736 of the permutation by folding the permutation into the single
29740 for (i
= 0; i
< nelt
; ++i
)
29741 d
.perm
[i
] &= nelt
- 1;
29743 d
.one_vector_p
= true;
29748 d
.one_vector_p
= true;
29752 return arm_expand_vec_perm_const_1 (&d
);
29755 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29758 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
29759 const unsigned char *sel
)
29761 struct expand_vec_perm_d d
;
29762 unsigned int i
, nelt
, which
;
29766 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29767 d
.testing_p
= true;
29768 memcpy (d
.perm
, sel
, nelt
);
29770 /* Categorize the set of elements in the selector. */
29771 for (i
= which
= 0; i
< nelt
; ++i
)
29773 unsigned char e
= d
.perm
[i
];
29774 gcc_assert (e
< 2 * nelt
);
29775 which
|= (e
< nelt
? 1 : 2);
29778 /* For all elements from second vector, fold the elements to first. */
29780 for (i
= 0; i
< nelt
; ++i
)
29783 /* Check whether the mask can be applied to the vector type. */
29784 d
.one_vector_p
= (which
!= 3);
29786 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29787 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29788 if (!d
.one_vector_p
)
29789 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29792 ret
= arm_expand_vec_perm_const_1 (&d
);
29799 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
29801 /* If we are soft float and we do not have ldrd
29802 then all auto increment forms are ok. */
29803 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29808 /* Post increment and Pre Decrement are supported for all
29809 instruction forms except for vector forms. */
29812 if (VECTOR_MODE_P (mode
))
29814 if (code
!= ARM_PRE_DEC
)
29824 /* Without LDRD and mode size greater than
29825 word size, there is no point in auto-incrementing
29826 because ldm and stm will not have these forms. */
29827 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29830 /* Vector and floating point modes do not support
29831 these auto increment forms. */
29832 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29845 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29846 on ARM, since we know that shifts by negative amounts are no-ops.
29847 Additionally, the default expansion code is not available or suitable
29848 for post-reload insn splits (this can occur when the register allocator
29849 chooses not to do a shift in NEON).
29851 This function is used in both initial expand and post-reload splits, and
29852 handles all kinds of 64-bit shifts.
29854 Input requirements:
29855 - It is safe for the input and output to be the same register, but
29856 early-clobber rules apply for the shift amount and scratch registers.
29857 - Shift by register requires both scratch registers. In all other cases
29858 the scratch registers may be NULL.
29859 - Ashiftrt by a register also clobbers the CC register. */
29861 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29862 rtx amount
, rtx scratch1
, rtx scratch2
)
29864 rtx out_high
= gen_highpart (SImode
, out
);
29865 rtx out_low
= gen_lowpart (SImode
, out
);
29866 rtx in_high
= gen_highpart (SImode
, in
);
29867 rtx in_low
= gen_lowpart (SImode
, in
);
29870 in = the register pair containing the input value.
29871 out = the destination register pair.
29872 up = the high- or low-part of each pair.
29873 down = the opposite part to "up".
29874 In a shift, we can consider bits to shift from "up"-stream to
29875 "down"-stream, so in a left-shift "up" is the low-part and "down"
29876 is the high-part of each register pair. */
29878 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29879 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29880 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29881 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29883 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29885 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29886 && GET_MODE (out
) == DImode
);
29888 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29889 && GET_MODE (in
) == DImode
);
29891 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29892 && GET_MODE (amount
) == SImode
)
29893 || CONST_INT_P (amount
)));
29894 gcc_assert (scratch1
== NULL
29895 || (GET_CODE (scratch1
) == SCRATCH
)
29896 || (GET_MODE (scratch1
) == SImode
29897 && REG_P (scratch1
)));
29898 gcc_assert (scratch2
== NULL
29899 || (GET_CODE (scratch2
) == SCRATCH
)
29900 || (GET_MODE (scratch2
) == SImode
29901 && REG_P (scratch2
)));
29902 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29903 || !HARD_REGISTER_P (out
)
29904 || (REGNO (out
) != REGNO (amount
)
29905 && REGNO (out
) + 1 != REGNO (amount
)));
29907 /* Macros to make following code more readable. */
29908 #define SUB_32(DEST,SRC) \
29909 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29910 #define RSB_32(DEST,SRC) \
29911 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29912 #define SUB_S_32(DEST,SRC) \
29913 gen_addsi3_compare0 ((DEST), (SRC), \
29915 #define SET(DEST,SRC) \
29916 gen_rtx_SET (SImode, (DEST), (SRC))
29917 #define SHIFT(CODE,SRC,AMOUNT) \
29918 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29919 #define LSHIFT(CODE,SRC,AMOUNT) \
29920 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29921 SImode, (SRC), (AMOUNT))
29922 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29923 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29924 SImode, (SRC), (AMOUNT))
29926 gen_rtx_IOR (SImode, (A), (B))
29927 #define BRANCH(COND,LABEL) \
29928 gen_arm_cond_branch ((LABEL), \
29929 gen_rtx_ ## COND (CCmode, cc_reg, \
29933 /* Shifts by register and shifts by constant are handled separately. */
29934 if (CONST_INT_P (amount
))
29936 /* We have a shift-by-constant. */
29938 /* First, handle out-of-range shift amounts.
29939 In both cases we try to match the result an ARM instruction in a
29940 shift-by-register would give. This helps reduce execution
29941 differences between optimization levels, but it won't stop other
29942 parts of the compiler doing different things. This is "undefined
29943 behaviour, in any case. */
29944 if (INTVAL (amount
) <= 0)
29945 emit_insn (gen_movdi (out
, in
));
29946 else if (INTVAL (amount
) >= 64)
29948 if (code
== ASHIFTRT
)
29950 rtx const31_rtx
= GEN_INT (31);
29951 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29952 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29955 emit_insn (gen_movdi (out
, const0_rtx
));
29958 /* Now handle valid shifts. */
29959 else if (INTVAL (amount
) < 32)
29961 /* Shifts by a constant less than 32. */
29962 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29964 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29965 emit_insn (SET (out_down
,
29966 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29968 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29972 /* Shifts by a constant greater than 31. */
29973 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29975 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29976 if (code
== ASHIFTRT
)
29977 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29980 emit_insn (SET (out_up
, const0_rtx
));
29985 /* We have a shift-by-register. */
29986 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29988 /* This alternative requires the scratch registers. */
29989 gcc_assert (scratch1
&& REG_P (scratch1
));
29990 gcc_assert (scratch2
&& REG_P (scratch2
));
29992 /* We will need the values "amount-32" and "32-amount" later.
29993 Swapping them around now allows the later code to be more general. */
29997 emit_insn (SUB_32 (scratch1
, amount
));
29998 emit_insn (RSB_32 (scratch2
, amount
));
30001 emit_insn (RSB_32 (scratch1
, amount
));
30002 /* Also set CC = amount > 32. */
30003 emit_insn (SUB_S_32 (scratch2
, amount
));
30006 emit_insn (RSB_32 (scratch1
, amount
));
30007 emit_insn (SUB_32 (scratch2
, amount
));
30010 gcc_unreachable ();
30013 /* Emit code like this:
30016 out_down = in_down << amount;
30017 out_down = (in_up << (amount - 32)) | out_down;
30018 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30019 out_up = in_up << amount;
30022 out_down = in_down >> amount;
30023 out_down = (in_up << (32 - amount)) | out_down;
30025 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30026 out_up = in_up << amount;
30029 out_down = in_down >> amount;
30030 out_down = (in_up << (32 - amount)) | out_down;
30032 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30033 out_up = in_up << amount;
30035 The ARM and Thumb2 variants are the same but implemented slightly
30036 differently. If this were only called during expand we could just
30037 use the Thumb2 case and let combine do the right thing, but this
30038 can also be called from post-reload splitters. */
30040 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30042 if (!TARGET_THUMB2
)
30044 /* Emit code for ARM mode. */
30045 emit_insn (SET (out_down
,
30046 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30047 if (code
== ASHIFTRT
)
30049 rtx done_label
= gen_label_rtx ();
30050 emit_jump_insn (BRANCH (LT
, done_label
));
30051 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30053 emit_label (done_label
);
30056 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30061 /* Emit code for Thumb2 mode.
30062 Thumb2 can't do shift and or in one insn. */
30063 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30064 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30066 if (code
== ASHIFTRT
)
30068 rtx done_label
= gen_label_rtx ();
30069 emit_jump_insn (BRANCH (LT
, done_label
));
30070 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30071 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30072 emit_label (done_label
);
30076 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30077 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30081 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30096 /* Returns true if a valid comparison operation and makes
30097 the operands in a form that is valid. */
30099 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30101 enum rtx_code code
= GET_CODE (*comparison
);
30103 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30104 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30106 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
30108 if (code
== UNEQ
|| code
== LTGT
)
30111 code_int
= (int)code
;
30112 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
30113 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
30118 if (!arm_add_operand (*op1
, mode
))
30119 *op1
= force_reg (mode
, *op1
);
30120 if (!arm_add_operand (*op2
, mode
))
30121 *op2
= force_reg (mode
, *op2
);
30125 if (!cmpdi_operand (*op1
, mode
))
30126 *op1
= force_reg (mode
, *op1
);
30127 if (!cmpdi_operand (*op2
, mode
))
30128 *op2
= force_reg (mode
, *op2
);
30133 if (!arm_float_compare_operand (*op1
, mode
))
30134 *op1
= force_reg (mode
, *op1
);
30135 if (!arm_float_compare_operand (*op2
, mode
))
30136 *op2
= force_reg (mode
, *op2
);
30146 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30148 static unsigned HOST_WIDE_INT
30149 arm_asan_shadow_offset (void)
30151 return (unsigned HOST_WIDE_INT
) 1 << 29;
30154 #include "gt-arm.h"