1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_const_not_ok_for_debug_p (rtx
);
76 static bool arm_lra_p (void);
77 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
78 static int arm_compute_static_chain_stack_bytes (void);
79 static arm_stack_offsets
*arm_get_frame_offsets (void);
80 static void arm_add_gc_roots (void);
81 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
82 HOST_WIDE_INT
, rtx
, rtx
, int, int);
83 static unsigned bit_count (unsigned long);
84 static int arm_address_register_rtx_p (rtx
, int);
85 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
86 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
87 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
88 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
89 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
90 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
91 inline static int thumb1_index_register_rtx_p (rtx
, int);
92 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
93 static int thumb_far_jump_used_p (void);
94 static bool thumb_force_lr_save (void);
95 static unsigned arm_size_return_regs (void);
96 static bool arm_assemble_integer (rtx
, unsigned int, int);
97 static void arm_print_operand (FILE *, rtx
, int);
98 static void arm_print_operand_address (FILE *, rtx
);
99 static bool arm_print_operand_punct_valid_p (unsigned char code
);
100 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
101 static arm_cc
get_arm_condition_code (rtx
);
102 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
103 static const char *output_multi_immediate (rtx
*, const char *, const char *,
105 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
106 static struct machine_function
*arm_init_machine_status (void);
107 static void thumb_exit (FILE *, int);
108 static HOST_WIDE_INT
get_jump_table_size (rtx
);
109 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
110 static Mnode
*add_minipool_forward_ref (Mfix
*);
111 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
112 static Mnode
*add_minipool_backward_ref (Mfix
*);
113 static void assign_minipool_offsets (Mfix
*);
114 static void arm_print_value (FILE *, rtx
);
115 static void dump_minipool (rtx
);
116 static int arm_barrier_cost (rtx
);
117 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
118 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
119 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
121 static void arm_reorg (void);
122 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
123 static unsigned long arm_compute_save_reg0_reg12_mask (void);
124 static unsigned long arm_compute_save_reg_mask (void);
125 static unsigned long arm_isr_value (tree
);
126 static unsigned long arm_compute_func_type (void);
127 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
129 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
130 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
131 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
133 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
134 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
135 static int arm_comp_type_attributes (const_tree
, const_tree
);
136 static void arm_set_default_type_attributes (tree
);
137 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
138 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
139 static int optimal_immediate_sequence (enum rtx_code code
,
140 unsigned HOST_WIDE_INT val
,
141 struct four_ints
*return_sequence
);
142 static int optimal_immediate_sequence_1 (enum rtx_code code
,
143 unsigned HOST_WIDE_INT val
,
144 struct four_ints
*return_sequence
,
146 static int arm_get_strip_length (int);
147 static bool arm_function_ok_for_sibcall (tree
, tree
);
148 static enum machine_mode
arm_promote_function_mode (const_tree
,
149 enum machine_mode
, int *,
151 static bool arm_return_in_memory (const_tree
, const_tree
);
152 static rtx
arm_function_value (const_tree
, const_tree
, bool);
153 static rtx
arm_libcall_value_1 (enum machine_mode
);
154 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
155 static bool arm_function_value_regno_p (const unsigned int);
156 static void arm_internal_label (FILE *, const char *, unsigned long);
157 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
159 static bool arm_have_conditional_execution (void);
160 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
161 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
162 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
163 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
164 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
168 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
169 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
170 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
171 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
175 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
176 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
177 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
178 static tree
arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond
, rtx pattern
);
180 static rtx
emit_set_insn (rtx
, rtx
);
181 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
182 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
184 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
186 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
188 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
189 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
191 static rtx
aapcs_libcall_value (enum machine_mode
);
192 static int aapcs_select_return_coproc (const_tree
, const_tree
);
194 #ifdef OBJECT_FORMAT_ELF
195 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
199 static void arm_encode_section_info (tree
, rtx
, int);
202 static void arm_file_end (void);
203 static void arm_file_start (void);
205 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
207 static bool arm_pass_by_reference (cumulative_args_t
,
208 enum machine_mode
, const_tree
, bool);
209 static bool arm_promote_prototypes (const_tree
);
210 static bool arm_default_short_enums (void);
211 static bool arm_align_anon_bitfield (void);
212 static bool arm_return_in_msb (const_tree
);
213 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
214 static bool arm_return_in_memory (const_tree
, const_tree
);
216 static void arm_unwind_emit (FILE *, rtx
);
217 static bool arm_output_ttype (rtx
);
218 static void arm_asm_emit_except_personality (rtx
);
219 static void arm_asm_init_sections (void);
221 static rtx
arm_dwarf_register_span (rtx
);
223 static tree
arm_cxx_guard_type (void);
224 static bool arm_cxx_guard_mask_bit (void);
225 static tree
arm_get_cookie_size (tree
);
226 static bool arm_cookie_has_size (void);
227 static bool arm_cxx_cdtor_returns_this (void);
228 static bool arm_cxx_key_method_may_be_inline (void);
229 static void arm_cxx_determine_class_data_visibility (tree
);
230 static bool arm_cxx_class_data_always_comdat (void);
231 static bool arm_cxx_use_aeabi_atexit (void);
232 static void arm_init_libfuncs (void);
233 static tree
arm_build_builtin_va_list (void);
234 static void arm_expand_builtin_va_start (tree
, rtx
);
235 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
236 static void arm_option_override (void);
237 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
238 static bool arm_cannot_copy_insn_p (rtx
);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
241 static bool arm_output_addr_const_extra (FILE *, rtx
);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree
);
244 static const char *arm_invalid_parameter_type (const_tree t
);
245 static const char *arm_invalid_return_type (const_tree t
);
246 static tree
arm_promoted_type (const_tree t
);
247 static tree
arm_convert_to_type (tree type
, tree expr
);
248 static bool arm_scalar_mode_supported_p (enum machine_mode
);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx
, tree
, rtx
);
253 static rtx
arm_trampoline_adjust_address (rtx
);
254 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
255 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode
,
259 unsigned HOST_WIDE_INT
);
260 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
261 static bool arm_class_likely_spilled_p (reg_class_t
);
262 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
263 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
268 static void arm_conditional_register_usage (void);
269 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
276 const unsigned char *sel
);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
280 int misalign ATTRIBUTE_UNUSED
);
281 static unsigned arm_add_stmt_cost (void *data
, int count
,
282 enum vect_cost_for_stmt kind
,
283 struct _stmt_vec_info
*stmt_info
,
285 enum vect_cost_model_location where
);
287 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
288 bool op0_preserve_value
);
289 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
299 { "long_call", 0, 0, false, true, true, NULL
, false },
300 /* Whereas these functions are always known to reside within the 26 bit
302 { "short_call", 0, 0, false, true, true, NULL
, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
314 /* ARM/PE has three new attributes:
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
323 { "dllimport", 0, 0, true, false, false, NULL
, false },
324 { "dllexport", 0, 0, true, false, false, NULL
, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
333 { NULL
, 0, 0, false, false, false, NULL
, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
679 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
681 struct gcc_target targetm
= TARGET_INITIALIZER
;
683 /* Obstack for minipool constant handling. */
684 static struct obstack minipool_obstack
;
685 static char * minipool_startobj
;
687 /* The maximum number of insns skipped which
688 will be conditionalised if possible. */
689 static int max_insns_skipped
= 5;
691 extern FILE * asm_out_file
;
693 /* True if we are currently building a constant table. */
694 int making_const_table
;
696 /* The processor for which instructions should be scheduled. */
697 enum processor_type arm_tune
= arm_none
;
699 /* The current tuning set. */
700 const struct tune_params
*current_tune
;
702 /* Which floating point hardware to schedule for. */
705 /* Which floating popint hardware to use. */
706 const struct arm_fpu_desc
*arm_fpu_desc
;
708 /* Used for Thumb call_via trampolines. */
709 rtx thumb_call_via_label
[14];
710 static int thumb_call_reg_needed
;
712 /* Bit values used to identify processor capabilities. */
713 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
714 #define FL_ARCH3M (1 << 1) /* Extended multiply */
715 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
716 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
717 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
718 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
719 #define FL_THUMB (1 << 6) /* Thumb aware */
720 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
721 #define FL_STRONG (1 << 8) /* StrongARM */
722 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
723 #define FL_XSCALE (1 << 10) /* XScale */
724 /* spare (1 << 11) */
725 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
726 media instructions. */
727 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
728 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
729 Note: ARM6 & 7 derivatives only. */
730 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
731 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
732 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
734 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
735 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
736 #define FL_NEON (1 << 20) /* Neon instructions. */
737 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
739 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
740 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
741 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
742 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
744 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
745 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
747 /* Flags that only effect tuning, not available instructions. */
748 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
751 #define FL_FOR_ARCH2 FL_NOTM
752 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
753 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
754 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
755 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
756 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
757 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
758 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
759 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
760 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
761 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
762 #define FL_FOR_ARCH6J FL_FOR_ARCH6
763 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
764 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
765 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
766 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
767 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
768 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
769 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
770 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
771 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
772 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
773 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
774 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
776 /* The bits in this mask specify which
777 instructions we are allowed to generate. */
778 static unsigned long insn_flags
= 0;
780 /* The bits in this mask specify which instruction scheduling options should
782 static unsigned long tune_flags
= 0;
784 /* The highest ARM architecture version supported by the
786 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
788 /* The following are used in the arm.md file as equivalents to bits
789 in the above two flag variables. */
791 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
806 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
809 /* Nonzero if this chip supports the ARM 6K extensions. */
812 /* Nonzero if instructions present in ARMv6-M can be used. */
815 /* Nonzero if this chip supports the ARM 7 extensions. */
818 /* Nonzero if instructions not present in the 'M' profile can be used. */
819 int arm_arch_notm
= 0;
821 /* Nonzero if instructions present in ARMv7E-M can be used. */
824 /* Nonzero if instructions present in ARMv8 can be used. */
827 /* Nonzero if this chip can benefit from load scheduling. */
828 int arm_ld_sched
= 0;
830 /* Nonzero if this chip is a StrongARM. */
831 int arm_tune_strongarm
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX technology. */
834 int arm_arch_iwmmxt
= 0;
836 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
837 int arm_arch_iwmmxt2
= 0;
839 /* Nonzero if this chip is an XScale. */
840 int arm_arch_xscale
= 0;
842 /* Nonzero if tuning for XScale */
843 int arm_tune_xscale
= 0;
845 /* Nonzero if we want to tune for stores that access the write-buffer.
846 This typically means an ARM6 or ARM7 with MMU or MPU. */
847 int arm_tune_wbuf
= 0;
849 /* Nonzero if tuning for Cortex-A9. */
850 int arm_tune_cortex_a9
= 0;
852 /* Nonzero if generating Thumb instructions. */
855 /* Nonzero if generating Thumb-1 instructions. */
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork
= 0;
865 /* Nonzero if chip supports Thumb 2. */
868 /* Nonzero if chip supports integer division instruction. */
869 int arm_arch_arm_hwdiv
;
870 int arm_arch_thumb_hwdiv
;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits
= 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool
= false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 enum machine_mode output_memory_reference_mode
;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register
= INVALID_REGNUM
;
887 enum arm_pcs arm_pcs_default
;
889 /* For an explanation of these variables, see final_prescan_insn below. */
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc
;
895 int arm_target_label
;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count
= 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask
= 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen
= 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc
= 0;
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes
[] =
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence
[] =
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 /* Initialization code. */
931 const char *const name
;
932 enum processor_type core
;
934 enum base_architecture base_arch
;
935 const unsigned long flags
;
936 const struct tune_params
*const tune
;
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
946 /* arm generic vectorizer costs. */
948 struct cpu_vec_costs arm_default_vec_cost
= {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs
=
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
987 true /* non_exec_costs_exec. */
992 COSTS_N_INSNS (3), /* simple. */
993 COSTS_N_INSNS (3), /* flag_setting. */
994 COSTS_N_INSNS (2), /* extend. */
995 COSTS_N_INSNS (3), /* add. */
996 COSTS_N_INSNS (2), /* extend_add. */
997 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1001 0, /* simple (N/A). */
1002 0, /* flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* extend. */
1005 COSTS_N_INSNS (4), /* extend_add. */
1011 COSTS_N_INSNS (2), /* load. */
1012 COSTS_N_INSNS (2), /* load_sign_extend. */
1013 COSTS_N_INSNS (2), /* ldrd. */
1014 COSTS_N_INSNS (2), /* ldm_1st. */
1015 1, /* ldm_regs_per_insn_1st. */
1016 2, /* ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* loadf. */
1018 COSTS_N_INSNS (5), /* loadd. */
1019 COSTS_N_INSNS (1), /* load_unaligned. */
1020 COSTS_N_INSNS (2), /* store. */
1021 COSTS_N_INSNS (2), /* strd. */
1022 COSTS_N_INSNS (2), /* stm_1st. */
1023 1, /* stm_regs_per_insn_1st. */
1024 2, /* stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* storef. */
1026 COSTS_N_INSNS (1), /* stored. */
1027 COSTS_N_INSNS (1) /* store_unaligned. */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs
=
1075 COSTS_N_INSNS (1), /* shift. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1082 0, /* extend_arith. */
1088 true /* non_exec_costs_exec. */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1133 COSTS_N_INSNS (36), /* div. */
1134 COSTS_N_INSNS (11), /* mult. */
1135 COSTS_N_INSNS (20), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (9), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (6), /* compare. */
1141 COSTS_N_INSNS (4), /* widen. */
1142 COSTS_N_INSNS (4), /* narrow. */
1143 COSTS_N_INSNS (8), /* toint. */
1144 COSTS_N_INSNS (8), /* fromint. */
1145 COSTS_N_INSNS (8) /* roundint. */
1149 COSTS_N_INSNS (64), /* div. */
1150 COSTS_N_INSNS (16), /* mult. */
1151 COSTS_N_INSNS (25), /* mult_addsub. */
1152 COSTS_N_INSNS (30), /* fma. */
1153 COSTS_N_INSNS (9), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (6), /* compare. */
1157 COSTS_N_INSNS (6), /* widen. */
1158 COSTS_N_INSNS (6), /* narrow. */
1159 COSTS_N_INSNS (8), /* toint. */
1160 COSTS_N_INSNS (8), /* fromint. */
1161 COSTS_N_INSNS (8) /* roundint. */
1166 COSTS_N_INSNS (1) /* alu. */
1172 const struct cpu_cost_table cortexa7_extra_costs
=
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1191 true /* non_exec_costs_exec. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (3), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (2), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1) /* store_unaligned. */
1237 COSTS_N_INSNS (15), /* div. */
1238 COSTS_N_INSNS (3), /* mult. */
1239 COSTS_N_INSNS (7), /* mult_addsub. */
1240 COSTS_N_INSNS (7), /* fma. */
1241 COSTS_N_INSNS (3), /* addsub. */
1242 COSTS_N_INSNS (3), /* fpconst. */
1243 COSTS_N_INSNS (3), /* neg. */
1244 COSTS_N_INSNS (3), /* compare. */
1245 COSTS_N_INSNS (3), /* widen. */
1246 COSTS_N_INSNS (3), /* narrow. */
1247 COSTS_N_INSNS (3), /* toint. */
1248 COSTS_N_INSNS (3), /* fromint. */
1249 COSTS_N_INSNS (3) /* roundint. */
1253 COSTS_N_INSNS (30), /* div. */
1254 COSTS_N_INSNS (6), /* mult. */
1255 COSTS_N_INSNS (10), /* mult_addsub. */
1256 COSTS_N_INSNS (7), /* fma. */
1257 COSTS_N_INSNS (3), /* addsub. */
1258 COSTS_N_INSNS (3), /* fpconst. */
1259 COSTS_N_INSNS (3), /* neg. */
1260 COSTS_N_INSNS (3), /* compare. */
1261 COSTS_N_INSNS (3), /* widen. */
1262 COSTS_N_INSNS (3), /* narrow. */
1263 COSTS_N_INSNS (3), /* toint. */
1264 COSTS_N_INSNS (3), /* fromint. */
1265 COSTS_N_INSNS (3) /* roundint. */
1270 COSTS_N_INSNS (1) /* alu. */
1274 const struct cpu_cost_table cortexa12_extra_costs
=
1281 COSTS_N_INSNS (1), /* shift_reg. */
1282 COSTS_N_INSNS (1), /* arith_shift. */
1283 COSTS_N_INSNS (1), /* arith_shift_reg. */
1284 COSTS_N_INSNS (1), /* log_shift. */
1285 COSTS_N_INSNS (1), /* log_shift_reg. */
1287 COSTS_N_INSNS (1), /* extend_arith. */
1289 COSTS_N_INSNS (1), /* bfx. */
1290 COSTS_N_INSNS (1), /* clz. */
1291 COSTS_N_INSNS (1), /* rev. */
1293 true /* non_exec_costs_exec. */
1298 COSTS_N_INSNS (2), /* simple. */
1299 COSTS_N_INSNS (3), /* flag_setting. */
1300 COSTS_N_INSNS (2), /* extend. */
1301 COSTS_N_INSNS (3), /* add. */
1302 COSTS_N_INSNS (2), /* extend_add. */
1303 COSTS_N_INSNS (18) /* idiv. */
1307 0, /* simple (N/A). */
1308 0, /* flag_setting (N/A). */
1309 COSTS_N_INSNS (3), /* extend. */
1311 COSTS_N_INSNS (3), /* extend_add. */
1317 COSTS_N_INSNS (3), /* load. */
1318 COSTS_N_INSNS (3), /* load_sign_extend. */
1319 COSTS_N_INSNS (3), /* ldrd. */
1320 COSTS_N_INSNS (3), /* ldm_1st. */
1321 1, /* ldm_regs_per_insn_1st. */
1322 2, /* ldm_regs_per_insn_subsequent. */
1323 COSTS_N_INSNS (3), /* loadf. */
1324 COSTS_N_INSNS (3), /* loadd. */
1325 0, /* load_unaligned. */
1329 1, /* stm_regs_per_insn_1st. */
1330 2, /* stm_regs_per_insn_subsequent. */
1331 COSTS_N_INSNS (2), /* storef. */
1332 COSTS_N_INSNS (2), /* stored. */
1333 0 /* store_unaligned. */
1338 COSTS_N_INSNS (17), /* div. */
1339 COSTS_N_INSNS (4), /* mult. */
1340 COSTS_N_INSNS (8), /* mult_addsub. */
1341 COSTS_N_INSNS (8), /* fma. */
1342 COSTS_N_INSNS (4), /* addsub. */
1343 COSTS_N_INSNS (2), /* fpconst. */
1344 COSTS_N_INSNS (2), /* neg. */
1345 COSTS_N_INSNS (2), /* compare. */
1346 COSTS_N_INSNS (4), /* widen. */
1347 COSTS_N_INSNS (4), /* narrow. */
1348 COSTS_N_INSNS (4), /* toint. */
1349 COSTS_N_INSNS (4), /* fromint. */
1350 COSTS_N_INSNS (4) /* roundint. */
1354 COSTS_N_INSNS (31), /* div. */
1355 COSTS_N_INSNS (4), /* mult. */
1356 COSTS_N_INSNS (8), /* mult_addsub. */
1357 COSTS_N_INSNS (8), /* fma. */
1358 COSTS_N_INSNS (4), /* addsub. */
1359 COSTS_N_INSNS (2), /* fpconst. */
1360 COSTS_N_INSNS (2), /* neg. */
1361 COSTS_N_INSNS (2), /* compare. */
1362 COSTS_N_INSNS (4), /* widen. */
1363 COSTS_N_INSNS (4), /* narrow. */
1364 COSTS_N_INSNS (4), /* toint. */
1365 COSTS_N_INSNS (4), /* fromint. */
1366 COSTS_N_INSNS (4) /* roundint. */
1371 COSTS_N_INSNS (1) /* alu. */
1375 const struct cpu_cost_table cortexa15_extra_costs
=
1383 COSTS_N_INSNS (1), /* arith_shift. */
1384 COSTS_N_INSNS (1), /* arith_shift_reg. */
1385 COSTS_N_INSNS (1), /* log_shift. */
1386 COSTS_N_INSNS (1), /* log_shift_reg. */
1388 COSTS_N_INSNS (1), /* extend_arith. */
1389 COSTS_N_INSNS (1), /* bfi. */
1394 true /* non_exec_costs_exec. */
1399 COSTS_N_INSNS (2), /* simple. */
1400 COSTS_N_INSNS (3), /* flag_setting. */
1401 COSTS_N_INSNS (2), /* extend. */
1402 COSTS_N_INSNS (2), /* add. */
1403 COSTS_N_INSNS (2), /* extend_add. */
1404 COSTS_N_INSNS (18) /* idiv. */
1408 0, /* simple (N/A). */
1409 0, /* flag_setting (N/A). */
1410 COSTS_N_INSNS (3), /* extend. */
1412 COSTS_N_INSNS (3), /* extend_add. */
1418 COSTS_N_INSNS (3), /* load. */
1419 COSTS_N_INSNS (3), /* load_sign_extend. */
1420 COSTS_N_INSNS (3), /* ldrd. */
1421 COSTS_N_INSNS (4), /* ldm_1st. */
1422 1, /* ldm_regs_per_insn_1st. */
1423 2, /* ldm_regs_per_insn_subsequent. */
1424 COSTS_N_INSNS (4), /* loadf. */
1425 COSTS_N_INSNS (4), /* loadd. */
1426 0, /* load_unaligned. */
1429 COSTS_N_INSNS (1), /* stm_1st. */
1430 1, /* stm_regs_per_insn_1st. */
1431 2, /* stm_regs_per_insn_subsequent. */
1434 0 /* store_unaligned. */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (5), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table v7m_extra_costs
=
1484 0, /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1494 COSTS_N_INSNS (1), /* non_exec. */
1495 false /* non_exec_costs_exec. */
1500 COSTS_N_INSNS (1), /* simple. */
1501 COSTS_N_INSNS (1), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (1), /* add. */
1504 COSTS_N_INSNS (3), /* extend_add. */
1505 COSTS_N_INSNS (8) /* idiv. */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (2), /* extend. */
1513 COSTS_N_INSNS (3), /* extend_add. */
1519 COSTS_N_INSNS (2), /* load. */
1520 0, /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (2), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 1, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (2), /* loadf. */
1526 COSTS_N_INSNS (3), /* loadd. */
1527 COSTS_N_INSNS (1), /* load_unaligned. */
1528 COSTS_N_INSNS (2), /* store. */
1529 COSTS_N_INSNS (3), /* strd. */
1530 COSTS_N_INSNS (2), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 1, /* stm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (2), /* storef. */
1534 COSTS_N_INSNS (3), /* stored. */
1535 COSTS_N_INSNS (1) /* store_unaligned. */
1540 COSTS_N_INSNS (7), /* div. */
1541 COSTS_N_INSNS (2), /* mult. */
1542 COSTS_N_INSNS (5), /* mult_addsub. */
1543 COSTS_N_INSNS (3), /* fma. */
1544 COSTS_N_INSNS (1), /* addsub. */
1556 COSTS_N_INSNS (15), /* div. */
1557 COSTS_N_INSNS (5), /* mult. */
1558 COSTS_N_INSNS (7), /* mult_addsub. */
1559 COSTS_N_INSNS (7), /* fma. */
1560 COSTS_N_INSNS (3), /* addsub. */
1573 COSTS_N_INSNS (1) /* alu. */
1577 const struct tune_params arm_slowmul_tune
=
1579 arm_slowmul_rtx_costs
,
1581 NULL
, /* Sched adj cost. */
1582 3, /* Constant limit. */
1583 5, /* Max cond insns. */
1584 ARM_PREFETCH_NOT_BENEFICIAL
,
1585 true, /* Prefer constant pool. */
1586 arm_default_branch_cost
,
1587 false, /* Prefer LDRD/STRD. */
1588 {true, true}, /* Prefer non short circuit. */
1589 &arm_default_vec_cost
, /* Vectorizer costs. */
1590 false, /* Prefer Neon for 64-bits bitops. */
1591 false, false /* Prefer 32-bit encodings. */
1594 const struct tune_params arm_fastmul_tune
=
1596 arm_fastmul_rtx_costs
,
1598 NULL
, /* Sched adj cost. */
1599 1, /* Constant limit. */
1600 5, /* Max cond insns. */
1601 ARM_PREFETCH_NOT_BENEFICIAL
,
1602 true, /* Prefer constant pool. */
1603 arm_default_branch_cost
,
1604 false, /* Prefer LDRD/STRD. */
1605 {true, true}, /* Prefer non short circuit. */
1606 &arm_default_vec_cost
, /* Vectorizer costs. */
1607 false, /* Prefer Neon for 64-bits bitops. */
1608 false, false /* Prefer 32-bit encodings. */
1611 /* StrongARM has early execution of branches, so a sequence that is worth
1612 skipping is shorter. Set max_insns_skipped to a lower value. */
1614 const struct tune_params arm_strongarm_tune
=
1616 arm_fastmul_rtx_costs
,
1618 NULL
, /* Sched adj cost. */
1619 1, /* Constant limit. */
1620 3, /* Max cond insns. */
1621 ARM_PREFETCH_NOT_BENEFICIAL
,
1622 true, /* Prefer constant pool. */
1623 arm_default_branch_cost
,
1624 false, /* Prefer LDRD/STRD. */
1625 {true, true}, /* Prefer non short circuit. */
1626 &arm_default_vec_cost
, /* Vectorizer costs. */
1627 false, /* Prefer Neon for 64-bits bitops. */
1628 false, false /* Prefer 32-bit encodings. */
1631 const struct tune_params arm_xscale_tune
=
1633 arm_xscale_rtx_costs
,
1635 xscale_sched_adjust_cost
,
1636 2, /* Constant limit. */
1637 3, /* Max cond insns. */
1638 ARM_PREFETCH_NOT_BENEFICIAL
,
1639 true, /* Prefer constant pool. */
1640 arm_default_branch_cost
,
1641 false, /* Prefer LDRD/STRD. */
1642 {true, true}, /* Prefer non short circuit. */
1643 &arm_default_vec_cost
, /* Vectorizer costs. */
1644 false, /* Prefer Neon for 64-bits bitops. */
1645 false, false /* Prefer 32-bit encodings. */
1648 const struct tune_params arm_9e_tune
=
1652 NULL
, /* Sched adj cost. */
1653 1, /* Constant limit. */
1654 5, /* Max cond insns. */
1655 ARM_PREFETCH_NOT_BENEFICIAL
,
1656 true, /* Prefer constant pool. */
1657 arm_default_branch_cost
,
1658 false, /* Prefer LDRD/STRD. */
1659 {true, true}, /* Prefer non short circuit. */
1660 &arm_default_vec_cost
, /* Vectorizer costs. */
1661 false, /* Prefer Neon for 64-bits bitops. */
1662 false, false /* Prefer 32-bit encodings. */
1665 const struct tune_params arm_v6t2_tune
=
1669 NULL
, /* Sched adj cost. */
1670 1, /* Constant limit. */
1671 5, /* Max cond insns. */
1672 ARM_PREFETCH_NOT_BENEFICIAL
,
1673 false, /* Prefer constant pool. */
1674 arm_default_branch_cost
,
1675 false, /* Prefer LDRD/STRD. */
1676 {true, true}, /* Prefer non short circuit. */
1677 &arm_default_vec_cost
, /* Vectorizer costs. */
1678 false, /* Prefer Neon for 64-bits bitops. */
1679 false, false /* Prefer 32-bit encodings. */
1682 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1683 const struct tune_params arm_cortex_tune
=
1686 &generic_extra_costs
,
1687 NULL
, /* Sched adj cost. */
1688 1, /* Constant limit. */
1689 5, /* Max cond insns. */
1690 ARM_PREFETCH_NOT_BENEFICIAL
,
1691 false, /* Prefer constant pool. */
1692 arm_default_branch_cost
,
1693 false, /* Prefer LDRD/STRD. */
1694 {true, true}, /* Prefer non short circuit. */
1695 &arm_default_vec_cost
, /* Vectorizer costs. */
1696 false, /* Prefer Neon for 64-bits bitops. */
1697 false, false /* Prefer 32-bit encodings. */
1700 const struct tune_params arm_cortex_a8_tune
=
1703 &cortexa8_extra_costs
,
1704 NULL
, /* Sched adj cost. */
1705 1, /* Constant limit. */
1706 5, /* Max cond insns. */
1707 ARM_PREFETCH_NOT_BENEFICIAL
,
1708 false, /* Prefer constant pool. */
1709 arm_default_branch_cost
,
1710 false, /* Prefer LDRD/STRD. */
1711 {true, true}, /* Prefer non short circuit. */
1712 &arm_default_vec_cost
, /* Vectorizer costs. */
1713 false, /* Prefer Neon for 64-bits bitops. */
1714 false, false /* Prefer 32-bit encodings. */
1717 const struct tune_params arm_cortex_a7_tune
=
1720 &cortexa7_extra_costs
,
1722 1, /* Constant limit. */
1723 5, /* Max cond insns. */
1724 ARM_PREFETCH_NOT_BENEFICIAL
,
1725 false, /* Prefer constant pool. */
1726 arm_default_branch_cost
,
1727 false, /* Prefer LDRD/STRD. */
1728 {true, true}, /* Prefer non short circuit. */
1729 &arm_default_vec_cost
, /* Vectorizer costs. */
1730 false, /* Prefer Neon for 64-bits bitops. */
1731 false, false /* Prefer 32-bit encodings. */
1734 const struct tune_params arm_cortex_a15_tune
=
1737 &cortexa15_extra_costs
,
1738 NULL
, /* Sched adj cost. */
1739 1, /* Constant limit. */
1740 2, /* Max cond insns. */
1741 ARM_PREFETCH_NOT_BENEFICIAL
,
1742 false, /* Prefer constant pool. */
1743 arm_default_branch_cost
,
1744 true, /* Prefer LDRD/STRD. */
1745 {true, true}, /* Prefer non short circuit. */
1746 &arm_default_vec_cost
, /* Vectorizer costs. */
1747 false, /* Prefer Neon for 64-bits bitops. */
1748 true, true /* Prefer 32-bit encodings. */
1751 const struct tune_params arm_cortex_a53_tune
=
1754 &cortexa53_extra_costs
,
1755 NULL
, /* Scheduler cost adjustment. */
1756 1, /* Constant limit. */
1757 5, /* Max cond insns. */
1758 ARM_PREFETCH_NOT_BENEFICIAL
,
1759 false, /* Prefer constant pool. */
1760 arm_default_branch_cost
,
1761 false, /* Prefer LDRD/STRD. */
1762 {true, true}, /* Prefer non short circuit. */
1763 &arm_default_vec_cost
, /* Vectorizer costs. */
1764 false, /* Prefer Neon for 64-bits bitops. */
1765 false, false /* Prefer 32-bit encodings. */
1768 const struct tune_params arm_cortex_a57_tune
=
1771 &cortexa57_extra_costs
,
1772 NULL
, /* Scheduler cost adjustment. */
1773 1, /* Constant limit. */
1774 2, /* Max cond insns. */
1775 ARM_PREFETCH_NOT_BENEFICIAL
,
1776 false, /* Prefer constant pool. */
1777 arm_default_branch_cost
,
1778 true, /* Prefer LDRD/STRD. */
1779 {true, true}, /* Prefer non short circuit. */
1780 &arm_default_vec_cost
, /* Vectorizer costs. */
1781 false, /* Prefer Neon for 64-bits bitops. */
1782 true, true /* Prefer 32-bit encodings. */
1785 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1786 less appealing. Set max_insns_skipped to a low value. */
1788 const struct tune_params arm_cortex_a5_tune
=
1792 NULL
, /* Sched adj cost. */
1793 1, /* Constant limit. */
1794 1, /* Max cond insns. */
1795 ARM_PREFETCH_NOT_BENEFICIAL
,
1796 false, /* Prefer constant pool. */
1797 arm_cortex_a5_branch_cost
,
1798 false, /* Prefer LDRD/STRD. */
1799 {false, false}, /* Prefer non short circuit. */
1800 &arm_default_vec_cost
, /* Vectorizer costs. */
1801 false, /* Prefer Neon for 64-bits bitops. */
1802 false, false /* Prefer 32-bit encodings. */
1805 const struct tune_params arm_cortex_a9_tune
=
1808 &cortexa9_extra_costs
,
1809 cortex_a9_sched_adjust_cost
,
1810 1, /* Constant limit. */
1811 5, /* Max cond insns. */
1812 ARM_PREFETCH_BENEFICIAL(4,32,32),
1813 false, /* Prefer constant pool. */
1814 arm_default_branch_cost
,
1815 false, /* Prefer LDRD/STRD. */
1816 {true, true}, /* Prefer non short circuit. */
1817 &arm_default_vec_cost
, /* Vectorizer costs. */
1818 false, /* Prefer Neon for 64-bits bitops. */
1819 false, false /* Prefer 32-bit encodings. */
1822 const struct tune_params arm_cortex_a12_tune
=
1825 &cortexa12_extra_costs
,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 ARM_PREFETCH_BENEFICIAL(4,32,32),
1830 false, /* Prefer constant pool. */
1831 arm_default_branch_cost
,
1832 true, /* Prefer LDRD/STRD. */
1833 {true, true}, /* Prefer non short circuit. */
1834 &arm_default_vec_cost
, /* Vectorizer costs. */
1835 false, /* Prefer Neon for 64-bits bitops. */
1836 false, false /* Prefer 32-bit encodings. */
1839 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1840 cycle to execute each. An LDR from the constant pool also takes two cycles
1841 to execute, but mildly increases pipelining opportunity (consecutive
1842 loads/stores can be pipelined together, saving one cycle), and may also
1843 improve icache utilisation. Hence we prefer the constant pool for such
1846 const struct tune_params arm_v7m_tune
=
1850 NULL
, /* Sched adj cost. */
1851 1, /* Constant limit. */
1852 2, /* Max cond insns. */
1853 ARM_PREFETCH_NOT_BENEFICIAL
,
1854 true, /* Prefer constant pool. */
1855 arm_cortex_m_branch_cost
,
1856 false, /* Prefer LDRD/STRD. */
1857 {false, false}, /* Prefer non short circuit. */
1858 &arm_default_vec_cost
, /* Vectorizer costs. */
1859 false, /* Prefer Neon for 64-bits bitops. */
1860 false, false /* Prefer 32-bit encodings. */
1863 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1864 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1865 const struct tune_params arm_v6m_tune
=
1869 NULL
, /* Sched adj cost. */
1870 1, /* Constant limit. */
1871 5, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL
,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost
,
1875 false, /* Prefer LDRD/STRD. */
1876 {false, false}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost
, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 false, false /* Prefer 32-bit encodings. */
1882 const struct tune_params arm_fa726te_tune
=
1886 fa726te_sched_adjust_cost
,
1887 1, /* Constant limit. */
1888 5, /* Max cond insns. */
1889 ARM_PREFETCH_NOT_BENEFICIAL
,
1890 true, /* Prefer constant pool. */
1891 arm_default_branch_cost
,
1892 false, /* Prefer LDRD/STRD. */
1893 {true, true}, /* Prefer non short circuit. */
1894 &arm_default_vec_cost
, /* Vectorizer costs. */
1895 false, /* Prefer Neon for 64-bits bitops. */
1896 false, false /* Prefer 32-bit encodings. */
1900 /* Not all of these give usefully different compilation alternatives,
1901 but there is no simple way of generalizing them. */
1902 static const struct processors all_cores
[] =
1905 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1906 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1907 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1908 #include "arm-cores.def"
1910 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1913 static const struct processors all_architectures
[] =
1915 /* ARM Architectures */
1916 /* We don't specify tuning costs here as it will be figured out
1919 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1920 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1921 #include "arm-arches.def"
1923 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1927 /* These are populated as commandline arguments are processed, or NULL
1928 if not specified. */
1929 static const struct processors
*arm_selected_arch
;
1930 static const struct processors
*arm_selected_cpu
;
1931 static const struct processors
*arm_selected_tune
;
1933 /* The name of the preprocessor macro to define for this architecture. */
1935 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1937 /* Available values for -mfpu=. */
1939 static const struct arm_fpu_desc all_fpus
[] =
1941 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1942 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1943 #include "arm-fpus.def"
1948 /* Supported TLS relocations. */
1956 TLS_DESCSEQ
/* GNU scheme */
1959 /* The maximum number of insns to be used when loading a constant. */
1961 arm_constant_limit (bool size_p
)
1963 return size_p
? 1 : current_tune
->constant_limit
;
1966 /* Emit an insn that's a simple single-set. Both the operands must be known
1969 emit_set_insn (rtx x
, rtx y
)
1971 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1974 /* Return the number of bits set in VALUE. */
1976 bit_count (unsigned long value
)
1978 unsigned long count
= 0;
1983 value
&= value
- 1; /* Clear the least-significant set bit. */
1991 enum machine_mode mode
;
1993 } arm_fixed_mode_set
;
1995 /* A small helper for setting fixed-point library libfuncs. */
1998 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1999 const char *funcname
, const char *modename
,
2004 if (num_suffix
== 0)
2005 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2007 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2009 set_optab_libfunc (optable
, mode
, buffer
);
2013 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
2014 enum machine_mode from
, const char *funcname
,
2015 const char *toname
, const char *fromname
)
2018 const char *maybe_suffix_2
= "";
2020 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2021 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2022 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2023 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2024 maybe_suffix_2
= "2";
2026 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2029 set_conv_libfunc (optable
, to
, from
, buffer
);
2032 /* Set up library functions unique to ARM. */
2035 arm_init_libfuncs (void)
2037 /* For Linux, we have access to kernel support for atomic operations. */
2038 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2039 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2041 /* There are no special library functions unless we are using the
2046 /* The functions below are described in Section 4 of the "Run-Time
2047 ABI for the ARM architecture", Version 1.0. */
2049 /* Double-precision floating-point arithmetic. Table 2. */
2050 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2051 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2052 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2053 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2054 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2056 /* Double-precision comparisons. Table 3. */
2057 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2058 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2059 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2060 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2061 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2062 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2063 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2065 /* Single-precision floating-point arithmetic. Table 4. */
2066 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2067 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2068 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2069 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2070 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2072 /* Single-precision comparisons. Table 5. */
2073 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2074 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2075 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2076 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2077 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2078 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2079 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2081 /* Floating-point to integer conversions. Table 6. */
2082 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2083 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2084 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2085 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2086 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2087 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2088 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2089 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2091 /* Conversions between floating types. Table 7. */
2092 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2093 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2095 /* Integer to floating-point conversions. Table 8. */
2096 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2097 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2098 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2099 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2100 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2101 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2102 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2103 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2105 /* Long long. Table 9. */
2106 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2107 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2108 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2109 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2110 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2111 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2112 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2113 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2115 /* Integer (32/32->32) division. \S 4.3.1. */
2116 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2117 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2119 /* The divmod functions are designed so that they can be used for
2120 plain division, even though they return both the quotient and the
2121 remainder. The quotient is returned in the usual location (i.e.,
2122 r0 for SImode, {r0, r1} for DImode), just as would be expected
2123 for an ordinary division routine. Because the AAPCS calling
2124 conventions specify that all of { r0, r1, r2, r3 } are
2125 callee-saved registers, there is no need to tell the compiler
2126 explicitly that those registers are clobbered by these
2128 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2129 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2131 /* For SImode division the ABI provides div-without-mod routines,
2132 which are faster. */
2133 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2134 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2136 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2137 divmod libcalls instead. */
2138 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2139 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2140 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2141 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2143 /* Half-precision float operations. The compiler handles all operations
2144 with NULL libfuncs by converting the SFmode. */
2145 switch (arm_fp16_format
)
2147 case ARM_FP16_FORMAT_IEEE
:
2148 case ARM_FP16_FORMAT_ALTERNATIVE
:
2151 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2152 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2154 : "__gnu_f2h_alternative"));
2155 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2156 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2158 : "__gnu_h2f_alternative"));
2161 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2162 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2163 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2164 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2165 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2168 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2169 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2170 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2171 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2172 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2173 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2174 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2181 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2183 const arm_fixed_mode_set fixed_arith_modes
[] =
2204 const arm_fixed_mode_set fixed_conv_modes
[] =
2234 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2236 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2237 "add", fixed_arith_modes
[i
].name
, 3);
2238 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2239 "ssadd", fixed_arith_modes
[i
].name
, 3);
2240 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2241 "usadd", fixed_arith_modes
[i
].name
, 3);
2242 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2243 "sub", fixed_arith_modes
[i
].name
, 3);
2244 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2245 "sssub", fixed_arith_modes
[i
].name
, 3);
2246 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2247 "ussub", fixed_arith_modes
[i
].name
, 3);
2248 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2249 "mul", fixed_arith_modes
[i
].name
, 3);
2250 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2251 "ssmul", fixed_arith_modes
[i
].name
, 3);
2252 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2253 "usmul", fixed_arith_modes
[i
].name
, 3);
2254 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2255 "div", fixed_arith_modes
[i
].name
, 3);
2256 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2257 "udiv", fixed_arith_modes
[i
].name
, 3);
2258 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2259 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2260 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2261 "usdiv", fixed_arith_modes
[i
].name
, 3);
2262 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2263 "neg", fixed_arith_modes
[i
].name
, 2);
2264 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2265 "ssneg", fixed_arith_modes
[i
].name
, 2);
2266 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2267 "usneg", fixed_arith_modes
[i
].name
, 2);
2268 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2269 "ashl", fixed_arith_modes
[i
].name
, 3);
2270 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2271 "ashr", fixed_arith_modes
[i
].name
, 3);
2272 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2273 "lshr", fixed_arith_modes
[i
].name
, 3);
2274 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2275 "ssashl", fixed_arith_modes
[i
].name
, 3);
2276 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2277 "usashl", fixed_arith_modes
[i
].name
, 3);
2278 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2279 "cmp", fixed_arith_modes
[i
].name
, 2);
2282 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2283 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2286 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2287 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2290 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2291 fixed_conv_modes
[j
].mode
, "fract",
2292 fixed_conv_modes
[i
].name
,
2293 fixed_conv_modes
[j
].name
);
2294 arm_set_fixed_conv_libfunc (satfract_optab
,
2295 fixed_conv_modes
[i
].mode
,
2296 fixed_conv_modes
[j
].mode
, "satfract",
2297 fixed_conv_modes
[i
].name
,
2298 fixed_conv_modes
[j
].name
);
2299 arm_set_fixed_conv_libfunc (fractuns_optab
,
2300 fixed_conv_modes
[i
].mode
,
2301 fixed_conv_modes
[j
].mode
, "fractuns",
2302 fixed_conv_modes
[i
].name
,
2303 fixed_conv_modes
[j
].name
);
2304 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2305 fixed_conv_modes
[i
].mode
,
2306 fixed_conv_modes
[j
].mode
, "satfractuns",
2307 fixed_conv_modes
[i
].name
,
2308 fixed_conv_modes
[j
].name
);
2312 if (TARGET_AAPCS_BASED
)
2313 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2316 /* On AAPCS systems, this is the "struct __va_list". */
2317 static GTY(()) tree va_list_type
;
2319 /* Return the type to use as __builtin_va_list. */
2321 arm_build_builtin_va_list (void)
2326 if (!TARGET_AAPCS_BASED
)
2327 return std_build_builtin_va_list ();
2329 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2337 The C Library ABI further reinforces this definition in \S
2340 We must follow this definition exactly. The structure tag
2341 name is visible in C++ mangled names, and thus forms a part
2342 of the ABI. The field name may be used by people who
2343 #include <stdarg.h>. */
2344 /* Create the type. */
2345 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2346 /* Give it the required name. */
2347 va_list_name
= build_decl (BUILTINS_LOCATION
,
2349 get_identifier ("__va_list"),
2351 DECL_ARTIFICIAL (va_list_name
) = 1;
2352 TYPE_NAME (va_list_type
) = va_list_name
;
2353 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2354 /* Create the __ap field. */
2355 ap_field
= build_decl (BUILTINS_LOCATION
,
2357 get_identifier ("__ap"),
2359 DECL_ARTIFICIAL (ap_field
) = 1;
2360 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2361 TYPE_FIELDS (va_list_type
) = ap_field
;
2362 /* Compute its layout. */
2363 layout_type (va_list_type
);
2365 return va_list_type
;
2368 /* Return an expression of type "void *" pointing to the next
2369 available argument in a variable-argument list. VALIST is the
2370 user-level va_list object, of type __builtin_va_list. */
2372 arm_extract_valist_ptr (tree valist
)
2374 if (TREE_TYPE (valist
) == error_mark_node
)
2375 return error_mark_node
;
2377 /* On an AAPCS target, the pointer is stored within "struct
2379 if (TARGET_AAPCS_BASED
)
2381 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2382 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2383 valist
, ap_field
, NULL_TREE
);
2389 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2391 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2393 valist
= arm_extract_valist_ptr (valist
);
2394 std_expand_builtin_va_start (valist
, nextarg
);
2397 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2399 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2402 valist
= arm_extract_valist_ptr (valist
);
2403 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2406 /* Fix up any incompatible options that the user has specified. */
2408 arm_option_override (void)
2410 if (global_options_set
.x_arm_arch_option
)
2411 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2413 if (global_options_set
.x_arm_cpu_option
)
2415 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2416 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2419 if (global_options_set
.x_arm_tune_option
)
2420 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2422 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2423 SUBTARGET_OVERRIDE_OPTIONS
;
2426 if (arm_selected_arch
)
2428 if (arm_selected_cpu
)
2430 /* Check for conflict between mcpu and march. */
2431 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2433 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2434 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2435 /* -march wins for code generation.
2436 -mcpu wins for default tuning. */
2437 if (!arm_selected_tune
)
2438 arm_selected_tune
= arm_selected_cpu
;
2440 arm_selected_cpu
= arm_selected_arch
;
2444 arm_selected_arch
= NULL
;
2447 /* Pick a CPU based on the architecture. */
2448 arm_selected_cpu
= arm_selected_arch
;
2451 /* If the user did not specify a processor, choose one for them. */
2452 if (!arm_selected_cpu
)
2454 const struct processors
* sel
;
2455 unsigned int sought
;
2457 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2458 if (!arm_selected_cpu
->name
)
2460 #ifdef SUBTARGET_CPU_DEFAULT
2461 /* Use the subtarget default CPU if none was specified by
2463 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2465 /* Default to ARM6. */
2466 if (!arm_selected_cpu
->name
)
2467 arm_selected_cpu
= &all_cores
[arm6
];
2470 sel
= arm_selected_cpu
;
2471 insn_flags
= sel
->flags
;
2473 /* Now check to see if the user has specified some command line
2474 switch that require certain abilities from the cpu. */
2477 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2479 sought
|= (FL_THUMB
| FL_MODE32
);
2481 /* There are no ARM processors that support both APCS-26 and
2482 interworking. Therefore we force FL_MODE26 to be removed
2483 from insn_flags here (if it was set), so that the search
2484 below will always be able to find a compatible processor. */
2485 insn_flags
&= ~FL_MODE26
;
2488 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2490 /* Try to locate a CPU type that supports all of the abilities
2491 of the default CPU, plus the extra abilities requested by
2493 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2494 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2497 if (sel
->name
== NULL
)
2499 unsigned current_bit_count
= 0;
2500 const struct processors
* best_fit
= NULL
;
2502 /* Ideally we would like to issue an error message here
2503 saying that it was not possible to find a CPU compatible
2504 with the default CPU, but which also supports the command
2505 line options specified by the programmer, and so they
2506 ought to use the -mcpu=<name> command line option to
2507 override the default CPU type.
2509 If we cannot find a cpu that has both the
2510 characteristics of the default cpu and the given
2511 command line options we scan the array again looking
2512 for a best match. */
2513 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2514 if ((sel
->flags
& sought
) == sought
)
2518 count
= bit_count (sel
->flags
& insn_flags
);
2520 if (count
>= current_bit_count
)
2523 current_bit_count
= count
;
2527 gcc_assert (best_fit
);
2531 arm_selected_cpu
= sel
;
2535 gcc_assert (arm_selected_cpu
);
2536 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2537 if (!arm_selected_tune
)
2538 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2540 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2541 insn_flags
= arm_selected_cpu
->flags
;
2542 arm_base_arch
= arm_selected_cpu
->base_arch
;
2544 arm_tune
= arm_selected_tune
->core
;
2545 tune_flags
= arm_selected_tune
->flags
;
2546 current_tune
= arm_selected_tune
->tune
;
2548 /* Make sure that the processor choice does not conflict with any of the
2549 other command line choices. */
2550 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2551 error ("target CPU does not support ARM mode");
2553 /* BPABI targets use linker tricks to allow interworking on cores
2554 without thumb support. */
2555 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2557 warning (0, "target CPU does not support interworking" );
2558 target_flags
&= ~MASK_INTERWORK
;
2561 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2563 warning (0, "target CPU does not support THUMB instructions");
2564 target_flags
&= ~MASK_THUMB
;
2567 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2569 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2570 target_flags
&= ~MASK_APCS_FRAME
;
2573 /* Callee super interworking implies thumb interworking. Adding
2574 this to the flags here simplifies the logic elsewhere. */
2575 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2576 target_flags
|= MASK_INTERWORK
;
2578 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2579 from here where no function is being compiled currently. */
2580 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2581 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2583 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2584 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2586 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2588 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2589 target_flags
|= MASK_APCS_FRAME
;
2592 if (TARGET_POKE_FUNCTION_NAME
)
2593 target_flags
|= MASK_APCS_FRAME
;
2595 if (TARGET_APCS_REENT
&& flag_pic
)
2596 error ("-fpic and -mapcs-reent are incompatible");
2598 if (TARGET_APCS_REENT
)
2599 warning (0, "APCS reentrant code not supported. Ignored");
2601 /* If this target is normally configured to use APCS frames, warn if they
2602 are turned off and debugging is turned on. */
2604 && write_symbols
!= NO_DEBUG
2605 && !TARGET_APCS_FRAME
2606 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2607 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2609 if (TARGET_APCS_FLOAT
)
2610 warning (0, "passing floating point arguments in fp regs not yet supported");
2612 if (TARGET_LITTLE_WORDS
)
2613 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2614 "will be removed in a future release");
2616 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2617 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2618 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2619 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2620 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2621 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2622 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2623 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2624 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2625 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2626 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2627 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2628 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2629 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2630 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2632 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2633 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2634 thumb_code
= TARGET_ARM
== 0;
2635 thumb1_code
= TARGET_THUMB1
!= 0;
2636 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2637 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2638 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2639 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2640 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2641 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2642 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2643 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2644 if (arm_restrict_it
== 2)
2645 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2648 arm_restrict_it
= 0;
2650 /* If we are not using the default (ARM mode) section anchor offset
2651 ranges, then set the correct ranges now. */
2654 /* Thumb-1 LDR instructions cannot have negative offsets.
2655 Permissible positive offset ranges are 5-bit (for byte loads),
2656 6-bit (for halfword loads), or 7-bit (for word loads).
2657 Empirical results suggest a 7-bit anchor range gives the best
2658 overall code size. */
2659 targetm
.min_anchor_offset
= 0;
2660 targetm
.max_anchor_offset
= 127;
2662 else if (TARGET_THUMB2
)
2664 /* The minimum is set such that the total size of the block
2665 for a particular anchor is 248 + 1 + 4095 bytes, which is
2666 divisible by eight, ensuring natural spacing of anchors. */
2667 targetm
.min_anchor_offset
= -248;
2668 targetm
.max_anchor_offset
= 4095;
2671 /* V5 code we generate is completely interworking capable, so we turn off
2672 TARGET_INTERWORK here to avoid many tests later on. */
2674 /* XXX However, we must pass the right pre-processor defines to CPP
2675 or GLD can get confused. This is a hack. */
2676 if (TARGET_INTERWORK
)
2677 arm_cpp_interwork
= 1;
2680 target_flags
&= ~MASK_INTERWORK
;
2682 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2683 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2685 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2686 error ("iwmmxt abi requires an iwmmxt capable cpu");
2688 if (!global_options_set
.x_arm_fpu_index
)
2690 const char *target_fpu_name
;
2693 #ifdef FPUTYPE_DEFAULT
2694 target_fpu_name
= FPUTYPE_DEFAULT
;
2696 target_fpu_name
= "vfp";
2699 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2704 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2706 switch (arm_fpu_desc
->model
)
2708 case ARM_FP_MODEL_VFP
:
2709 arm_fpu_attr
= FPU_VFP
;
2716 if (TARGET_AAPCS_BASED
)
2718 if (TARGET_CALLER_INTERWORKING
)
2719 error ("AAPCS does not support -mcaller-super-interworking");
2721 if (TARGET_CALLEE_INTERWORKING
)
2722 error ("AAPCS does not support -mcallee-super-interworking");
2725 /* iWMMXt and NEON are incompatible. */
2726 if (TARGET_IWMMXT
&& TARGET_NEON
)
2727 error ("iWMMXt and NEON are incompatible");
2729 /* iWMMXt unsupported under Thumb mode. */
2730 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2731 error ("iWMMXt unsupported under Thumb mode");
2733 /* __fp16 support currently assumes the core has ldrh. */
2734 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2735 sorry ("__fp16 and no ldrh");
2737 /* If soft-float is specified then don't use FPU. */
2738 if (TARGET_SOFT_FLOAT
)
2739 arm_fpu_attr
= FPU_NONE
;
2741 if (TARGET_AAPCS_BASED
)
2743 if (arm_abi
== ARM_ABI_IWMMXT
)
2744 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2745 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2746 && TARGET_HARD_FLOAT
2748 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2750 arm_pcs_default
= ARM_PCS_AAPCS
;
2754 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2755 sorry ("-mfloat-abi=hard and VFP");
2757 if (arm_abi
== ARM_ABI_APCS
)
2758 arm_pcs_default
= ARM_PCS_APCS
;
2760 arm_pcs_default
= ARM_PCS_ATPCS
;
2763 /* For arm2/3 there is no need to do any scheduling if we are doing
2764 software floating-point. */
2765 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2766 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2768 /* Use the cp15 method if it is available. */
2769 if (target_thread_pointer
== TP_AUTO
)
2771 if (arm_arch6k
&& !TARGET_THUMB1
)
2772 target_thread_pointer
= TP_CP15
;
2774 target_thread_pointer
= TP_SOFT
;
2777 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2778 error ("can not use -mtp=cp15 with 16-bit Thumb");
2780 /* Override the default structure alignment for AAPCS ABI. */
2781 if (!global_options_set
.x_arm_structure_size_boundary
)
2783 if (TARGET_AAPCS_BASED
)
2784 arm_structure_size_boundary
= 8;
2788 if (arm_structure_size_boundary
!= 8
2789 && arm_structure_size_boundary
!= 32
2790 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2792 if (ARM_DOUBLEWORD_ALIGN
)
2794 "structure size boundary can only be set to 8, 32 or 64");
2796 warning (0, "structure size boundary can only be set to 8 or 32");
2797 arm_structure_size_boundary
2798 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2802 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2804 error ("RTP PIC is incompatible with Thumb");
2808 /* If stack checking is disabled, we can use r10 as the PIC register,
2809 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2810 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2812 if (TARGET_VXWORKS_RTP
)
2813 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2814 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2817 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2818 arm_pic_register
= 9;
2820 if (arm_pic_register_string
!= NULL
)
2822 int pic_register
= decode_reg_name (arm_pic_register_string
);
2825 warning (0, "-mpic-register= is useless without -fpic");
2827 /* Prevent the user from choosing an obviously stupid PIC register. */
2828 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2829 || pic_register
== HARD_FRAME_POINTER_REGNUM
2830 || pic_register
== STACK_POINTER_REGNUM
2831 || pic_register
>= PC_REGNUM
2832 || (TARGET_VXWORKS_RTP
2833 && (unsigned int) pic_register
!= arm_pic_register
))
2834 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2836 arm_pic_register
= pic_register
;
2839 if (TARGET_VXWORKS_RTP
2840 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2841 arm_pic_data_is_text_relative
= 0;
2843 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2844 if (fix_cm3_ldrd
== 2)
2846 if (arm_selected_cpu
->core
== cortexm3
)
2852 /* Enable -munaligned-access by default for
2853 - all ARMv6 architecture-based processors
2854 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2855 - ARMv8 architecture-base processors.
2857 Disable -munaligned-access by default for
2858 - all pre-ARMv6 architecture-based processors
2859 - ARMv6-M architecture-based processors. */
2861 if (unaligned_access
== 2)
2863 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2864 unaligned_access
= 1;
2866 unaligned_access
= 0;
2868 else if (unaligned_access
== 1
2869 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2871 warning (0, "target CPU does not support unaligned accesses");
2872 unaligned_access
= 0;
2875 if (TARGET_THUMB1
&& flag_schedule_insns
)
2877 /* Don't warn since it's on by default in -O2. */
2878 flag_schedule_insns
= 0;
2883 /* If optimizing for size, bump the number of instructions that we
2884 are prepared to conditionally execute (even on a StrongARM). */
2885 max_insns_skipped
= 6;
2888 max_insns_skipped
= current_tune
->max_insns_skipped
;
2890 /* Hot/Cold partitioning is not currently supported, since we can't
2891 handle literal pool placement in that case. */
2892 if (flag_reorder_blocks_and_partition
)
2894 inform (input_location
,
2895 "-freorder-blocks-and-partition not supported on this architecture");
2896 flag_reorder_blocks_and_partition
= 0;
2897 flag_reorder_blocks
= 1;
2901 /* Hoisting PIC address calculations more aggressively provides a small,
2902 but measurable, size reduction for PIC code. Therefore, we decrease
2903 the bar for unrestricted expression hoisting to the cost of PIC address
2904 calculation, which is 2 instructions. */
2905 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2906 global_options
.x_param_values
,
2907 global_options_set
.x_param_values
);
2909 /* ARM EABI defaults to strict volatile bitfields. */
2910 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2911 && abi_version_at_least(2))
2912 flag_strict_volatile_bitfields
= 1;
2914 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2915 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2916 if (flag_prefetch_loop_arrays
< 0
2919 && current_tune
->num_prefetch_slots
> 0)
2920 flag_prefetch_loop_arrays
= 1;
2922 /* Set up parameters to be used in prefetching algorithm. Do not override the
2923 defaults unless we are tuning for a core we have researched values for. */
2924 if (current_tune
->num_prefetch_slots
> 0)
2925 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2926 current_tune
->num_prefetch_slots
,
2927 global_options
.x_param_values
,
2928 global_options_set
.x_param_values
);
2929 if (current_tune
->l1_cache_line_size
>= 0)
2930 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2931 current_tune
->l1_cache_line_size
,
2932 global_options
.x_param_values
,
2933 global_options_set
.x_param_values
);
2934 if (current_tune
->l1_cache_size
>= 0)
2935 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2936 current_tune
->l1_cache_size
,
2937 global_options
.x_param_values
,
2938 global_options_set
.x_param_values
);
2940 /* Use Neon to perform 64-bits operations rather than core
2942 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2943 if (use_neon_for_64bits
== 1)
2944 prefer_neon_for_64bits
= true;
2946 /* Use the alternative scheduling-pressure algorithm by default. */
2947 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2948 global_options
.x_param_values
,
2949 global_options_set
.x_param_values
);
2951 /* Disable shrink-wrap when optimizing function for size, since it tends to
2952 generate additional returns. */
2953 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2954 flag_shrink_wrap
= false;
2955 /* TBD: Dwarf info for apcs frame is not handled yet. */
2956 if (TARGET_APCS_FRAME
)
2957 flag_shrink_wrap
= false;
2959 /* We only support -mslow-flash-data on armv7-m targets. */
2960 if (target_slow_flash_data
2961 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2962 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2963 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2965 /* Currently, for slow flash data, we just disable literal pools. */
2966 if (target_slow_flash_data
)
2967 arm_disable_literal_pool
= true;
2969 /* Register global variables with the garbage collector. */
2970 arm_add_gc_roots ();
2974 arm_add_gc_roots (void)
2976 gcc_obstack_init(&minipool_obstack
);
2977 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2980 /* A table of known ARM exception types.
2981 For use with the interrupt function attribute. */
2985 const char *const arg
;
2986 const unsigned long return_value
;
2990 static const isr_attribute_arg isr_attribute_args
[] =
2992 { "IRQ", ARM_FT_ISR
},
2993 { "irq", ARM_FT_ISR
},
2994 { "FIQ", ARM_FT_FIQ
},
2995 { "fiq", ARM_FT_FIQ
},
2996 { "ABORT", ARM_FT_ISR
},
2997 { "abort", ARM_FT_ISR
},
2998 { "ABORT", ARM_FT_ISR
},
2999 { "abort", ARM_FT_ISR
},
3000 { "UNDEF", ARM_FT_EXCEPTION
},
3001 { "undef", ARM_FT_EXCEPTION
},
3002 { "SWI", ARM_FT_EXCEPTION
},
3003 { "swi", ARM_FT_EXCEPTION
},
3004 { NULL
, ARM_FT_NORMAL
}
3007 /* Returns the (interrupt) function type of the current
3008 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3010 static unsigned long
3011 arm_isr_value (tree argument
)
3013 const isr_attribute_arg
* ptr
;
3017 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3019 /* No argument - default to IRQ. */
3020 if (argument
== NULL_TREE
)
3023 /* Get the value of the argument. */
3024 if (TREE_VALUE (argument
) == NULL_TREE
3025 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3026 return ARM_FT_UNKNOWN
;
3028 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3030 /* Check it against the list of known arguments. */
3031 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3032 if (streq (arg
, ptr
->arg
))
3033 return ptr
->return_value
;
3035 /* An unrecognized interrupt type. */
3036 return ARM_FT_UNKNOWN
;
3039 /* Computes the type of the current function. */
3041 static unsigned long
3042 arm_compute_func_type (void)
3044 unsigned long type
= ARM_FT_UNKNOWN
;
3048 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3050 /* Decide if the current function is volatile. Such functions
3051 never return, and many memory cycles can be saved by not storing
3052 register values that will never be needed again. This optimization
3053 was added to speed up context switching in a kernel application. */
3055 && (TREE_NOTHROW (current_function_decl
)
3056 || !(flag_unwind_tables
3058 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3059 && TREE_THIS_VOLATILE (current_function_decl
))
3060 type
|= ARM_FT_VOLATILE
;
3062 if (cfun
->static_chain_decl
!= NULL
)
3063 type
|= ARM_FT_NESTED
;
3065 attr
= DECL_ATTRIBUTES (current_function_decl
);
3067 a
= lookup_attribute ("naked", attr
);
3069 type
|= ARM_FT_NAKED
;
3071 a
= lookup_attribute ("isr", attr
);
3073 a
= lookup_attribute ("interrupt", attr
);
3076 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3078 type
|= arm_isr_value (TREE_VALUE (a
));
3083 /* Returns the type of the current function. */
3086 arm_current_func_type (void)
3088 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3089 cfun
->machine
->func_type
= arm_compute_func_type ();
3091 return cfun
->machine
->func_type
;
3095 arm_allocate_stack_slots_for_args (void)
3097 /* Naked functions should not allocate stack slots for arguments. */
3098 return !IS_NAKED (arm_current_func_type ());
3102 arm_warn_func_return (tree decl
)
3104 /* Naked functions are implemented entirely in assembly, including the
3105 return sequence, so suppress warnings about this. */
3106 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3110 /* Output assembler code for a block containing the constant parts
3111 of a trampoline, leaving space for the variable parts.
3113 On the ARM, (if r8 is the static chain regnum, and remembering that
3114 referencing pc adds an offset of 8) the trampoline looks like:
3117 .word static chain value
3118 .word function's address
3119 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3122 arm_asm_trampoline_template (FILE *f
)
3126 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3127 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3129 else if (TARGET_THUMB2
)
3131 /* The Thumb-2 trampoline is similar to the arm implementation.
3132 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3133 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3134 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3135 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3139 ASM_OUTPUT_ALIGN (f
, 2);
3140 fprintf (f
, "\t.code\t16\n");
3141 fprintf (f
, ".Ltrampoline_start:\n");
3142 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3143 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3144 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3145 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3146 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3147 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3149 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3150 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3153 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3156 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3158 rtx fnaddr
, mem
, a_tramp
;
3160 emit_block_move (m_tramp
, assemble_trampoline_template (),
3161 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3163 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3164 emit_move_insn (mem
, chain_value
);
3166 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3167 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3168 emit_move_insn (mem
, fnaddr
);
3170 a_tramp
= XEXP (m_tramp
, 0);
3171 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3172 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3173 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3176 /* Thumb trampolines should be entered in thumb mode, so set
3177 the bottom bit of the address. */
3180 arm_trampoline_adjust_address (rtx addr
)
3183 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3184 NULL
, 0, OPTAB_LIB_WIDEN
);
3188 /* Return 1 if it is possible to return using a single instruction.
3189 If SIBLING is non-null, this is a test for a return before a sibling
3190 call. SIBLING is the call insn, so we can examine its register usage. */
3193 use_return_insn (int iscond
, rtx sibling
)
3196 unsigned int func_type
;
3197 unsigned long saved_int_regs
;
3198 unsigned HOST_WIDE_INT stack_adjust
;
3199 arm_stack_offsets
*offsets
;
3201 /* Never use a return instruction before reload has run. */
3202 if (!reload_completed
)
3205 func_type
= arm_current_func_type ();
3207 /* Naked, volatile and stack alignment functions need special
3209 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3212 /* So do interrupt functions that use the frame pointer and Thumb
3213 interrupt functions. */
3214 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3217 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3218 && !optimize_function_for_size_p (cfun
))
3221 offsets
= arm_get_frame_offsets ();
3222 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3224 /* As do variadic functions. */
3225 if (crtl
->args
.pretend_args_size
3226 || cfun
->machine
->uses_anonymous_args
3227 /* Or if the function calls __builtin_eh_return () */
3228 || crtl
->calls_eh_return
3229 /* Or if the function calls alloca */
3230 || cfun
->calls_alloca
3231 /* Or if there is a stack adjustment. However, if the stack pointer
3232 is saved on the stack, we can use a pre-incrementing stack load. */
3233 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3234 && stack_adjust
== 4)))
3237 saved_int_regs
= offsets
->saved_regs_mask
;
3239 /* Unfortunately, the insn
3241 ldmib sp, {..., sp, ...}
3243 triggers a bug on most SA-110 based devices, such that the stack
3244 pointer won't be correctly restored if the instruction takes a
3245 page fault. We work around this problem by popping r3 along with
3246 the other registers, since that is never slower than executing
3247 another instruction.
3249 We test for !arm_arch5 here, because code for any architecture
3250 less than this could potentially be run on one of the buggy
3252 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3254 /* Validate that r3 is a call-clobbered register (always true in
3255 the default abi) ... */
3256 if (!call_used_regs
[3])
3259 /* ... that it isn't being used for a return value ... */
3260 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3263 /* ... or for a tail-call argument ... */
3266 gcc_assert (CALL_P (sibling
));
3268 if (find_regno_fusage (sibling
, USE
, 3))
3272 /* ... and that there are no call-saved registers in r0-r2
3273 (always true in the default ABI). */
3274 if (saved_int_regs
& 0x7)
3278 /* Can't be done if interworking with Thumb, and any registers have been
3280 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3283 /* On StrongARM, conditional returns are expensive if they aren't
3284 taken and multiple registers have been stacked. */
3285 if (iscond
&& arm_tune_strongarm
)
3287 /* Conditional return when just the LR is stored is a simple
3288 conditional-load instruction, that's not expensive. */
3289 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3293 && arm_pic_register
!= INVALID_REGNUM
3294 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3298 /* If there are saved registers but the LR isn't saved, then we need
3299 two instructions for the return. */
3300 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3303 /* Can't be done if any of the VFP regs are pushed,
3304 since this also requires an insn. */
3305 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3306 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3307 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3310 if (TARGET_REALLY_IWMMXT
)
3311 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3312 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3318 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3319 shrink-wrapping if possible. This is the case if we need to emit a
3320 prologue, which we can test by looking at the offsets. */
3322 use_simple_return_p (void)
3324 arm_stack_offsets
*offsets
;
3326 offsets
= arm_get_frame_offsets ();
3327 return offsets
->outgoing_args
!= 0;
3330 /* Return TRUE if int I is a valid immediate ARM constant. */
3333 const_ok_for_arm (HOST_WIDE_INT i
)
3337 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3338 be all zero, or all one. */
3339 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3340 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3341 != ((~(unsigned HOST_WIDE_INT
) 0)
3342 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3345 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3347 /* Fast return for 0 and small values. We must do this for zero, since
3348 the code below can't handle that one case. */
3349 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3352 /* Get the number of trailing zeros. */
3353 lowbit
= ffs((int) i
) - 1;
3355 /* Only even shifts are allowed in ARM mode so round down to the
3356 nearest even number. */
3360 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3365 /* Allow rotated constants in ARM mode. */
3367 && ((i
& ~0xc000003f) == 0
3368 || (i
& ~0xf000000f) == 0
3369 || (i
& ~0xfc000003) == 0))
3376 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3379 if (i
== v
|| i
== (v
| (v
<< 8)))
3382 /* Allow repeated pattern 0xXY00XY00. */
3392 /* Return true if I is a valid constant for the operation CODE. */
3394 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3396 if (const_ok_for_arm (i
))
3402 /* See if we can use movw. */
3403 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3406 /* Otherwise, try mvn. */
3407 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3410 /* See if we can use addw or subw. */
3412 && ((i
& 0xfffff000) == 0
3413 || ((-i
) & 0xfffff000) == 0))
3415 /* else fall through. */
3435 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3437 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3443 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3447 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3454 /* Return true if I is a valid di mode constant for the operation CODE. */
3456 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3458 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3459 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3460 rtx hi
= GEN_INT (hi_val
);
3461 rtx lo
= GEN_INT (lo_val
);
3471 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3472 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3474 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3481 /* Emit a sequence of insns to handle a large constant.
3482 CODE is the code of the operation required, it can be any of SET, PLUS,
3483 IOR, AND, XOR, MINUS;
3484 MODE is the mode in which the operation is being performed;
3485 VAL is the integer to operate on;
3486 SOURCE is the other operand (a register, or a null-pointer for SET);
3487 SUBTARGETS means it is safe to create scratch registers if that will
3488 either produce a simpler sequence, or we will want to cse the values.
3489 Return value is the number of insns emitted. */
3491 /* ??? Tweak this for thumb2. */
3493 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3494 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3498 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3499 cond
= COND_EXEC_TEST (PATTERN (insn
));
3503 if (subtargets
|| code
== SET
3504 || (REG_P (target
) && REG_P (source
)
3505 && REGNO (target
) != REGNO (source
)))
3507 /* After arm_reorg has been called, we can't fix up expensive
3508 constants by pushing them into memory so we must synthesize
3509 them in-line, regardless of the cost. This is only likely to
3510 be more costly on chips that have load delay slots and we are
3511 compiling without running the scheduler (so no splitting
3512 occurred before the final instruction emission).
3514 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3516 if (!cfun
->machine
->after_arm_reorg
3518 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3520 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3525 /* Currently SET is the only monadic value for CODE, all
3526 the rest are diadic. */
3527 if (TARGET_USE_MOVT
)
3528 arm_emit_movpair (target
, GEN_INT (val
));
3530 emit_set_insn (target
, GEN_INT (val
));
3536 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3538 if (TARGET_USE_MOVT
)
3539 arm_emit_movpair (temp
, GEN_INT (val
));
3541 emit_set_insn (temp
, GEN_INT (val
));
3543 /* For MINUS, the value is subtracted from, since we never
3544 have subtraction of a constant. */
3546 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3548 emit_set_insn (target
,
3549 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3555 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3559 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3560 ARM/THUMB2 immediates, and add up to VAL.
3561 Thr function return value gives the number of insns required. */
3563 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3564 struct four_ints
*return_sequence
)
3566 int best_consecutive_zeros
= 0;
3570 struct four_ints tmp_sequence
;
3572 /* If we aren't targeting ARM, the best place to start is always at
3573 the bottom, otherwise look more closely. */
3576 for (i
= 0; i
< 32; i
+= 2)
3578 int consecutive_zeros
= 0;
3580 if (!(val
& (3 << i
)))
3582 while ((i
< 32) && !(val
& (3 << i
)))
3584 consecutive_zeros
+= 2;
3587 if (consecutive_zeros
> best_consecutive_zeros
)
3589 best_consecutive_zeros
= consecutive_zeros
;
3590 best_start
= i
- consecutive_zeros
;
3597 /* So long as it won't require any more insns to do so, it's
3598 desirable to emit a small constant (in bits 0...9) in the last
3599 insn. This way there is more chance that it can be combined with
3600 a later addressing insn to form a pre-indexed load or store
3601 operation. Consider:
3603 *((volatile int *)0xe0000100) = 1;
3604 *((volatile int *)0xe0000110) = 2;
3606 We want this to wind up as:
3610 str rB, [rA, #0x100]
3612 str rB, [rA, #0x110]
3614 rather than having to synthesize both large constants from scratch.
3616 Therefore, we calculate how many insns would be required to emit
3617 the constant starting from `best_start', and also starting from
3618 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3619 yield a shorter sequence, we may as well use zero. */
3620 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3622 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3624 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3625 if (insns2
<= insns1
)
3627 *return_sequence
= tmp_sequence
;
3635 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3637 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3638 struct four_ints
*return_sequence
, int i
)
3640 int remainder
= val
& 0xffffffff;
3643 /* Try and find a way of doing the job in either two or three
3646 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3647 location. We start at position I. This may be the MSB, or
3648 optimial_immediate_sequence may have positioned it at the largest block
3649 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3650 wrapping around to the top of the word when we drop off the bottom.
3651 In the worst case this code should produce no more than four insns.
3653 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3654 constants, shifted to any arbitrary location. We should always start
3659 unsigned int b1
, b2
, b3
, b4
;
3660 unsigned HOST_WIDE_INT result
;
3663 gcc_assert (insns
< 4);
3668 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3669 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3672 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3673 /* We can use addw/subw for the last 12 bits. */
3677 /* Use an 8-bit shifted/rotated immediate. */
3681 result
= remainder
& ((0x0ff << end
)
3682 | ((i
< end
) ? (0xff >> (32 - end
))
3689 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3690 arbitrary shifts. */
3691 i
-= TARGET_ARM
? 2 : 1;
3695 /* Next, see if we can do a better job with a thumb2 replicated
3698 We do it this way around to catch the cases like 0x01F001E0 where
3699 two 8-bit immediates would work, but a replicated constant would
3702 TODO: 16-bit constants that don't clear all the bits, but still win.
3703 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3706 b1
= (remainder
& 0xff000000) >> 24;
3707 b2
= (remainder
& 0x00ff0000) >> 16;
3708 b3
= (remainder
& 0x0000ff00) >> 8;
3709 b4
= remainder
& 0xff;
3713 /* The 8-bit immediate already found clears b1 (and maybe b2),
3714 but must leave b3 and b4 alone. */
3716 /* First try to find a 32-bit replicated constant that clears
3717 almost everything. We can assume that we can't do it in one,
3718 or else we wouldn't be here. */
3719 unsigned int tmp
= b1
& b2
& b3
& b4
;
3720 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3722 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3723 + (tmp
== b3
) + (tmp
== b4
);
3725 && (matching_bytes
>= 3
3726 || (matching_bytes
== 2
3727 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3729 /* At least 3 of the bytes match, and the fourth has at
3730 least as many bits set, or two of the bytes match
3731 and it will only require one more insn to finish. */
3739 /* Second, try to find a 16-bit replicated constant that can
3740 leave three of the bytes clear. If b2 or b4 is already
3741 zero, then we can. If the 8-bit from above would not
3742 clear b2 anyway, then we still win. */
3743 else if (b1
== b3
&& (!b2
|| !b4
3744 || (remainder
& 0x00ff0000 & ~result
)))
3746 result
= remainder
& 0xff00ff00;
3752 /* The 8-bit immediate already found clears b2 (and maybe b3)
3753 and we don't get here unless b1 is alredy clear, but it will
3754 leave b4 unchanged. */
3756 /* If we can clear b2 and b4 at once, then we win, since the
3757 8-bits couldn't possibly reach that far. */
3760 result
= remainder
& 0x00ff00ff;
3766 return_sequence
->i
[insns
++] = result
;
3767 remainder
&= ~result
;
3769 if (code
== SET
|| code
== MINUS
)
3777 /* Emit an instruction with the indicated PATTERN. If COND is
3778 non-NULL, conditionalize the execution of the instruction on COND
3782 emit_constant_insn (rtx cond
, rtx pattern
)
3785 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3786 emit_insn (pattern
);
3789 /* As above, but extra parameter GENERATE which, if clear, suppresses
3793 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3794 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3799 int final_invert
= 0;
3801 int set_sign_bit_copies
= 0;
3802 int clear_sign_bit_copies
= 0;
3803 int clear_zero_bit_copies
= 0;
3804 int set_zero_bit_copies
= 0;
3805 int insns
= 0, neg_insns
, inv_insns
;
3806 unsigned HOST_WIDE_INT temp1
, temp2
;
3807 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3808 struct four_ints
*immediates
;
3809 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3811 /* Find out which operations are safe for a given CODE. Also do a quick
3812 check for degenerate cases; these can occur when DImode operations
3825 if (remainder
== 0xffffffff)
3828 emit_constant_insn (cond
,
3829 gen_rtx_SET (VOIDmode
, target
,
3830 GEN_INT (ARM_SIGN_EXTEND (val
))));
3836 if (reload_completed
&& rtx_equal_p (target
, source
))
3840 emit_constant_insn (cond
,
3841 gen_rtx_SET (VOIDmode
, target
, source
));
3850 emit_constant_insn (cond
,
3851 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3854 if (remainder
== 0xffffffff)
3856 if (reload_completed
&& rtx_equal_p (target
, source
))
3859 emit_constant_insn (cond
,
3860 gen_rtx_SET (VOIDmode
, target
, source
));
3869 if (reload_completed
&& rtx_equal_p (target
, source
))
3872 emit_constant_insn (cond
,
3873 gen_rtx_SET (VOIDmode
, target
, source
));
3877 if (remainder
== 0xffffffff)
3880 emit_constant_insn (cond
,
3881 gen_rtx_SET (VOIDmode
, target
,
3882 gen_rtx_NOT (mode
, source
)));
3889 /* We treat MINUS as (val - source), since (source - val) is always
3890 passed as (source + (-val)). */
3894 emit_constant_insn (cond
,
3895 gen_rtx_SET (VOIDmode
, target
,
3896 gen_rtx_NEG (mode
, source
)));
3899 if (const_ok_for_arm (val
))
3902 emit_constant_insn (cond
,
3903 gen_rtx_SET (VOIDmode
, target
,
3904 gen_rtx_MINUS (mode
, GEN_INT (val
),
3915 /* If we can do it in one insn get out quickly. */
3916 if (const_ok_for_op (val
, code
))
3919 emit_constant_insn (cond
,
3920 gen_rtx_SET (VOIDmode
, target
,
3922 ? gen_rtx_fmt_ee (code
, mode
, source
,
3928 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3930 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3931 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3935 if (mode
== SImode
&& i
== 16)
3936 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3938 emit_constant_insn (cond
,
3939 gen_zero_extendhisi2
3940 (target
, gen_lowpart (HImode
, source
)));
3942 /* Extz only supports SImode, but we can coerce the operands
3944 emit_constant_insn (cond
,
3945 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3946 gen_lowpart (SImode
, source
),
3947 GEN_INT (i
), const0_rtx
));
3953 /* Calculate a few attributes that may be useful for specific
3955 /* Count number of leading zeros. */
3956 for (i
= 31; i
>= 0; i
--)
3958 if ((remainder
& (1 << i
)) == 0)
3959 clear_sign_bit_copies
++;
3964 /* Count number of leading 1's. */
3965 for (i
= 31; i
>= 0; i
--)
3967 if ((remainder
& (1 << i
)) != 0)
3968 set_sign_bit_copies
++;
3973 /* Count number of trailing zero's. */
3974 for (i
= 0; i
<= 31; i
++)
3976 if ((remainder
& (1 << i
)) == 0)
3977 clear_zero_bit_copies
++;
3982 /* Count number of trailing 1's. */
3983 for (i
= 0; i
<= 31; i
++)
3985 if ((remainder
& (1 << i
)) != 0)
3986 set_zero_bit_copies
++;
3994 /* See if we can do this by sign_extending a constant that is known
3995 to be negative. This is a good, way of doing it, since the shift
3996 may well merge into a subsequent insn. */
3997 if (set_sign_bit_copies
> 1)
3999 if (const_ok_for_arm
4000 (temp1
= ARM_SIGN_EXTEND (remainder
4001 << (set_sign_bit_copies
- 1))))
4005 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4006 emit_constant_insn (cond
,
4007 gen_rtx_SET (VOIDmode
, new_src
,
4009 emit_constant_insn (cond
,
4010 gen_ashrsi3 (target
, new_src
,
4011 GEN_INT (set_sign_bit_copies
- 1)));
4015 /* For an inverted constant, we will need to set the low bits,
4016 these will be shifted out of harm's way. */
4017 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4018 if (const_ok_for_arm (~temp1
))
4022 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4023 emit_constant_insn (cond
,
4024 gen_rtx_SET (VOIDmode
, new_src
,
4026 emit_constant_insn (cond
,
4027 gen_ashrsi3 (target
, new_src
,
4028 GEN_INT (set_sign_bit_copies
- 1)));
4034 /* See if we can calculate the value as the difference between two
4035 valid immediates. */
4036 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4038 int topshift
= clear_sign_bit_copies
& ~1;
4040 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4041 & (0xff000000 >> topshift
));
4043 /* If temp1 is zero, then that means the 9 most significant
4044 bits of remainder were 1 and we've caused it to overflow.
4045 When topshift is 0 we don't need to do anything since we
4046 can borrow from 'bit 32'. */
4047 if (temp1
== 0 && topshift
!= 0)
4048 temp1
= 0x80000000 >> (topshift
- 1);
4050 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4052 if (const_ok_for_arm (temp2
))
4056 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4057 emit_constant_insn (cond
,
4058 gen_rtx_SET (VOIDmode
, new_src
,
4060 emit_constant_insn (cond
,
4061 gen_addsi3 (target
, new_src
,
4069 /* See if we can generate this by setting the bottom (or the top)
4070 16 bits, and then shifting these into the other half of the
4071 word. We only look for the simplest cases, to do more would cost
4072 too much. Be careful, however, not to generate this when the
4073 alternative would take fewer insns. */
4074 if (val
& 0xffff0000)
4076 temp1
= remainder
& 0xffff0000;
4077 temp2
= remainder
& 0x0000ffff;
4079 /* Overlaps outside this range are best done using other methods. */
4080 for (i
= 9; i
< 24; i
++)
4082 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4083 && !const_ok_for_arm (temp2
))
4085 rtx new_src
= (subtargets
4086 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4088 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4089 source
, subtargets
, generate
);
4097 gen_rtx_ASHIFT (mode
, source
,
4104 /* Don't duplicate cases already considered. */
4105 for (i
= 17; i
< 24; i
++)
4107 if (((temp1
| (temp1
>> i
)) == remainder
)
4108 && !const_ok_for_arm (temp1
))
4110 rtx new_src
= (subtargets
4111 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4113 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4114 source
, subtargets
, generate
);
4119 gen_rtx_SET (VOIDmode
, target
,
4122 gen_rtx_LSHIFTRT (mode
, source
,
4133 /* If we have IOR or XOR, and the constant can be loaded in a
4134 single instruction, and we can find a temporary to put it in,
4135 then this can be done in two instructions instead of 3-4. */
4137 /* TARGET can't be NULL if SUBTARGETS is 0 */
4138 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4140 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4144 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4146 emit_constant_insn (cond
,
4147 gen_rtx_SET (VOIDmode
, sub
,
4149 emit_constant_insn (cond
,
4150 gen_rtx_SET (VOIDmode
, target
,
4151 gen_rtx_fmt_ee (code
, mode
,
4162 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4163 and the remainder 0s for e.g. 0xfff00000)
4164 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4166 This can be done in 2 instructions by using shifts with mov or mvn.
4171 mvn r0, r0, lsr #12 */
4172 if (set_sign_bit_copies
> 8
4173 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4177 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4178 rtx shift
= GEN_INT (set_sign_bit_copies
);
4182 gen_rtx_SET (VOIDmode
, sub
,
4184 gen_rtx_ASHIFT (mode
,
4189 gen_rtx_SET (VOIDmode
, target
,
4191 gen_rtx_LSHIFTRT (mode
, sub
,
4198 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4200 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4202 For eg. r0 = r0 | 0xfff
4207 if (set_zero_bit_copies
> 8
4208 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4212 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4213 rtx shift
= GEN_INT (set_zero_bit_copies
);
4217 gen_rtx_SET (VOIDmode
, sub
,
4219 gen_rtx_LSHIFTRT (mode
,
4224 gen_rtx_SET (VOIDmode
, target
,
4226 gen_rtx_ASHIFT (mode
, sub
,
4232 /* This will never be reached for Thumb2 because orn is a valid
4233 instruction. This is for Thumb1 and the ARM 32 bit cases.
4235 x = y | constant (such that ~constant is a valid constant)
4237 x = ~(~y & ~constant).
4239 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4243 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4244 emit_constant_insn (cond
,
4245 gen_rtx_SET (VOIDmode
, sub
,
4246 gen_rtx_NOT (mode
, source
)));
4249 sub
= gen_reg_rtx (mode
);
4250 emit_constant_insn (cond
,
4251 gen_rtx_SET (VOIDmode
, sub
,
4252 gen_rtx_AND (mode
, source
,
4254 emit_constant_insn (cond
,
4255 gen_rtx_SET (VOIDmode
, target
,
4256 gen_rtx_NOT (mode
, sub
)));
4263 /* See if two shifts will do 2 or more insn's worth of work. */
4264 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4266 HOST_WIDE_INT shift_mask
= ((0xffffffff
4267 << (32 - clear_sign_bit_copies
))
4270 if ((remainder
| shift_mask
) != 0xffffffff)
4274 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4275 insns
= arm_gen_constant (AND
, mode
, cond
,
4276 remainder
| shift_mask
,
4277 new_src
, source
, subtargets
, 1);
4282 rtx targ
= subtargets
? NULL_RTX
: target
;
4283 insns
= arm_gen_constant (AND
, mode
, cond
,
4284 remainder
| shift_mask
,
4285 targ
, source
, subtargets
, 0);
4291 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4292 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4294 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4295 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4301 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4303 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4305 if ((remainder
| shift_mask
) != 0xffffffff)
4309 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4311 insns
= arm_gen_constant (AND
, mode
, cond
,
4312 remainder
| shift_mask
,
4313 new_src
, source
, subtargets
, 1);
4318 rtx targ
= subtargets
? NULL_RTX
: target
;
4320 insns
= arm_gen_constant (AND
, mode
, cond
,
4321 remainder
| shift_mask
,
4322 targ
, source
, subtargets
, 0);
4328 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4329 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4331 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4332 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4344 /* Calculate what the instruction sequences would be if we generated it
4345 normally, negated, or inverted. */
4347 /* AND cannot be split into multiple insns, so invert and use BIC. */
4350 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4353 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4358 if (can_invert
|| final_invert
)
4359 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4364 immediates
= &pos_immediates
;
4366 /* Is the negated immediate sequence more efficient? */
4367 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4370 immediates
= &neg_immediates
;
4375 /* Is the inverted immediate sequence more efficient?
4376 We must allow for an extra NOT instruction for XOR operations, although
4377 there is some chance that the final 'mvn' will get optimized later. */
4378 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4381 immediates
= &inv_immediates
;
4389 /* Now output the chosen sequence as instructions. */
4392 for (i
= 0; i
< insns
; i
++)
4394 rtx new_src
, temp1_rtx
;
4396 temp1
= immediates
->i
[i
];
4398 if (code
== SET
|| code
== MINUS
)
4399 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4400 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4401 new_src
= gen_reg_rtx (mode
);
4407 else if (can_negate
)
4410 temp1
= trunc_int_for_mode (temp1
, mode
);
4411 temp1_rtx
= GEN_INT (temp1
);
4415 else if (code
== MINUS
)
4416 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4418 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4420 emit_constant_insn (cond
,
4421 gen_rtx_SET (VOIDmode
, new_src
,
4427 can_negate
= can_invert
;
4431 else if (code
== MINUS
)
4439 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4440 gen_rtx_NOT (mode
, source
)));
4447 /* Canonicalize a comparison so that we are more likely to recognize it.
4448 This can be done for a few constant compares, where we can make the
4449 immediate value easier to load. */
4452 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4453 bool op0_preserve_value
)
4455 enum machine_mode mode
;
4456 unsigned HOST_WIDE_INT i
, maxval
;
4458 mode
= GET_MODE (*op0
);
4459 if (mode
== VOIDmode
)
4460 mode
= GET_MODE (*op1
);
4462 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4464 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4465 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4466 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4467 for GTU/LEU in Thumb mode. */
4472 if (*code
== GT
|| *code
== LE
4473 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4475 /* Missing comparison. First try to use an available
4477 if (CONST_INT_P (*op1
))
4485 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4487 *op1
= GEN_INT (i
+ 1);
4488 *code
= *code
== GT
? GE
: LT
;
4494 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4495 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4497 *op1
= GEN_INT (i
+ 1);
4498 *code
= *code
== GTU
? GEU
: LTU
;
4507 /* If that did not work, reverse the condition. */
4508 if (!op0_preserve_value
)
4513 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4519 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4520 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4521 to facilitate possible combining with a cmp into 'ands'. */
4523 && GET_CODE (*op0
) == ZERO_EXTEND
4524 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4525 && GET_MODE (XEXP (*op0
, 0)) == QImode
4526 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4527 && subreg_lowpart_p (XEXP (*op0
, 0))
4528 && *op1
== const0_rtx
)
4529 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4532 /* Comparisons smaller than DImode. Only adjust comparisons against
4533 an out-of-range constant. */
4534 if (!CONST_INT_P (*op1
)
4535 || const_ok_for_arm (INTVAL (*op1
))
4536 || const_ok_for_arm (- INTVAL (*op1
)))
4550 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4552 *op1
= GEN_INT (i
+ 1);
4553 *code
= *code
== GT
? GE
: LT
;
4561 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4563 *op1
= GEN_INT (i
- 1);
4564 *code
= *code
== GE
? GT
: LE
;
4571 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4572 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4574 *op1
= GEN_INT (i
+ 1);
4575 *code
= *code
== GTU
? GEU
: LTU
;
4583 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4585 *op1
= GEN_INT (i
- 1);
4586 *code
= *code
== GEU
? GTU
: LEU
;
4597 /* Define how to find the value returned by a function. */
4600 arm_function_value(const_tree type
, const_tree func
,
4601 bool outgoing ATTRIBUTE_UNUSED
)
4603 enum machine_mode mode
;
4604 int unsignedp ATTRIBUTE_UNUSED
;
4605 rtx r ATTRIBUTE_UNUSED
;
4607 mode
= TYPE_MODE (type
);
4609 if (TARGET_AAPCS_BASED
)
4610 return aapcs_allocate_return_reg (mode
, type
, func
);
4612 /* Promote integer types. */
4613 if (INTEGRAL_TYPE_P (type
))
4614 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4616 /* Promotes small structs returned in a register to full-word size
4617 for big-endian AAPCS. */
4618 if (arm_return_in_msb (type
))
4620 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4621 if (size
% UNITS_PER_WORD
!= 0)
4623 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4624 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4628 return arm_libcall_value_1 (mode
);
4631 /* libcall hashtable helpers. */
4633 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4635 typedef rtx_def value_type
;
4636 typedef rtx_def compare_type
;
4637 static inline hashval_t
hash (const value_type
*);
4638 static inline bool equal (const value_type
*, const compare_type
*);
4639 static inline void remove (value_type
*);
4643 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4645 return rtx_equal_p (p1
, p2
);
4649 libcall_hasher::hash (const value_type
*p1
)
4651 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4654 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4657 add_libcall (libcall_table_type htab
, rtx libcall
)
4659 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4663 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4665 static bool init_done
= false;
4666 static libcall_table_type libcall_htab
;
4672 libcall_htab
.create (31);
4673 add_libcall (libcall_htab
,
4674 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4675 add_libcall (libcall_htab
,
4676 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4677 add_libcall (libcall_htab
,
4678 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4679 add_libcall (libcall_htab
,
4680 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4682 add_libcall (libcall_htab
,
4683 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4684 add_libcall (libcall_htab
,
4685 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4686 add_libcall (libcall_htab
,
4687 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4688 add_libcall (libcall_htab
,
4689 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4691 add_libcall (libcall_htab
,
4692 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4693 add_libcall (libcall_htab
,
4694 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4695 add_libcall (libcall_htab
,
4696 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4697 add_libcall (libcall_htab
,
4698 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4699 add_libcall (libcall_htab
,
4700 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4701 add_libcall (libcall_htab
,
4702 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4703 add_libcall (libcall_htab
,
4704 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4705 add_libcall (libcall_htab
,
4706 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4708 /* Values from double-precision helper functions are returned in core
4709 registers if the selected core only supports single-precision
4710 arithmetic, even if we are using the hard-float ABI. The same is
4711 true for single-precision helpers, but we will never be using the
4712 hard-float ABI on a CPU which doesn't support single-precision
4713 operations in hardware. */
4714 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4715 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4716 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4717 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4718 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4719 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4720 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4721 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4722 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4723 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4724 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4725 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4727 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4731 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4735 arm_libcall_value_1 (enum machine_mode mode
)
4737 if (TARGET_AAPCS_BASED
)
4738 return aapcs_libcall_value (mode
);
4739 else if (TARGET_IWMMXT_ABI
4740 && arm_vector_mode_supported_p (mode
))
4741 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4743 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4746 /* Define how to find the value returned by a library function
4747 assuming the value has mode MODE. */
4750 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4752 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4753 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4755 /* The following libcalls return their result in integer registers,
4756 even though they return a floating point value. */
4757 if (arm_libcall_uses_aapcs_base (libcall
))
4758 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4762 return arm_libcall_value_1 (mode
);
4765 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4768 arm_function_value_regno_p (const unsigned int regno
)
4770 if (regno
== ARG_REGISTER (1)
4772 && TARGET_AAPCS_BASED
4774 && TARGET_HARD_FLOAT
4775 && regno
== FIRST_VFP_REGNUM
)
4776 || (TARGET_IWMMXT_ABI
4777 && regno
== FIRST_IWMMXT_REGNUM
))
4783 /* Determine the amount of memory needed to store the possible return
4784 registers of an untyped call. */
4786 arm_apply_result_size (void)
4792 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4794 if (TARGET_IWMMXT_ABI
)
4801 /* Decide whether TYPE should be returned in memory (true)
4802 or in a register (false). FNTYPE is the type of the function making
4805 arm_return_in_memory (const_tree type
, const_tree fntype
)
4809 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4811 if (TARGET_AAPCS_BASED
)
4813 /* Simple, non-aggregate types (ie not including vectors and
4814 complex) are always returned in a register (or registers).
4815 We don't care about which register here, so we can short-cut
4816 some of the detail. */
4817 if (!AGGREGATE_TYPE_P (type
)
4818 && TREE_CODE (type
) != VECTOR_TYPE
4819 && TREE_CODE (type
) != COMPLEX_TYPE
)
4822 /* Any return value that is no larger than one word can be
4824 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4827 /* Check any available co-processors to see if they accept the
4828 type as a register candidate (VFP, for example, can return
4829 some aggregates in consecutive registers). These aren't
4830 available if the call is variadic. */
4831 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4834 /* Vector values should be returned using ARM registers, not
4835 memory (unless they're over 16 bytes, which will break since
4836 we only have four call-clobbered registers to play with). */
4837 if (TREE_CODE (type
) == VECTOR_TYPE
)
4838 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4840 /* The rest go in memory. */
4844 if (TREE_CODE (type
) == VECTOR_TYPE
)
4845 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4847 if (!AGGREGATE_TYPE_P (type
) &&
4848 (TREE_CODE (type
) != VECTOR_TYPE
))
4849 /* All simple types are returned in registers. */
4852 if (arm_abi
!= ARM_ABI_APCS
)
4854 /* ATPCS and later return aggregate types in memory only if they are
4855 larger than a word (or are variable size). */
4856 return (size
< 0 || size
> UNITS_PER_WORD
);
4859 /* For the arm-wince targets we choose to be compatible with Microsoft's
4860 ARM and Thumb compilers, which always return aggregates in memory. */
4862 /* All structures/unions bigger than one word are returned in memory.
4863 Also catch the case where int_size_in_bytes returns -1. In this case
4864 the aggregate is either huge or of variable size, and in either case
4865 we will want to return it via memory and not in a register. */
4866 if (size
< 0 || size
> UNITS_PER_WORD
)
4869 if (TREE_CODE (type
) == RECORD_TYPE
)
4873 /* For a struct the APCS says that we only return in a register
4874 if the type is 'integer like' and every addressable element
4875 has an offset of zero. For practical purposes this means
4876 that the structure can have at most one non bit-field element
4877 and that this element must be the first one in the structure. */
4879 /* Find the first field, ignoring non FIELD_DECL things which will
4880 have been created by C++. */
4881 for (field
= TYPE_FIELDS (type
);
4882 field
&& TREE_CODE (field
) != FIELD_DECL
;
4883 field
= DECL_CHAIN (field
))
4887 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4889 /* Check that the first field is valid for returning in a register. */
4891 /* ... Floats are not allowed */
4892 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4895 /* ... Aggregates that are not themselves valid for returning in
4896 a register are not allowed. */
4897 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4900 /* Now check the remaining fields, if any. Only bitfields are allowed,
4901 since they are not addressable. */
4902 for (field
= DECL_CHAIN (field
);
4904 field
= DECL_CHAIN (field
))
4906 if (TREE_CODE (field
) != FIELD_DECL
)
4909 if (!DECL_BIT_FIELD_TYPE (field
))
4916 if (TREE_CODE (type
) == UNION_TYPE
)
4920 /* Unions can be returned in registers if every element is
4921 integral, or can be returned in an integer register. */
4922 for (field
= TYPE_FIELDS (type
);
4924 field
= DECL_CHAIN (field
))
4926 if (TREE_CODE (field
) != FIELD_DECL
)
4929 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4932 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4938 #endif /* not ARM_WINCE */
4940 /* Return all other types in memory. */
4944 const struct pcs_attribute_arg
4948 } pcs_attribute_args
[] =
4950 {"aapcs", ARM_PCS_AAPCS
},
4951 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4953 /* We could recognize these, but changes would be needed elsewhere
4954 * to implement them. */
4955 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4956 {"atpcs", ARM_PCS_ATPCS
},
4957 {"apcs", ARM_PCS_APCS
},
4959 {NULL
, ARM_PCS_UNKNOWN
}
4963 arm_pcs_from_attribute (tree attr
)
4965 const struct pcs_attribute_arg
*ptr
;
4968 /* Get the value of the argument. */
4969 if (TREE_VALUE (attr
) == NULL_TREE
4970 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4971 return ARM_PCS_UNKNOWN
;
4973 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4975 /* Check it against the list of known arguments. */
4976 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4977 if (streq (arg
, ptr
->arg
))
4980 /* An unrecognized interrupt type. */
4981 return ARM_PCS_UNKNOWN
;
4984 /* Get the PCS variant to use for this call. TYPE is the function's type
4985 specification, DECL is the specific declartion. DECL may be null if
4986 the call could be indirect or if this is a library call. */
4988 arm_get_pcs_model (const_tree type
, const_tree decl
)
4990 bool user_convention
= false;
4991 enum arm_pcs user_pcs
= arm_pcs_default
;
4996 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4999 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5000 user_convention
= true;
5003 if (TARGET_AAPCS_BASED
)
5005 /* Detect varargs functions. These always use the base rules
5006 (no argument is ever a candidate for a co-processor
5008 bool base_rules
= stdarg_p (type
);
5010 if (user_convention
)
5012 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5013 sorry ("non-AAPCS derived PCS variant");
5014 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5015 error ("variadic functions must use the base AAPCS variant");
5019 return ARM_PCS_AAPCS
;
5020 else if (user_convention
)
5022 else if (decl
&& flag_unit_at_a_time
)
5024 /* Local functions never leak outside this compilation unit,
5025 so we are free to use whatever conventions are
5027 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5028 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5030 return ARM_PCS_AAPCS_LOCAL
;
5033 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5034 sorry ("PCS variant");
5036 /* For everything else we use the target's default. */
5037 return arm_pcs_default
;
5042 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5043 const_tree fntype ATTRIBUTE_UNUSED
,
5044 rtx libcall ATTRIBUTE_UNUSED
,
5045 const_tree fndecl ATTRIBUTE_UNUSED
)
5047 /* Record the unallocated VFP registers. */
5048 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5049 pcum
->aapcs_vfp_reg_alloc
= 0;
5052 /* Walk down the type tree of TYPE counting consecutive base elements.
5053 If *MODEP is VOIDmode, then set it to the first valid floating point
5054 type. If a non-floating point type is found, or if a floating point
5055 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5056 otherwise return the count in the sub-tree. */
5058 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5060 enum machine_mode mode
;
5063 switch (TREE_CODE (type
))
5066 mode
= TYPE_MODE (type
);
5067 if (mode
!= DFmode
&& mode
!= SFmode
)
5070 if (*modep
== VOIDmode
)
5079 mode
= TYPE_MODE (TREE_TYPE (type
));
5080 if (mode
!= DFmode
&& mode
!= SFmode
)
5083 if (*modep
== VOIDmode
)
5092 /* Use V2SImode and V4SImode as representatives of all 64-bit
5093 and 128-bit vector types, whether or not those modes are
5094 supported with the present options. */
5095 size
= int_size_in_bytes (type
);
5108 if (*modep
== VOIDmode
)
5111 /* Vector modes are considered to be opaque: two vectors are
5112 equivalent for the purposes of being homogeneous aggregates
5113 if they are the same size. */
5122 tree index
= TYPE_DOMAIN (type
);
5124 /* Can't handle incomplete types. */
5125 if (!COMPLETE_TYPE_P (type
))
5128 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5131 || !TYPE_MAX_VALUE (index
)
5132 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5133 || !TYPE_MIN_VALUE (index
)
5134 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5138 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5139 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5141 /* There must be no padding. */
5142 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5143 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5144 != count
* GET_MODE_BITSIZE (*modep
)))
5156 /* Can't handle incomplete types. */
5157 if (!COMPLETE_TYPE_P (type
))
5160 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5162 if (TREE_CODE (field
) != FIELD_DECL
)
5165 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5171 /* There must be no padding. */
5172 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5173 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5174 != count
* GET_MODE_BITSIZE (*modep
)))
5181 case QUAL_UNION_TYPE
:
5183 /* These aren't very interesting except in a degenerate case. */
5188 /* Can't handle incomplete types. */
5189 if (!COMPLETE_TYPE_P (type
))
5192 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5194 if (TREE_CODE (field
) != FIELD_DECL
)
5197 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5200 count
= count
> sub_count
? count
: sub_count
;
5203 /* There must be no padding. */
5204 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5205 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5206 != count
* GET_MODE_BITSIZE (*modep
)))
5219 /* Return true if PCS_VARIANT should use VFP registers. */
5221 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5223 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5225 static bool seen_thumb1_vfp
= false;
5227 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5229 sorry ("Thumb-1 hard-float VFP ABI");
5230 /* sorry() is not immediately fatal, so only display this once. */
5231 seen_thumb1_vfp
= true;
5237 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5240 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5241 (TARGET_VFP_DOUBLE
|| !is_double
));
5244 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5245 suitable for passing or returning in VFP registers for the PCS
5246 variant selected. If it is, then *BASE_MODE is updated to contain
5247 a machine mode describing each element of the argument's type and
5248 *COUNT to hold the number of such elements. */
5250 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5251 enum machine_mode mode
, const_tree type
,
5252 enum machine_mode
*base_mode
, int *count
)
5254 enum machine_mode new_mode
= VOIDmode
;
5256 /* If we have the type information, prefer that to working things
5257 out from the mode. */
5260 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5262 if (ag_count
> 0 && ag_count
<= 4)
5267 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5268 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5269 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5274 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5277 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5283 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5286 *base_mode
= new_mode
;
5291 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5292 enum machine_mode mode
, const_tree type
)
5294 int count ATTRIBUTE_UNUSED
;
5295 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5297 if (!use_vfp_abi (pcs_variant
, false))
5299 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5304 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5307 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5310 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5311 &pcum
->aapcs_vfp_rmode
,
5312 &pcum
->aapcs_vfp_rcount
);
5316 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5317 const_tree type ATTRIBUTE_UNUSED
)
5319 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5320 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5323 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5324 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5326 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5328 || (mode
== TImode
&& ! TARGET_NEON
)
5329 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5332 int rcount
= pcum
->aapcs_vfp_rcount
;
5334 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5338 /* Avoid using unsupported vector modes. */
5339 if (rmode
== V2SImode
)
5341 else if (rmode
== V4SImode
)
5348 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5349 for (i
= 0; i
< rcount
; i
++)
5351 rtx tmp
= gen_rtx_REG (rmode
,
5352 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5353 tmp
= gen_rtx_EXPR_LIST
5355 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5356 XVECEXP (par
, 0, i
) = tmp
;
5359 pcum
->aapcs_reg
= par
;
5362 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5369 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5370 enum machine_mode mode
,
5371 const_tree type ATTRIBUTE_UNUSED
)
5373 if (!use_vfp_abi (pcs_variant
, false))
5376 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5379 enum machine_mode ag_mode
;
5384 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5389 if (ag_mode
== V2SImode
)
5391 else if (ag_mode
== V4SImode
)
5397 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5398 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5399 for (i
= 0; i
< count
; i
++)
5401 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5402 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5403 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5404 XVECEXP (par
, 0, i
) = tmp
;
5410 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5414 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5415 enum machine_mode mode ATTRIBUTE_UNUSED
,
5416 const_tree type ATTRIBUTE_UNUSED
)
5418 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5419 pcum
->aapcs_vfp_reg_alloc
= 0;
5423 #define AAPCS_CP(X) \
5425 aapcs_ ## X ## _cum_init, \
5426 aapcs_ ## X ## _is_call_candidate, \
5427 aapcs_ ## X ## _allocate, \
5428 aapcs_ ## X ## _is_return_candidate, \
5429 aapcs_ ## X ## _allocate_return_reg, \
5430 aapcs_ ## X ## _advance \
5433 /* Table of co-processors that can be used to pass arguments in
5434 registers. Idealy no arugment should be a candidate for more than
5435 one co-processor table entry, but the table is processed in order
5436 and stops after the first match. If that entry then fails to put
5437 the argument into a co-processor register, the argument will go on
5441 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5442 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5444 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5445 BLKmode) is a candidate for this co-processor's registers; this
5446 function should ignore any position-dependent state in
5447 CUMULATIVE_ARGS and only use call-type dependent information. */
5448 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5450 /* Return true if the argument does get a co-processor register; it
5451 should set aapcs_reg to an RTX of the register allocated as is
5452 required for a return from FUNCTION_ARG. */
5453 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5455 /* Return true if a result of mode MODE (or type TYPE if MODE is
5456 BLKmode) is can be returned in this co-processor's registers. */
5457 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5459 /* Allocate and return an RTX element to hold the return type of a
5460 call, this routine must not fail and will only be called if
5461 is_return_candidate returned true with the same parameters. */
5462 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5464 /* Finish processing this argument and prepare to start processing
5466 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5467 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5475 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5480 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5481 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5488 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5490 /* We aren't passed a decl, so we can't check that a call is local.
5491 However, it isn't clear that that would be a win anyway, since it
5492 might limit some tail-calling opportunities. */
5493 enum arm_pcs pcs_variant
;
5497 const_tree fndecl
= NULL_TREE
;
5499 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5502 fntype
= TREE_TYPE (fntype
);
5505 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5508 pcs_variant
= arm_pcs_default
;
5510 if (pcs_variant
!= ARM_PCS_AAPCS
)
5514 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5515 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5524 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5527 /* We aren't passed a decl, so we can't check that a call is local.
5528 However, it isn't clear that that would be a win anyway, since it
5529 might limit some tail-calling opportunities. */
5530 enum arm_pcs pcs_variant
;
5531 int unsignedp ATTRIBUTE_UNUSED
;
5535 const_tree fndecl
= NULL_TREE
;
5537 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5540 fntype
= TREE_TYPE (fntype
);
5543 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5546 pcs_variant
= arm_pcs_default
;
5548 /* Promote integer types. */
5549 if (type
&& INTEGRAL_TYPE_P (type
))
5550 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5552 if (pcs_variant
!= ARM_PCS_AAPCS
)
5556 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5557 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5559 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5563 /* Promotes small structs returned in a register to full-word size
5564 for big-endian AAPCS. */
5565 if (type
&& arm_return_in_msb (type
))
5567 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5568 if (size
% UNITS_PER_WORD
!= 0)
5570 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5571 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5575 return gen_rtx_REG (mode
, R0_REGNUM
);
5579 aapcs_libcall_value (enum machine_mode mode
)
5581 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5582 && GET_MODE_SIZE (mode
) <= 4)
5585 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5588 /* Lay out a function argument using the AAPCS rules. The rule
5589 numbers referred to here are those in the AAPCS. */
5591 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5592 const_tree type
, bool named
)
5597 /* We only need to do this once per argument. */
5598 if (pcum
->aapcs_arg_processed
)
5601 pcum
->aapcs_arg_processed
= true;
5603 /* Special case: if named is false then we are handling an incoming
5604 anonymous argument which is on the stack. */
5608 /* Is this a potential co-processor register candidate? */
5609 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5611 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5612 pcum
->aapcs_cprc_slot
= slot
;
5614 /* We don't have to apply any of the rules from part B of the
5615 preparation phase, these are handled elsewhere in the
5620 /* A Co-processor register candidate goes either in its own
5621 class of registers or on the stack. */
5622 if (!pcum
->aapcs_cprc_failed
[slot
])
5624 /* C1.cp - Try to allocate the argument to co-processor
5626 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5629 /* C2.cp - Put the argument on the stack and note that we
5630 can't assign any more candidates in this slot. We also
5631 need to note that we have allocated stack space, so that
5632 we won't later try to split a non-cprc candidate between
5633 core registers and the stack. */
5634 pcum
->aapcs_cprc_failed
[slot
] = true;
5635 pcum
->can_split
= false;
5638 /* We didn't get a register, so this argument goes on the
5640 gcc_assert (pcum
->can_split
== false);
5645 /* C3 - For double-word aligned arguments, round the NCRN up to the
5646 next even number. */
5647 ncrn
= pcum
->aapcs_ncrn
;
5648 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5651 nregs
= ARM_NUM_REGS2(mode
, type
);
5653 /* Sigh, this test should really assert that nregs > 0, but a GCC
5654 extension allows empty structs and then gives them empty size; it
5655 then allows such a structure to be passed by value. For some of
5656 the code below we have to pretend that such an argument has
5657 non-zero size so that we 'locate' it correctly either in
5658 registers or on the stack. */
5659 gcc_assert (nregs
>= 0);
5661 nregs2
= nregs
? nregs
: 1;
5663 /* C4 - Argument fits entirely in core registers. */
5664 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5666 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5667 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5671 /* C5 - Some core registers left and there are no arguments already
5672 on the stack: split this argument between the remaining core
5673 registers and the stack. */
5674 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5676 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5677 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5678 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5682 /* C6 - NCRN is set to 4. */
5683 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5685 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5689 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5690 for a call to a function whose data type is FNTYPE.
5691 For a library call, FNTYPE is NULL. */
5693 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5695 tree fndecl ATTRIBUTE_UNUSED
)
5697 /* Long call handling. */
5699 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5701 pcum
->pcs_variant
= arm_pcs_default
;
5703 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5705 if (arm_libcall_uses_aapcs_base (libname
))
5706 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5708 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5709 pcum
->aapcs_reg
= NULL_RTX
;
5710 pcum
->aapcs_partial
= 0;
5711 pcum
->aapcs_arg_processed
= false;
5712 pcum
->aapcs_cprc_slot
= -1;
5713 pcum
->can_split
= true;
5715 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5719 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5721 pcum
->aapcs_cprc_failed
[i
] = false;
5722 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5730 /* On the ARM, the offset starts at 0. */
5732 pcum
->iwmmxt_nregs
= 0;
5733 pcum
->can_split
= true;
5735 /* Varargs vectors are treated the same as long long.
5736 named_count avoids having to change the way arm handles 'named' */
5737 pcum
->named_count
= 0;
5740 if (TARGET_REALLY_IWMMXT
&& fntype
)
5744 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5746 fn_arg
= TREE_CHAIN (fn_arg
))
5747 pcum
->named_count
+= 1;
5749 if (! pcum
->named_count
)
5750 pcum
->named_count
= INT_MAX
;
5754 /* Return true if we use LRA instead of reload pass. */
5758 return arm_lra_flag
;
5761 /* Return true if mode/type need doubleword alignment. */
5763 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5765 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5766 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5770 /* Determine where to put an argument to a function.
5771 Value is zero to push the argument on the stack,
5772 or a hard register in which to store the argument.
5774 MODE is the argument's machine mode.
5775 TYPE is the data type of the argument (as a tree).
5776 This is null for libcalls where that information may
5778 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5779 the preceding args and about the function being called.
5780 NAMED is nonzero if this argument is a named parameter
5781 (otherwise it is an extra parameter matching an ellipsis).
5783 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5784 other arguments are passed on the stack. If (NAMED == 0) (which happens
5785 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5786 defined), say it is passed in the stack (function_prologue will
5787 indeed make it pass in the stack if necessary). */
5790 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5791 const_tree type
, bool named
)
5793 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5796 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5797 a call insn (op3 of a call_value insn). */
5798 if (mode
== VOIDmode
)
5801 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5803 aapcs_layout_arg (pcum
, mode
, type
, named
);
5804 return pcum
->aapcs_reg
;
5807 /* Varargs vectors are treated the same as long long.
5808 named_count avoids having to change the way arm handles 'named' */
5809 if (TARGET_IWMMXT_ABI
5810 && arm_vector_mode_supported_p (mode
)
5811 && pcum
->named_count
> pcum
->nargs
+ 1)
5813 if (pcum
->iwmmxt_nregs
<= 9)
5814 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5817 pcum
->can_split
= false;
5822 /* Put doubleword aligned quantities in even register pairs. */
5824 && ARM_DOUBLEWORD_ALIGN
5825 && arm_needs_doubleword_align (mode
, type
))
5828 /* Only allow splitting an arg between regs and memory if all preceding
5829 args were allocated to regs. For args passed by reference we only count
5830 the reference pointer. */
5831 if (pcum
->can_split
)
5834 nregs
= ARM_NUM_REGS2 (mode
, type
);
5836 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5839 return gen_rtx_REG (mode
, pcum
->nregs
);
5843 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5845 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5846 ? DOUBLEWORD_ALIGNMENT
5851 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5852 tree type
, bool named
)
5854 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5855 int nregs
= pcum
->nregs
;
5857 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5859 aapcs_layout_arg (pcum
, mode
, type
, named
);
5860 return pcum
->aapcs_partial
;
5863 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5866 if (NUM_ARG_REGS
> nregs
5867 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5869 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5874 /* Update the data in PCUM to advance over an argument
5875 of mode MODE and data type TYPE.
5876 (TYPE is null for libcalls where that information may not be available.) */
5879 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5880 const_tree type
, bool named
)
5882 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5884 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5886 aapcs_layout_arg (pcum
, mode
, type
, named
);
5888 if (pcum
->aapcs_cprc_slot
>= 0)
5890 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5892 pcum
->aapcs_cprc_slot
= -1;
5895 /* Generic stuff. */
5896 pcum
->aapcs_arg_processed
= false;
5897 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5898 pcum
->aapcs_reg
= NULL_RTX
;
5899 pcum
->aapcs_partial
= 0;
5904 if (arm_vector_mode_supported_p (mode
)
5905 && pcum
->named_count
> pcum
->nargs
5906 && TARGET_IWMMXT_ABI
)
5907 pcum
->iwmmxt_nregs
+= 1;
5909 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5913 /* Variable sized types are passed by reference. This is a GCC
5914 extension to the ARM ABI. */
5917 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5918 enum machine_mode mode ATTRIBUTE_UNUSED
,
5919 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5921 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5924 /* Encode the current state of the #pragma [no_]long_calls. */
5927 OFF
, /* No #pragma [no_]long_calls is in effect. */
5928 LONG
, /* #pragma long_calls is in effect. */
5929 SHORT
/* #pragma no_long_calls is in effect. */
5932 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5935 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5937 arm_pragma_long_calls
= LONG
;
5941 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5943 arm_pragma_long_calls
= SHORT
;
5947 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5949 arm_pragma_long_calls
= OFF
;
5952 /* Handle an attribute requiring a FUNCTION_DECL;
5953 arguments as in struct attribute_spec.handler. */
5955 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5956 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5958 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5960 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5962 *no_add_attrs
= true;
5968 /* Handle an "interrupt" or "isr" attribute;
5969 arguments as in struct attribute_spec.handler. */
5971 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5976 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5978 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5980 *no_add_attrs
= true;
5982 /* FIXME: the argument if any is checked for type attributes;
5983 should it be checked for decl ones? */
5987 if (TREE_CODE (*node
) == FUNCTION_TYPE
5988 || TREE_CODE (*node
) == METHOD_TYPE
)
5990 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5992 warning (OPT_Wattributes
, "%qE attribute ignored",
5994 *no_add_attrs
= true;
5997 else if (TREE_CODE (*node
) == POINTER_TYPE
5998 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5999 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6000 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6002 *node
= build_variant_type_copy (*node
);
6003 TREE_TYPE (*node
) = build_type_attribute_variant
6005 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6006 *no_add_attrs
= true;
6010 /* Possibly pass this attribute on from the type to a decl. */
6011 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6012 | (int) ATTR_FLAG_FUNCTION_NEXT
6013 | (int) ATTR_FLAG_ARRAY_NEXT
))
6015 *no_add_attrs
= true;
6016 return tree_cons (name
, args
, NULL_TREE
);
6020 warning (OPT_Wattributes
, "%qE attribute ignored",
6029 /* Handle a "pcs" attribute; arguments as in struct
6030 attribute_spec.handler. */
6032 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6033 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6035 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6037 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6038 *no_add_attrs
= true;
6043 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6044 /* Handle the "notshared" attribute. This attribute is another way of
6045 requesting hidden visibility. ARM's compiler supports
6046 "__declspec(notshared)"; we support the same thing via an
6050 arm_handle_notshared_attribute (tree
*node
,
6051 tree name ATTRIBUTE_UNUSED
,
6052 tree args ATTRIBUTE_UNUSED
,
6053 int flags ATTRIBUTE_UNUSED
,
6056 tree decl
= TYPE_NAME (*node
);
6060 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6061 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6062 *no_add_attrs
= false;
6068 /* Return 0 if the attributes for two types are incompatible, 1 if they
6069 are compatible, and 2 if they are nearly compatible (which causes a
6070 warning to be generated). */
6072 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6076 /* Check for mismatch of non-default calling convention. */
6077 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6080 /* Check for mismatched call attributes. */
6081 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6082 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6083 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6084 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6086 /* Only bother to check if an attribute is defined. */
6087 if (l1
| l2
| s1
| s2
)
6089 /* If one type has an attribute, the other must have the same attribute. */
6090 if ((l1
!= l2
) || (s1
!= s2
))
6093 /* Disallow mixed attributes. */
6094 if ((l1
& s2
) || (l2
& s1
))
6098 /* Check for mismatched ISR attribute. */
6099 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6101 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6102 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6104 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6111 /* Assigns default attributes to newly defined type. This is used to
6112 set short_call/long_call attributes for function types of
6113 functions defined inside corresponding #pragma scopes. */
6115 arm_set_default_type_attributes (tree type
)
6117 /* Add __attribute__ ((long_call)) to all functions, when
6118 inside #pragma long_calls or __attribute__ ((short_call)),
6119 when inside #pragma no_long_calls. */
6120 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6122 tree type_attr_list
, attr_name
;
6123 type_attr_list
= TYPE_ATTRIBUTES (type
);
6125 if (arm_pragma_long_calls
== LONG
)
6126 attr_name
= get_identifier ("long_call");
6127 else if (arm_pragma_long_calls
== SHORT
)
6128 attr_name
= get_identifier ("short_call");
6132 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6133 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6137 /* Return true if DECL is known to be linked into section SECTION. */
6140 arm_function_in_section_p (tree decl
, section
*section
)
6142 /* We can only be certain about functions defined in the same
6143 compilation unit. */
6144 if (!TREE_STATIC (decl
))
6147 /* Make sure that SYMBOL always binds to the definition in this
6148 compilation unit. */
6149 if (!targetm
.binds_local_p (decl
))
6152 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6153 if (!DECL_SECTION_NAME (decl
))
6155 /* Make sure that we will not create a unique section for DECL. */
6156 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6160 return function_section (decl
) == section
;
6163 /* Return nonzero if a 32-bit "long_call" should be generated for
6164 a call from the current function to DECL. We generate a long_call
6167 a. has an __attribute__((long call))
6168 or b. is within the scope of a #pragma long_calls
6169 or c. the -mlong-calls command line switch has been specified
6171 However we do not generate a long call if the function:
6173 d. has an __attribute__ ((short_call))
6174 or e. is inside the scope of a #pragma no_long_calls
6175 or f. is defined in the same section as the current function. */
6178 arm_is_long_call_p (tree decl
)
6183 return TARGET_LONG_CALLS
;
6185 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6186 if (lookup_attribute ("short_call", attrs
))
6189 /* For "f", be conservative, and only cater for cases in which the
6190 whole of the current function is placed in the same section. */
6191 if (!flag_reorder_blocks_and_partition
6192 && TREE_CODE (decl
) == FUNCTION_DECL
6193 && arm_function_in_section_p (decl
, current_function_section ()))
6196 if (lookup_attribute ("long_call", attrs
))
6199 return TARGET_LONG_CALLS
;
6202 /* Return nonzero if it is ok to make a tail-call to DECL. */
6204 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6206 unsigned long func_type
;
6208 if (cfun
->machine
->sibcall_blocked
)
6211 /* Never tailcall something if we are generating code for Thumb-1. */
6215 /* The PIC register is live on entry to VxWorks PLT entries, so we
6216 must make the call before restoring the PIC register. */
6217 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6220 /* Cannot tail-call to long calls, since these are out of range of
6221 a branch instruction. */
6222 if (decl
&& arm_is_long_call_p (decl
))
6225 /* If we are interworking and the function is not declared static
6226 then we can't tail-call it unless we know that it exists in this
6227 compilation unit (since it might be a Thumb routine). */
6228 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6229 && !TREE_ASM_WRITTEN (decl
))
6232 func_type
= arm_current_func_type ();
6233 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6234 if (IS_INTERRUPT (func_type
))
6237 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6239 /* Check that the return value locations are the same. For
6240 example that we aren't returning a value from the sibling in
6241 a VFP register but then need to transfer it to a core
6245 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6246 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6248 if (!rtx_equal_p (a
, b
))
6252 /* Never tailcall if function may be called with a misaligned SP. */
6253 if (IS_STACKALIGN (func_type
))
6256 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6257 references should become a NOP. Don't convert such calls into
6259 if (TARGET_AAPCS_BASED
6260 && arm_abi
== ARM_ABI_AAPCS
6262 && DECL_WEAK (decl
))
6265 /* Everything else is ok. */
6270 /* Addressing mode support functions. */
6272 /* Return nonzero if X is a legitimate immediate operand when compiling
6273 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6275 legitimate_pic_operand_p (rtx x
)
6277 if (GET_CODE (x
) == SYMBOL_REF
6278 || (GET_CODE (x
) == CONST
6279 && GET_CODE (XEXP (x
, 0)) == PLUS
6280 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6286 /* Record that the current function needs a PIC register. Initialize
6287 cfun->machine->pic_reg if we have not already done so. */
6290 require_pic_register (void)
6292 /* A lot of the logic here is made obscure by the fact that this
6293 routine gets called as part of the rtx cost estimation process.
6294 We don't want those calls to affect any assumptions about the real
6295 function; and further, we can't call entry_of_function() until we
6296 start the real expansion process. */
6297 if (!crtl
->uses_pic_offset_table
)
6299 gcc_assert (can_create_pseudo_p ());
6300 if (arm_pic_register
!= INVALID_REGNUM
6301 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6303 if (!cfun
->machine
->pic_reg
)
6304 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6306 /* Play games to avoid marking the function as needing pic
6307 if we are being called as part of the cost-estimation
6309 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6310 crtl
->uses_pic_offset_table
= 1;
6316 if (!cfun
->machine
->pic_reg
)
6317 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6319 /* Play games to avoid marking the function as needing pic
6320 if we are being called as part of the cost-estimation
6322 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6324 crtl
->uses_pic_offset_table
= 1;
6327 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6328 && arm_pic_register
> LAST_LO_REGNUM
)
6329 emit_move_insn (cfun
->machine
->pic_reg
,
6330 gen_rtx_REG (Pmode
, arm_pic_register
));
6332 arm_load_pic_register (0UL);
6337 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6339 INSN_LOCATION (insn
) = prologue_location
;
6341 /* We can be called during expansion of PHI nodes, where
6342 we can't yet emit instructions directly in the final
6343 insn stream. Queue the insns on the entry edge, they will
6344 be committed after everything else is expanded. */
6345 insert_insn_on_edge (seq
,
6346 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6353 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6355 if (GET_CODE (orig
) == SYMBOL_REF
6356 || GET_CODE (orig
) == LABEL_REF
)
6362 gcc_assert (can_create_pseudo_p ());
6363 reg
= gen_reg_rtx (Pmode
);
6366 /* VxWorks does not impose a fixed gap between segments; the run-time
6367 gap can be different from the object-file gap. We therefore can't
6368 use GOTOFF unless we are absolutely sure that the symbol is in the
6369 same segment as the GOT. Unfortunately, the flexibility of linker
6370 scripts means that we can't be sure of that in general, so assume
6371 that GOTOFF is never valid on VxWorks. */
6372 if ((GET_CODE (orig
) == LABEL_REF
6373 || (GET_CODE (orig
) == SYMBOL_REF
&&
6374 SYMBOL_REF_LOCAL_P (orig
)))
6376 && arm_pic_data_is_text_relative
)
6377 insn
= arm_pic_static_addr (orig
, reg
);
6383 /* If this function doesn't have a pic register, create one now. */
6384 require_pic_register ();
6386 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6388 /* Make the MEM as close to a constant as possible. */
6389 mem
= SET_SRC (pat
);
6390 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6391 MEM_READONLY_P (mem
) = 1;
6392 MEM_NOTRAP_P (mem
) = 1;
6394 insn
= emit_insn (pat
);
6397 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6399 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6403 else if (GET_CODE (orig
) == CONST
)
6407 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6408 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6411 /* Handle the case where we have: const (UNSPEC_TLS). */
6412 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6413 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6416 /* Handle the case where we have:
6417 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6419 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6420 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6421 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6423 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6429 gcc_assert (can_create_pseudo_p ());
6430 reg
= gen_reg_rtx (Pmode
);
6433 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6435 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6436 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6437 base
== reg
? 0 : reg
);
6439 if (CONST_INT_P (offset
))
6441 /* The base register doesn't really matter, we only want to
6442 test the index for the appropriate mode. */
6443 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6445 gcc_assert (can_create_pseudo_p ());
6446 offset
= force_reg (Pmode
, offset
);
6449 if (CONST_INT_P (offset
))
6450 return plus_constant (Pmode
, base
, INTVAL (offset
));
6453 if (GET_MODE_SIZE (mode
) > 4
6454 && (GET_MODE_CLASS (mode
) == MODE_INT
6455 || TARGET_SOFT_FLOAT
))
6457 emit_insn (gen_addsi3 (reg
, base
, offset
));
6461 return gen_rtx_PLUS (Pmode
, base
, offset
);
6468 /* Find a spare register to use during the prolog of a function. */
6471 thumb_find_work_register (unsigned long pushed_regs_mask
)
6475 /* Check the argument registers first as these are call-used. The
6476 register allocation order means that sometimes r3 might be used
6477 but earlier argument registers might not, so check them all. */
6478 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6479 if (!df_regs_ever_live_p (reg
))
6482 /* Before going on to check the call-saved registers we can try a couple
6483 more ways of deducing that r3 is available. The first is when we are
6484 pushing anonymous arguments onto the stack and we have less than 4
6485 registers worth of fixed arguments(*). In this case r3 will be part of
6486 the variable argument list and so we can be sure that it will be
6487 pushed right at the start of the function. Hence it will be available
6488 for the rest of the prologue.
6489 (*): ie crtl->args.pretend_args_size is greater than 0. */
6490 if (cfun
->machine
->uses_anonymous_args
6491 && crtl
->args
.pretend_args_size
> 0)
6492 return LAST_ARG_REGNUM
;
6494 /* The other case is when we have fixed arguments but less than 4 registers
6495 worth. In this case r3 might be used in the body of the function, but
6496 it is not being used to convey an argument into the function. In theory
6497 we could just check crtl->args.size to see how many bytes are
6498 being passed in argument registers, but it seems that it is unreliable.
6499 Sometimes it will have the value 0 when in fact arguments are being
6500 passed. (See testcase execute/20021111-1.c for an example). So we also
6501 check the args_info.nregs field as well. The problem with this field is
6502 that it makes no allowances for arguments that are passed to the
6503 function but which are not used. Hence we could miss an opportunity
6504 when a function has an unused argument in r3. But it is better to be
6505 safe than to be sorry. */
6506 if (! cfun
->machine
->uses_anonymous_args
6507 && crtl
->args
.size
>= 0
6508 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6509 && (TARGET_AAPCS_BASED
6510 ? crtl
->args
.info
.aapcs_ncrn
< 4
6511 : crtl
->args
.info
.nregs
< 4))
6512 return LAST_ARG_REGNUM
;
6514 /* Otherwise look for a call-saved register that is going to be pushed. */
6515 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6516 if (pushed_regs_mask
& (1 << reg
))
6521 /* Thumb-2 can use high regs. */
6522 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6523 if (pushed_regs_mask
& (1 << reg
))
6526 /* Something went wrong - thumb_compute_save_reg_mask()
6527 should have arranged for a suitable register to be pushed. */
6531 static GTY(()) int pic_labelno
;
6533 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6537 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6539 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6541 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6544 gcc_assert (flag_pic
);
6546 pic_reg
= cfun
->machine
->pic_reg
;
6547 if (TARGET_VXWORKS_RTP
)
6549 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6550 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6551 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6553 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6555 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6556 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6560 /* We use an UNSPEC rather than a LABEL_REF because this label
6561 never appears in the code stream. */
6563 labelno
= GEN_INT (pic_labelno
++);
6564 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6565 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6567 /* On the ARM the PC register contains 'dot + 8' at the time of the
6568 addition, on the Thumb it is 'dot + 4'. */
6569 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6570 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6572 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6576 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6578 else /* TARGET_THUMB1 */
6580 if (arm_pic_register
!= INVALID_REGNUM
6581 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6583 /* We will have pushed the pic register, so we should always be
6584 able to find a work register. */
6585 pic_tmp
= gen_rtx_REG (SImode
,
6586 thumb_find_work_register (saved_regs
));
6587 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6588 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6589 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6591 else if (arm_pic_register
!= INVALID_REGNUM
6592 && arm_pic_register
> LAST_LO_REGNUM
6593 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6595 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6596 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6597 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6600 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6604 /* Need to emit this whether or not we obey regdecls,
6605 since setjmp/longjmp can cause life info to screw up. */
6609 /* Generate code to load the address of a static var when flag_pic is set. */
6611 arm_pic_static_addr (rtx orig
, rtx reg
)
6613 rtx l1
, labelno
, offset_rtx
, insn
;
6615 gcc_assert (flag_pic
);
6617 /* We use an UNSPEC rather than a LABEL_REF because this label
6618 never appears in the code stream. */
6619 labelno
= GEN_INT (pic_labelno
++);
6620 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6621 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6623 /* On the ARM the PC register contains 'dot + 8' at the time of the
6624 addition, on the Thumb it is 'dot + 4'. */
6625 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6626 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6627 UNSPEC_SYMBOL_OFFSET
);
6628 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6630 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6634 /* Return nonzero if X is valid as an ARM state addressing register. */
6636 arm_address_register_rtx_p (rtx x
, int strict_p
)
6646 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6648 return (regno
<= LAST_ARM_REGNUM
6649 || regno
>= FIRST_PSEUDO_REGISTER
6650 || regno
== FRAME_POINTER_REGNUM
6651 || regno
== ARG_POINTER_REGNUM
);
6654 /* Return TRUE if this rtx is the difference of a symbol and a label,
6655 and will reduce to a PC-relative relocation in the object file.
6656 Expressions like this can be left alone when generating PIC, rather
6657 than forced through the GOT. */
6659 pcrel_constant_p (rtx x
)
6661 if (GET_CODE (x
) == MINUS
)
6662 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6667 /* Return true if X will surely end up in an index register after next
6670 will_be_in_index_register (const_rtx x
)
6672 /* arm.md: calculate_pic_address will split this into a register. */
6673 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6676 /* Return nonzero if X is a valid ARM state address operand. */
6678 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6682 enum rtx_code code
= GET_CODE (x
);
6684 if (arm_address_register_rtx_p (x
, strict_p
))
6687 use_ldrd
= (TARGET_LDRD
6689 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6691 if (code
== POST_INC
|| code
== PRE_DEC
6692 || ((code
== PRE_INC
|| code
== POST_DEC
)
6693 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6694 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6696 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6697 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6698 && GET_CODE (XEXP (x
, 1)) == PLUS
6699 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6701 rtx addend
= XEXP (XEXP (x
, 1), 1);
6703 /* Don't allow ldrd post increment by register because it's hard
6704 to fixup invalid register choices. */
6706 && GET_CODE (x
) == POST_MODIFY
6710 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6711 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6714 /* After reload constants split into minipools will have addresses
6715 from a LABEL_REF. */
6716 else if (reload_completed
6717 && (code
== LABEL_REF
6719 && GET_CODE (XEXP (x
, 0)) == PLUS
6720 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6721 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6724 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6727 else if (code
== PLUS
)
6729 rtx xop0
= XEXP (x
, 0);
6730 rtx xop1
= XEXP (x
, 1);
6732 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6733 && ((CONST_INT_P (xop1
)
6734 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6735 || (!strict_p
&& will_be_in_index_register (xop1
))))
6736 || (arm_address_register_rtx_p (xop1
, strict_p
)
6737 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6741 /* Reload currently can't handle MINUS, so disable this for now */
6742 else if (GET_CODE (x
) == MINUS
)
6744 rtx xop0
= XEXP (x
, 0);
6745 rtx xop1
= XEXP (x
, 1);
6747 return (arm_address_register_rtx_p (xop0
, strict_p
)
6748 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6752 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6753 && code
== SYMBOL_REF
6754 && CONSTANT_POOL_ADDRESS_P (x
)
6756 && symbol_mentioned_p (get_pool_constant (x
))
6757 && ! pcrel_constant_p (get_pool_constant (x
))))
6763 /* Return nonzero if X is a valid Thumb-2 address operand. */
6765 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6768 enum rtx_code code
= GET_CODE (x
);
6770 if (arm_address_register_rtx_p (x
, strict_p
))
6773 use_ldrd
= (TARGET_LDRD
6775 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6777 if (code
== POST_INC
|| code
== PRE_DEC
6778 || ((code
== PRE_INC
|| code
== POST_DEC
)
6779 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6780 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6782 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6783 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6784 && GET_CODE (XEXP (x
, 1)) == PLUS
6785 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6787 /* Thumb-2 only has autoincrement by constant. */
6788 rtx addend
= XEXP (XEXP (x
, 1), 1);
6789 HOST_WIDE_INT offset
;
6791 if (!CONST_INT_P (addend
))
6794 offset
= INTVAL(addend
);
6795 if (GET_MODE_SIZE (mode
) <= 4)
6796 return (offset
> -256 && offset
< 256);
6798 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6799 && (offset
& 3) == 0);
6802 /* After reload constants split into minipools will have addresses
6803 from a LABEL_REF. */
6804 else if (reload_completed
6805 && (code
== LABEL_REF
6807 && GET_CODE (XEXP (x
, 0)) == PLUS
6808 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6809 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6812 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6815 else if (code
== PLUS
)
6817 rtx xop0
= XEXP (x
, 0);
6818 rtx xop1
= XEXP (x
, 1);
6820 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6821 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6822 || (!strict_p
&& will_be_in_index_register (xop1
))))
6823 || (arm_address_register_rtx_p (xop1
, strict_p
)
6824 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6827 /* Normally we can assign constant values to target registers without
6828 the help of constant pool. But there are cases we have to use constant
6830 1) assign a label to register.
6831 2) sign-extend a 8bit value to 32bit and then assign to register.
6833 Constant pool access in format:
6834 (set (reg r0) (mem (symbol_ref (".LC0"))))
6835 will cause the use of literal pool (later in function arm_reorg).
6836 So here we mark such format as an invalid format, then the compiler
6837 will adjust it into:
6838 (set (reg r0) (symbol_ref (".LC0")))
6839 (set (reg r0) (mem (reg r0))).
6840 No extra register is required, and (mem (reg r0)) won't cause the use
6841 of literal pools. */
6842 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6843 && CONSTANT_POOL_ADDRESS_P (x
))
6846 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6847 && code
== SYMBOL_REF
6848 && CONSTANT_POOL_ADDRESS_P (x
)
6850 && symbol_mentioned_p (get_pool_constant (x
))
6851 && ! pcrel_constant_p (get_pool_constant (x
))))
6857 /* Return nonzero if INDEX is valid for an address index operand in
6860 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6863 HOST_WIDE_INT range
;
6864 enum rtx_code code
= GET_CODE (index
);
6866 /* Standard coprocessor addressing modes. */
6867 if (TARGET_HARD_FLOAT
6869 && (mode
== SFmode
|| mode
== DFmode
))
6870 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6871 && INTVAL (index
) > -1024
6872 && (INTVAL (index
) & 3) == 0);
6874 /* For quad modes, we restrict the constant offset to be slightly less
6875 than what the instruction format permits. We do this because for
6876 quad mode moves, we will actually decompose them into two separate
6877 double-mode reads or writes. INDEX must therefore be a valid
6878 (double-mode) offset and so should INDEX+8. */
6879 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6880 return (code
== CONST_INT
6881 && INTVAL (index
) < 1016
6882 && INTVAL (index
) > -1024
6883 && (INTVAL (index
) & 3) == 0);
6885 /* We have no such constraint on double mode offsets, so we permit the
6886 full range of the instruction format. */
6887 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6888 return (code
== CONST_INT
6889 && INTVAL (index
) < 1024
6890 && INTVAL (index
) > -1024
6891 && (INTVAL (index
) & 3) == 0);
6893 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6894 return (code
== CONST_INT
6895 && INTVAL (index
) < 1024
6896 && INTVAL (index
) > -1024
6897 && (INTVAL (index
) & 3) == 0);
6899 if (arm_address_register_rtx_p (index
, strict_p
)
6900 && (GET_MODE_SIZE (mode
) <= 4))
6903 if (mode
== DImode
|| mode
== DFmode
)
6905 if (code
== CONST_INT
)
6907 HOST_WIDE_INT val
= INTVAL (index
);
6910 return val
> -256 && val
< 256;
6912 return val
> -4096 && val
< 4092;
6915 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6918 if (GET_MODE_SIZE (mode
) <= 4
6922 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6926 rtx xiop0
= XEXP (index
, 0);
6927 rtx xiop1
= XEXP (index
, 1);
6929 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6930 && power_of_two_operand (xiop1
, SImode
))
6931 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6932 && power_of_two_operand (xiop0
, SImode
)));
6934 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6935 || code
== ASHIFT
|| code
== ROTATERT
)
6937 rtx op
= XEXP (index
, 1);
6939 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6942 && INTVAL (op
) <= 31);
6946 /* For ARM v4 we may be doing a sign-extend operation during the
6952 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6958 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6960 return (code
== CONST_INT
6961 && INTVAL (index
) < range
6962 && INTVAL (index
) > -range
);
6965 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6966 index operand. i.e. 1, 2, 4 or 8. */
6968 thumb2_index_mul_operand (rtx op
)
6972 if (!CONST_INT_P (op
))
6976 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6979 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6981 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6983 enum rtx_code code
= GET_CODE (index
);
6985 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6986 /* Standard coprocessor addressing modes. */
6987 if (TARGET_HARD_FLOAT
6989 && (mode
== SFmode
|| mode
== DFmode
))
6990 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6991 /* Thumb-2 allows only > -256 index range for it's core register
6992 load/stores. Since we allow SF/DF in core registers, we have
6993 to use the intersection between -256~4096 (core) and -1024~1024
6995 && INTVAL (index
) > -256
6996 && (INTVAL (index
) & 3) == 0);
6998 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7000 /* For DImode assume values will usually live in core regs
7001 and only allow LDRD addressing modes. */
7002 if (!TARGET_LDRD
|| mode
!= DImode
)
7003 return (code
== CONST_INT
7004 && INTVAL (index
) < 1024
7005 && INTVAL (index
) > -1024
7006 && (INTVAL (index
) & 3) == 0);
7009 /* For quad modes, we restrict the constant offset to be slightly less
7010 than what the instruction format permits. We do this because for
7011 quad mode moves, we will actually decompose them into two separate
7012 double-mode reads or writes. INDEX must therefore be a valid
7013 (double-mode) offset and so should INDEX+8. */
7014 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7015 return (code
== CONST_INT
7016 && INTVAL (index
) < 1016
7017 && INTVAL (index
) > -1024
7018 && (INTVAL (index
) & 3) == 0);
7020 /* We have no such constraint on double mode offsets, so we permit the
7021 full range of the instruction format. */
7022 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7023 return (code
== CONST_INT
7024 && INTVAL (index
) < 1024
7025 && INTVAL (index
) > -1024
7026 && (INTVAL (index
) & 3) == 0);
7028 if (arm_address_register_rtx_p (index
, strict_p
)
7029 && (GET_MODE_SIZE (mode
) <= 4))
7032 if (mode
== DImode
|| mode
== DFmode
)
7034 if (code
== CONST_INT
)
7036 HOST_WIDE_INT val
= INTVAL (index
);
7037 /* ??? Can we assume ldrd for thumb2? */
7038 /* Thumb-2 ldrd only has reg+const addressing modes. */
7039 /* ldrd supports offsets of +-1020.
7040 However the ldr fallback does not. */
7041 return val
> -256 && val
< 256 && (val
& 3) == 0;
7049 rtx xiop0
= XEXP (index
, 0);
7050 rtx xiop1
= XEXP (index
, 1);
7052 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7053 && thumb2_index_mul_operand (xiop1
))
7054 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7055 && thumb2_index_mul_operand (xiop0
)));
7057 else if (code
== ASHIFT
)
7059 rtx op
= XEXP (index
, 1);
7061 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7064 && INTVAL (op
) <= 3);
7067 return (code
== CONST_INT
7068 && INTVAL (index
) < 4096
7069 && INTVAL (index
) > -256);
7072 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7074 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
7084 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7086 return (regno
<= LAST_LO_REGNUM
7087 || regno
> LAST_VIRTUAL_REGISTER
7088 || regno
== FRAME_POINTER_REGNUM
7089 || (GET_MODE_SIZE (mode
) >= 4
7090 && (regno
== STACK_POINTER_REGNUM
7091 || regno
>= FIRST_PSEUDO_REGISTER
7092 || x
== hard_frame_pointer_rtx
7093 || x
== arg_pointer_rtx
)));
7096 /* Return nonzero if x is a legitimate index register. This is the case
7097 for any base register that can access a QImode object. */
7099 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7101 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7104 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7106 The AP may be eliminated to either the SP or the FP, so we use the
7107 least common denominator, e.g. SImode, and offsets from 0 to 64.
7109 ??? Verify whether the above is the right approach.
7111 ??? Also, the FP may be eliminated to the SP, so perhaps that
7112 needs special handling also.
7114 ??? Look at how the mips16 port solves this problem. It probably uses
7115 better ways to solve some of these problems.
7117 Although it is not incorrect, we don't accept QImode and HImode
7118 addresses based on the frame pointer or arg pointer until the
7119 reload pass starts. This is so that eliminating such addresses
7120 into stack based ones won't produce impossible code. */
7122 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
7124 /* ??? Not clear if this is right. Experiment. */
7125 if (GET_MODE_SIZE (mode
) < 4
7126 && !(reload_in_progress
|| reload_completed
)
7127 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7128 || reg_mentioned_p (arg_pointer_rtx
, x
)
7129 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7130 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7131 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7132 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7135 /* Accept any base register. SP only in SImode or larger. */
7136 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7139 /* This is PC relative data before arm_reorg runs. */
7140 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7141 && GET_CODE (x
) == SYMBOL_REF
7142 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7145 /* This is PC relative data after arm_reorg runs. */
7146 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7148 && (GET_CODE (x
) == LABEL_REF
7149 || (GET_CODE (x
) == CONST
7150 && GET_CODE (XEXP (x
, 0)) == PLUS
7151 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7152 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7155 /* Post-inc indexing only supported for SImode and larger. */
7156 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7157 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7160 else if (GET_CODE (x
) == PLUS
)
7162 /* REG+REG address can be any two index registers. */
7163 /* We disallow FRAME+REG addressing since we know that FRAME
7164 will be replaced with STACK, and SP relative addressing only
7165 permits SP+OFFSET. */
7166 if (GET_MODE_SIZE (mode
) <= 4
7167 && XEXP (x
, 0) != frame_pointer_rtx
7168 && XEXP (x
, 1) != frame_pointer_rtx
7169 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7170 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7171 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7174 /* REG+const has 5-7 bit offset for non-SP registers. */
7175 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7176 || XEXP (x
, 0) == arg_pointer_rtx
)
7177 && CONST_INT_P (XEXP (x
, 1))
7178 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7181 /* REG+const has 10-bit offset for SP, but only SImode and
7182 larger is supported. */
7183 /* ??? Should probably check for DI/DFmode overflow here
7184 just like GO_IF_LEGITIMATE_OFFSET does. */
7185 else if (REG_P (XEXP (x
, 0))
7186 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7187 && GET_MODE_SIZE (mode
) >= 4
7188 && CONST_INT_P (XEXP (x
, 1))
7189 && INTVAL (XEXP (x
, 1)) >= 0
7190 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7191 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7194 else if (REG_P (XEXP (x
, 0))
7195 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7196 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7197 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7198 && REGNO (XEXP (x
, 0))
7199 <= LAST_VIRTUAL_POINTER_REGISTER
))
7200 && GET_MODE_SIZE (mode
) >= 4
7201 && CONST_INT_P (XEXP (x
, 1))
7202 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7206 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7207 && GET_MODE_SIZE (mode
) == 4
7208 && GET_CODE (x
) == SYMBOL_REF
7209 && CONSTANT_POOL_ADDRESS_P (x
)
7211 && symbol_mentioned_p (get_pool_constant (x
))
7212 && ! pcrel_constant_p (get_pool_constant (x
))))
7218 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7219 instruction of mode MODE. */
7221 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7223 switch (GET_MODE_SIZE (mode
))
7226 return val
>= 0 && val
< 32;
7229 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7233 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7239 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7242 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7243 else if (TARGET_THUMB2
)
7244 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7245 else /* if (TARGET_THUMB1) */
7246 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7249 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7251 Given an rtx X being reloaded into a reg required to be
7252 in class CLASS, return the class of reg to actually use.
7253 In general this is just CLASS, but for the Thumb core registers and
7254 immediate constants we prefer a LO_REGS class or a subset. */
7257 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7263 if (rclass
== GENERAL_REGS
)
7270 /* Build the SYMBOL_REF for __tls_get_addr. */
7272 static GTY(()) rtx tls_get_addr_libfunc
;
7275 get_tls_get_addr (void)
7277 if (!tls_get_addr_libfunc
)
7278 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7279 return tls_get_addr_libfunc
;
7283 arm_load_tp (rtx target
)
7286 target
= gen_reg_rtx (SImode
);
7290 /* Can return in any reg. */
7291 emit_insn (gen_load_tp_hard (target
));
7295 /* Always returned in r0. Immediately copy the result into a pseudo,
7296 otherwise other uses of r0 (e.g. setting up function arguments) may
7297 clobber the value. */
7301 emit_insn (gen_load_tp_soft ());
7303 tmp
= gen_rtx_REG (SImode
, 0);
7304 emit_move_insn (target
, tmp
);
7310 load_tls_operand (rtx x
, rtx reg
)
7314 if (reg
== NULL_RTX
)
7315 reg
= gen_reg_rtx (SImode
);
7317 tmp
= gen_rtx_CONST (SImode
, x
);
7319 emit_move_insn (reg
, tmp
);
7325 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7327 rtx insns
, label
, labelno
, sum
;
7329 gcc_assert (reloc
!= TLS_DESCSEQ
);
7332 labelno
= GEN_INT (pic_labelno
++);
7333 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7334 label
= gen_rtx_CONST (VOIDmode
, label
);
7336 sum
= gen_rtx_UNSPEC (Pmode
,
7337 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7338 GEN_INT (TARGET_ARM
? 8 : 4)),
7340 reg
= load_tls_operand (sum
, reg
);
7343 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7345 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7347 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7348 LCT_PURE
, /* LCT_CONST? */
7349 Pmode
, 1, reg
, Pmode
);
7351 insns
= get_insns ();
7358 arm_tls_descseq_addr (rtx x
, rtx reg
)
7360 rtx labelno
= GEN_INT (pic_labelno
++);
7361 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7362 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7363 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7364 gen_rtx_CONST (VOIDmode
, label
),
7365 GEN_INT (!TARGET_ARM
)),
7367 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7369 emit_insn (gen_tlscall (x
, labelno
));
7371 reg
= gen_reg_rtx (SImode
);
7373 gcc_assert (REGNO (reg
) != 0);
7375 emit_move_insn (reg
, reg0
);
7381 legitimize_tls_address (rtx x
, rtx reg
)
7383 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7384 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7388 case TLS_MODEL_GLOBAL_DYNAMIC
:
7389 if (TARGET_GNU2_TLS
)
7391 reg
= arm_tls_descseq_addr (x
, reg
);
7393 tp
= arm_load_tp (NULL_RTX
);
7395 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7399 /* Original scheme */
7400 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7401 dest
= gen_reg_rtx (Pmode
);
7402 emit_libcall_block (insns
, dest
, ret
, x
);
7406 case TLS_MODEL_LOCAL_DYNAMIC
:
7407 if (TARGET_GNU2_TLS
)
7409 reg
= arm_tls_descseq_addr (x
, reg
);
7411 tp
= arm_load_tp (NULL_RTX
);
7413 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7417 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7419 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7420 share the LDM result with other LD model accesses. */
7421 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7423 dest
= gen_reg_rtx (Pmode
);
7424 emit_libcall_block (insns
, dest
, ret
, eqv
);
7426 /* Load the addend. */
7427 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7428 GEN_INT (TLS_LDO32
)),
7430 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7431 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7435 case TLS_MODEL_INITIAL_EXEC
:
7436 labelno
= GEN_INT (pic_labelno
++);
7437 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7438 label
= gen_rtx_CONST (VOIDmode
, label
);
7439 sum
= gen_rtx_UNSPEC (Pmode
,
7440 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7441 GEN_INT (TARGET_ARM
? 8 : 4)),
7443 reg
= load_tls_operand (sum
, reg
);
7446 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7447 else if (TARGET_THUMB2
)
7448 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7451 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7452 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7455 tp
= arm_load_tp (NULL_RTX
);
7457 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7459 case TLS_MODEL_LOCAL_EXEC
:
7460 tp
= arm_load_tp (NULL_RTX
);
7462 reg
= gen_rtx_UNSPEC (Pmode
,
7463 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7465 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7467 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7474 /* Try machine-dependent ways of modifying an illegitimate address
7475 to be legitimate. If we find one, return the new, valid address. */
7477 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7479 if (arm_tls_referenced_p (x
))
7483 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7485 addend
= XEXP (XEXP (x
, 0), 1);
7486 x
= XEXP (XEXP (x
, 0), 0);
7489 if (GET_CODE (x
) != SYMBOL_REF
)
7492 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7494 x
= legitimize_tls_address (x
, NULL_RTX
);
7498 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7507 /* TODO: legitimize_address for Thumb2. */
7510 return thumb_legitimize_address (x
, orig_x
, mode
);
7513 if (GET_CODE (x
) == PLUS
)
7515 rtx xop0
= XEXP (x
, 0);
7516 rtx xop1
= XEXP (x
, 1);
7518 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7519 xop0
= force_reg (SImode
, xop0
);
7521 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7522 && !symbol_mentioned_p (xop1
))
7523 xop1
= force_reg (SImode
, xop1
);
7525 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7526 && CONST_INT_P (xop1
))
7528 HOST_WIDE_INT n
, low_n
;
7532 /* VFP addressing modes actually allow greater offsets, but for
7533 now we just stick with the lowest common denominator. */
7535 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7547 low_n
= ((mode
) == TImode
? 0
7548 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7552 base_reg
= gen_reg_rtx (SImode
);
7553 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7554 emit_move_insn (base_reg
, val
);
7555 x
= plus_constant (Pmode
, base_reg
, low_n
);
7557 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7558 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7561 /* XXX We don't allow MINUS any more -- see comment in
7562 arm_legitimate_address_outer_p (). */
7563 else if (GET_CODE (x
) == MINUS
)
7565 rtx xop0
= XEXP (x
, 0);
7566 rtx xop1
= XEXP (x
, 1);
7568 if (CONSTANT_P (xop0
))
7569 xop0
= force_reg (SImode
, xop0
);
7571 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7572 xop1
= force_reg (SImode
, xop1
);
7574 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7575 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7578 /* Make sure to take full advantage of the pre-indexed addressing mode
7579 with absolute addresses which often allows for the base register to
7580 be factorized for multiple adjacent memory references, and it might
7581 even allows for the mini pool to be avoided entirely. */
7582 else if (CONST_INT_P (x
) && optimize
> 0)
7585 HOST_WIDE_INT mask
, base
, index
;
7588 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7589 use a 8-bit index. So let's use a 12-bit index for SImode only and
7590 hope that arm_gen_constant will enable ldrb to use more bits. */
7591 bits
= (mode
== SImode
) ? 12 : 8;
7592 mask
= (1 << bits
) - 1;
7593 base
= INTVAL (x
) & ~mask
;
7594 index
= INTVAL (x
) & mask
;
7595 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7597 /* It'll most probably be more efficient to generate the base
7598 with more bits set and use a negative index instead. */
7602 base_reg
= force_reg (SImode
, GEN_INT (base
));
7603 x
= plus_constant (Pmode
, base_reg
, index
);
7608 /* We need to find and carefully transform any SYMBOL and LABEL
7609 references; so go back to the original address expression. */
7610 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7612 if (new_x
!= orig_x
)
7620 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7621 to be legitimate. If we find one, return the new, valid address. */
7623 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7625 if (GET_CODE (x
) == PLUS
7626 && CONST_INT_P (XEXP (x
, 1))
7627 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7628 || INTVAL (XEXP (x
, 1)) < 0))
7630 rtx xop0
= XEXP (x
, 0);
7631 rtx xop1
= XEXP (x
, 1);
7632 HOST_WIDE_INT offset
= INTVAL (xop1
);
7634 /* Try and fold the offset into a biasing of the base register and
7635 then offsetting that. Don't do this when optimizing for space
7636 since it can cause too many CSEs. */
7637 if (optimize_size
&& offset
>= 0
7638 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7640 HOST_WIDE_INT delta
;
7643 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7644 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7645 delta
= 31 * GET_MODE_SIZE (mode
);
7647 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7649 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7651 x
= plus_constant (Pmode
, xop0
, delta
);
7653 else if (offset
< 0 && offset
> -256)
7654 /* Small negative offsets are best done with a subtract before the
7655 dereference, forcing these into a register normally takes two
7657 x
= force_operand (x
, NULL_RTX
);
7660 /* For the remaining cases, force the constant into a register. */
7661 xop1
= force_reg (SImode
, xop1
);
7662 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7665 else if (GET_CODE (x
) == PLUS
7666 && s_register_operand (XEXP (x
, 1), SImode
)
7667 && !s_register_operand (XEXP (x
, 0), SImode
))
7669 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7671 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7676 /* We need to find and carefully transform any SYMBOL and LABEL
7677 references; so go back to the original address expression. */
7678 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7680 if (new_x
!= orig_x
)
7688 arm_legitimize_reload_address (rtx
*p
,
7689 enum machine_mode mode
,
7690 int opnum
, int type
,
7691 int ind_levels ATTRIBUTE_UNUSED
)
7693 /* We must recognize output that we have already generated ourselves. */
7694 if (GET_CODE (*p
) == PLUS
7695 && GET_CODE (XEXP (*p
, 0)) == PLUS
7696 && REG_P (XEXP (XEXP (*p
, 0), 0))
7697 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7698 && CONST_INT_P (XEXP (*p
, 1)))
7700 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7701 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7702 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7706 if (GET_CODE (*p
) == PLUS
7707 && REG_P (XEXP (*p
, 0))
7708 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7709 /* If the base register is equivalent to a constant, let the generic
7710 code handle it. Otherwise we will run into problems if a future
7711 reload pass decides to rematerialize the constant. */
7712 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7713 && CONST_INT_P (XEXP (*p
, 1)))
7715 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7716 HOST_WIDE_INT low
, high
;
7718 /* Detect coprocessor load/stores. */
7719 bool coproc_p
= ((TARGET_HARD_FLOAT
7721 && (mode
== SFmode
|| mode
== DFmode
))
7722 || (TARGET_REALLY_IWMMXT
7723 && VALID_IWMMXT_REG_MODE (mode
))
7725 && (VALID_NEON_DREG_MODE (mode
)
7726 || VALID_NEON_QREG_MODE (mode
))));
7728 /* For some conditions, bail out when lower two bits are unaligned. */
7729 if ((val
& 0x3) != 0
7730 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7732 /* For DI, and DF under soft-float: */
7733 || ((mode
== DImode
|| mode
== DFmode
)
7734 /* Without ldrd, we use stm/ldm, which does not
7735 fair well with unaligned bits. */
7737 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7738 || TARGET_THUMB2
))))
7741 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7742 of which the (reg+high) gets turned into a reload add insn,
7743 we try to decompose the index into high/low values that can often
7744 also lead to better reload CSE.
7746 ldr r0, [r2, #4100] // Offset too large
7747 ldr r1, [r2, #4104] // Offset too large
7749 is best reloaded as:
7755 which post-reload CSE can simplify in most cases to eliminate the
7756 second add instruction:
7761 The idea here is that we want to split out the bits of the constant
7762 as a mask, rather than as subtracting the maximum offset that the
7763 respective type of load/store used can handle.
7765 When encountering negative offsets, we can still utilize it even if
7766 the overall offset is positive; sometimes this may lead to an immediate
7767 that can be constructed with fewer instructions.
7769 ldr r0, [r2, #0x3FFFFC]
7771 This is best reloaded as:
7772 add t1, r2, #0x400000
7775 The trick for spotting this for a load insn with N bits of offset
7776 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7777 negative offset that is going to make bit N and all the bits below
7778 it become zero in the remainder part.
7780 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7781 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7782 used in most cases of ARM load/store instructions. */
7784 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7785 (((VAL) & ((1 << (N)) - 1)) \
7786 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7791 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7793 /* NEON quad-word load/stores are made of two double-word accesses,
7794 so the valid index range is reduced by 8. Treat as 9-bit range if
7796 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7797 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7799 else if (GET_MODE_SIZE (mode
) == 8)
7802 low
= (TARGET_THUMB2
7803 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7804 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7806 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7807 to access doublewords. The supported load/store offsets are
7808 -8, -4, and 4, which we try to produce here. */
7809 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7811 else if (GET_MODE_SIZE (mode
) < 8)
7813 /* NEON element load/stores do not have an offset. */
7814 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7819 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7820 Try the wider 12-bit range first, and re-try if the result
7822 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7824 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7828 if (mode
== HImode
|| mode
== HFmode
)
7831 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7834 /* The storehi/movhi_bytes fallbacks can use only
7835 [-4094,+4094] of the full ldrb/strb index range. */
7836 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7837 if (low
== 4095 || low
== -4095)
7842 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7848 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7849 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7850 - (unsigned HOST_WIDE_INT
) 0x80000000);
7851 /* Check for overflow or zero */
7852 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7855 /* Reload the high part into a base reg; leave the low part
7857 Note that replacing this gen_rtx_PLUS with plus_constant is
7858 wrong in this case because we rely on the
7859 (plus (plus reg c1) c2) structure being preserved so that
7860 XEXP (*p, 0) in push_reload below uses the correct term. */
7861 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7862 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7865 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7866 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7867 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7875 thumb_legitimize_reload_address (rtx
*x_p
,
7876 enum machine_mode mode
,
7877 int opnum
, int type
,
7878 int ind_levels ATTRIBUTE_UNUSED
)
7882 if (GET_CODE (x
) == PLUS
7883 && GET_MODE_SIZE (mode
) < 4
7884 && REG_P (XEXP (x
, 0))
7885 && XEXP (x
, 0) == stack_pointer_rtx
7886 && CONST_INT_P (XEXP (x
, 1))
7887 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7892 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7893 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7897 /* If both registers are hi-regs, then it's better to reload the
7898 entire expression rather than each register individually. That
7899 only requires one reload register rather than two. */
7900 if (GET_CODE (x
) == PLUS
7901 && REG_P (XEXP (x
, 0))
7902 && REG_P (XEXP (x
, 1))
7903 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7904 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7909 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7910 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7917 /* Test for various thread-local symbols. */
7919 /* Helper for arm_tls_referenced_p. */
7922 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7924 if (GET_CODE (*x
) == SYMBOL_REF
)
7925 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7927 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7928 TLS offsets, not real symbol references. */
7929 if (GET_CODE (*x
) == UNSPEC
7930 && XINT (*x
, 1) == UNSPEC_TLS
)
7936 /* Return TRUE if X contains any TLS symbol references. */
7939 arm_tls_referenced_p (rtx x
)
7941 if (! TARGET_HAVE_TLS
)
7944 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7947 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7949 On the ARM, allow any integer (invalid ones are removed later by insn
7950 patterns), nice doubles and symbol_refs which refer to the function's
7953 When generating pic allow anything. */
7956 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7958 /* At present, we have no support for Neon structure constants, so forbid
7959 them here. It might be possible to handle simple cases like 0 and -1
7961 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7964 return flag_pic
|| !label_mentioned_p (x
);
7968 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7970 return (CONST_INT_P (x
)
7971 || CONST_DOUBLE_P (x
)
7972 || CONSTANT_ADDRESS_P (x
)
7977 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7979 return (!arm_cannot_force_const_mem (mode
, x
)
7981 ? arm_legitimate_constant_p_1 (mode
, x
)
7982 : thumb_legitimate_constant_p (mode
, x
)));
7985 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7988 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7992 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7994 split_const (x
, &base
, &offset
);
7995 if (GET_CODE (base
) == SYMBOL_REF
7996 && !offset_within_block_p (base
, INTVAL (offset
)))
7999 return arm_tls_referenced_p (x
);
8002 #define REG_OR_SUBREG_REG(X) \
8004 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8006 #define REG_OR_SUBREG_RTX(X) \
8007 (REG_P (X) ? (X) : SUBREG_REG (X))
8010 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8012 enum machine_mode mode
= GET_MODE (x
);
8021 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8028 return COSTS_N_INSNS (1);
8031 if (CONST_INT_P (XEXP (x
, 1)))
8034 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8041 return COSTS_N_INSNS (2) + cycles
;
8043 return COSTS_N_INSNS (1) + 16;
8046 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8048 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8049 return (COSTS_N_INSNS (words
)
8050 + 4 * ((MEM_P (SET_SRC (x
)))
8051 + MEM_P (SET_DEST (x
))));
8056 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8058 if (thumb_shiftable_const (INTVAL (x
)))
8059 return COSTS_N_INSNS (2);
8060 return COSTS_N_INSNS (3);
8062 else if ((outer
== PLUS
|| outer
== COMPARE
)
8063 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8065 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8066 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8067 return COSTS_N_INSNS (1);
8068 else if (outer
== AND
)
8071 /* This duplicates the tests in the andsi3 expander. */
8072 for (i
= 9; i
<= 31; i
++)
8073 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8074 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8075 return COSTS_N_INSNS (2);
8077 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8078 || outer
== LSHIFTRT
)
8080 return COSTS_N_INSNS (2);
8086 return COSTS_N_INSNS (3);
8104 /* XXX another guess. */
8105 /* Memory costs quite a lot for the first word, but subsequent words
8106 load at the equivalent of a single insn each. */
8107 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8108 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8113 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8119 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8120 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8126 return total
+ COSTS_N_INSNS (1);
8128 /* Assume a two-shift sequence. Increase the cost slightly so
8129 we prefer actual shifts over an extend operation. */
8130 return total
+ 1 + COSTS_N_INSNS (2);
8138 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8140 enum machine_mode mode
= GET_MODE (x
);
8141 enum rtx_code subcode
;
8143 enum rtx_code code
= GET_CODE (x
);
8149 /* Memory costs quite a lot for the first word, but subsequent words
8150 load at the equivalent of a single insn each. */
8151 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8158 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8159 *total
= COSTS_N_INSNS (2);
8160 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8161 *total
= COSTS_N_INSNS (4);
8163 *total
= COSTS_N_INSNS (20);
8167 if (REG_P (XEXP (x
, 1)))
8168 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8169 else if (!CONST_INT_P (XEXP (x
, 1)))
8170 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8176 *total
+= COSTS_N_INSNS (4);
8181 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8182 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8185 *total
+= COSTS_N_INSNS (3);
8189 *total
+= COSTS_N_INSNS (1);
8190 /* Increase the cost of complex shifts because they aren't any faster,
8191 and reduce dual issue opportunities. */
8192 if (arm_tune_cortex_a9
8193 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8201 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8202 if (CONST_INT_P (XEXP (x
, 0))
8203 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8205 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8209 if (CONST_INT_P (XEXP (x
, 1))
8210 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8212 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8219 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8221 if (TARGET_HARD_FLOAT
8223 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8225 *total
= COSTS_N_INSNS (1);
8226 if (CONST_DOUBLE_P (XEXP (x
, 0))
8227 && arm_const_double_rtx (XEXP (x
, 0)))
8229 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8233 if (CONST_DOUBLE_P (XEXP (x
, 1))
8234 && arm_const_double_rtx (XEXP (x
, 1)))
8236 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8242 *total
= COSTS_N_INSNS (20);
8246 *total
= COSTS_N_INSNS (1);
8247 if (CONST_INT_P (XEXP (x
, 0))
8248 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8250 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8254 subcode
= GET_CODE (XEXP (x
, 1));
8255 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8256 || subcode
== LSHIFTRT
8257 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8259 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8260 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8264 /* A shift as a part of RSB costs no more than RSB itself. */
8265 if (GET_CODE (XEXP (x
, 0)) == MULT
8266 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8268 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8269 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8274 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8276 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8277 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8281 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8282 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8284 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8285 if (REG_P (XEXP (XEXP (x
, 1), 0))
8286 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8287 *total
+= COSTS_N_INSNS (1);
8295 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8296 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8297 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8299 *total
= COSTS_N_INSNS (1);
8300 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8302 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8306 /* MLA: All arguments must be registers. We filter out
8307 multiplication by a power of two, so that we fall down into
8309 if (GET_CODE (XEXP (x
, 0)) == MULT
8310 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8312 /* The cost comes from the cost of the multiply. */
8316 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8318 if (TARGET_HARD_FLOAT
8320 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8322 *total
= COSTS_N_INSNS (1);
8323 if (CONST_DOUBLE_P (XEXP (x
, 1))
8324 && arm_const_double_rtx (XEXP (x
, 1)))
8326 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8333 *total
= COSTS_N_INSNS (20);
8337 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8338 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8340 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8341 if (REG_P (XEXP (XEXP (x
, 0), 0))
8342 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8343 *total
+= COSTS_N_INSNS (1);
8349 case AND
: case XOR
: case IOR
:
8351 /* Normally the frame registers will be spilt into reg+const during
8352 reload, so it is a bad idea to combine them with other instructions,
8353 since then they might not be moved outside of loops. As a compromise
8354 we allow integration with ops that have a constant as their second
8356 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8357 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8358 && !CONST_INT_P (XEXP (x
, 1)))
8359 *total
= COSTS_N_INSNS (1);
8363 *total
+= COSTS_N_INSNS (2);
8364 if (CONST_INT_P (XEXP (x
, 1))
8365 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8367 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8374 *total
+= COSTS_N_INSNS (1);
8375 if (CONST_INT_P (XEXP (x
, 1))
8376 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8378 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8381 subcode
= GET_CODE (XEXP (x
, 0));
8382 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8383 || subcode
== LSHIFTRT
8384 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8386 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8387 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8392 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8394 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8395 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8399 if (subcode
== UMIN
|| subcode
== UMAX
8400 || subcode
== SMIN
|| subcode
== SMAX
)
8402 *total
= COSTS_N_INSNS (3);
8409 /* This should have been handled by the CPU specific routines. */
8413 if (arm_arch3m
&& mode
== SImode
8414 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8415 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8416 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8417 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8418 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8419 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8421 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8424 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8428 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8430 if (TARGET_HARD_FLOAT
8432 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8434 *total
= COSTS_N_INSNS (1);
8437 *total
= COSTS_N_INSNS (2);
8443 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8444 if (mode
== SImode
&& code
== NOT
)
8446 subcode
= GET_CODE (XEXP (x
, 0));
8447 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8448 || subcode
== LSHIFTRT
8449 || subcode
== ROTATE
|| subcode
== ROTATERT
8451 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8453 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8454 /* Register shifts cost an extra cycle. */
8455 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8456 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8465 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8467 *total
= COSTS_N_INSNS (4);
8471 operand
= XEXP (x
, 0);
8473 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8474 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8475 && REG_P (XEXP (operand
, 0))
8476 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8477 *total
+= COSTS_N_INSNS (1);
8478 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8479 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8483 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8485 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8491 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8492 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8494 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8500 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8501 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8503 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8523 /* SCC insns. In the case where the comparison has already been
8524 performed, then they cost 2 instructions. Otherwise they need
8525 an additional comparison before them. */
8526 *total
= COSTS_N_INSNS (2);
8527 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8534 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8540 *total
+= COSTS_N_INSNS (1);
8541 if (CONST_INT_P (XEXP (x
, 1))
8542 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8544 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8548 subcode
= GET_CODE (XEXP (x
, 0));
8549 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8550 || subcode
== LSHIFTRT
8551 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8553 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8554 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8559 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8561 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8562 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8572 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8573 if (!CONST_INT_P (XEXP (x
, 1))
8574 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8575 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8579 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8581 if (TARGET_HARD_FLOAT
8583 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8585 *total
= COSTS_N_INSNS (1);
8588 *total
= COSTS_N_INSNS (20);
8591 *total
= COSTS_N_INSNS (1);
8593 *total
+= COSTS_N_INSNS (3);
8599 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8601 rtx op
= XEXP (x
, 0);
8602 enum machine_mode opmode
= GET_MODE (op
);
8605 *total
+= COSTS_N_INSNS (1);
8607 if (opmode
!= SImode
)
8611 /* If !arm_arch4, we use one of the extendhisi2_mem
8612 or movhi_bytes patterns for HImode. For a QImode
8613 sign extension, we first zero-extend from memory
8614 and then perform a shift sequence. */
8615 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8616 *total
+= COSTS_N_INSNS (2);
8619 *total
+= COSTS_N_INSNS (1);
8621 /* We don't have the necessary insn, so we need to perform some
8623 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8624 /* An and with constant 255. */
8625 *total
+= COSTS_N_INSNS (1);
8627 /* A shift sequence. Increase costs slightly to avoid
8628 combining two shifts into an extend operation. */
8629 *total
+= COSTS_N_INSNS (2) + 1;
8635 switch (GET_MODE (XEXP (x
, 0)))
8642 *total
= COSTS_N_INSNS (1);
8652 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8656 if (const_ok_for_arm (INTVAL (x
))
8657 || const_ok_for_arm (~INTVAL (x
)))
8658 *total
= COSTS_N_INSNS (1);
8660 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8661 INTVAL (x
), NULL_RTX
,
8668 *total
= COSTS_N_INSNS (3);
8672 *total
= COSTS_N_INSNS (1);
8676 *total
= COSTS_N_INSNS (1);
8677 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8681 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8682 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8683 *total
= COSTS_N_INSNS (1);
8685 *total
= COSTS_N_INSNS (4);
8689 /* The vec_extract patterns accept memory operands that require an
8690 address reload. Account for the cost of that reload to give the
8691 auto-inc-dec pass an incentive to try to replace them. */
8692 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8693 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8695 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8696 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8697 *total
+= COSTS_N_INSNS (1);
8700 /* Likewise for the vec_set patterns. */
8701 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8702 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8703 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8705 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8706 *total
= rtx_cost (mem
, code
, 0, speed
);
8707 if (!neon_vector_mem_operand (mem
, 2, true))
8708 *total
+= COSTS_N_INSNS (1);
8714 /* We cost this as high as our memory costs to allow this to
8715 be hoisted from loops. */
8716 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8718 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8724 && TARGET_HARD_FLOAT
8726 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8727 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8728 *total
= COSTS_N_INSNS (1);
8730 *total
= COSTS_N_INSNS (4);
8734 *total
= COSTS_N_INSNS (4);
8739 /* Estimates the size cost of thumb1 instructions.
8740 For now most of the code is copied from thumb1_rtx_costs. We need more
8741 fine grain tuning when we have more related test cases. */
8743 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8745 enum machine_mode mode
= GET_MODE (x
);
8754 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8758 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8759 defined by RTL expansion, especially for the expansion of
8761 if ((GET_CODE (XEXP (x
, 0)) == MULT
8762 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8763 || (GET_CODE (XEXP (x
, 1)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8765 return COSTS_N_INSNS (2);
8766 /* On purpose fall through for normal RTX. */
8770 return COSTS_N_INSNS (1);
8773 if (CONST_INT_P (XEXP (x
, 1)))
8775 /* Thumb1 mul instruction can't operate on const. We must Load it
8776 into a register first. */
8777 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8778 return COSTS_N_INSNS (1) + const_size
;
8780 return COSTS_N_INSNS (1);
8783 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8785 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8786 return (COSTS_N_INSNS (words
)
8787 + 4 * ((MEM_P (SET_SRC (x
)))
8788 + MEM_P (SET_DEST (x
))));
8793 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8794 return COSTS_N_INSNS (1);
8795 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8796 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8797 return COSTS_N_INSNS (2);
8798 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8799 if (thumb_shiftable_const (INTVAL (x
)))
8800 return COSTS_N_INSNS (2);
8801 return COSTS_N_INSNS (3);
8803 else if ((outer
== PLUS
|| outer
== COMPARE
)
8804 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8806 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8807 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8808 return COSTS_N_INSNS (1);
8809 else if (outer
== AND
)
8812 /* This duplicates the tests in the andsi3 expander. */
8813 for (i
= 9; i
<= 31; i
++)
8814 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8815 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8816 return COSTS_N_INSNS (2);
8818 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8819 || outer
== LSHIFTRT
)
8821 return COSTS_N_INSNS (2);
8827 return COSTS_N_INSNS (3);
8845 /* XXX another guess. */
8846 /* Memory costs quite a lot for the first word, but subsequent words
8847 load at the equivalent of a single insn each. */
8848 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8849 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8854 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8859 /* XXX still guessing. */
8860 switch (GET_MODE (XEXP (x
, 0)))
8863 return (1 + (mode
== DImode
? 4 : 0)
8864 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8867 return (4 + (mode
== DImode
? 4 : 0)
8868 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8871 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8882 /* RTX costs when optimizing for size. */
8884 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8887 enum machine_mode mode
= GET_MODE (x
);
8890 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8894 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8898 /* A memory access costs 1 insn if the mode is small, or the address is
8899 a single register, otherwise it costs one insn per word. */
8900 if (REG_P (XEXP (x
, 0)))
8901 *total
= COSTS_N_INSNS (1);
8903 && GET_CODE (XEXP (x
, 0)) == PLUS
8904 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8905 /* This will be split into two instructions.
8906 See arm.md:calculate_pic_address. */
8907 *total
= COSTS_N_INSNS (2);
8909 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8916 /* Needs a libcall, so it costs about this. */
8917 *total
= COSTS_N_INSNS (2);
8921 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8923 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8931 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8933 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8936 else if (mode
== SImode
)
8938 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8939 /* Slightly disparage register shifts, but not by much. */
8940 if (!CONST_INT_P (XEXP (x
, 1)))
8941 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8945 /* Needs a libcall. */
8946 *total
= COSTS_N_INSNS (2);
8950 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8951 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8953 *total
= COSTS_N_INSNS (1);
8959 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8960 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8962 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8963 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8964 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8965 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8966 || subcode1
== ASHIFTRT
)
8968 /* It's just the cost of the two operands. */
8973 *total
= COSTS_N_INSNS (1);
8977 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8981 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8982 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8984 *total
= COSTS_N_INSNS (1);
8988 /* A shift as a part of ADD costs nothing. */
8989 if (GET_CODE (XEXP (x
, 0)) == MULT
8990 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8992 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8993 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8994 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8999 case AND
: case XOR
: case IOR
:
9002 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9004 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9005 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9006 || (code
== AND
&& subcode
== NOT
))
9008 /* It's just the cost of the two operands. */
9014 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9018 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9022 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9023 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9025 *total
= COSTS_N_INSNS (1);
9031 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9040 if (cc_register (XEXP (x
, 0), VOIDmode
))
9043 *total
= COSTS_N_INSNS (1);
9047 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9048 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9049 *total
= COSTS_N_INSNS (1);
9051 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9056 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9059 if (const_ok_for_arm (INTVAL (x
)))
9060 /* A multiplication by a constant requires another instruction
9061 to load the constant to a register. */
9062 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9064 else if (const_ok_for_arm (~INTVAL (x
)))
9065 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9066 else if (const_ok_for_arm (-INTVAL (x
)))
9068 if (outer_code
== COMPARE
|| outer_code
== PLUS
9069 || outer_code
== MINUS
)
9072 *total
= COSTS_N_INSNS (1);
9075 *total
= COSTS_N_INSNS (2);
9081 *total
= COSTS_N_INSNS (2);
9085 *total
= COSTS_N_INSNS (4);
9090 && TARGET_HARD_FLOAT
9091 && outer_code
== SET
9092 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9093 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9094 *total
= COSTS_N_INSNS (1);
9096 *total
= COSTS_N_INSNS (4);
9101 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9102 cost of these slightly. */
9103 *total
= COSTS_N_INSNS (1) + 1;
9110 if (mode
!= VOIDmode
)
9111 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9113 *total
= COSTS_N_INSNS (4); /* How knows? */
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9123 shifter_op_p (rtx op
, rtx
*shift_reg
)
9125 enum rtx_code code
= GET_CODE (op
);
9127 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9128 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9129 return XEXP (op
, 0);
9130 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9131 return XEXP (op
, 0);
9132 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9133 || code
== ASHIFTRT
)
9135 if (!CONST_INT_P (XEXP (op
, 1)))
9136 *shift_reg
= XEXP (op
, 1);
9137 return XEXP (op
, 0);
9144 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9146 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9147 gcc_assert (GET_CODE (x
) == UNSPEC
);
9149 switch (XINT (x
, 1))
9151 case UNSPEC_UNALIGNED_LOAD
:
9152 /* We can only do unaligned loads into the integer unit, and we can't
9154 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9156 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9157 + extra_cost
->ldst
.load_unaligned
);
9160 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9161 ADDR_SPACE_GENERIC
, speed_p
);
9165 case UNSPEC_UNALIGNED_STORE
:
9166 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9168 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9169 + extra_cost
->ldst
.store_unaligned
);
9171 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9173 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9174 ADDR_SPACE_GENERIC
, speed_p
);
9184 *cost
= COSTS_N_INSNS (1);
9186 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9190 *cost
= COSTS_N_INSNS (2);
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9214 *cost += extra_cost->alu.arith_shift; \
9216 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9217 + rtx_cost (XEXP (x, 1 - IDX), \
9224 /* RTX costs. Make an estimate of the cost of executing the operation
9225 X, which is contained with an operation with code OUTER_CODE.
9226 SPEED_P indicates whether the cost desired is the performance cost,
9227 or the size cost. The estimate is stored in COST and the return
9228 value is TRUE if the cost calculation is final, or FALSE if the
9229 caller should recurse through the operands of X to add additional
9232 We currently make no attempt to model the size savings of Thumb-2
9233 16-bit instructions. At the normal points in compilation where
9234 this code is called we have no measure of whether the condition
9235 flags are live or not, and thus no realistic way to determine what
9236 the size will eventually be. */
9238 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9239 const struct cpu_cost_table
*extra_cost
,
9240 int *cost
, bool speed_p
)
9242 enum machine_mode mode
= GET_MODE (x
);
9247 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9249 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9257 /* SET RTXs don't have a mode so we get it from the destination. */
9258 mode
= GET_MODE (SET_DEST (x
));
9260 if (REG_P (SET_SRC (x
))
9261 && REG_P (SET_DEST (x
)))
9263 /* Assume that most copies can be done with a single insn,
9264 unless we don't have HW FP, in which case everything
9265 larger than word mode will require two insns. */
9266 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9267 && GET_MODE_SIZE (mode
) > 4)
9270 /* Conditional register moves can be encoded
9271 in 16 bits in Thumb mode. */
9272 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9278 if (CONST_INT_P (SET_SRC (x
)))
9280 /* Handle CONST_INT here, since the value doesn't have a mode
9281 and we would otherwise be unable to work out the true cost. */
9282 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9284 /* Slightly lower the cost of setting a core reg to a constant.
9285 This helps break up chains and allows for better scheduling. */
9286 if (REG_P (SET_DEST (x
))
9287 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9290 /* Immediate moves with an immediate in the range [0, 255] can be
9291 encoded in 16 bits in Thumb mode. */
9292 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9293 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9295 goto const_int_cost
;
9301 /* A memory access costs 1 insn if the mode is small, or the address is
9302 a single register, otherwise it costs one insn per word. */
9303 if (REG_P (XEXP (x
, 0)))
9304 *cost
= COSTS_N_INSNS (1);
9306 && GET_CODE (XEXP (x
, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9308 /* This will be split into two instructions.
9309 See arm.md:calculate_pic_address. */
9310 *cost
= COSTS_N_INSNS (2);
9312 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9314 /* For speed optimizations, add the costs of the address and
9315 accessing memory. */
9318 *cost
+= (extra_cost
->ldst
.load
9319 + arm_address_cost (XEXP (x
, 0), mode
,
9320 ADDR_SPACE_GENERIC
, speed_p
));
9322 *cost
+= extra_cost
->ldst
.load
;
9328 /* Calculations of LDM costs are complex. We assume an initial cost
9329 (ldm_1st) which will load the number of registers mentioned in
9330 ldm_regs_per_insn_1st registers; then each additional
9331 ldm_regs_per_insn_subsequent registers cost one more insn. The
9332 formula for N regs is thus:
9334 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335 + ldm_regs_per_insn_subsequent - 1)
9336 / ldm_regs_per_insn_subsequent).
9338 Additional costs may also be added for addressing. A similar
9339 formula is used for STM. */
9341 bool is_ldm
= load_multiple_operation (x
, SImode
);
9342 bool is_stm
= store_multiple_operation (x
, SImode
);
9344 *cost
= COSTS_N_INSNS (1);
9346 if (is_ldm
|| is_stm
)
9350 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9351 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9352 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9353 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9354 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9355 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9356 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9358 *cost
+= regs_per_insn_1st
9359 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9360 + regs_per_insn_sub
- 1)
9361 / regs_per_insn_sub
);
9370 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9371 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9372 *cost
= COSTS_N_INSNS (speed_p
9373 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9374 else if (mode
== SImode
&& TARGET_IDIV
)
9375 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9377 *cost
= LIBCALL_COST (2);
9378 return false; /* All arguments must be in registers. */
9382 *cost
= LIBCALL_COST (2);
9383 return false; /* All arguments must be in registers. */
9386 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9388 *cost
= (COSTS_N_INSNS (2)
9389 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9391 *cost
+= extra_cost
->alu
.shift_reg
;
9399 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9401 *cost
= (COSTS_N_INSNS (3)
9402 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9404 *cost
+= 2 * extra_cost
->alu
.shift
;
9407 else if (mode
== SImode
)
9409 *cost
= (COSTS_N_INSNS (1)
9410 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9411 /* Slightly disparage register shifts at -Os, but not by much. */
9412 if (!CONST_INT_P (XEXP (x
, 1)))
9413 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9414 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9417 else if (GET_MODE_CLASS (mode
) == MODE_INT
9418 && GET_MODE_SIZE (mode
) < 4)
9422 *cost
= (COSTS_N_INSNS (1)
9423 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9424 /* Slightly disparage register shifts at -Os, but not by
9426 if (!CONST_INT_P (XEXP (x
, 1)))
9427 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9428 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9430 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9432 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9434 /* Can use SBFX/UBFX. */
9435 *cost
= COSTS_N_INSNS (1);
9437 *cost
+= extra_cost
->alu
.bfx
;
9438 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9442 *cost
= COSTS_N_INSNS (2);
9443 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9446 if (CONST_INT_P (XEXP (x
, 1)))
9447 *cost
+= 2 * extra_cost
->alu
.shift
;
9449 *cost
+= (extra_cost
->alu
.shift
9450 + extra_cost
->alu
.shift_reg
);
9453 /* Slightly disparage register shifts. */
9454 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9459 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9460 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9463 if (CONST_INT_P (XEXP (x
, 1)))
9464 *cost
+= (2 * extra_cost
->alu
.shift
9465 + extra_cost
->alu
.log_shift
);
9467 *cost
+= (extra_cost
->alu
.shift
9468 + extra_cost
->alu
.shift_reg
9469 + extra_cost
->alu
.log_shift_reg
);
9475 *cost
= LIBCALL_COST (2);
9483 *cost
= COSTS_N_INSNS (1);
9485 *cost
+= extra_cost
->alu
.rev
;
9492 /* No rev instruction available. Look at arm_legacy_rev
9493 and thumb_legacy_rev for the form of RTL used then. */
9496 *cost
= COSTS_N_INSNS (10);
9500 *cost
+= 6 * extra_cost
->alu
.shift
;
9501 *cost
+= 3 * extra_cost
->alu
.logical
;
9506 *cost
= COSTS_N_INSNS (5);
9510 *cost
+= 2 * extra_cost
->alu
.shift
;
9511 *cost
+= extra_cost
->alu
.arith_shift
;
9512 *cost
+= 2 * extra_cost
->alu
.logical
;
9520 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9521 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9523 *cost
= COSTS_N_INSNS (1);
9524 if (GET_CODE (XEXP (x
, 0)) == MULT
9525 || GET_CODE (XEXP (x
, 1)) == MULT
)
9527 rtx mul_op0
, mul_op1
, sub_op
;
9530 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9532 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9534 mul_op0
= XEXP (XEXP (x
, 0), 0);
9535 mul_op1
= XEXP (XEXP (x
, 0), 1);
9536 sub_op
= XEXP (x
, 1);
9540 mul_op0
= XEXP (XEXP (x
, 1), 0);
9541 mul_op1
= XEXP (XEXP (x
, 1), 1);
9542 sub_op
= XEXP (x
, 0);
9545 /* The first operand of the multiply may be optionally
9547 if (GET_CODE (mul_op0
) == NEG
)
9548 mul_op0
= XEXP (mul_op0
, 0);
9550 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9551 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9552 + rtx_cost (sub_op
, code
, 0, speed_p
));
9558 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9564 rtx shift_by_reg
= NULL
;
9568 *cost
= COSTS_N_INSNS (1);
9570 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9571 if (shift_op
== NULL
)
9573 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9574 non_shift_op
= XEXP (x
, 0);
9577 non_shift_op
= XEXP (x
, 1);
9579 if (shift_op
!= NULL
)
9581 if (shift_by_reg
!= NULL
)
9584 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9585 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9588 *cost
+= extra_cost
->alu
.arith_shift
;
9590 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9591 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9596 && GET_CODE (XEXP (x
, 1)) == MULT
)
9600 *cost
+= extra_cost
->mult
[0].add
;
9601 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9602 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9603 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9607 if (CONST_INT_P (XEXP (x
, 0)))
9609 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9610 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9612 *cost
= COSTS_N_INSNS (insns
);
9614 *cost
+= insns
* extra_cost
->alu
.arith
;
9615 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9622 if (GET_MODE_CLASS (mode
) == MODE_INT
9623 && GET_MODE_SIZE (mode
) < 4)
9625 rtx shift_op
, shift_reg
;
9628 /* We check both sides of the MINUS for shifter operands since,
9629 unlike PLUS, it's not commutative. */
9631 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9632 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9634 /* Slightly disparage, as we might need to widen the result. */
9635 *cost
= 1 + COSTS_N_INSNS (1);
9637 *cost
+= extra_cost
->alu
.arith
;
9639 if (CONST_INT_P (XEXP (x
, 0)))
9641 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9650 *cost
= COSTS_N_INSNS (2);
9652 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9654 rtx op1
= XEXP (x
, 1);
9657 *cost
+= 2 * extra_cost
->alu
.arith
;
9659 if (GET_CODE (op1
) == ZERO_EXTEND
)
9660 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9662 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9663 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9667 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9670 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9671 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9673 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9676 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9677 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9680 *cost
+= (extra_cost
->alu
.arith
9681 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9682 ? extra_cost
->alu
.arith
9683 : extra_cost
->alu
.arith_shift
));
9684 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9685 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9686 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9691 *cost
+= 2 * extra_cost
->alu
.arith
;
9697 *cost
= LIBCALL_COST (2);
9701 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9702 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9704 *cost
= COSTS_N_INSNS (1);
9705 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9707 rtx mul_op0
, mul_op1
, add_op
;
9710 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9712 mul_op0
= XEXP (XEXP (x
, 0), 0);
9713 mul_op1
= XEXP (XEXP (x
, 0), 1);
9714 add_op
= XEXP (x
, 1);
9716 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9717 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9718 + rtx_cost (add_op
, code
, 0, speed_p
));
9724 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9727 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9729 *cost
= LIBCALL_COST (2);
9733 /* Narrow modes can be synthesized in SImode, but the range
9734 of useful sub-operations is limited. Check for shift operations
9735 on one of the operands. Only left shifts can be used in the
9737 if (GET_MODE_CLASS (mode
) == MODE_INT
9738 && GET_MODE_SIZE (mode
) < 4)
9740 rtx shift_op
, shift_reg
;
9743 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9745 if (CONST_INT_P (XEXP (x
, 1)))
9747 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9748 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9750 *cost
= COSTS_N_INSNS (insns
);
9752 *cost
+= insns
* extra_cost
->alu
.arith
;
9753 /* Slightly penalize a narrow operation as the result may
9755 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9759 /* Slightly penalize a narrow operation as the result may
9761 *cost
= 1 + COSTS_N_INSNS (1);
9763 *cost
+= extra_cost
->alu
.arith
;
9770 rtx shift_op
, shift_reg
;
9772 *cost
= COSTS_N_INSNS (1);
9774 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9775 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9777 /* UXTA[BH] or SXTA[BH]. */
9779 *cost
+= extra_cost
->alu
.extend_arith
;
9780 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9782 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9787 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9788 if (shift_op
!= NULL
)
9793 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9794 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9797 *cost
+= extra_cost
->alu
.arith_shift
;
9799 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9800 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9803 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9805 rtx mul_op
= XEXP (x
, 0);
9807 *cost
= COSTS_N_INSNS (1);
9809 if (TARGET_DSP_MULTIPLY
9810 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9811 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9812 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9814 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9815 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9817 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9818 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9819 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9821 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9826 *cost
+= extra_cost
->mult
[0].extend_add
;
9827 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9828 SIGN_EXTEND
, 0, speed_p
)
9829 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9830 SIGN_EXTEND
, 0, speed_p
)
9831 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9836 *cost
+= extra_cost
->mult
[0].add
;
9837 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9838 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9839 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9842 if (CONST_INT_P (XEXP (x
, 1)))
9844 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9845 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9847 *cost
= COSTS_N_INSNS (insns
);
9849 *cost
+= insns
* extra_cost
->alu
.arith
;
9850 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9859 && GET_CODE (XEXP (x
, 0)) == MULT
9860 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9861 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9862 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9863 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9865 *cost
= COSTS_N_INSNS (1);
9867 *cost
+= extra_cost
->mult
[1].extend_add
;
9868 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9869 ZERO_EXTEND
, 0, speed_p
)
9870 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9871 ZERO_EXTEND
, 0, speed_p
)
9872 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9876 *cost
= COSTS_N_INSNS (2);
9878 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9879 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9882 *cost
+= (extra_cost
->alu
.arith
9883 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9884 ? extra_cost
->alu
.arith
9885 : extra_cost
->alu
.arith_shift
));
9887 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9889 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9894 *cost
+= 2 * extra_cost
->alu
.arith
;
9899 *cost
= LIBCALL_COST (2);
9902 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9904 *cost
= COSTS_N_INSNS (1);
9906 *cost
+= extra_cost
->alu
.rev
;
9914 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9915 rtx op0
= XEXP (x
, 0);
9916 rtx shift_op
, shift_reg
;
9918 *cost
= COSTS_N_INSNS (1);
9922 || (code
== IOR
&& TARGET_THUMB2
)))
9923 op0
= XEXP (op0
, 0);
9926 shift_op
= shifter_op_p (op0
, &shift_reg
);
9927 if (shift_op
!= NULL
)
9932 *cost
+= extra_cost
->alu
.log_shift_reg
;
9933 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9936 *cost
+= extra_cost
->alu
.log_shift
;
9938 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9939 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9943 if (CONST_INT_P (XEXP (x
, 1)))
9945 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9946 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9949 *cost
= COSTS_N_INSNS (insns
);
9951 *cost
+= insns
* extra_cost
->alu
.logical
;
9952 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9957 *cost
+= extra_cost
->alu
.logical
;
9958 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9959 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9965 rtx op0
= XEXP (x
, 0);
9966 enum rtx_code subcode
= GET_CODE (op0
);
9968 *cost
= COSTS_N_INSNS (2);
9972 || (code
== IOR
&& TARGET_THUMB2
)))
9973 op0
= XEXP (op0
, 0);
9975 if (GET_CODE (op0
) == ZERO_EXTEND
)
9978 *cost
+= 2 * extra_cost
->alu
.logical
;
9980 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9981 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9984 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9987 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9989 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9990 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9995 *cost
+= 2 * extra_cost
->alu
.logical
;
10001 *cost
= LIBCALL_COST (2);
10005 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10006 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10008 rtx op0
= XEXP (x
, 0);
10010 *cost
= COSTS_N_INSNS (1);
10012 if (GET_CODE (op0
) == NEG
)
10013 op0
= XEXP (op0
, 0);
10016 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10018 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10019 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10022 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10024 *cost
= LIBCALL_COST (2);
10028 if (mode
== SImode
)
10030 *cost
= COSTS_N_INSNS (1);
10031 if (TARGET_DSP_MULTIPLY
10032 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10033 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10034 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10035 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10036 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10037 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10038 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10039 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10040 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10041 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10042 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10043 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10046 /* SMUL[TB][TB]. */
10048 *cost
+= extra_cost
->mult
[0].extend
;
10049 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10050 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10054 *cost
+= extra_cost
->mult
[0].simple
;
10058 if (mode
== DImode
)
10061 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10062 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10063 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10064 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10066 *cost
= COSTS_N_INSNS (1);
10068 *cost
+= extra_cost
->mult
[1].extend
;
10069 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10070 ZERO_EXTEND
, 0, speed_p
)
10071 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10072 ZERO_EXTEND
, 0, speed_p
));
10076 *cost
= LIBCALL_COST (2);
10081 *cost
= LIBCALL_COST (2);
10085 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10086 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10088 *cost
= COSTS_N_INSNS (1);
10090 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10094 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10096 *cost
= LIBCALL_COST (1);
10100 if (mode
== SImode
)
10102 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10104 *cost
= COSTS_N_INSNS (2);
10105 /* Assume the non-flag-changing variant. */
10107 *cost
+= (extra_cost
->alu
.log_shift
10108 + extra_cost
->alu
.arith_shift
);
10109 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10113 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10114 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10116 *cost
= COSTS_N_INSNS (2);
10117 /* No extra cost for MOV imm and MVN imm. */
10118 /* If the comparison op is using the flags, there's no further
10119 cost, otherwise we need to add the cost of the comparison. */
10120 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10121 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10122 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10124 *cost
+= (COSTS_N_INSNS (1)
10125 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10127 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10130 *cost
+= extra_cost
->alu
.arith
;
10134 *cost
= COSTS_N_INSNS (1);
10136 *cost
+= extra_cost
->alu
.arith
;
10140 if (GET_MODE_CLASS (mode
) == MODE_INT
10141 && GET_MODE_SIZE (mode
) < 4)
10143 /* Slightly disparage, as we might need an extend operation. */
10144 *cost
= 1 + COSTS_N_INSNS (1);
10146 *cost
+= extra_cost
->alu
.arith
;
10150 if (mode
== DImode
)
10152 *cost
= COSTS_N_INSNS (2);
10154 *cost
+= 2 * extra_cost
->alu
.arith
;
10159 *cost
= LIBCALL_COST (1);
10163 if (mode
== SImode
)
10166 rtx shift_reg
= NULL
;
10168 *cost
= COSTS_N_INSNS (1);
10169 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10173 if (shift_reg
!= NULL
)
10176 *cost
+= extra_cost
->alu
.log_shift_reg
;
10177 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10180 *cost
+= extra_cost
->alu
.log_shift
;
10181 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10186 *cost
+= extra_cost
->alu
.logical
;
10189 if (mode
== DImode
)
10191 *cost
= COSTS_N_INSNS (2);
10197 *cost
+= LIBCALL_COST (1);
10202 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10204 *cost
= COSTS_N_INSNS (4);
10207 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10208 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10210 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10211 /* Assume that if one arm of the if_then_else is a register,
10212 that it will be tied with the result and eliminate the
10213 conditional insn. */
10214 if (REG_P (XEXP (x
, 1)))
10216 else if (REG_P (XEXP (x
, 2)))
10222 if (extra_cost
->alu
.non_exec_costs_exec
)
10223 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10225 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10228 *cost
+= op1cost
+ op2cost
;
10234 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10238 enum machine_mode op0mode
;
10239 /* We'll mostly assume that the cost of a compare is the cost of the
10240 LHS. However, there are some notable exceptions. */
10242 /* Floating point compares are never done as side-effects. */
10243 op0mode
= GET_MODE (XEXP (x
, 0));
10244 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10245 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10247 *cost
= COSTS_N_INSNS (1);
10249 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10251 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10253 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10259 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10261 *cost
= LIBCALL_COST (2);
10265 /* DImode compares normally take two insns. */
10266 if (op0mode
== DImode
)
10268 *cost
= COSTS_N_INSNS (2);
10270 *cost
+= 2 * extra_cost
->alu
.arith
;
10274 if (op0mode
== SImode
)
10279 if (XEXP (x
, 1) == const0_rtx
10280 && !(REG_P (XEXP (x
, 0))
10281 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10282 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10284 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10286 /* Multiply operations that set the flags are often
10287 significantly more expensive. */
10289 && GET_CODE (XEXP (x
, 0)) == MULT
10290 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10291 *cost
+= extra_cost
->mult
[0].flag_setting
;
10294 && GET_CODE (XEXP (x
, 0)) == PLUS
10295 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10296 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10298 *cost
+= extra_cost
->mult
[0].flag_setting
;
10303 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10304 if (shift_op
!= NULL
)
10306 *cost
= COSTS_N_INSNS (1);
10307 if (shift_reg
!= NULL
)
10309 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10311 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10314 *cost
+= extra_cost
->alu
.arith_shift
;
10315 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10316 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10320 *cost
= COSTS_N_INSNS (1);
10322 *cost
+= extra_cost
->alu
.arith
;
10323 if (CONST_INT_P (XEXP (x
, 1))
10324 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10326 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10334 *cost
= LIBCALL_COST (2);
10357 if (outer_code
== SET
)
10359 /* Is it a store-flag operation? */
10360 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10361 && XEXP (x
, 1) == const0_rtx
)
10363 /* Thumb also needs an IT insn. */
10364 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10367 if (XEXP (x
, 1) == const0_rtx
)
10372 /* LSR Rd, Rn, #31. */
10373 *cost
= COSTS_N_INSNS (1);
10375 *cost
+= extra_cost
->alu
.shift
;
10385 *cost
= COSTS_N_INSNS (2);
10389 /* RSBS T1, Rn, Rn, LSR #31
10391 *cost
= COSTS_N_INSNS (2);
10393 *cost
+= extra_cost
->alu
.arith_shift
;
10397 /* RSB Rd, Rn, Rn, ASR #1
10398 LSR Rd, Rd, #31. */
10399 *cost
= COSTS_N_INSNS (2);
10401 *cost
+= (extra_cost
->alu
.arith_shift
10402 + extra_cost
->alu
.shift
);
10408 *cost
= COSTS_N_INSNS (2);
10410 *cost
+= extra_cost
->alu
.shift
;
10414 /* Remaining cases are either meaningless or would take
10415 three insns anyway. */
10416 *cost
= COSTS_N_INSNS (3);
10419 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10424 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10425 if (CONST_INT_P (XEXP (x
, 1))
10426 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10428 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10435 /* Not directly inside a set. If it involves the condition code
10436 register it must be the condition for a branch, cond_exec or
10437 I_T_E operation. Since the comparison is performed elsewhere
10438 this is just the control part which has no additional
10440 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10441 && XEXP (x
, 1) == const0_rtx
)
10449 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10450 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10452 *cost
= COSTS_N_INSNS (1);
10454 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10458 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10460 *cost
= LIBCALL_COST (1);
10464 if (mode
== SImode
)
10466 *cost
= COSTS_N_INSNS (1);
10468 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10472 *cost
= LIBCALL_COST (1);
10476 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10477 && MEM_P (XEXP (x
, 0)))
10479 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10481 if (mode
== DImode
)
10482 *cost
+= COSTS_N_INSNS (1);
10487 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10488 *cost
+= extra_cost
->ldst
.load
;
10490 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10492 if (mode
== DImode
)
10493 *cost
+= extra_cost
->alu
.shift
;
10498 /* Widening from less than 32-bits requires an extend operation. */
10499 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10501 /* We have SXTB/SXTH. */
10502 *cost
= COSTS_N_INSNS (1);
10503 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10505 *cost
+= extra_cost
->alu
.extend
;
10507 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10509 /* Needs two shifts. */
10510 *cost
= COSTS_N_INSNS (2);
10511 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10513 *cost
+= 2 * extra_cost
->alu
.shift
;
10516 /* Widening beyond 32-bits requires one more insn. */
10517 if (mode
== DImode
)
10519 *cost
+= COSTS_N_INSNS (1);
10521 *cost
+= extra_cost
->alu
.shift
;
10528 || GET_MODE (XEXP (x
, 0)) == SImode
10529 || GET_MODE (XEXP (x
, 0)) == QImode
)
10530 && MEM_P (XEXP (x
, 0)))
10532 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10534 if (mode
== DImode
)
10535 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10540 /* Widening from less than 32-bits requires an extend operation. */
10541 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10543 /* UXTB can be a shorter instruction in Thumb2, but it might
10544 be slower than the AND Rd, Rn, #255 alternative. When
10545 optimizing for speed it should never be slower to use
10546 AND, and we don't really model 16-bit vs 32-bit insns
10548 *cost
= COSTS_N_INSNS (1);
10550 *cost
+= extra_cost
->alu
.logical
;
10552 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10554 /* We have UXTB/UXTH. */
10555 *cost
= COSTS_N_INSNS (1);
10556 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10558 *cost
+= extra_cost
->alu
.extend
;
10560 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10562 /* Needs two shifts. It's marginally preferable to use
10563 shifts rather than two BIC instructions as the second
10564 shift may merge with a subsequent insn as a shifter
10566 *cost
= COSTS_N_INSNS (2);
10567 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10569 *cost
+= 2 * extra_cost
->alu
.shift
;
10571 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10572 *cost
= COSTS_N_INSNS (1);
10574 /* Widening beyond 32-bits requires one more insn. */
10575 if (mode
== DImode
)
10577 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10584 /* CONST_INT has no mode, so we cannot tell for sure how many
10585 insns are really going to be needed. The best we can do is
10586 look at the value passed. If it fits in SImode, then assume
10587 that's the mode it will be used for. Otherwise assume it
10588 will be used in DImode. */
10589 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10594 /* Avoid blowing up in arm_gen_constant (). */
10595 if (!(outer_code
== PLUS
10596 || outer_code
== AND
10597 || outer_code
== IOR
10598 || outer_code
== XOR
10599 || outer_code
== MINUS
))
10603 if (mode
== SImode
)
10605 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10606 INTVAL (x
), NULL
, NULL
,
10612 *cost
+= COSTS_N_INSNS (arm_gen_constant
10613 (outer_code
, SImode
, NULL
,
10614 trunc_int_for_mode (INTVAL (x
), SImode
),
10616 + arm_gen_constant (outer_code
, SImode
, NULL
,
10617 INTVAL (x
) >> 32, NULL
,
10629 if (arm_arch_thumb2
&& !flag_pic
)
10630 *cost
= COSTS_N_INSNS (2);
10632 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10635 *cost
= COSTS_N_INSNS (2);
10639 *cost
+= COSTS_N_INSNS (1);
10641 *cost
+= extra_cost
->alu
.arith
;
10647 *cost
= COSTS_N_INSNS (4);
10652 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10653 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10655 if (vfp3_const_double_rtx (x
))
10657 *cost
= COSTS_N_INSNS (1);
10659 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10665 *cost
= COSTS_N_INSNS (1);
10666 if (mode
== DFmode
)
10667 *cost
+= extra_cost
->ldst
.loadd
;
10669 *cost
+= extra_cost
->ldst
.loadf
;
10672 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10676 *cost
= COSTS_N_INSNS (4);
10682 && TARGET_HARD_FLOAT
10683 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10684 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10685 *cost
= COSTS_N_INSNS (1);
10687 *cost
= COSTS_N_INSNS (4);
10692 *cost
= COSTS_N_INSNS (1);
10693 /* When optimizing for size, we prefer constant pool entries to
10694 MOVW/MOVT pairs, so bump the cost of these slightly. */
10700 *cost
= COSTS_N_INSNS (1);
10702 *cost
+= extra_cost
->alu
.clz
;
10706 if (XEXP (x
, 1) == const0_rtx
)
10708 *cost
= COSTS_N_INSNS (1);
10710 *cost
+= extra_cost
->alu
.log_shift
;
10711 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10714 /* Fall through. */
10718 *cost
= COSTS_N_INSNS (2);
10722 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10723 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10724 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10725 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10726 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10727 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10728 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10729 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10732 *cost
= COSTS_N_INSNS (1);
10734 *cost
+= extra_cost
->mult
[1].extend
;
10735 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10737 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10741 *cost
= LIBCALL_COST (1);
10745 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10748 /* Reading the PC is like reading any other register. Writing it
10749 is more expensive, but we take that into account elsewhere. */
10754 /* TODO: Simple zero_extract of bottom bits using AND. */
10755 /* Fall through. */
10759 && CONST_INT_P (XEXP (x
, 1))
10760 && CONST_INT_P (XEXP (x
, 2)))
10762 *cost
= COSTS_N_INSNS (1);
10764 *cost
+= extra_cost
->alu
.bfx
;
10765 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10768 /* Without UBFX/SBFX, need to resort to shift operations. */
10769 *cost
= COSTS_N_INSNS (2);
10771 *cost
+= 2 * extra_cost
->alu
.shift
;
10772 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10776 if (TARGET_HARD_FLOAT
)
10778 *cost
= COSTS_N_INSNS (1);
10780 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10781 if (!TARGET_FPU_ARMV8
10782 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10784 /* Pre v8, widening HF->DF is a two-step process, first
10785 widening to SFmode. */
10786 *cost
+= COSTS_N_INSNS (1);
10788 *cost
+= extra_cost
->fp
[0].widen
;
10790 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10794 *cost
= LIBCALL_COST (1);
10797 case FLOAT_TRUNCATE
:
10798 if (TARGET_HARD_FLOAT
)
10800 *cost
= COSTS_N_INSNS (1);
10802 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10803 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10805 /* Vector modes? */
10807 *cost
= LIBCALL_COST (1);
10811 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10813 rtx op0
= XEXP (x
, 0);
10814 rtx op1
= XEXP (x
, 1);
10815 rtx op2
= XEXP (x
, 2);
10817 *cost
= COSTS_N_INSNS (1);
10819 /* vfms or vfnma. */
10820 if (GET_CODE (op0
) == NEG
)
10821 op0
= XEXP (op0
, 0);
10823 /* vfnms or vfnma. */
10824 if (GET_CODE (op2
) == NEG
)
10825 op2
= XEXP (op2
, 0);
10827 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10828 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10829 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10832 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10837 *cost
= LIBCALL_COST (3);
10842 if (TARGET_HARD_FLOAT
)
10844 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10846 *cost
= COSTS_N_INSNS (1);
10848 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10849 /* Strip of the 'cost' of rounding towards zero. */
10850 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10851 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10853 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10854 /* ??? Increase the cost to deal with transferring from
10855 FP -> CORE registers? */
10858 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10859 && TARGET_FPU_ARMV8
)
10861 *cost
= COSTS_N_INSNS (1);
10863 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10866 /* Vector costs? */
10868 *cost
= LIBCALL_COST (1);
10872 case UNSIGNED_FLOAT
:
10873 if (TARGET_HARD_FLOAT
)
10875 /* ??? Increase the cost to deal with transferring from CORE
10876 -> FP registers? */
10877 *cost
= COSTS_N_INSNS (1);
10879 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10882 *cost
= LIBCALL_COST (1);
10886 *cost
= COSTS_N_INSNS (1);
10891 /* Just a guess. Guess number of instructions in the asm
10892 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10893 though (see PR60663). */
10894 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10895 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10897 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10901 if (mode
!= VOIDmode
)
10902 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10904 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10909 #undef HANDLE_NARROW_SHIFT_ARITH
10911 /* RTX costs when optimizing for size. */
10913 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10914 int *total
, bool speed
)
10918 if (TARGET_OLD_RTX_COSTS
10919 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10921 /* Old way. (Deprecated.) */
10923 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10924 (enum rtx_code
) outer_code
, total
);
10926 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10927 (enum rtx_code
) outer_code
, total
,
10933 if (current_tune
->insn_extra_cost
)
10934 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10935 (enum rtx_code
) outer_code
,
10936 current_tune
->insn_extra_cost
,
10938 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10939 && current_tune->insn_extra_cost != NULL */
10941 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10942 (enum rtx_code
) outer_code
,
10943 &generic_extra_costs
, total
, speed
);
10946 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10948 print_rtl_single (dump_file
, x
);
10949 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10950 *total
, result
? "final" : "partial");
10955 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10956 supported on any "slowmul" cores, so it can be ignored. */
10959 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10960 int *total
, bool speed
)
10962 enum machine_mode mode
= GET_MODE (x
);
10966 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10973 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10976 *total
= COSTS_N_INSNS (20);
10980 if (CONST_INT_P (XEXP (x
, 1)))
10982 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10983 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10984 int cost
, const_ok
= const_ok_for_arm (i
);
10985 int j
, booth_unit_size
;
10987 /* Tune as appropriate. */
10988 cost
= const_ok
? 4 : 8;
10989 booth_unit_size
= 2;
10990 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10992 i
>>= booth_unit_size
;
10996 *total
= COSTS_N_INSNS (cost
);
10997 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11001 *total
= COSTS_N_INSNS (20);
11005 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11010 /* RTX cost for cores with a fast multiply unit (M variants). */
11013 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11014 int *total
, bool speed
)
11016 enum machine_mode mode
= GET_MODE (x
);
11020 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11024 /* ??? should thumb2 use different costs? */
11028 /* There is no point basing this on the tuning, since it is always the
11029 fast variant if it exists at all. */
11031 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11032 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11033 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11035 *total
= COSTS_N_INSNS(2);
11040 if (mode
== DImode
)
11042 *total
= COSTS_N_INSNS (5);
11046 if (CONST_INT_P (XEXP (x
, 1)))
11048 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11049 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11050 int cost
, const_ok
= const_ok_for_arm (i
);
11051 int j
, booth_unit_size
;
11053 /* Tune as appropriate. */
11054 cost
= const_ok
? 4 : 8;
11055 booth_unit_size
= 8;
11056 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11058 i
>>= booth_unit_size
;
11062 *total
= COSTS_N_INSNS(cost
);
11066 if (mode
== SImode
)
11068 *total
= COSTS_N_INSNS (4);
11072 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11074 if (TARGET_HARD_FLOAT
11076 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11078 *total
= COSTS_N_INSNS (1);
11083 /* Requires a lib call */
11084 *total
= COSTS_N_INSNS (20);
11088 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11093 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11094 so it can be ignored. */
11097 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11098 int *total
, bool speed
)
11100 enum machine_mode mode
= GET_MODE (x
);
11104 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11111 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11112 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11114 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11115 will stall until the multiplication is complete. */
11116 *total
= COSTS_N_INSNS (3);
11120 /* There is no point basing this on the tuning, since it is always the
11121 fast variant if it exists at all. */
11123 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11124 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11125 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11127 *total
= COSTS_N_INSNS (2);
11132 if (mode
== DImode
)
11134 *total
= COSTS_N_INSNS (5);
11138 if (CONST_INT_P (XEXP (x
, 1)))
11140 /* If operand 1 is a constant we can more accurately
11141 calculate the cost of the multiply. The multiplier can
11142 retire 15 bits on the first cycle and a further 12 on the
11143 second. We do, of course, have to load the constant into
11144 a register first. */
11145 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11146 /* There's a general overhead of one cycle. */
11148 unsigned HOST_WIDE_INT masked_const
;
11150 if (i
& 0x80000000)
11153 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11155 masked_const
= i
& 0xffff8000;
11156 if (masked_const
!= 0)
11159 masked_const
= i
& 0xf8000000;
11160 if (masked_const
!= 0)
11163 *total
= COSTS_N_INSNS (cost
);
11167 if (mode
== SImode
)
11169 *total
= COSTS_N_INSNS (3);
11173 /* Requires a lib call */
11174 *total
= COSTS_N_INSNS (20);
11178 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11183 /* RTX costs for 9e (and later) cores. */
11186 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11187 int *total
, bool speed
)
11189 enum machine_mode mode
= GET_MODE (x
);
11196 *total
= COSTS_N_INSNS (3);
11200 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11208 /* There is no point basing this on the tuning, since it is always the
11209 fast variant if it exists at all. */
11211 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11212 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11213 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11215 *total
= COSTS_N_INSNS (2);
11220 if (mode
== DImode
)
11222 *total
= COSTS_N_INSNS (5);
11226 if (mode
== SImode
)
11228 *total
= COSTS_N_INSNS (2);
11232 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11234 if (TARGET_HARD_FLOAT
11236 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11238 *total
= COSTS_N_INSNS (1);
11243 *total
= COSTS_N_INSNS (20);
11247 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11250 /* All address computations that can be done are free, but rtx cost returns
11251 the same for practically all of them. So we weight the different types
11252 of address here in the order (most pref first):
11253 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11255 arm_arm_address_cost (rtx x
)
11257 enum rtx_code c
= GET_CODE (x
);
11259 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11261 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11266 if (CONST_INT_P (XEXP (x
, 1)))
11269 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11279 arm_thumb_address_cost (rtx x
)
11281 enum rtx_code c
= GET_CODE (x
);
11286 && REG_P (XEXP (x
, 0))
11287 && CONST_INT_P (XEXP (x
, 1)))
11294 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11295 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11297 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11300 /* Adjust cost hook for XScale. */
11302 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11304 /* Some true dependencies can have a higher cost depending
11305 on precisely how certain input operands are used. */
11306 if (REG_NOTE_KIND(link
) == 0
11307 && recog_memoized (insn
) >= 0
11308 && recog_memoized (dep
) >= 0)
11310 int shift_opnum
= get_attr_shift (insn
);
11311 enum attr_type attr_type
= get_attr_type (dep
);
11313 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11314 operand for INSN. If we have a shifted input operand and the
11315 instruction we depend on is another ALU instruction, then we may
11316 have to account for an additional stall. */
11317 if (shift_opnum
!= 0
11318 && (attr_type
== TYPE_ALU_SHIFT_IMM
11319 || attr_type
== TYPE_ALUS_SHIFT_IMM
11320 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11321 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11322 || attr_type
== TYPE_ALU_SHIFT_REG
11323 || attr_type
== TYPE_ALUS_SHIFT_REG
11324 || attr_type
== TYPE_LOGIC_SHIFT_REG
11325 || attr_type
== TYPE_LOGICS_SHIFT_REG
11326 || attr_type
== TYPE_MOV_SHIFT
11327 || attr_type
== TYPE_MVN_SHIFT
11328 || attr_type
== TYPE_MOV_SHIFT_REG
11329 || attr_type
== TYPE_MVN_SHIFT_REG
))
11331 rtx shifted_operand
;
11334 /* Get the shifted operand. */
11335 extract_insn (insn
);
11336 shifted_operand
= recog_data
.operand
[shift_opnum
];
11338 /* Iterate over all the operands in DEP. If we write an operand
11339 that overlaps with SHIFTED_OPERAND, then we have increase the
11340 cost of this dependency. */
11341 extract_insn (dep
);
11342 preprocess_constraints ();
11343 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11345 /* We can ignore strict inputs. */
11346 if (recog_data
.operand_type
[opno
] == OP_IN
)
11349 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11361 /* Adjust cost hook for Cortex A9. */
11363 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11365 switch (REG_NOTE_KIND (link
))
11372 case REG_DEP_OUTPUT
:
11373 if (recog_memoized (insn
) >= 0
11374 && recog_memoized (dep
) >= 0)
11376 if (GET_CODE (PATTERN (insn
)) == SET
)
11379 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11381 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11383 enum attr_type attr_type_insn
= get_attr_type (insn
);
11384 enum attr_type attr_type_dep
= get_attr_type (dep
);
11386 /* By default all dependencies of the form
11389 have an extra latency of 1 cycle because
11390 of the input and output dependency in this
11391 case. However this gets modeled as an true
11392 dependency and hence all these checks. */
11393 if (REG_P (SET_DEST (PATTERN (insn
)))
11394 && REG_P (SET_DEST (PATTERN (dep
)))
11395 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11396 SET_DEST (PATTERN (dep
))))
11398 /* FMACS is a special case where the dependent
11399 instruction can be issued 3 cycles before
11400 the normal latency in case of an output
11402 if ((attr_type_insn
== TYPE_FMACS
11403 || attr_type_insn
== TYPE_FMACD
)
11404 && (attr_type_dep
== TYPE_FMACS
11405 || attr_type_dep
== TYPE_FMACD
))
11407 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11408 *cost
= insn_default_latency (dep
) - 3;
11410 *cost
= insn_default_latency (dep
);
11415 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11416 *cost
= insn_default_latency (dep
) + 1;
11418 *cost
= insn_default_latency (dep
);
11428 gcc_unreachable ();
11434 /* Adjust cost hook for FA726TE. */
11436 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11438 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11439 have penalty of 3. */
11440 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11441 && recog_memoized (insn
) >= 0
11442 && recog_memoized (dep
) >= 0
11443 && get_attr_conds (dep
) == CONDS_SET
)
11445 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11446 if (get_attr_conds (insn
) == CONDS_USE
11447 && get_attr_type (insn
) != TYPE_BRANCH
)
11453 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11454 || get_attr_conds (insn
) == CONDS_USE
)
11464 /* Implement TARGET_REGISTER_MOVE_COST.
11466 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11467 it is typically more expensive than a single memory access. We set
11468 the cost to less than two memory accesses so that floating
11469 point to integer conversion does not go through memory. */
11472 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11473 reg_class_t from
, reg_class_t to
)
11477 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11478 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11480 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11481 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11483 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11490 if (from
== HI_REGS
|| to
== HI_REGS
)
11497 /* Implement TARGET_MEMORY_MOVE_COST. */
11500 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11501 bool in ATTRIBUTE_UNUSED
)
11507 if (GET_MODE_SIZE (mode
) < 4)
11510 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11514 /* Vectorizer cost model implementation. */
11516 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11518 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11520 int misalign ATTRIBUTE_UNUSED
)
11524 switch (type_of_cost
)
11527 return current_tune
->vec_costs
->scalar_stmt_cost
;
11530 return current_tune
->vec_costs
->scalar_load_cost
;
11533 return current_tune
->vec_costs
->scalar_store_cost
;
11536 return current_tune
->vec_costs
->vec_stmt_cost
;
11539 return current_tune
->vec_costs
->vec_align_load_cost
;
11542 return current_tune
->vec_costs
->vec_store_cost
;
11544 case vec_to_scalar
:
11545 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11547 case scalar_to_vec
:
11548 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11550 case unaligned_load
:
11551 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11553 case unaligned_store
:
11554 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11556 case cond_branch_taken
:
11557 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11559 case cond_branch_not_taken
:
11560 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11563 case vec_promote_demote
:
11564 return current_tune
->vec_costs
->vec_stmt_cost
;
11566 case vec_construct
:
11567 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11568 return elements
/ 2 + 1;
11571 gcc_unreachable ();
11575 /* Implement targetm.vectorize.add_stmt_cost. */
11578 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11579 struct _stmt_vec_info
*stmt_info
, int misalign
,
11580 enum vect_cost_model_location where
)
11582 unsigned *cost
= (unsigned *) data
;
11583 unsigned retval
= 0;
11585 if (flag_vect_cost_model
)
11587 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11588 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11590 /* Statements in an inner loop relative to the loop being
11591 vectorized are weighted more heavily. The value here is
11592 arbitrary and could potentially be improved with analysis. */
11593 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11594 count
*= 50; /* FIXME. */
11596 retval
= (unsigned) (count
* stmt_cost
);
11597 cost
[where
] += retval
;
11603 /* Return true if and only if this insn can dual-issue only as older. */
11605 cortexa7_older_only (rtx insn
)
11607 if (recog_memoized (insn
) < 0)
11610 switch (get_attr_type (insn
))
11613 case TYPE_ALUS_REG
:
11614 case TYPE_LOGIC_REG
:
11615 case TYPE_LOGICS_REG
:
11617 case TYPE_ADCS_REG
:
11622 case TYPE_SHIFT_IMM
:
11623 case TYPE_SHIFT_REG
:
11624 case TYPE_LOAD_BYTE
:
11627 case TYPE_FFARITHS
:
11629 case TYPE_FFARITHD
:
11647 case TYPE_F_STORES
:
11654 /* Return true if and only if this insn can dual-issue as younger. */
11656 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11658 if (recog_memoized (insn
) < 0)
11661 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11665 switch (get_attr_type (insn
))
11668 case TYPE_ALUS_IMM
:
11669 case TYPE_LOGIC_IMM
:
11670 case TYPE_LOGICS_IMM
:
11675 case TYPE_MOV_SHIFT
:
11676 case TYPE_MOV_SHIFT_REG
:
11686 /* Look for an instruction that can dual issue only as an older
11687 instruction, and move it in front of any instructions that can
11688 dual-issue as younger, while preserving the relative order of all
11689 other instructions in the ready list. This is a hueuristic to help
11690 dual-issue in later cycles, by postponing issue of more flexible
11691 instructions. This heuristic may affect dual issue opportunities
11692 in the current cycle. */
11694 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11698 int first_older_only
= -1, first_younger
= -1;
11702 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11706 /* Traverse the ready list from the head (the instruction to issue
11707 first), and looking for the first instruction that can issue as
11708 younger and the first instruction that can dual-issue only as
11710 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11712 rtx insn
= ready
[i
];
11713 if (cortexa7_older_only (insn
))
11715 first_older_only
= i
;
11717 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11720 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11724 /* Nothing to reorder because either no younger insn found or insn
11725 that can dual-issue only as older appears before any insn that
11726 can dual-issue as younger. */
11727 if (first_younger
== -1)
11730 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11734 /* Nothing to reorder because no older-only insn in the ready list. */
11735 if (first_older_only
== -1)
11738 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11742 /* Move first_older_only insn before first_younger. */
11744 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11745 INSN_UID(ready
[first_older_only
]),
11746 INSN_UID(ready
[first_younger
]));
11747 rtx first_older_only_insn
= ready
[first_older_only
];
11748 for (i
= first_older_only
; i
< first_younger
; i
++)
11750 ready
[i
] = ready
[i
+1];
11753 ready
[i
] = first_older_only_insn
;
11757 /* Implement TARGET_SCHED_REORDER. */
11759 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11765 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11768 /* Do nothing for other cores. */
11772 return arm_issue_rate ();
11775 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11776 It corrects the value of COST based on the relationship between
11777 INSN and DEP through the dependence LINK. It returns the new
11778 value. There is a per-core adjust_cost hook to adjust scheduler costs
11779 and the per-core hook can choose to completely override the generic
11780 adjust_cost function. Only put bits of code into arm_adjust_cost that
11781 are common across all cores. */
11783 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11787 /* When generating Thumb-1 code, we want to place flag-setting operations
11788 close to a conditional branch which depends on them, so that we can
11789 omit the comparison. */
11791 && REG_NOTE_KIND (link
) == 0
11792 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11793 && recog_memoized (dep
) >= 0
11794 && get_attr_conds (dep
) == CONDS_SET
)
11797 if (current_tune
->sched_adjust_cost
!= NULL
)
11799 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11803 /* XXX Is this strictly true? */
11804 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11805 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11808 /* Call insns don't incur a stall, even if they follow a load. */
11809 if (REG_NOTE_KIND (link
) == 0
11813 if ((i_pat
= single_set (insn
)) != NULL
11814 && MEM_P (SET_SRC (i_pat
))
11815 && (d_pat
= single_set (dep
)) != NULL
11816 && MEM_P (SET_DEST (d_pat
)))
11818 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11819 /* This is a load after a store, there is no conflict if the load reads
11820 from a cached area. Assume that loads from the stack, and from the
11821 constant pool are cached, and that others will miss. This is a
11824 if ((GET_CODE (src_mem
) == SYMBOL_REF
11825 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11826 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11827 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11828 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11836 arm_max_conditional_execute (void)
11838 return max_insns_skipped
;
11842 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11845 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11847 return (optimize
> 0) ? 2 : 0;
11851 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11853 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11856 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11857 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11858 sequences of non-executed instructions in IT blocks probably take the same
11859 amount of time as executed instructions (and the IT instruction itself takes
11860 space in icache). This function was experimentally determined to give good
11861 results on a popular embedded benchmark. */
11864 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11866 return (TARGET_32BIT
&& speed_p
) ? 1
11867 : arm_default_branch_cost (speed_p
, predictable_p
);
11870 static bool fp_consts_inited
= false;
11872 static REAL_VALUE_TYPE value_fp0
;
11875 init_fp_table (void)
11879 r
= REAL_VALUE_ATOF ("0", DFmode
);
11881 fp_consts_inited
= true;
11884 /* Return TRUE if rtx X is a valid immediate FP constant. */
11886 arm_const_double_rtx (rtx x
)
11890 if (!fp_consts_inited
)
11893 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11894 if (REAL_VALUE_MINUS_ZERO (r
))
11897 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11903 /* VFPv3 has a fairly wide range of representable immediates, formed from
11904 "quarter-precision" floating-point values. These can be evaluated using this
11905 formula (with ^ for exponentiation):
11909 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11910 16 <= n <= 31 and 0 <= r <= 7.
11912 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11914 - A (most-significant) is the sign bit.
11915 - BCD are the exponent (encoded as r XOR 3).
11916 - EFGH are the mantissa (encoded as n - 16).
11919 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11920 fconst[sd] instruction, or -1 if X isn't suitable. */
11922 vfp3_const_double_index (rtx x
)
11924 REAL_VALUE_TYPE r
, m
;
11925 int sign
, exponent
;
11926 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11927 unsigned HOST_WIDE_INT mask
;
11928 HOST_WIDE_INT m1
, m2
;
11929 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11931 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11934 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11936 /* We can't represent these things, so detect them first. */
11937 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11940 /* Extract sign, exponent and mantissa. */
11941 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11942 r
= real_value_abs (&r
);
11943 exponent
= REAL_EXP (&r
);
11944 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11945 highest (sign) bit, with a fixed binary point at bit point_pos.
11946 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11947 bits for the mantissa, this may fail (low bits would be lost). */
11948 real_ldexp (&m
, &r
, point_pos
- exponent
);
11949 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11953 /* If there are bits set in the low part of the mantissa, we can't
11954 represent this value. */
11958 /* Now make it so that mantissa contains the most-significant bits, and move
11959 the point_pos to indicate that the least-significant bits have been
11961 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11962 mantissa
= mant_hi
;
11964 /* We can permit four significant bits of mantissa only, plus a high bit
11965 which is always 1. */
11966 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11967 if ((mantissa
& mask
) != 0)
11970 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11971 mantissa
>>= point_pos
- 5;
11973 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11974 floating-point immediate zero with Neon using an integer-zero load, but
11975 that case is handled elsewhere.) */
11979 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11981 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11982 normalized significands are in the range [1, 2). (Our mantissa is shifted
11983 left 4 places at this point relative to normalized IEEE754 values). GCC
11984 internally uses [0.5, 1) (see real.c), so the exponent returned from
11985 REAL_EXP must be altered. */
11986 exponent
= 5 - exponent
;
11988 if (exponent
< 0 || exponent
> 7)
11991 /* Sign, mantissa and exponent are now in the correct form to plug into the
11992 formula described in the comment above. */
11993 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11996 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11998 vfp3_const_double_rtx (rtx x
)
12003 return vfp3_const_double_index (x
) != -1;
12006 /* Recognize immediates which can be used in various Neon instructions. Legal
12007 immediates are described by the following table (for VMVN variants, the
12008 bitwise inverse of the constant shown is recognized. In either case, VMOV
12009 is output and the correct instruction to use for a given constant is chosen
12010 by the assembler). The constant shown is replicated across all elements of
12011 the destination vector.
12013 insn elems variant constant (binary)
12014 ---- ----- ------- -----------------
12015 vmov i32 0 00000000 00000000 00000000 abcdefgh
12016 vmov i32 1 00000000 00000000 abcdefgh 00000000
12017 vmov i32 2 00000000 abcdefgh 00000000 00000000
12018 vmov i32 3 abcdefgh 00000000 00000000 00000000
12019 vmov i16 4 00000000 abcdefgh
12020 vmov i16 5 abcdefgh 00000000
12021 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12022 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12023 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12024 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12025 vmvn i16 10 00000000 abcdefgh
12026 vmvn i16 11 abcdefgh 00000000
12027 vmov i32 12 00000000 00000000 abcdefgh 11111111
12028 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12029 vmov i32 14 00000000 abcdefgh 11111111 11111111
12030 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12031 vmov i8 16 abcdefgh
12032 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12033 eeeeeeee ffffffff gggggggg hhhhhhhh
12034 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12035 vmov f32 19 00000000 00000000 00000000 00000000
12037 For case 18, B = !b. Representable values are exactly those accepted by
12038 vfp3_const_double_index, but are output as floating-point numbers rather
12041 For case 19, we will change it to vmov.i32 when assembling.
12043 Variants 0-5 (inclusive) may also be used as immediates for the second
12044 operand of VORR/VBIC instructions.
12046 The INVERSE argument causes the bitwise inverse of the given operand to be
12047 recognized instead (used for recognizing legal immediates for the VAND/VORN
12048 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12049 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12050 output, rather than the real insns vbic/vorr).
12052 INVERSE makes no difference to the recognition of float vectors.
12054 The return value is the variant of immediate as shown in the above table, or
12055 -1 if the given value doesn't match any of the listed patterns.
12058 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
12059 rtx
*modconst
, int *elementwidth
)
12061 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12063 for (i = 0; i < idx; i += (STRIDE)) \
12068 immtype = (CLASS); \
12069 elsize = (ELSIZE); \
12073 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12074 unsigned int innersize
;
12075 unsigned char bytes
[16];
12076 int immtype
= -1, matches
;
12077 unsigned int invmask
= inverse
? 0xff : 0;
12078 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12082 n_elts
= CONST_VECTOR_NUNITS (op
);
12083 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12088 if (mode
== VOIDmode
)
12090 innersize
= GET_MODE_SIZE (mode
);
12093 /* Vectors of float constants. */
12094 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12096 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12097 REAL_VALUE_TYPE r0
;
12099 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12102 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12104 for (i
= 1; i
< n_elts
; i
++)
12106 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12107 REAL_VALUE_TYPE re
;
12109 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12111 if (!REAL_VALUES_EQUAL (r0
, re
))
12116 *modconst
= CONST_VECTOR_ELT (op
, 0);
12121 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12127 /* Splat vector constant out into a byte vector. */
12128 for (i
= 0; i
< n_elts
; i
++)
12130 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12131 unsigned HOST_WIDE_INT elpart
;
12132 unsigned int part
, parts
;
12134 if (CONST_INT_P (el
))
12136 elpart
= INTVAL (el
);
12139 else if (CONST_DOUBLE_P (el
))
12141 elpart
= CONST_DOUBLE_LOW (el
);
12145 gcc_unreachable ();
12147 for (part
= 0; part
< parts
; part
++)
12150 for (byte
= 0; byte
< innersize
; byte
++)
12152 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12153 elpart
>>= BITS_PER_UNIT
;
12155 if (CONST_DOUBLE_P (el
))
12156 elpart
= CONST_DOUBLE_HIGH (el
);
12160 /* Sanity check. */
12161 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12165 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12166 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12168 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12169 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12171 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12172 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12174 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12175 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12177 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12179 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12181 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12182 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12184 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12185 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12187 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12188 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12190 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12191 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12193 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12195 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12197 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12198 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12200 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12201 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12203 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12204 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12206 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12207 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12209 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12211 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12212 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12220 *elementwidth
= elsize
;
12224 unsigned HOST_WIDE_INT imm
= 0;
12226 /* Un-invert bytes of recognized vector, if necessary. */
12228 for (i
= 0; i
< idx
; i
++)
12229 bytes
[i
] ^= invmask
;
12233 /* FIXME: Broken on 32-bit H_W_I hosts. */
12234 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12236 for (i
= 0; i
< 8; i
++)
12237 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12238 << (i
* BITS_PER_UNIT
);
12240 *modconst
= GEN_INT (imm
);
12244 unsigned HOST_WIDE_INT imm
= 0;
12246 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12247 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12249 *modconst
= GEN_INT (imm
);
12257 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12258 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12259 float elements), and a modified constant (whatever should be output for a
12260 VMOV) in *MODCONST. */
12263 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12264 rtx
*modconst
, int *elementwidth
)
12268 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12274 *modconst
= tmpconst
;
12277 *elementwidth
= tmpwidth
;
12282 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12283 the immediate is valid, write a constant suitable for using as an operand
12284 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12285 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12288 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12289 rtx
*modconst
, int *elementwidth
)
12293 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12295 if (retval
< 0 || retval
> 5)
12299 *modconst
= tmpconst
;
12302 *elementwidth
= tmpwidth
;
12307 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12308 the immediate is valid, write a constant suitable for using as an operand
12309 to VSHR/VSHL to *MODCONST and the corresponding element width to
12310 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12311 because they have different limitations. */
12314 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12315 rtx
*modconst
, int *elementwidth
,
12318 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12319 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12320 unsigned HOST_WIDE_INT last_elt
= 0;
12321 unsigned HOST_WIDE_INT maxshift
;
12323 /* Split vector constant out into a byte vector. */
12324 for (i
= 0; i
< n_elts
; i
++)
12326 rtx el
= CONST_VECTOR_ELT (op
, i
);
12327 unsigned HOST_WIDE_INT elpart
;
12329 if (CONST_INT_P (el
))
12330 elpart
= INTVAL (el
);
12331 else if (CONST_DOUBLE_P (el
))
12334 gcc_unreachable ();
12336 if (i
!= 0 && elpart
!= last_elt
)
12342 /* Shift less than element size. */
12343 maxshift
= innersize
* 8;
12347 /* Left shift immediate value can be from 0 to <size>-1. */
12348 if (last_elt
>= maxshift
)
12353 /* Right shift immediate value can be from 1 to <size>. */
12354 if (last_elt
== 0 || last_elt
> maxshift
)
12359 *elementwidth
= innersize
* 8;
12362 *modconst
= CONST_VECTOR_ELT (op
, 0);
12367 /* Return a string suitable for output of Neon immediate logic operation
12371 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12372 int inverse
, int quad
)
12374 int width
, is_valid
;
12375 static char templ
[40];
12377 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12379 gcc_assert (is_valid
!= 0);
12382 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12384 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12389 /* Return a string suitable for output of Neon immediate shift operation
12390 (VSHR or VSHL) MNEM. */
12393 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12394 enum machine_mode mode
, int quad
,
12397 int width
, is_valid
;
12398 static char templ
[40];
12400 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12401 gcc_assert (is_valid
!= 0);
12404 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12406 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12411 /* Output a sequence of pairwise operations to implement a reduction.
12412 NOTE: We do "too much work" here, because pairwise operations work on two
12413 registers-worth of operands in one go. Unfortunately we can't exploit those
12414 extra calculations to do the full operation in fewer steps, I don't think.
12415 Although all vector elements of the result but the first are ignored, we
12416 actually calculate the same result in each of the elements. An alternative
12417 such as initially loading a vector with zero to use as each of the second
12418 operands would use up an additional register and take an extra instruction,
12419 for no particular gain. */
12422 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12423 rtx (*reduc
) (rtx
, rtx
, rtx
))
12425 enum machine_mode inner
= GET_MODE_INNER (mode
);
12426 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12429 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12431 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12432 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12437 /* If VALS is a vector constant that can be loaded into a register
12438 using VDUP, generate instructions to do so and return an RTX to
12439 assign to the register. Otherwise return NULL_RTX. */
12442 neon_vdup_constant (rtx vals
)
12444 enum machine_mode mode
= GET_MODE (vals
);
12445 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12446 int n_elts
= GET_MODE_NUNITS (mode
);
12447 bool all_same
= true;
12451 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12454 for (i
= 0; i
< n_elts
; ++i
)
12456 x
= XVECEXP (vals
, 0, i
);
12457 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12462 /* The elements are not all the same. We could handle repeating
12463 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12464 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12468 /* We can load this constant by using VDUP and a constant in a
12469 single ARM register. This will be cheaper than a vector
12472 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12473 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12476 /* Generate code to load VALS, which is a PARALLEL containing only
12477 constants (for vec_init) or CONST_VECTOR, efficiently into a
12478 register. Returns an RTX to copy into the register, or NULL_RTX
12479 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12482 neon_make_constant (rtx vals
)
12484 enum machine_mode mode
= GET_MODE (vals
);
12486 rtx const_vec
= NULL_RTX
;
12487 int n_elts
= GET_MODE_NUNITS (mode
);
12491 if (GET_CODE (vals
) == CONST_VECTOR
)
12493 else if (GET_CODE (vals
) == PARALLEL
)
12495 /* A CONST_VECTOR must contain only CONST_INTs and
12496 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12497 Only store valid constants in a CONST_VECTOR. */
12498 for (i
= 0; i
< n_elts
; ++i
)
12500 rtx x
= XVECEXP (vals
, 0, i
);
12501 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12504 if (n_const
== n_elts
)
12505 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12508 gcc_unreachable ();
12510 if (const_vec
!= NULL
12511 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12512 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12514 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12515 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12516 pipeline cycle; creating the constant takes one or two ARM
12517 pipeline cycles. */
12519 else if (const_vec
!= NULL_RTX
)
12520 /* Load from constant pool. On Cortex-A8 this takes two cycles
12521 (for either double or quad vectors). We can not take advantage
12522 of single-cycle VLD1 because we need a PC-relative addressing
12526 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12527 We can not construct an initializer. */
12531 /* Initialize vector TARGET to VALS. */
12534 neon_expand_vector_init (rtx target
, rtx vals
)
12536 enum machine_mode mode
= GET_MODE (target
);
12537 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12538 int n_elts
= GET_MODE_NUNITS (mode
);
12539 int n_var
= 0, one_var
= -1;
12540 bool all_same
= true;
12544 for (i
= 0; i
< n_elts
; ++i
)
12546 x
= XVECEXP (vals
, 0, i
);
12547 if (!CONSTANT_P (x
))
12548 ++n_var
, one_var
= i
;
12550 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12556 rtx constant
= neon_make_constant (vals
);
12557 if (constant
!= NULL_RTX
)
12559 emit_move_insn (target
, constant
);
12564 /* Splat a single non-constant element if we can. */
12565 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12567 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12568 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12569 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12573 /* One field is non-constant. Load constant then overwrite varying
12574 field. This is more efficient than using the stack. */
12577 rtx copy
= copy_rtx (vals
);
12578 rtx index
= GEN_INT (one_var
);
12580 /* Load constant part of vector, substitute neighboring value for
12581 varying element. */
12582 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12583 neon_expand_vector_init (target
, copy
);
12585 /* Insert variable. */
12586 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12590 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12593 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12596 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12599 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12602 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12605 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12608 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12611 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12614 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12617 gcc_unreachable ();
12622 /* Construct the vector in memory one field at a time
12623 and load the whole vector. */
12624 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12625 for (i
= 0; i
< n_elts
; i
++)
12626 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12627 i
* GET_MODE_SIZE (inner_mode
)),
12628 XVECEXP (vals
, 0, i
));
12629 emit_move_insn (target
, mem
);
12632 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12633 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12634 reported source locations are bogus. */
12637 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12640 HOST_WIDE_INT lane
;
12642 gcc_assert (CONST_INT_P (operand
));
12644 lane
= INTVAL (operand
);
12646 if (lane
< low
|| lane
>= high
)
12650 /* Bounds-check lanes. */
12653 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12655 bounds_check (operand
, low
, high
, "lane out of range");
12658 /* Bounds-check constants. */
12661 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12663 bounds_check (operand
, low
, high
, "constant out of range");
12667 neon_element_bits (enum machine_mode mode
)
12669 if (mode
== DImode
)
12670 return GET_MODE_BITSIZE (mode
);
12672 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12676 /* Predicates for `match_operand' and `match_operator'. */
12678 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12679 WB is true if full writeback address modes are allowed and is false
12680 if limited writeback address modes (POST_INC and PRE_DEC) are
12684 arm_coproc_mem_operand (rtx op
, bool wb
)
12688 /* Reject eliminable registers. */
12689 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12690 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12691 || reg_mentioned_p (arg_pointer_rtx
, op
)
12692 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12693 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12694 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12695 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12698 /* Constants are converted into offsets from labels. */
12702 ind
= XEXP (op
, 0);
12704 if (reload_completed
12705 && (GET_CODE (ind
) == LABEL_REF
12706 || (GET_CODE (ind
) == CONST
12707 && GET_CODE (XEXP (ind
, 0)) == PLUS
12708 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12709 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12712 /* Match: (mem (reg)). */
12714 return arm_address_register_rtx_p (ind
, 0);
12716 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12717 acceptable in any case (subject to verification by
12718 arm_address_register_rtx_p). We need WB to be true to accept
12719 PRE_INC and POST_DEC. */
12720 if (GET_CODE (ind
) == POST_INC
12721 || GET_CODE (ind
) == PRE_DEC
12723 && (GET_CODE (ind
) == PRE_INC
12724 || GET_CODE (ind
) == POST_DEC
)))
12725 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12728 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12729 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12730 && GET_CODE (XEXP (ind
, 1)) == PLUS
12731 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12732 ind
= XEXP (ind
, 1);
12737 if (GET_CODE (ind
) == PLUS
12738 && REG_P (XEXP (ind
, 0))
12739 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12740 && CONST_INT_P (XEXP (ind
, 1))
12741 && INTVAL (XEXP (ind
, 1)) > -1024
12742 && INTVAL (XEXP (ind
, 1)) < 1024
12743 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12749 /* Return TRUE if OP is a memory operand which we can load or store a vector
12750 to/from. TYPE is one of the following values:
12751 0 - Vector load/stor (vldr)
12752 1 - Core registers (ldm)
12753 2 - Element/structure loads (vld1)
12756 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12760 /* Reject eliminable registers. */
12761 if (! (reload_in_progress
|| reload_completed
)
12762 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12763 || reg_mentioned_p (arg_pointer_rtx
, op
)
12764 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12765 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12766 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12767 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12770 /* Constants are converted into offsets from labels. */
12774 ind
= XEXP (op
, 0);
12776 if (reload_completed
12777 && (GET_CODE (ind
) == LABEL_REF
12778 || (GET_CODE (ind
) == CONST
12779 && GET_CODE (XEXP (ind
, 0)) == PLUS
12780 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12781 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12784 /* Match: (mem (reg)). */
12786 return arm_address_register_rtx_p (ind
, 0);
12788 /* Allow post-increment with Neon registers. */
12789 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12790 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12791 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12793 /* FIXME: vld1 allows register post-modify. */
12799 && GET_CODE (ind
) == PLUS
12800 && REG_P (XEXP (ind
, 0))
12801 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12802 && CONST_INT_P (XEXP (ind
, 1))
12803 && INTVAL (XEXP (ind
, 1)) > -1024
12804 /* For quad modes, we restrict the constant offset to be slightly less
12805 than what the instruction format permits. We have no such constraint
12806 on double mode offsets. (This must match arm_legitimate_index_p.) */
12807 && (INTVAL (XEXP (ind
, 1))
12808 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12809 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12815 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12818 neon_struct_mem_operand (rtx op
)
12822 /* Reject eliminable registers. */
12823 if (! (reload_in_progress
|| reload_completed
)
12824 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12825 || reg_mentioned_p (arg_pointer_rtx
, op
)
12826 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12827 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12828 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12829 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12832 /* Constants are converted into offsets from labels. */
12836 ind
= XEXP (op
, 0);
12838 if (reload_completed
12839 && (GET_CODE (ind
) == LABEL_REF
12840 || (GET_CODE (ind
) == CONST
12841 && GET_CODE (XEXP (ind
, 0)) == PLUS
12842 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12843 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12846 /* Match: (mem (reg)). */
12848 return arm_address_register_rtx_p (ind
, 0);
12850 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12851 if (GET_CODE (ind
) == POST_INC
12852 || GET_CODE (ind
) == PRE_DEC
)
12853 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12858 /* Return true if X is a register that will be eliminated later on. */
12860 arm_eliminable_register (rtx x
)
12862 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12863 || REGNO (x
) == ARG_POINTER_REGNUM
12864 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12865 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12868 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12869 coprocessor registers. Otherwise return NO_REGS. */
12872 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12874 if (mode
== HFmode
)
12876 if (!TARGET_NEON_FP16
)
12877 return GENERAL_REGS
;
12878 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12880 return GENERAL_REGS
;
12883 /* The neon move patterns handle all legitimate vector and struct
12886 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12887 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12888 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12889 || VALID_NEON_STRUCT_MODE (mode
)))
12892 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12895 return GENERAL_REGS
;
12898 /* Values which must be returned in the most-significant end of the return
12902 arm_return_in_msb (const_tree valtype
)
12904 return (TARGET_AAPCS_BASED
12905 && BYTES_BIG_ENDIAN
12906 && (AGGREGATE_TYPE_P (valtype
)
12907 || TREE_CODE (valtype
) == COMPLEX_TYPE
12908 || FIXED_POINT_TYPE_P (valtype
)));
12911 /* Return TRUE if X references a SYMBOL_REF. */
12913 symbol_mentioned_p (rtx x
)
12918 if (GET_CODE (x
) == SYMBOL_REF
)
12921 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12922 are constant offsets, not symbols. */
12923 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12926 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12928 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12934 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12935 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12938 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12945 /* Return TRUE if X references a LABEL_REF. */
12947 label_mentioned_p (rtx x
)
12952 if (GET_CODE (x
) == LABEL_REF
)
12955 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12956 instruction, but they are constant offsets, not symbols. */
12957 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12960 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12961 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12967 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12968 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12971 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12979 tls_mentioned_p (rtx x
)
12981 switch (GET_CODE (x
))
12984 return tls_mentioned_p (XEXP (x
, 0));
12987 if (XINT (x
, 1) == UNSPEC_TLS
)
12995 /* Must not copy any rtx that uses a pc-relative address. */
12998 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
13000 if (GET_CODE (*x
) == UNSPEC
13001 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
13002 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
13008 arm_cannot_copy_insn_p (rtx insn
)
13010 /* The tls call insn cannot be copied, as it is paired with a data
13012 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13015 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
13019 minmax_code (rtx x
)
13021 enum rtx_code code
= GET_CODE (x
);
13034 gcc_unreachable ();
13038 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13041 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13042 int *mask
, bool *signed_sat
)
13044 /* The high bound must be a power of two minus one. */
13045 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13049 /* The low bound is either zero (for usat) or one less than the
13050 negation of the high bound (for ssat). */
13051 if (INTVAL (lo_bound
) == 0)
13056 *signed_sat
= false;
13061 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13066 *signed_sat
= true;
13074 /* Return 1 if memory locations are adjacent. */
13076 adjacent_mem_locations (rtx a
, rtx b
)
13078 /* We don't guarantee to preserve the order of these memory refs. */
13079 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13082 if ((REG_P (XEXP (a
, 0))
13083 || (GET_CODE (XEXP (a
, 0)) == PLUS
13084 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13085 && (REG_P (XEXP (b
, 0))
13086 || (GET_CODE (XEXP (b
, 0)) == PLUS
13087 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13089 HOST_WIDE_INT val0
= 0, val1
= 0;
13093 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13095 reg0
= XEXP (XEXP (a
, 0), 0);
13096 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13099 reg0
= XEXP (a
, 0);
13101 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13103 reg1
= XEXP (XEXP (b
, 0), 0);
13104 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13107 reg1
= XEXP (b
, 0);
13109 /* Don't accept any offset that will require multiple
13110 instructions to handle, since this would cause the
13111 arith_adjacentmem pattern to output an overlong sequence. */
13112 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13115 /* Don't allow an eliminable register: register elimination can make
13116 the offset too large. */
13117 if (arm_eliminable_register (reg0
))
13120 val_diff
= val1
- val0
;
13124 /* If the target has load delay slots, then there's no benefit
13125 to using an ldm instruction unless the offset is zero and
13126 we are optimizing for size. */
13127 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13128 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13129 && (val_diff
== 4 || val_diff
== -4));
13132 return ((REGNO (reg0
) == REGNO (reg1
))
13133 && (val_diff
== 4 || val_diff
== -4));
13139 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13140 for load operations, false for store operations. CONSECUTIVE is true
13141 if the register numbers in the operation must be consecutive in the register
13142 bank. RETURN_PC is true if value is to be loaded in PC.
13143 The pattern we are trying to match for load is:
13144 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13145 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13148 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13151 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13152 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13153 3. If consecutive is TRUE, then for kth register being loaded,
13154 REGNO (R_dk) = REGNO (R_d0) + k.
13155 The pattern for store is similar. */
13157 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
13158 bool consecutive
, bool return_pc
)
13160 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13161 rtx reg
, mem
, addr
;
13163 unsigned first_regno
;
13164 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13166 bool addr_reg_in_reglist
= false;
13167 bool update
= false;
13172 /* If not in SImode, then registers must be consecutive
13173 (e.g., VLDM instructions for DFmode). */
13174 gcc_assert ((mode
== SImode
) || consecutive
);
13175 /* Setting return_pc for stores is illegal. */
13176 gcc_assert (!return_pc
|| load
);
13178 /* Set up the increments and the regs per val based on the mode. */
13179 reg_increment
= GET_MODE_SIZE (mode
);
13180 regs_per_val
= reg_increment
/ 4;
13181 offset_adj
= return_pc
? 1 : 0;
13184 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13185 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13188 /* Check if this is a write-back. */
13189 elt
= XVECEXP (op
, 0, offset_adj
);
13190 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13196 /* The offset adjustment must be the number of registers being
13197 popped times the size of a single register. */
13198 if (!REG_P (SET_DEST (elt
))
13199 || !REG_P (XEXP (SET_SRC (elt
), 0))
13200 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13201 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13202 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13203 ((count
- 1 - offset_adj
) * reg_increment
))
13207 i
= i
+ offset_adj
;
13208 base
= base
+ offset_adj
;
13209 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13210 success depends on the type: VLDM can do just one reg,
13211 LDM must do at least two. */
13212 if ((count
<= i
) && (mode
== SImode
))
13215 elt
= XVECEXP (op
, 0, i
- 1);
13216 if (GET_CODE (elt
) != SET
)
13221 reg
= SET_DEST (elt
);
13222 mem
= SET_SRC (elt
);
13226 reg
= SET_SRC (elt
);
13227 mem
= SET_DEST (elt
);
13230 if (!REG_P (reg
) || !MEM_P (mem
))
13233 regno
= REGNO (reg
);
13234 first_regno
= regno
;
13235 addr
= XEXP (mem
, 0);
13236 if (GET_CODE (addr
) == PLUS
)
13238 if (!CONST_INT_P (XEXP (addr
, 1)))
13241 offset
= INTVAL (XEXP (addr
, 1));
13242 addr
= XEXP (addr
, 0);
13248 /* Don't allow SP to be loaded unless it is also the base register. It
13249 guarantees that SP is reset correctly when an LDM instruction
13250 is interrupted. Otherwise, we might end up with a corrupt stack. */
13251 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13254 for (; i
< count
; i
++)
13256 elt
= XVECEXP (op
, 0, i
);
13257 if (GET_CODE (elt
) != SET
)
13262 reg
= SET_DEST (elt
);
13263 mem
= SET_SRC (elt
);
13267 reg
= SET_SRC (elt
);
13268 mem
= SET_DEST (elt
);
13272 || GET_MODE (reg
) != mode
13273 || REGNO (reg
) <= regno
13276 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13277 /* Don't allow SP to be loaded unless it is also the base register. It
13278 guarantees that SP is reset correctly when an LDM instruction
13279 is interrupted. Otherwise, we might end up with a corrupt stack. */
13280 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13282 || GET_MODE (mem
) != mode
13283 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13284 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13285 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13286 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13287 offset
+ (i
- base
) * reg_increment
))
13288 && (!REG_P (XEXP (mem
, 0))
13289 || offset
+ (i
- base
) * reg_increment
!= 0)))
13292 regno
= REGNO (reg
);
13293 if (regno
== REGNO (addr
))
13294 addr_reg_in_reglist
= true;
13299 if (update
&& addr_reg_in_reglist
)
13302 /* For Thumb-1, address register is always modified - either by write-back
13303 or by explicit load. If the pattern does not describe an update,
13304 then the address register must be in the list of loaded registers. */
13306 return update
|| addr_reg_in_reglist
;
13312 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13313 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13314 instruction. ADD_OFFSET is nonzero if the base address register needs
13315 to be modified with an add instruction before we can use it. */
13318 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13319 int nops
, HOST_WIDE_INT add_offset
)
13321 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13322 if the offset isn't small enough. The reason 2 ldrs are faster
13323 is because these ARMs are able to do more than one cache access
13324 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13325 whilst the ARM8 has a double bandwidth cache. This means that
13326 these cores can do both an instruction fetch and a data fetch in
13327 a single cycle, so the trick of calculating the address into a
13328 scratch register (one of the result regs) and then doing a load
13329 multiple actually becomes slower (and no smaller in code size).
13330 That is the transformation
13332 ldr rd1, [rbase + offset]
13333 ldr rd2, [rbase + offset + 4]
13337 add rd1, rbase, offset
13338 ldmia rd1, {rd1, rd2}
13340 produces worse code -- '3 cycles + any stalls on rd2' instead of
13341 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13342 access per cycle, the first sequence could never complete in less
13343 than 6 cycles, whereas the ldm sequence would only take 5 and
13344 would make better use of sequential accesses if not hitting the
13347 We cheat here and test 'arm_ld_sched' which we currently know to
13348 only be true for the ARM8, ARM9 and StrongARM. If this ever
13349 changes, then the test below needs to be reworked. */
13350 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13353 /* XScale has load-store double instructions, but they have stricter
13354 alignment requirements than load-store multiple, so we cannot
13357 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13358 the pipeline until completion.
13366 An ldr instruction takes 1-3 cycles, but does not block the
13375 Best case ldr will always win. However, the more ldr instructions
13376 we issue, the less likely we are to be able to schedule them well.
13377 Using ldr instructions also increases code size.
13379 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13380 for counts of 3 or 4 regs. */
13381 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13386 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13387 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13388 an array ORDER which describes the sequence to use when accessing the
13389 offsets that produces an ascending order. In this sequence, each
13390 offset must be larger by exactly 4 than the previous one. ORDER[0]
13391 must have been filled in with the lowest offset by the caller.
13392 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13393 we use to verify that ORDER produces an ascending order of registers.
13394 Return true if it was possible to construct such an order, false if
13398 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13399 int *unsorted_regs
)
13402 for (i
= 1; i
< nops
; i
++)
13406 order
[i
] = order
[i
- 1];
13407 for (j
= 0; j
< nops
; j
++)
13408 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13410 /* We must find exactly one offset that is higher than the
13411 previous one by 4. */
13412 if (order
[i
] != order
[i
- 1])
13416 if (order
[i
] == order
[i
- 1])
13418 /* The register numbers must be ascending. */
13419 if (unsorted_regs
!= NULL
13420 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13426 /* Used to determine in a peephole whether a sequence of load
13427 instructions can be changed into a load-multiple instruction.
13428 NOPS is the number of separate load instructions we are examining. The
13429 first NOPS entries in OPERANDS are the destination registers, the
13430 next NOPS entries are memory operands. If this function is
13431 successful, *BASE is set to the common base register of the memory
13432 accesses; *LOAD_OFFSET is set to the first memory location's offset
13433 from that base register.
13434 REGS is an array filled in with the destination register numbers.
13435 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13436 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13437 the sequence of registers in REGS matches the loads from ascending memory
13438 locations, and the function verifies that the register numbers are
13439 themselves ascending. If CHECK_REGS is false, the register numbers
13440 are stored in the order they are found in the operands. */
13442 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13443 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13445 int unsorted_regs
[MAX_LDM_STM_OPS
];
13446 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13447 int order
[MAX_LDM_STM_OPS
];
13448 rtx base_reg_rtx
= NULL
;
13452 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13453 easily extended if required. */
13454 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13456 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13458 /* Loop over the operands and check that the memory references are
13459 suitable (i.e. immediate offsets from the same base register). At
13460 the same time, extract the target register, and the memory
13462 for (i
= 0; i
< nops
; i
++)
13467 /* Convert a subreg of a mem into the mem itself. */
13468 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13469 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13471 gcc_assert (MEM_P (operands
[nops
+ i
]));
13473 /* Don't reorder volatile memory references; it doesn't seem worth
13474 looking for the case where the order is ok anyway. */
13475 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13478 offset
= const0_rtx
;
13480 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13481 || (GET_CODE (reg
) == SUBREG
13482 && REG_P (reg
= SUBREG_REG (reg
))))
13483 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13484 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13485 || (GET_CODE (reg
) == SUBREG
13486 && REG_P (reg
= SUBREG_REG (reg
))))
13487 && (CONST_INT_P (offset
13488 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13492 base_reg
= REGNO (reg
);
13493 base_reg_rtx
= reg
;
13494 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13497 else if (base_reg
!= (int) REGNO (reg
))
13498 /* Not addressed from the same base register. */
13501 unsorted_regs
[i
] = (REG_P (operands
[i
])
13502 ? REGNO (operands
[i
])
13503 : REGNO (SUBREG_REG (operands
[i
])));
13505 /* If it isn't an integer register, or if it overwrites the
13506 base register but isn't the last insn in the list, then
13507 we can't do this. */
13508 if (unsorted_regs
[i
] < 0
13509 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13510 || unsorted_regs
[i
] > 14
13511 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13514 /* Don't allow SP to be loaded unless it is also the base
13515 register. It guarantees that SP is reset correctly when
13516 an LDM instruction is interrupted. Otherwise, we might
13517 end up with a corrupt stack. */
13518 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13521 unsorted_offsets
[i
] = INTVAL (offset
);
13522 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13526 /* Not a suitable memory address. */
13530 /* All the useful information has now been extracted from the
13531 operands into unsorted_regs and unsorted_offsets; additionally,
13532 order[0] has been set to the lowest offset in the list. Sort
13533 the offsets into order, verifying that they are adjacent, and
13534 check that the register numbers are ascending. */
13535 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13536 check_regs
? unsorted_regs
: NULL
))
13540 memcpy (saved_order
, order
, sizeof order
);
13546 for (i
= 0; i
< nops
; i
++)
13547 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13549 *load_offset
= unsorted_offsets
[order
[0]];
13553 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13556 if (unsorted_offsets
[order
[0]] == 0)
13557 ldm_case
= 1; /* ldmia */
13558 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13559 ldm_case
= 2; /* ldmib */
13560 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13561 ldm_case
= 3; /* ldmda */
13562 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13563 ldm_case
= 4; /* ldmdb */
13564 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13565 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13570 if (!multiple_operation_profitable_p (false, nops
,
13572 ? unsorted_offsets
[order
[0]] : 0))
13578 /* Used to determine in a peephole whether a sequence of store instructions can
13579 be changed into a store-multiple instruction.
13580 NOPS is the number of separate store instructions we are examining.
13581 NOPS_TOTAL is the total number of instructions recognized by the peephole
13583 The first NOPS entries in OPERANDS are the source registers, the next
13584 NOPS entries are memory operands. If this function is successful, *BASE is
13585 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13586 to the first memory location's offset from that base register. REGS is an
13587 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13588 likewise filled with the corresponding rtx's.
13589 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13590 numbers to an ascending order of stores.
13591 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13592 from ascending memory locations, and the function verifies that the register
13593 numbers are themselves ascending. If CHECK_REGS is false, the register
13594 numbers are stored in the order they are found in the operands. */
13596 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13597 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13598 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13600 int unsorted_regs
[MAX_LDM_STM_OPS
];
13601 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13602 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13603 int order
[MAX_LDM_STM_OPS
];
13605 rtx base_reg_rtx
= NULL
;
13608 /* Write back of base register is currently only supported for Thumb 1. */
13609 int base_writeback
= TARGET_THUMB1
;
13611 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13612 easily extended if required. */
13613 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13615 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13617 /* Loop over the operands and check that the memory references are
13618 suitable (i.e. immediate offsets from the same base register). At
13619 the same time, extract the target register, and the memory
13621 for (i
= 0; i
< nops
; i
++)
13626 /* Convert a subreg of a mem into the mem itself. */
13627 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13628 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13630 gcc_assert (MEM_P (operands
[nops
+ i
]));
13632 /* Don't reorder volatile memory references; it doesn't seem worth
13633 looking for the case where the order is ok anyway. */
13634 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13637 offset
= const0_rtx
;
13639 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13640 || (GET_CODE (reg
) == SUBREG
13641 && REG_P (reg
= SUBREG_REG (reg
))))
13642 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13643 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13644 || (GET_CODE (reg
) == SUBREG
13645 && REG_P (reg
= SUBREG_REG (reg
))))
13646 && (CONST_INT_P (offset
13647 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13649 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13650 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13651 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13655 base_reg
= REGNO (reg
);
13656 base_reg_rtx
= reg
;
13657 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13660 else if (base_reg
!= (int) REGNO (reg
))
13661 /* Not addressed from the same base register. */
13664 /* If it isn't an integer register, then we can't do this. */
13665 if (unsorted_regs
[i
] < 0
13666 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13667 /* The effects are unpredictable if the base register is
13668 both updated and stored. */
13669 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13670 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13671 || unsorted_regs
[i
] > 14)
13674 unsorted_offsets
[i
] = INTVAL (offset
);
13675 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13679 /* Not a suitable memory address. */
13683 /* All the useful information has now been extracted from the
13684 operands into unsorted_regs and unsorted_offsets; additionally,
13685 order[0] has been set to the lowest offset in the list. Sort
13686 the offsets into order, verifying that they are adjacent, and
13687 check that the register numbers are ascending. */
13688 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13689 check_regs
? unsorted_regs
: NULL
))
13693 memcpy (saved_order
, order
, sizeof order
);
13699 for (i
= 0; i
< nops
; i
++)
13701 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13703 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13706 *load_offset
= unsorted_offsets
[order
[0]];
13710 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13713 if (unsorted_offsets
[order
[0]] == 0)
13714 stm_case
= 1; /* stmia */
13715 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13716 stm_case
= 2; /* stmib */
13717 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13718 stm_case
= 3; /* stmda */
13719 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13720 stm_case
= 4; /* stmdb */
13724 if (!multiple_operation_profitable_p (false, nops
, 0))
13730 /* Routines for use in generating RTL. */
13732 /* Generate a load-multiple instruction. COUNT is the number of loads in
13733 the instruction; REGS and MEMS are arrays containing the operands.
13734 BASEREG is the base register to be used in addressing the memory operands.
13735 WBACK_OFFSET is nonzero if the instruction should update the base
13739 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13740 HOST_WIDE_INT wback_offset
)
13745 if (!multiple_operation_profitable_p (false, count
, 0))
13751 for (i
= 0; i
< count
; i
++)
13752 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13754 if (wback_offset
!= 0)
13755 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13757 seq
= get_insns ();
13763 result
= gen_rtx_PARALLEL (VOIDmode
,
13764 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13765 if (wback_offset
!= 0)
13767 XVECEXP (result
, 0, 0)
13768 = gen_rtx_SET (VOIDmode
, basereg
,
13769 plus_constant (Pmode
, basereg
, wback_offset
));
13774 for (j
= 0; i
< count
; i
++, j
++)
13775 XVECEXP (result
, 0, i
)
13776 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13781 /* Generate a store-multiple instruction. COUNT is the number of stores in
13782 the instruction; REGS and MEMS are arrays containing the operands.
13783 BASEREG is the base register to be used in addressing the memory operands.
13784 WBACK_OFFSET is nonzero if the instruction should update the base
13788 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13789 HOST_WIDE_INT wback_offset
)
13794 if (GET_CODE (basereg
) == PLUS
)
13795 basereg
= XEXP (basereg
, 0);
13797 if (!multiple_operation_profitable_p (false, count
, 0))
13803 for (i
= 0; i
< count
; i
++)
13804 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13806 if (wback_offset
!= 0)
13807 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13809 seq
= get_insns ();
13815 result
= gen_rtx_PARALLEL (VOIDmode
,
13816 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13817 if (wback_offset
!= 0)
13819 XVECEXP (result
, 0, 0)
13820 = gen_rtx_SET (VOIDmode
, basereg
,
13821 plus_constant (Pmode
, basereg
, wback_offset
));
13826 for (j
= 0; i
< count
; i
++, j
++)
13827 XVECEXP (result
, 0, i
)
13828 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13833 /* Generate either a load-multiple or a store-multiple instruction. This
13834 function can be used in situations where we can start with a single MEM
13835 rtx and adjust its address upwards.
13836 COUNT is the number of operations in the instruction, not counting a
13837 possible update of the base register. REGS is an array containing the
13839 BASEREG is the base register to be used in addressing the memory operands,
13840 which are constructed from BASEMEM.
13841 WRITE_BACK specifies whether the generated instruction should include an
13842 update of the base register.
13843 OFFSETP is used to pass an offset to and from this function; this offset
13844 is not used when constructing the address (instead BASEMEM should have an
13845 appropriate offset in its address), it is used only for setting
13846 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13849 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13850 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13852 rtx mems
[MAX_LDM_STM_OPS
];
13853 HOST_WIDE_INT offset
= *offsetp
;
13856 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13858 if (GET_CODE (basereg
) == PLUS
)
13859 basereg
= XEXP (basereg
, 0);
13861 for (i
= 0; i
< count
; i
++)
13863 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13864 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13872 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13873 write_back
? 4 * count
: 0);
13875 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13876 write_back
? 4 * count
: 0);
13880 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13881 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13883 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13888 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13889 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13891 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13895 /* Called from a peephole2 expander to turn a sequence of loads into an
13896 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13897 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13898 is true if we can reorder the registers because they are used commutatively
13900 Returns true iff we could generate a new instruction. */
13903 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13905 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13906 rtx mems
[MAX_LDM_STM_OPS
];
13907 int i
, j
, base_reg
;
13909 HOST_WIDE_INT offset
;
13910 int write_back
= FALSE
;
13914 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13915 &base_reg
, &offset
, !sort_regs
);
13921 for (i
= 0; i
< nops
- 1; i
++)
13922 for (j
= i
+ 1; j
< nops
; j
++)
13923 if (regs
[i
] > regs
[j
])
13929 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13933 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13934 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13940 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13941 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13943 if (!TARGET_THUMB1
)
13945 base_reg
= regs
[0];
13946 base_reg_rtx
= newbase
;
13950 for (i
= 0; i
< nops
; i
++)
13952 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13953 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13956 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13957 write_back
? offset
+ i
* 4 : 0));
13961 /* Called from a peephole2 expander to turn a sequence of stores into an
13962 STM instruction. OPERANDS are the operands found by the peephole matcher;
13963 NOPS indicates how many separate stores we are trying to combine.
13964 Returns true iff we could generate a new instruction. */
13967 gen_stm_seq (rtx
*operands
, int nops
)
13970 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13971 rtx mems
[MAX_LDM_STM_OPS
];
13974 HOST_WIDE_INT offset
;
13975 int write_back
= FALSE
;
13978 bool base_reg_dies
;
13980 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13981 mem_order
, &base_reg
, &offset
, true);
13986 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13988 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13991 gcc_assert (base_reg_dies
);
13997 gcc_assert (base_reg_dies
);
13998 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14002 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14004 for (i
= 0; i
< nops
; i
++)
14006 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14007 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14010 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14011 write_back
? offset
+ i
* 4 : 0));
14015 /* Called from a peephole2 expander to turn a sequence of stores that are
14016 preceded by constant loads into an STM instruction. OPERANDS are the
14017 operands found by the peephole matcher; NOPS indicates how many
14018 separate stores we are trying to combine; there are 2 * NOPS
14019 instructions in the peephole.
14020 Returns true iff we could generate a new instruction. */
14023 gen_const_stm_seq (rtx
*operands
, int nops
)
14025 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14026 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14027 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14028 rtx mems
[MAX_LDM_STM_OPS
];
14031 HOST_WIDE_INT offset
;
14032 int write_back
= FALSE
;
14035 bool base_reg_dies
;
14037 HARD_REG_SET allocated
;
14039 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14040 mem_order
, &base_reg
, &offset
, false);
14045 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14047 /* If the same register is used more than once, try to find a free
14049 CLEAR_HARD_REG_SET (allocated
);
14050 for (i
= 0; i
< nops
; i
++)
14052 for (j
= i
+ 1; j
< nops
; j
++)
14053 if (regs
[i
] == regs
[j
])
14055 rtx t
= peep2_find_free_register (0, nops
* 2,
14056 TARGET_THUMB1
? "l" : "r",
14057 SImode
, &allocated
);
14061 regs
[i
] = REGNO (t
);
14065 /* Compute an ordering that maps the register numbers to an ascending
14068 for (i
= 0; i
< nops
; i
++)
14069 if (regs
[i
] < regs
[reg_order
[0]])
14072 for (i
= 1; i
< nops
; i
++)
14074 int this_order
= reg_order
[i
- 1];
14075 for (j
= 0; j
< nops
; j
++)
14076 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14077 && (this_order
== reg_order
[i
- 1]
14078 || regs
[j
] < regs
[this_order
]))
14080 reg_order
[i
] = this_order
;
14083 /* Ensure that registers that must be live after the instruction end
14084 up with the correct value. */
14085 for (i
= 0; i
< nops
; i
++)
14087 int this_order
= reg_order
[i
];
14088 if ((this_order
!= mem_order
[i
]
14089 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14090 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14094 /* Load the constants. */
14095 for (i
= 0; i
< nops
; i
++)
14097 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14098 sorted_regs
[i
] = regs
[reg_order
[i
]];
14099 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14102 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14104 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14107 gcc_assert (base_reg_dies
);
14113 gcc_assert (base_reg_dies
);
14114 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14118 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14120 for (i
= 0; i
< nops
; i
++)
14122 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14123 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14126 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14127 write_back
? offset
+ i
* 4 : 0));
14131 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14132 unaligned copies on processors which support unaligned semantics for those
14133 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14134 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14135 An interleave factor of 1 (the minimum) will perform no interleaving.
14136 Load/store multiple are used for aligned addresses where possible. */
14139 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14140 HOST_WIDE_INT length
,
14141 unsigned int interleave_factor
)
14143 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14144 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14145 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14146 HOST_WIDE_INT i
, j
;
14147 HOST_WIDE_INT remaining
= length
, words
;
14148 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14150 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14151 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14152 HOST_WIDE_INT srcoffset
, dstoffset
;
14153 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14156 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14158 /* Use hard registers if we have aligned source or destination so we can use
14159 load/store multiple with contiguous registers. */
14160 if (dst_aligned
|| src_aligned
)
14161 for (i
= 0; i
< interleave_factor
; i
++)
14162 regs
[i
] = gen_rtx_REG (SImode
, i
);
14164 for (i
= 0; i
< interleave_factor
; i
++)
14165 regs
[i
] = gen_reg_rtx (SImode
);
14167 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14168 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14170 srcoffset
= dstoffset
= 0;
14172 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14173 For copying the last bytes we want to subtract this offset again. */
14174 src_autoinc
= dst_autoinc
= 0;
14176 for (i
= 0; i
< interleave_factor
; i
++)
14179 /* Copy BLOCK_SIZE_BYTES chunks. */
14181 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14184 if (src_aligned
&& interleave_factor
> 1)
14186 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14187 TRUE
, srcbase
, &srcoffset
));
14188 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14192 for (j
= 0; j
< interleave_factor
; j
++)
14194 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14196 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14197 srcoffset
+ j
* UNITS_PER_WORD
);
14198 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14200 srcoffset
+= block_size_bytes
;
14204 if (dst_aligned
&& interleave_factor
> 1)
14206 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14207 TRUE
, dstbase
, &dstoffset
));
14208 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14212 for (j
= 0; j
< interleave_factor
; j
++)
14214 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14216 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14217 dstoffset
+ j
* UNITS_PER_WORD
);
14218 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14220 dstoffset
+= block_size_bytes
;
14223 remaining
-= block_size_bytes
;
14226 /* Copy any whole words left (note these aren't interleaved with any
14227 subsequent halfword/byte load/stores in the interests of simplicity). */
14229 words
= remaining
/ UNITS_PER_WORD
;
14231 gcc_assert (words
< interleave_factor
);
14233 if (src_aligned
&& words
> 1)
14235 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14237 src_autoinc
+= UNITS_PER_WORD
* words
;
14241 for (j
= 0; j
< words
; j
++)
14243 addr
= plus_constant (Pmode
, src
,
14244 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14245 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14246 srcoffset
+ j
* UNITS_PER_WORD
);
14247 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14249 srcoffset
+= words
* UNITS_PER_WORD
;
14252 if (dst_aligned
&& words
> 1)
14254 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14256 dst_autoinc
+= words
* UNITS_PER_WORD
;
14260 for (j
= 0; j
< words
; j
++)
14262 addr
= plus_constant (Pmode
, dst
,
14263 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14264 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14265 dstoffset
+ j
* UNITS_PER_WORD
);
14266 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14268 dstoffset
+= words
* UNITS_PER_WORD
;
14271 remaining
-= words
* UNITS_PER_WORD
;
14273 gcc_assert (remaining
< 4);
14275 /* Copy a halfword if necessary. */
14277 if (remaining
>= 2)
14279 halfword_tmp
= gen_reg_rtx (SImode
);
14281 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14282 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14283 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14285 /* Either write out immediately, or delay until we've loaded the last
14286 byte, depending on interleave factor. */
14287 if (interleave_factor
== 1)
14289 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14290 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14291 emit_insn (gen_unaligned_storehi (mem
,
14292 gen_lowpart (HImode
, halfword_tmp
)));
14293 halfword_tmp
= NULL
;
14301 gcc_assert (remaining
< 2);
14303 /* Copy last byte. */
14305 if ((remaining
& 1) != 0)
14307 byte_tmp
= gen_reg_rtx (SImode
);
14309 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14310 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14311 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14313 if (interleave_factor
== 1)
14315 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14316 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14317 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14326 /* Store last halfword if we haven't done so already. */
14330 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14331 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14332 emit_insn (gen_unaligned_storehi (mem
,
14333 gen_lowpart (HImode
, halfword_tmp
)));
14337 /* Likewise for last byte. */
14341 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14342 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14343 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14347 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14350 /* From mips_adjust_block_mem:
14352 Helper function for doing a loop-based block operation on memory
14353 reference MEM. Each iteration of the loop will operate on LENGTH
14356 Create a new base register for use within the loop and point it to
14357 the start of MEM. Create a new memory reference that uses this
14358 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14361 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14364 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14366 /* Although the new mem does not refer to a known location,
14367 it does keep up to LENGTH bytes of alignment. */
14368 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14369 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14372 /* From mips_block_move_loop:
14374 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14375 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14376 the memory regions do not overlap. */
14379 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14380 unsigned int interleave_factor
,
14381 HOST_WIDE_INT bytes_per_iter
)
14383 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14384 HOST_WIDE_INT leftover
;
14386 leftover
= length
% bytes_per_iter
;
14387 length
-= leftover
;
14389 /* Create registers and memory references for use within the loop. */
14390 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14391 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14393 /* Calculate the value that SRC_REG should have after the last iteration of
14395 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14396 0, 0, OPTAB_WIDEN
);
14398 /* Emit the start of the loop. */
14399 label
= gen_label_rtx ();
14400 emit_label (label
);
14402 /* Emit the loop body. */
14403 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14404 interleave_factor
);
14406 /* Move on to the next block. */
14407 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14408 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14410 /* Emit the loop condition. */
14411 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14412 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14414 /* Mop up any left-over bytes. */
14416 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14419 /* Emit a block move when either the source or destination is unaligned (not
14420 aligned to a four-byte boundary). This may need further tuning depending on
14421 core type, optimize_size setting, etc. */
14424 arm_movmemqi_unaligned (rtx
*operands
)
14426 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14430 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14431 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14432 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14433 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14434 or dst_aligned though: allow more interleaving in those cases since the
14435 resulting code can be smaller. */
14436 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14437 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14440 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14441 interleave_factor
, bytes_per_iter
);
14443 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14444 interleave_factor
);
14448 /* Note that the loop created by arm_block_move_unaligned_loop may be
14449 subject to loop unrolling, which makes tuning this condition a little
14452 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14454 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14461 arm_gen_movmemqi (rtx
*operands
)
14463 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14464 HOST_WIDE_INT srcoffset
, dstoffset
;
14466 rtx src
, dst
, srcbase
, dstbase
;
14467 rtx part_bytes_reg
= NULL
;
14470 if (!CONST_INT_P (operands
[2])
14471 || !CONST_INT_P (operands
[3])
14472 || INTVAL (operands
[2]) > 64)
14475 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14476 return arm_movmemqi_unaligned (operands
);
14478 if (INTVAL (operands
[3]) & 3)
14481 dstbase
= operands
[0];
14482 srcbase
= operands
[1];
14484 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14485 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14487 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14488 out_words_to_go
= INTVAL (operands
[2]) / 4;
14489 last_bytes
= INTVAL (operands
[2]) & 3;
14490 dstoffset
= srcoffset
= 0;
14492 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14493 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14495 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14497 if (in_words_to_go
> 4)
14498 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14499 TRUE
, srcbase
, &srcoffset
));
14501 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14502 src
, FALSE
, srcbase
,
14505 if (out_words_to_go
)
14507 if (out_words_to_go
> 4)
14508 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14509 TRUE
, dstbase
, &dstoffset
));
14510 else if (out_words_to_go
!= 1)
14511 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14512 out_words_to_go
, dst
,
14515 dstbase
, &dstoffset
));
14518 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14519 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14520 if (last_bytes
!= 0)
14522 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14528 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14529 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14532 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14533 if (out_words_to_go
)
14537 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14538 sreg
= copy_to_reg (mem
);
14540 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14541 emit_move_insn (mem
, sreg
);
14544 gcc_assert (!in_words_to_go
); /* Sanity check */
14547 if (in_words_to_go
)
14549 gcc_assert (in_words_to_go
> 0);
14551 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14552 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14555 gcc_assert (!last_bytes
|| part_bytes_reg
);
14557 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14559 rtx tmp
= gen_reg_rtx (SImode
);
14561 /* The bytes we want are in the top end of the word. */
14562 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14563 GEN_INT (8 * (4 - last_bytes
))));
14564 part_bytes_reg
= tmp
;
14568 mem
= adjust_automodify_address (dstbase
, QImode
,
14569 plus_constant (Pmode
, dst
,
14571 dstoffset
+ last_bytes
- 1);
14572 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14576 tmp
= gen_reg_rtx (SImode
);
14577 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14578 part_bytes_reg
= tmp
;
14585 if (last_bytes
> 1)
14587 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14588 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14592 rtx tmp
= gen_reg_rtx (SImode
);
14593 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14594 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14595 part_bytes_reg
= tmp
;
14602 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14603 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14610 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14613 next_consecutive_mem (rtx mem
)
14615 enum machine_mode mode
= GET_MODE (mem
);
14616 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14617 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14619 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14622 /* Copy using LDRD/STRD instructions whenever possible.
14623 Returns true upon success. */
14625 gen_movmem_ldrd_strd (rtx
*operands
)
14627 unsigned HOST_WIDE_INT len
;
14628 HOST_WIDE_INT align
;
14629 rtx src
, dst
, base
;
14631 bool src_aligned
, dst_aligned
;
14632 bool src_volatile
, dst_volatile
;
14634 gcc_assert (CONST_INT_P (operands
[2]));
14635 gcc_assert (CONST_INT_P (operands
[3]));
14637 len
= UINTVAL (operands
[2]);
14641 /* Maximum alignment we can assume for both src and dst buffers. */
14642 align
= INTVAL (operands
[3]);
14644 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14647 /* Place src and dst addresses in registers
14648 and update the corresponding mem rtx. */
14650 dst_volatile
= MEM_VOLATILE_P (dst
);
14651 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14652 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14653 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14656 src_volatile
= MEM_VOLATILE_P (src
);
14657 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14658 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14659 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14661 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14664 if (src_volatile
|| dst_volatile
)
14667 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14668 if (!(dst_aligned
|| src_aligned
))
14669 return arm_gen_movmemqi (operands
);
14671 src
= adjust_address (src
, DImode
, 0);
14672 dst
= adjust_address (dst
, DImode
, 0);
14676 reg0
= gen_reg_rtx (DImode
);
14678 emit_move_insn (reg0
, src
);
14680 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14683 emit_move_insn (dst
, reg0
);
14685 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14687 src
= next_consecutive_mem (src
);
14688 dst
= next_consecutive_mem (dst
);
14691 gcc_assert (len
< 8);
14694 /* More than a word but less than a double-word to copy. Copy a word. */
14695 reg0
= gen_reg_rtx (SImode
);
14696 src
= adjust_address (src
, SImode
, 0);
14697 dst
= adjust_address (dst
, SImode
, 0);
14699 emit_move_insn (reg0
, src
);
14701 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14704 emit_move_insn (dst
, reg0
);
14706 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14708 src
= next_consecutive_mem (src
);
14709 dst
= next_consecutive_mem (dst
);
14716 /* Copy the remaining bytes. */
14719 dst
= adjust_address (dst
, HImode
, 0);
14720 src
= adjust_address (src
, HImode
, 0);
14721 reg0
= gen_reg_rtx (SImode
);
14723 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14725 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14728 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14730 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14732 src
= next_consecutive_mem (src
);
14733 dst
= next_consecutive_mem (dst
);
14738 dst
= adjust_address (dst
, QImode
, 0);
14739 src
= adjust_address (src
, QImode
, 0);
14740 reg0
= gen_reg_rtx (QImode
);
14741 emit_move_insn (reg0
, src
);
14742 emit_move_insn (dst
, reg0
);
14746 /* Select a dominance comparison mode if possible for a test of the general
14747 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14748 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14749 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14750 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14751 In all cases OP will be either EQ or NE, but we don't need to know which
14752 here. If we are unable to support a dominance comparison we return
14753 CC mode. This will then fail to match for the RTL expressions that
14754 generate this call. */
14756 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14758 enum rtx_code cond1
, cond2
;
14761 /* Currently we will probably get the wrong result if the individual
14762 comparisons are not simple. This also ensures that it is safe to
14763 reverse a comparison if necessary. */
14764 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14766 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14770 /* The if_then_else variant of this tests the second condition if the
14771 first passes, but is true if the first fails. Reverse the first
14772 condition to get a true "inclusive-or" expression. */
14773 if (cond_or
== DOM_CC_NX_OR_Y
)
14774 cond1
= reverse_condition (cond1
);
14776 /* If the comparisons are not equal, and one doesn't dominate the other,
14777 then we can't do this. */
14779 && !comparison_dominates_p (cond1
, cond2
)
14780 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14785 enum rtx_code temp
= cond1
;
14793 if (cond_or
== DOM_CC_X_AND_Y
)
14798 case EQ
: return CC_DEQmode
;
14799 case LE
: return CC_DLEmode
;
14800 case LEU
: return CC_DLEUmode
;
14801 case GE
: return CC_DGEmode
;
14802 case GEU
: return CC_DGEUmode
;
14803 default: gcc_unreachable ();
14807 if (cond_or
== DOM_CC_X_AND_Y
)
14819 gcc_unreachable ();
14823 if (cond_or
== DOM_CC_X_AND_Y
)
14835 gcc_unreachable ();
14839 if (cond_or
== DOM_CC_X_AND_Y
)
14840 return CC_DLTUmode
;
14845 return CC_DLTUmode
;
14847 return CC_DLEUmode
;
14851 gcc_unreachable ();
14855 if (cond_or
== DOM_CC_X_AND_Y
)
14856 return CC_DGTUmode
;
14861 return CC_DGTUmode
;
14863 return CC_DGEUmode
;
14867 gcc_unreachable ();
14870 /* The remaining cases only occur when both comparisons are the
14873 gcc_assert (cond1
== cond2
);
14877 gcc_assert (cond1
== cond2
);
14881 gcc_assert (cond1
== cond2
);
14885 gcc_assert (cond1
== cond2
);
14886 return CC_DLEUmode
;
14889 gcc_assert (cond1
== cond2
);
14890 return CC_DGEUmode
;
14893 gcc_unreachable ();
14898 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14900 /* All floating point compares return CCFP if it is an equality
14901 comparison, and CCFPE otherwise. */
14902 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14925 gcc_unreachable ();
14929 /* A compare with a shifted operand. Because of canonicalization, the
14930 comparison will have to be swapped when we emit the assembler. */
14931 if (GET_MODE (y
) == SImode
14932 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14933 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14934 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14935 || GET_CODE (x
) == ROTATERT
))
14938 /* This operation is performed swapped, but since we only rely on the Z
14939 flag we don't need an additional mode. */
14940 if (GET_MODE (y
) == SImode
14941 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14942 && GET_CODE (x
) == NEG
14943 && (op
== EQ
|| op
== NE
))
14946 /* This is a special case that is used by combine to allow a
14947 comparison of a shifted byte load to be split into a zero-extend
14948 followed by a comparison of the shifted integer (only valid for
14949 equalities and unsigned inequalities). */
14950 if (GET_MODE (x
) == SImode
14951 && GET_CODE (x
) == ASHIFT
14952 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14953 && GET_CODE (XEXP (x
, 0)) == SUBREG
14954 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14955 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14956 && (op
== EQ
|| op
== NE
14957 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14958 && CONST_INT_P (y
))
14961 /* A construct for a conditional compare, if the false arm contains
14962 0, then both conditions must be true, otherwise either condition
14963 must be true. Not all conditions are possible, so CCmode is
14964 returned if it can't be done. */
14965 if (GET_CODE (x
) == IF_THEN_ELSE
14966 && (XEXP (x
, 2) == const0_rtx
14967 || XEXP (x
, 2) == const1_rtx
)
14968 && COMPARISON_P (XEXP (x
, 0))
14969 && COMPARISON_P (XEXP (x
, 1)))
14970 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14971 INTVAL (XEXP (x
, 2)));
14973 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14974 if (GET_CODE (x
) == AND
14975 && (op
== EQ
|| op
== NE
)
14976 && COMPARISON_P (XEXP (x
, 0))
14977 && COMPARISON_P (XEXP (x
, 1)))
14978 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14981 if (GET_CODE (x
) == IOR
14982 && (op
== EQ
|| op
== NE
)
14983 && COMPARISON_P (XEXP (x
, 0))
14984 && COMPARISON_P (XEXP (x
, 1)))
14985 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14988 /* An operation (on Thumb) where we want to test for a single bit.
14989 This is done by shifting that bit up into the top bit of a
14990 scratch register; we can then branch on the sign bit. */
14992 && GET_MODE (x
) == SImode
14993 && (op
== EQ
|| op
== NE
)
14994 && GET_CODE (x
) == ZERO_EXTRACT
14995 && XEXP (x
, 1) == const1_rtx
)
14998 /* An operation that sets the condition codes as a side-effect, the
14999 V flag is not set correctly, so we can only use comparisons where
15000 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15002 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15003 if (GET_MODE (x
) == SImode
15005 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15006 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15007 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15008 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15009 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15010 || GET_CODE (x
) == LSHIFTRT
15011 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15012 || GET_CODE (x
) == ROTATERT
15013 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15014 return CC_NOOVmode
;
15016 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15019 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15020 && GET_CODE (x
) == PLUS
15021 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15024 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15030 /* A DImode comparison against zero can be implemented by
15031 or'ing the two halves together. */
15032 if (y
== const0_rtx
)
15035 /* We can do an equality test in three Thumb instructions. */
15045 /* DImode unsigned comparisons can be implemented by cmp +
15046 cmpeq without a scratch register. Not worth doing in
15057 /* DImode signed and unsigned comparisons can be implemented
15058 by cmp + sbcs with a scratch register, but that does not
15059 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15060 gcc_assert (op
!= EQ
&& op
!= NE
);
15064 gcc_unreachable ();
15068 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15069 return GET_MODE (x
);
15074 /* X and Y are two things to compare using CODE. Emit the compare insn and
15075 return the rtx for register 0 in the proper mode. FP means this is a
15076 floating point compare: I don't think that it is needed on the arm. */
15078 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15080 enum machine_mode mode
;
15082 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15084 /* We might have X as a constant, Y as a register because of the predicates
15085 used for cmpdi. If so, force X to a register here. */
15086 if (dimode_comparison
&& !REG_P (x
))
15087 x
= force_reg (DImode
, x
);
15089 mode
= SELECT_CC_MODE (code
, x
, y
);
15090 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15092 if (dimode_comparison
15093 && mode
!= CC_CZmode
)
15097 /* To compare two non-zero values for equality, XOR them and
15098 then compare against zero. Not used for ARM mode; there
15099 CC_CZmode is cheaper. */
15100 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15102 gcc_assert (!reload_completed
);
15103 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15107 /* A scratch register is required. */
15108 if (reload_completed
)
15109 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15111 scratch
= gen_rtx_SCRATCH (SImode
);
15113 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15114 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15115 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15118 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15123 /* Generate a sequence of insns that will generate the correct return
15124 address mask depending on the physical architecture that the program
15127 arm_gen_return_addr_mask (void)
15129 rtx reg
= gen_reg_rtx (Pmode
);
15131 emit_insn (gen_return_addr_mask (reg
));
15136 arm_reload_in_hi (rtx
*operands
)
15138 rtx ref
= operands
[1];
15140 HOST_WIDE_INT offset
= 0;
15142 if (GET_CODE (ref
) == SUBREG
)
15144 offset
= SUBREG_BYTE (ref
);
15145 ref
= SUBREG_REG (ref
);
15150 /* We have a pseudo which has been spilt onto the stack; there
15151 are two cases here: the first where there is a simple
15152 stack-slot replacement and a second where the stack-slot is
15153 out of range, or is used as a subreg. */
15154 if (reg_equiv_mem (REGNO (ref
)))
15156 ref
= reg_equiv_mem (REGNO (ref
));
15157 base
= find_replacement (&XEXP (ref
, 0));
15160 /* The slot is out of range, or was dressed up in a SUBREG. */
15161 base
= reg_equiv_address (REGNO (ref
));
15164 base
= find_replacement (&XEXP (ref
, 0));
15166 /* Handle the case where the address is too complex to be offset by 1. */
15167 if (GET_CODE (base
) == MINUS
15168 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15170 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15172 emit_set_insn (base_plus
, base
);
15175 else if (GET_CODE (base
) == PLUS
)
15177 /* The addend must be CONST_INT, or we would have dealt with it above. */
15178 HOST_WIDE_INT hi
, lo
;
15180 offset
+= INTVAL (XEXP (base
, 1));
15181 base
= XEXP (base
, 0);
15183 /* Rework the address into a legal sequence of insns. */
15184 /* Valid range for lo is -4095 -> 4095 */
15187 : -((-offset
) & 0xfff));
15189 /* Corner case, if lo is the max offset then we would be out of range
15190 once we have added the additional 1 below, so bump the msb into the
15191 pre-loading insn(s). */
15195 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15196 ^ (HOST_WIDE_INT
) 0x80000000)
15197 - (HOST_WIDE_INT
) 0x80000000);
15199 gcc_assert (hi
+ lo
== offset
);
15203 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15205 /* Get the base address; addsi3 knows how to handle constants
15206 that require more than one insn. */
15207 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15213 /* Operands[2] may overlap operands[0] (though it won't overlap
15214 operands[1]), that's why we asked for a DImode reg -- so we can
15215 use the bit that does not overlap. */
15216 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15217 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15219 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15221 emit_insn (gen_zero_extendqisi2 (scratch
,
15222 gen_rtx_MEM (QImode
,
15223 plus_constant (Pmode
, base
,
15225 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15226 gen_rtx_MEM (QImode
,
15227 plus_constant (Pmode
, base
,
15229 if (!BYTES_BIG_ENDIAN
)
15230 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15231 gen_rtx_IOR (SImode
,
15234 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15238 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15239 gen_rtx_IOR (SImode
,
15240 gen_rtx_ASHIFT (SImode
, scratch
,
15242 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15245 /* Handle storing a half-word to memory during reload by synthesizing as two
15246 byte stores. Take care not to clobber the input values until after we
15247 have moved them somewhere safe. This code assumes that if the DImode
15248 scratch in operands[2] overlaps either the input value or output address
15249 in some way, then that value must die in this insn (we absolutely need
15250 two scratch registers for some corner cases). */
15252 arm_reload_out_hi (rtx
*operands
)
15254 rtx ref
= operands
[0];
15255 rtx outval
= operands
[1];
15257 HOST_WIDE_INT offset
= 0;
15259 if (GET_CODE (ref
) == SUBREG
)
15261 offset
= SUBREG_BYTE (ref
);
15262 ref
= SUBREG_REG (ref
);
15267 /* We have a pseudo which has been spilt onto the stack; there
15268 are two cases here: the first where there is a simple
15269 stack-slot replacement and a second where the stack-slot is
15270 out of range, or is used as a subreg. */
15271 if (reg_equiv_mem (REGNO (ref
)))
15273 ref
= reg_equiv_mem (REGNO (ref
));
15274 base
= find_replacement (&XEXP (ref
, 0));
15277 /* The slot is out of range, or was dressed up in a SUBREG. */
15278 base
= reg_equiv_address (REGNO (ref
));
15281 base
= find_replacement (&XEXP (ref
, 0));
15283 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15285 /* Handle the case where the address is too complex to be offset by 1. */
15286 if (GET_CODE (base
) == MINUS
15287 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15289 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15291 /* Be careful not to destroy OUTVAL. */
15292 if (reg_overlap_mentioned_p (base_plus
, outval
))
15294 /* Updating base_plus might destroy outval, see if we can
15295 swap the scratch and base_plus. */
15296 if (!reg_overlap_mentioned_p (scratch
, outval
))
15299 scratch
= base_plus
;
15304 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15306 /* Be conservative and copy OUTVAL into the scratch now,
15307 this should only be necessary if outval is a subreg
15308 of something larger than a word. */
15309 /* XXX Might this clobber base? I can't see how it can,
15310 since scratch is known to overlap with OUTVAL, and
15311 must be wider than a word. */
15312 emit_insn (gen_movhi (scratch_hi
, outval
));
15313 outval
= scratch_hi
;
15317 emit_set_insn (base_plus
, base
);
15320 else if (GET_CODE (base
) == PLUS
)
15322 /* The addend must be CONST_INT, or we would have dealt with it above. */
15323 HOST_WIDE_INT hi
, lo
;
15325 offset
+= INTVAL (XEXP (base
, 1));
15326 base
= XEXP (base
, 0);
15328 /* Rework the address into a legal sequence of insns. */
15329 /* Valid range for lo is -4095 -> 4095 */
15332 : -((-offset
) & 0xfff));
15334 /* Corner case, if lo is the max offset then we would be out of range
15335 once we have added the additional 1 below, so bump the msb into the
15336 pre-loading insn(s). */
15340 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15341 ^ (HOST_WIDE_INT
) 0x80000000)
15342 - (HOST_WIDE_INT
) 0x80000000);
15344 gcc_assert (hi
+ lo
== offset
);
15348 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15350 /* Be careful not to destroy OUTVAL. */
15351 if (reg_overlap_mentioned_p (base_plus
, outval
))
15353 /* Updating base_plus might destroy outval, see if we
15354 can swap the scratch and base_plus. */
15355 if (!reg_overlap_mentioned_p (scratch
, outval
))
15358 scratch
= base_plus
;
15363 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15365 /* Be conservative and copy outval into scratch now,
15366 this should only be necessary if outval is a
15367 subreg of something larger than a word. */
15368 /* XXX Might this clobber base? I can't see how it
15369 can, since scratch is known to overlap with
15371 emit_insn (gen_movhi (scratch_hi
, outval
));
15372 outval
= scratch_hi
;
15376 /* Get the base address; addsi3 knows how to handle constants
15377 that require more than one insn. */
15378 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15384 if (BYTES_BIG_ENDIAN
)
15386 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15387 plus_constant (Pmode
, base
,
15389 gen_lowpart (QImode
, outval
)));
15390 emit_insn (gen_lshrsi3 (scratch
,
15391 gen_rtx_SUBREG (SImode
, outval
, 0),
15393 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15395 gen_lowpart (QImode
, scratch
)));
15399 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15401 gen_lowpart (QImode
, outval
)));
15402 emit_insn (gen_lshrsi3 (scratch
,
15403 gen_rtx_SUBREG (SImode
, outval
, 0),
15405 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15406 plus_constant (Pmode
, base
,
15408 gen_lowpart (QImode
, scratch
)));
15412 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15413 (padded to the size of a word) should be passed in a register. */
15416 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15418 if (TARGET_AAPCS_BASED
)
15419 return must_pass_in_stack_var_size (mode
, type
);
15421 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15425 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15426 Return true if an argument passed on the stack should be padded upwards,
15427 i.e. if the least-significant byte has useful data.
15428 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15429 aggregate types are placed in the lowest memory address. */
15432 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15434 if (!TARGET_AAPCS_BASED
)
15435 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15437 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15444 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15445 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15446 register has useful data, and return the opposite if the most
15447 significant byte does. */
15450 arm_pad_reg_upward (enum machine_mode mode
,
15451 tree type
, int first ATTRIBUTE_UNUSED
)
15453 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15455 /* For AAPCS, small aggregates, small fixed-point types,
15456 and small complex types are always padded upwards. */
15459 if ((AGGREGATE_TYPE_P (type
)
15460 || TREE_CODE (type
) == COMPLEX_TYPE
15461 || FIXED_POINT_TYPE_P (type
))
15462 && int_size_in_bytes (type
) <= 4)
15467 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15468 && GET_MODE_SIZE (mode
) <= 4)
15473 /* Otherwise, use default padding. */
15474 return !BYTES_BIG_ENDIAN
;
15477 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15478 assuming that the address in the base register is word aligned. */
15480 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15482 HOST_WIDE_INT max_offset
;
15484 /* Offset must be a multiple of 4 in Thumb mode. */
15485 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15490 else if (TARGET_ARM
)
15495 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15498 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15499 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15500 Assumes that the address in the base register RN is word aligned. Pattern
15501 guarantees that both memory accesses use the same base register,
15502 the offsets are constants within the range, and the gap between the offsets is 4.
15503 If preload complete then check that registers are legal. WBACK indicates whether
15504 address is updated. LOAD indicates whether memory access is load or store. */
15506 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15507 bool wback
, bool load
)
15509 unsigned int t
, t2
, n
;
15511 if (!reload_completed
)
15514 if (!offset_ok_for_ldrd_strd (offset
))
15521 if ((TARGET_THUMB2
)
15522 && ((wback
&& (n
== t
|| n
== t2
))
15523 || (t
== SP_REGNUM
)
15524 || (t
== PC_REGNUM
)
15525 || (t2
== SP_REGNUM
)
15526 || (t2
== PC_REGNUM
)
15527 || (!load
&& (n
== PC_REGNUM
))
15528 || (load
&& (t
== t2
))
15529 /* Triggers Cortex-M3 LDRD errata. */
15530 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15534 && ((wback
&& (n
== t
|| n
== t2
))
15535 || (t2
== PC_REGNUM
)
15536 || (t
% 2 != 0) /* First destination register is not even. */
15538 /* PC can be used as base register (for offset addressing only),
15539 but it is depricated. */
15540 || (n
== PC_REGNUM
)))
15546 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15547 operand MEM's address contains an immediate offset from the base
15548 register and has no side effects, in which case it sets BASE and
15549 OFFSET accordingly. */
15551 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15555 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15557 /* TODO: Handle more general memory operand patterns, such as
15558 PRE_DEC and PRE_INC. */
15560 if (side_effects_p (mem
))
15563 /* Can't deal with subregs. */
15564 if (GET_CODE (mem
) == SUBREG
)
15567 gcc_assert (MEM_P (mem
));
15569 *offset
= const0_rtx
;
15571 addr
= XEXP (mem
, 0);
15573 /* If addr isn't valid for DImode, then we can't handle it. */
15574 if (!arm_legitimate_address_p (DImode
, addr
,
15575 reload_in_progress
|| reload_completed
))
15583 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15585 *base
= XEXP (addr
, 0);
15586 *offset
= XEXP (addr
, 1);
15587 return (REG_P (*base
) && CONST_INT_P (*offset
));
15593 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15595 /* Called from a peephole2 to replace two word-size accesses with a
15596 single LDRD/STRD instruction. Returns true iff we can generate a
15597 new instruction sequence. That is, both accesses use the same base
15598 register and the gap between constant offsets is 4. This function
15599 may reorder its operands to match ldrd/strd RTL templates.
15600 OPERANDS are the operands found by the peephole matcher;
15601 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15602 corresponding memory operands. LOAD indicaates whether the access
15603 is load or store. CONST_STORE indicates a store of constant
15604 integer values held in OPERANDS[4,5] and assumes that the pattern
15605 is of length 4 insn, for the purpose of checking dead registers.
15606 COMMUTE indicates that register operands may be reordered. */
15608 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15609 bool const_store
, bool commute
)
15612 HOST_WIDE_INT offsets
[2], offset
;
15613 rtx base
= NULL_RTX
;
15614 rtx cur_base
, cur_offset
, tmp
;
15616 HARD_REG_SET regset
;
15618 gcc_assert (!const_store
|| !load
);
15619 /* Check that the memory references are immediate offsets from the
15620 same base register. Extract the base register, the destination
15621 registers, and the corresponding memory offsets. */
15622 for (i
= 0; i
< nops
; i
++)
15624 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15629 else if (REGNO (base
) != REGNO (cur_base
))
15632 offsets
[i
] = INTVAL (cur_offset
);
15633 if (GET_CODE (operands
[i
]) == SUBREG
)
15635 tmp
= SUBREG_REG (operands
[i
]);
15636 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15641 /* Make sure there is no dependency between the individual loads. */
15642 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15643 return false; /* RAW */
15645 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15646 return false; /* WAW */
15648 /* If the same input register is used in both stores
15649 when storing different constants, try to find a free register.
15650 For example, the code
15655 can be transformed into
15658 in Thumb mode assuming that r1 is free. */
15660 && REGNO (operands
[0]) == REGNO (operands
[1])
15661 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15665 CLEAR_HARD_REG_SET (regset
);
15666 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15667 if (tmp
== NULL_RTX
)
15670 /* Use the new register in the first load to ensure that
15671 if the original input register is not dead after peephole,
15672 then it will have the correct constant value. */
15675 else if (TARGET_ARM
)
15678 int regno
= REGNO (operands
[0]);
15679 if (!peep2_reg_dead_p (4, operands
[0]))
15681 /* When the input register is even and is not dead after the
15682 pattern, it has to hold the second constant but we cannot
15683 form a legal STRD in ARM mode with this register as the second
15685 if (regno
% 2 == 0)
15688 /* Is regno-1 free? */
15689 SET_HARD_REG_SET (regset
);
15690 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15691 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15692 if (tmp
== NULL_RTX
)
15699 /* Find a DImode register. */
15700 CLEAR_HARD_REG_SET (regset
);
15701 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15702 if (tmp
!= NULL_RTX
)
15704 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15705 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15709 /* Can we use the input register to form a DI register? */
15710 SET_HARD_REG_SET (regset
);
15711 CLEAR_HARD_REG_BIT(regset
,
15712 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15713 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15714 if (tmp
== NULL_RTX
)
15716 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15720 gcc_assert (operands
[0] != NULL_RTX
);
15721 gcc_assert (operands
[1] != NULL_RTX
);
15722 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15723 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15727 /* Make sure the instructions are ordered with lower memory access first. */
15728 if (offsets
[0] > offsets
[1])
15730 gap
= offsets
[0] - offsets
[1];
15731 offset
= offsets
[1];
15733 /* Swap the instructions such that lower memory is accessed first. */
15734 SWAP_RTX (operands
[0], operands
[1]);
15735 SWAP_RTX (operands
[2], operands
[3]);
15737 SWAP_RTX (operands
[4], operands
[5]);
15741 gap
= offsets
[1] - offsets
[0];
15742 offset
= offsets
[0];
15745 /* Make sure accesses are to consecutive memory locations. */
15749 /* Make sure we generate legal instructions. */
15750 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15754 /* In Thumb state, where registers are almost unconstrained, there
15755 is little hope to fix it. */
15759 if (load
&& commute
)
15761 /* Try reordering registers. */
15762 SWAP_RTX (operands
[0], operands
[1]);
15763 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15770 /* If input registers are dead after this pattern, they can be
15771 reordered or replaced by other registers that are free in the
15772 current pattern. */
15773 if (!peep2_reg_dead_p (4, operands
[0])
15774 || !peep2_reg_dead_p (4, operands
[1]))
15777 /* Try to reorder the input registers. */
15778 /* For example, the code
15783 can be transformed into
15788 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15791 SWAP_RTX (operands
[0], operands
[1]);
15795 /* Try to find a free DI register. */
15796 CLEAR_HARD_REG_SET (regset
);
15797 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15798 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15801 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15802 if (tmp
== NULL_RTX
)
15805 /* DREG must be an even-numbered register in DImode.
15806 Split it into SI registers. */
15807 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15808 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15809 gcc_assert (operands
[0] != NULL_RTX
);
15810 gcc_assert (operands
[1] != NULL_RTX
);
15811 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15812 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15814 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15827 /* Print a symbolic form of X to the debug file, F. */
15829 arm_print_value (FILE *f
, rtx x
)
15831 switch (GET_CODE (x
))
15834 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15838 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15846 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15848 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15849 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15857 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15861 fprintf (f
, "`%s'", XSTR (x
, 0));
15865 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15869 arm_print_value (f
, XEXP (x
, 0));
15873 arm_print_value (f
, XEXP (x
, 0));
15875 arm_print_value (f
, XEXP (x
, 1));
15883 fprintf (f
, "????");
15888 /* Routines for manipulation of the constant pool. */
15890 /* Arm instructions cannot load a large constant directly into a
15891 register; they have to come from a pc relative load. The constant
15892 must therefore be placed in the addressable range of the pc
15893 relative load. Depending on the precise pc relative load
15894 instruction the range is somewhere between 256 bytes and 4k. This
15895 means that we often have to dump a constant inside a function, and
15896 generate code to branch around it.
15898 It is important to minimize this, since the branches will slow
15899 things down and make the code larger.
15901 Normally we can hide the table after an existing unconditional
15902 branch so that there is no interruption of the flow, but in the
15903 worst case the code looks like this:
15921 We fix this by performing a scan after scheduling, which notices
15922 which instructions need to have their operands fetched from the
15923 constant table and builds the table.
15925 The algorithm starts by building a table of all the constants that
15926 need fixing up and all the natural barriers in the function (places
15927 where a constant table can be dropped without breaking the flow).
15928 For each fixup we note how far the pc-relative replacement will be
15929 able to reach and the offset of the instruction into the function.
15931 Having built the table we then group the fixes together to form
15932 tables that are as large as possible (subject to addressing
15933 constraints) and emit each table of constants after the last
15934 barrier that is within range of all the instructions in the group.
15935 If a group does not contain a barrier, then we forcibly create one
15936 by inserting a jump instruction into the flow. Once the table has
15937 been inserted, the insns are then modified to reference the
15938 relevant entry in the pool.
15940 Possible enhancements to the algorithm (not implemented) are:
15942 1) For some processors and object formats, there may be benefit in
15943 aligning the pools to the start of cache lines; this alignment
15944 would need to be taken into account when calculating addressability
15947 /* These typedefs are located at the start of this file, so that
15948 they can be used in the prototypes there. This comment is to
15949 remind readers of that fact so that the following structures
15950 can be understood more easily.
15952 typedef struct minipool_node Mnode;
15953 typedef struct minipool_fixup Mfix; */
15955 struct minipool_node
15957 /* Doubly linked chain of entries. */
15960 /* The maximum offset into the code that this entry can be placed. While
15961 pushing fixes for forward references, all entries are sorted in order
15962 of increasing max_address. */
15963 HOST_WIDE_INT max_address
;
15964 /* Similarly for an entry inserted for a backwards ref. */
15965 HOST_WIDE_INT min_address
;
15966 /* The number of fixes referencing this entry. This can become zero
15967 if we "unpush" an entry. In this case we ignore the entry when we
15968 come to emit the code. */
15970 /* The offset from the start of the minipool. */
15971 HOST_WIDE_INT offset
;
15972 /* The value in table. */
15974 /* The mode of value. */
15975 enum machine_mode mode
;
15976 /* The size of the value. With iWMMXt enabled
15977 sizes > 4 also imply an alignment of 8-bytes. */
15981 struct minipool_fixup
15985 HOST_WIDE_INT address
;
15987 enum machine_mode mode
;
15991 HOST_WIDE_INT forwards
;
15992 HOST_WIDE_INT backwards
;
15995 /* Fixes less than a word need padding out to a word boundary. */
15996 #define MINIPOOL_FIX_SIZE(mode) \
15997 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15999 static Mnode
* minipool_vector_head
;
16000 static Mnode
* minipool_vector_tail
;
16001 static rtx minipool_vector_label
;
16002 static int minipool_pad
;
16004 /* The linked list of all minipool fixes required for this function. */
16005 Mfix
* minipool_fix_head
;
16006 Mfix
* minipool_fix_tail
;
16007 /* The fix entry for the current minipool, once it has been placed. */
16008 Mfix
* minipool_barrier
;
16010 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16011 #define JUMP_TABLES_IN_TEXT_SECTION 0
16014 static HOST_WIDE_INT
16015 get_jump_table_size (rtx insn
)
16017 /* ADDR_VECs only take room if read-only data does into the text
16019 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16021 rtx body
= PATTERN (insn
);
16022 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16023 HOST_WIDE_INT size
;
16024 HOST_WIDE_INT modesize
;
16026 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16027 size
= modesize
* XVECLEN (body
, elt
);
16031 /* Round up size of TBB table to a halfword boundary. */
16032 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16035 /* No padding necessary for TBH. */
16038 /* Add two bytes for alignment on Thumb. */
16043 gcc_unreachable ();
16051 /* Return the maximum amount of padding that will be inserted before
16054 static HOST_WIDE_INT
16055 get_label_padding (rtx label
)
16057 HOST_WIDE_INT align
, min_insn_size
;
16059 align
= 1 << label_to_alignment (label
);
16060 min_insn_size
= TARGET_THUMB
? 2 : 4;
16061 return align
> min_insn_size
? align
- min_insn_size
: 0;
16064 /* Move a minipool fix MP from its current location to before MAX_MP.
16065 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16066 constraints may need updating. */
16068 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16069 HOST_WIDE_INT max_address
)
16071 /* The code below assumes these are different. */
16072 gcc_assert (mp
!= max_mp
);
16074 if (max_mp
== NULL
)
16076 if (max_address
< mp
->max_address
)
16077 mp
->max_address
= max_address
;
16081 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16082 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16084 mp
->max_address
= max_address
;
16086 /* Unlink MP from its current position. Since max_mp is non-null,
16087 mp->prev must be non-null. */
16088 mp
->prev
->next
= mp
->next
;
16089 if (mp
->next
!= NULL
)
16090 mp
->next
->prev
= mp
->prev
;
16092 minipool_vector_tail
= mp
->prev
;
16094 /* Re-insert it before MAX_MP. */
16096 mp
->prev
= max_mp
->prev
;
16099 if (mp
->prev
!= NULL
)
16100 mp
->prev
->next
= mp
;
16102 minipool_vector_head
= mp
;
16105 /* Save the new entry. */
16108 /* Scan over the preceding entries and adjust their addresses as
16110 while (mp
->prev
!= NULL
16111 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16113 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16120 /* Add a constant to the minipool for a forward reference. Returns the
16121 node added or NULL if the constant will not fit in this pool. */
16123 add_minipool_forward_ref (Mfix
*fix
)
16125 /* If set, max_mp is the first pool_entry that has a lower
16126 constraint than the one we are trying to add. */
16127 Mnode
* max_mp
= NULL
;
16128 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16131 /* If the minipool starts before the end of FIX->INSN then this FIX
16132 can not be placed into the current pool. Furthermore, adding the
16133 new constant pool entry may cause the pool to start FIX_SIZE bytes
16135 if (minipool_vector_head
&&
16136 (fix
->address
+ get_attr_length (fix
->insn
)
16137 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16140 /* Scan the pool to see if a constant with the same value has
16141 already been added. While we are doing this, also note the
16142 location where we must insert the constant if it doesn't already
16144 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16146 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16147 && fix
->mode
== mp
->mode
16148 && (!LABEL_P (fix
->value
)
16149 || (CODE_LABEL_NUMBER (fix
->value
)
16150 == CODE_LABEL_NUMBER (mp
->value
)))
16151 && rtx_equal_p (fix
->value
, mp
->value
))
16153 /* More than one fix references this entry. */
16155 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16158 /* Note the insertion point if necessary. */
16160 && mp
->max_address
> max_address
)
16163 /* If we are inserting an 8-bytes aligned quantity and
16164 we have not already found an insertion point, then
16165 make sure that all such 8-byte aligned quantities are
16166 placed at the start of the pool. */
16167 if (ARM_DOUBLEWORD_ALIGN
16169 && fix
->fix_size
>= 8
16170 && mp
->fix_size
< 8)
16173 max_address
= mp
->max_address
;
16177 /* The value is not currently in the minipool, so we need to create
16178 a new entry for it. If MAX_MP is NULL, the entry will be put on
16179 the end of the list since the placement is less constrained than
16180 any existing entry. Otherwise, we insert the new fix before
16181 MAX_MP and, if necessary, adjust the constraints on the other
16184 mp
->fix_size
= fix
->fix_size
;
16185 mp
->mode
= fix
->mode
;
16186 mp
->value
= fix
->value
;
16188 /* Not yet required for a backwards ref. */
16189 mp
->min_address
= -65536;
16191 if (max_mp
== NULL
)
16193 mp
->max_address
= max_address
;
16195 mp
->prev
= minipool_vector_tail
;
16197 if (mp
->prev
== NULL
)
16199 minipool_vector_head
= mp
;
16200 minipool_vector_label
= gen_label_rtx ();
16203 mp
->prev
->next
= mp
;
16205 minipool_vector_tail
= mp
;
16209 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16210 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16212 mp
->max_address
= max_address
;
16215 mp
->prev
= max_mp
->prev
;
16217 if (mp
->prev
!= NULL
)
16218 mp
->prev
->next
= mp
;
16220 minipool_vector_head
= mp
;
16223 /* Save the new entry. */
16226 /* Scan over the preceding entries and adjust their addresses as
16228 while (mp
->prev
!= NULL
16229 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16231 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16239 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16240 HOST_WIDE_INT min_address
)
16242 HOST_WIDE_INT offset
;
16244 /* The code below assumes these are different. */
16245 gcc_assert (mp
!= min_mp
);
16247 if (min_mp
== NULL
)
16249 if (min_address
> mp
->min_address
)
16250 mp
->min_address
= min_address
;
16254 /* We will adjust this below if it is too loose. */
16255 mp
->min_address
= min_address
;
16257 /* Unlink MP from its current position. Since min_mp is non-null,
16258 mp->next must be non-null. */
16259 mp
->next
->prev
= mp
->prev
;
16260 if (mp
->prev
!= NULL
)
16261 mp
->prev
->next
= mp
->next
;
16263 minipool_vector_head
= mp
->next
;
16265 /* Reinsert it after MIN_MP. */
16267 mp
->next
= min_mp
->next
;
16269 if (mp
->next
!= NULL
)
16270 mp
->next
->prev
= mp
;
16272 minipool_vector_tail
= mp
;
16278 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16280 mp
->offset
= offset
;
16281 if (mp
->refcount
> 0)
16282 offset
+= mp
->fix_size
;
16284 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16285 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16291 /* Add a constant to the minipool for a backward reference. Returns the
16292 node added or NULL if the constant will not fit in this pool.
16294 Note that the code for insertion for a backwards reference can be
16295 somewhat confusing because the calculated offsets for each fix do
16296 not take into account the size of the pool (which is still under
16299 add_minipool_backward_ref (Mfix
*fix
)
16301 /* If set, min_mp is the last pool_entry that has a lower constraint
16302 than the one we are trying to add. */
16303 Mnode
*min_mp
= NULL
;
16304 /* This can be negative, since it is only a constraint. */
16305 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16308 /* If we can't reach the current pool from this insn, or if we can't
16309 insert this entry at the end of the pool without pushing other
16310 fixes out of range, then we don't try. This ensures that we
16311 can't fail later on. */
16312 if (min_address
>= minipool_barrier
->address
16313 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16314 >= minipool_barrier
->address
))
16317 /* Scan the pool to see if a constant with the same value has
16318 already been added. While we are doing this, also note the
16319 location where we must insert the constant if it doesn't already
16321 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16323 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16324 && fix
->mode
== mp
->mode
16325 && (!LABEL_P (fix
->value
)
16326 || (CODE_LABEL_NUMBER (fix
->value
)
16327 == CODE_LABEL_NUMBER (mp
->value
)))
16328 && rtx_equal_p (fix
->value
, mp
->value
)
16329 /* Check that there is enough slack to move this entry to the
16330 end of the table (this is conservative). */
16331 && (mp
->max_address
16332 > (minipool_barrier
->address
16333 + minipool_vector_tail
->offset
16334 + minipool_vector_tail
->fix_size
)))
16337 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16340 if (min_mp
!= NULL
)
16341 mp
->min_address
+= fix
->fix_size
;
16344 /* Note the insertion point if necessary. */
16345 if (mp
->min_address
< min_address
)
16347 /* For now, we do not allow the insertion of 8-byte alignment
16348 requiring nodes anywhere but at the start of the pool. */
16349 if (ARM_DOUBLEWORD_ALIGN
16350 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16355 else if (mp
->max_address
16356 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16358 /* Inserting before this entry would push the fix beyond
16359 its maximum address (which can happen if we have
16360 re-located a forwards fix); force the new fix to come
16362 if (ARM_DOUBLEWORD_ALIGN
16363 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16368 min_address
= mp
->min_address
+ fix
->fix_size
;
16371 /* Do not insert a non-8-byte aligned quantity before 8-byte
16372 aligned quantities. */
16373 else if (ARM_DOUBLEWORD_ALIGN
16374 && fix
->fix_size
< 8
16375 && mp
->fix_size
>= 8)
16378 min_address
= mp
->min_address
+ fix
->fix_size
;
16383 /* We need to create a new entry. */
16385 mp
->fix_size
= fix
->fix_size
;
16386 mp
->mode
= fix
->mode
;
16387 mp
->value
= fix
->value
;
16389 mp
->max_address
= minipool_barrier
->address
+ 65536;
16391 mp
->min_address
= min_address
;
16393 if (min_mp
== NULL
)
16396 mp
->next
= minipool_vector_head
;
16398 if (mp
->next
== NULL
)
16400 minipool_vector_tail
= mp
;
16401 minipool_vector_label
= gen_label_rtx ();
16404 mp
->next
->prev
= mp
;
16406 minipool_vector_head
= mp
;
16410 mp
->next
= min_mp
->next
;
16414 if (mp
->next
!= NULL
)
16415 mp
->next
->prev
= mp
;
16417 minipool_vector_tail
= mp
;
16420 /* Save the new entry. */
16428 /* Scan over the following entries and adjust their offsets. */
16429 while (mp
->next
!= NULL
)
16431 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16432 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16435 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16437 mp
->next
->offset
= mp
->offset
;
16446 assign_minipool_offsets (Mfix
*barrier
)
16448 HOST_WIDE_INT offset
= 0;
16451 minipool_barrier
= barrier
;
16453 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16455 mp
->offset
= offset
;
16457 if (mp
->refcount
> 0)
16458 offset
+= mp
->fix_size
;
16462 /* Output the literal table */
16464 dump_minipool (rtx scan
)
16470 if (ARM_DOUBLEWORD_ALIGN
)
16471 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16472 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16479 fprintf (dump_file
,
16480 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16481 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16483 scan
= emit_label_after (gen_label_rtx (), scan
);
16484 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16485 scan
= emit_label_after (minipool_vector_label
, scan
);
16487 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16489 if (mp
->refcount
> 0)
16493 fprintf (dump_file
,
16494 ";; Offset %u, min %ld, max %ld ",
16495 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16496 (unsigned long) mp
->max_address
);
16497 arm_print_value (dump_file
, mp
->value
);
16498 fputc ('\n', dump_file
);
16501 switch (mp
->fix_size
)
16503 #ifdef HAVE_consttable_1
16505 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16509 #ifdef HAVE_consttable_2
16511 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16515 #ifdef HAVE_consttable_4
16517 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16521 #ifdef HAVE_consttable_8
16523 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16527 #ifdef HAVE_consttable_16
16529 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16534 gcc_unreachable ();
16542 minipool_vector_head
= minipool_vector_tail
= NULL
;
16543 scan
= emit_insn_after (gen_consttable_end (), scan
);
16544 scan
= emit_barrier_after (scan
);
16547 /* Return the cost of forcibly inserting a barrier after INSN. */
16549 arm_barrier_cost (rtx insn
)
16551 /* Basing the location of the pool on the loop depth is preferable,
16552 but at the moment, the basic block information seems to be
16553 corrupt by this stage of the compilation. */
16554 int base_cost
= 50;
16555 rtx next
= next_nonnote_insn (insn
);
16557 if (next
!= NULL
&& LABEL_P (next
))
16560 switch (GET_CODE (insn
))
16563 /* It will always be better to place the table before the label, rather
16572 return base_cost
- 10;
16575 return base_cost
+ 10;
16579 /* Find the best place in the insn stream in the range
16580 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16581 Create the barrier by inserting a jump and add a new fix entry for
16584 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16586 HOST_WIDE_INT count
= 0;
16588 rtx from
= fix
->insn
;
16589 /* The instruction after which we will insert the jump. */
16590 rtx selected
= NULL
;
16592 /* The address at which the jump instruction will be placed. */
16593 HOST_WIDE_INT selected_address
;
16595 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16596 rtx label
= gen_label_rtx ();
16598 selected_cost
= arm_barrier_cost (from
);
16599 selected_address
= fix
->address
;
16601 while (from
&& count
< max_count
)
16606 /* This code shouldn't have been called if there was a natural barrier
16608 gcc_assert (!BARRIER_P (from
));
16610 /* Count the length of this insn. This must stay in sync with the
16611 code that pushes minipool fixes. */
16612 if (LABEL_P (from
))
16613 count
+= get_label_padding (from
);
16615 count
+= get_attr_length (from
);
16617 /* If there is a jump table, add its length. */
16618 if (tablejump_p (from
, NULL
, &tmp
))
16620 count
+= get_jump_table_size (tmp
);
16622 /* Jump tables aren't in a basic block, so base the cost on
16623 the dispatch insn. If we select this location, we will
16624 still put the pool after the table. */
16625 new_cost
= arm_barrier_cost (from
);
16627 if (count
< max_count
16628 && (!selected
|| new_cost
<= selected_cost
))
16631 selected_cost
= new_cost
;
16632 selected_address
= fix
->address
+ count
;
16635 /* Continue after the dispatch table. */
16636 from
= NEXT_INSN (tmp
);
16640 new_cost
= arm_barrier_cost (from
);
16642 if (count
< max_count
16643 && (!selected
|| new_cost
<= selected_cost
))
16646 selected_cost
= new_cost
;
16647 selected_address
= fix
->address
+ count
;
16650 from
= NEXT_INSN (from
);
16653 /* Make sure that we found a place to insert the jump. */
16654 gcc_assert (selected
);
16656 /* Make sure we do not split a call and its corresponding
16657 CALL_ARG_LOCATION note. */
16658 if (CALL_P (selected
))
16660 rtx next
= NEXT_INSN (selected
);
16661 if (next
&& NOTE_P (next
)
16662 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16666 /* Create a new JUMP_INSN that branches around a barrier. */
16667 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16668 JUMP_LABEL (from
) = label
;
16669 barrier
= emit_barrier_after (from
);
16670 emit_label_after (label
, barrier
);
16672 /* Create a minipool barrier entry for the new barrier. */
16673 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16674 new_fix
->insn
= barrier
;
16675 new_fix
->address
= selected_address
;
16676 new_fix
->next
= fix
->next
;
16677 fix
->next
= new_fix
;
16682 /* Record that there is a natural barrier in the insn stream at
16685 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16687 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16690 fix
->address
= address
;
16693 if (minipool_fix_head
!= NULL
)
16694 minipool_fix_tail
->next
= fix
;
16696 minipool_fix_head
= fix
;
16698 minipool_fix_tail
= fix
;
16701 /* Record INSN, which will need fixing up to load a value from the
16702 minipool. ADDRESS is the offset of the insn since the start of the
16703 function; LOC is a pointer to the part of the insn which requires
16704 fixing; VALUE is the constant that must be loaded, which is of type
16707 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16708 enum machine_mode mode
, rtx value
)
16710 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16713 fix
->address
= address
;
16716 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16717 fix
->value
= value
;
16718 fix
->forwards
= get_attr_pool_range (insn
);
16719 fix
->backwards
= get_attr_neg_pool_range (insn
);
16720 fix
->minipool
= NULL
;
16722 /* If an insn doesn't have a range defined for it, then it isn't
16723 expecting to be reworked by this code. Better to stop now than
16724 to generate duff assembly code. */
16725 gcc_assert (fix
->forwards
|| fix
->backwards
);
16727 /* If an entry requires 8-byte alignment then assume all constant pools
16728 require 4 bytes of padding. Trying to do this later on a per-pool
16729 basis is awkward because existing pool entries have to be modified. */
16730 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16735 fprintf (dump_file
,
16736 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16737 GET_MODE_NAME (mode
),
16738 INSN_UID (insn
), (unsigned long) address
,
16739 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16740 arm_print_value (dump_file
, fix
->value
);
16741 fprintf (dump_file
, "\n");
16744 /* Add it to the chain of fixes. */
16747 if (minipool_fix_head
!= NULL
)
16748 minipool_fix_tail
->next
= fix
;
16750 minipool_fix_head
= fix
;
16752 minipool_fix_tail
= fix
;
16755 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16756 Returns the number of insns needed, or 99 if we always want to synthesize
16759 arm_max_const_double_inline_cost ()
16761 /* Let the value get synthesized to avoid the use of literal pools. */
16762 if (arm_disable_literal_pool
)
16765 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16768 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16769 Returns the number of insns needed, or 99 if we don't know how to
16772 arm_const_double_inline_cost (rtx val
)
16774 rtx lowpart
, highpart
;
16775 enum machine_mode mode
;
16777 mode
= GET_MODE (val
);
16779 if (mode
== VOIDmode
)
16782 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16784 lowpart
= gen_lowpart (SImode
, val
);
16785 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16787 gcc_assert (CONST_INT_P (lowpart
));
16788 gcc_assert (CONST_INT_P (highpart
));
16790 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16791 NULL_RTX
, NULL_RTX
, 0, 0)
16792 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16793 NULL_RTX
, NULL_RTX
, 0, 0));
16796 /* Return true if it is worthwhile to split a 64-bit constant into two
16797 32-bit operations. This is the case if optimizing for size, or
16798 if we have load delay slots, or if one 32-bit part can be done with
16799 a single data operation. */
16801 arm_const_double_by_parts (rtx val
)
16803 enum machine_mode mode
= GET_MODE (val
);
16806 if (optimize_size
|| arm_ld_sched
)
16809 if (mode
== VOIDmode
)
16812 part
= gen_highpart_mode (SImode
, mode
, val
);
16814 gcc_assert (CONST_INT_P (part
));
16816 if (const_ok_for_arm (INTVAL (part
))
16817 || const_ok_for_arm (~INTVAL (part
)))
16820 part
= gen_lowpart (SImode
, val
);
16822 gcc_assert (CONST_INT_P (part
));
16824 if (const_ok_for_arm (INTVAL (part
))
16825 || const_ok_for_arm (~INTVAL (part
)))
16831 /* Return true if it is possible to inline both the high and low parts
16832 of a 64-bit constant into 32-bit data processing instructions. */
16834 arm_const_double_by_immediates (rtx val
)
16836 enum machine_mode mode
= GET_MODE (val
);
16839 if (mode
== VOIDmode
)
16842 part
= gen_highpart_mode (SImode
, mode
, val
);
16844 gcc_assert (CONST_INT_P (part
));
16846 if (!const_ok_for_arm (INTVAL (part
)))
16849 part
= gen_lowpart (SImode
, val
);
16851 gcc_assert (CONST_INT_P (part
));
16853 if (!const_ok_for_arm (INTVAL (part
)))
16859 /* Scan INSN and note any of its operands that need fixing.
16860 If DO_PUSHES is false we do not actually push any of the fixups
16863 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16867 extract_insn (insn
);
16869 if (!constrain_operands (1))
16870 fatal_insn_not_found (insn
);
16872 if (recog_data
.n_alternatives
== 0)
16875 /* Fill in recog_op_alt with information about the constraints of
16877 preprocess_constraints ();
16879 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16881 /* Things we need to fix can only occur in inputs. */
16882 if (recog_data
.operand_type
[opno
] != OP_IN
)
16885 /* If this alternative is a memory reference, then any mention
16886 of constants in this alternative is really to fool reload
16887 into allowing us to accept one there. We need to fix them up
16888 now so that we output the right code. */
16889 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16891 rtx op
= recog_data
.operand
[opno
];
16893 if (CONSTANT_P (op
))
16896 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16897 recog_data
.operand_mode
[opno
], op
);
16899 else if (MEM_P (op
)
16900 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16901 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16905 rtx cop
= avoid_constant_pool_reference (op
);
16907 /* Casting the address of something to a mode narrower
16908 than a word can cause avoid_constant_pool_reference()
16909 to return the pool reference itself. That's no good to
16910 us here. Lets just hope that we can use the
16911 constant pool value directly. */
16913 cop
= get_pool_constant (XEXP (op
, 0));
16915 push_minipool_fix (insn
, address
,
16916 recog_data
.operand_loc
[opno
],
16917 recog_data
.operand_mode
[opno
], cop
);
16927 /* Rewrite move insn into subtract of 0 if the condition codes will
16928 be useful in next conditional jump insn. */
16931 thumb1_reorg (void)
16935 FOR_EACH_BB_FN (bb
, cfun
)
16938 rtx pat
, op0
, set
= NULL
;
16939 rtx prev
, insn
= BB_END (bb
);
16940 bool insn_clobbered
= false;
16942 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16943 insn
= PREV_INSN (insn
);
16945 /* Find the last cbranchsi4_insn in basic block BB. */
16946 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16949 /* Get the register with which we are comparing. */
16950 pat
= PATTERN (insn
);
16951 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16953 /* Find the first flag setting insn before INSN in basic block BB. */
16954 gcc_assert (insn
!= BB_HEAD (bb
));
16955 for (prev
= PREV_INSN (insn
);
16957 && prev
!= BB_HEAD (bb
)
16959 || DEBUG_INSN_P (prev
)
16960 || ((set
= single_set (prev
)) != NULL
16961 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16962 prev
= PREV_INSN (prev
))
16964 if (reg_set_p (op0
, prev
))
16965 insn_clobbered
= true;
16968 /* Skip if op0 is clobbered by insn other than prev. */
16969 if (insn_clobbered
)
16975 dest
= SET_DEST (set
);
16976 src
= SET_SRC (set
);
16977 if (!low_register_operand (dest
, SImode
)
16978 || !low_register_operand (src
, SImode
))
16981 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16982 in INSN. Both src and dest of the move insn are checked. */
16983 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16985 dest
= copy_rtx (dest
);
16986 src
= copy_rtx (src
);
16987 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16988 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16989 INSN_CODE (prev
) = -1;
16990 /* Set test register in INSN to dest. */
16991 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16992 INSN_CODE (insn
) = -1;
16997 /* Convert instructions to their cc-clobbering variant if possible, since
16998 that allows us to use smaller encodings. */
17001 thumb2_reorg (void)
17006 INIT_REG_SET (&live
);
17008 /* We are freeing block_for_insn in the toplev to keep compatibility
17009 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17010 compute_bb_for_insn ();
17013 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17015 FOR_EACH_BB_FN (bb
, cfun
)
17017 if (current_tune
->disparage_flag_setting_t16_encodings
17018 && optimize_bb_for_speed_p (bb
))
17022 Convert_Action action
= SKIP
;
17023 Convert_Action action_for_partial_flag_setting
17024 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17025 && optimize_bb_for_speed_p (bb
))
17028 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17029 df_simulate_initialize_backwards (bb
, &live
);
17030 FOR_BB_INSNS_REVERSE (bb
, insn
)
17032 if (NONJUMP_INSN_P (insn
)
17033 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17034 && GET_CODE (PATTERN (insn
)) == SET
)
17037 rtx pat
= PATTERN (insn
);
17038 rtx dst
= XEXP (pat
, 0);
17039 rtx src
= XEXP (pat
, 1);
17040 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17042 if (!OBJECT_P (src
))
17043 op0
= XEXP (src
, 0);
17045 if (BINARY_P (src
))
17046 op1
= XEXP (src
, 1);
17048 if (low_register_operand (dst
, SImode
))
17050 switch (GET_CODE (src
))
17053 /* Adding two registers and storing the result
17054 in the first source is already a 16-bit
17056 if (rtx_equal_p (dst
, op0
)
17057 && register_operand (op1
, SImode
))
17060 if (low_register_operand (op0
, SImode
))
17062 /* ADDS <Rd>,<Rn>,<Rm> */
17063 if (low_register_operand (op1
, SImode
))
17065 /* ADDS <Rdn>,#<imm8> */
17066 /* SUBS <Rdn>,#<imm8> */
17067 else if (rtx_equal_p (dst
, op0
)
17068 && CONST_INT_P (op1
)
17069 && IN_RANGE (INTVAL (op1
), -255, 255))
17071 /* ADDS <Rd>,<Rn>,#<imm3> */
17072 /* SUBS <Rd>,<Rn>,#<imm3> */
17073 else if (CONST_INT_P (op1
)
17074 && IN_RANGE (INTVAL (op1
), -7, 7))
17077 /* ADCS <Rd>, <Rn> */
17078 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17079 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17080 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17082 && COMPARISON_P (op1
)
17083 && cc_register (XEXP (op1
, 0), VOIDmode
)
17084 && maybe_get_arm_condition_code (op1
) == ARM_CS
17085 && XEXP (op1
, 1) == const0_rtx
)
17090 /* RSBS <Rd>,<Rn>,#0
17091 Not handled here: see NEG below. */
17092 /* SUBS <Rd>,<Rn>,#<imm3>
17094 Not handled here: see PLUS above. */
17095 /* SUBS <Rd>,<Rn>,<Rm> */
17096 if (low_register_operand (op0
, SImode
)
17097 && low_register_operand (op1
, SImode
))
17102 /* MULS <Rdm>,<Rn>,<Rdm>
17103 As an exception to the rule, this is only used
17104 when optimizing for size since MULS is slow on all
17105 known implementations. We do not even want to use
17106 MULS in cold code, if optimizing for speed, so we
17107 test the global flag here. */
17108 if (!optimize_size
)
17110 /* else fall through. */
17114 /* ANDS <Rdn>,<Rm> */
17115 if (rtx_equal_p (dst
, op0
)
17116 && low_register_operand (op1
, SImode
))
17117 action
= action_for_partial_flag_setting
;
17118 else if (rtx_equal_p (dst
, op1
)
17119 && low_register_operand (op0
, SImode
))
17120 action
= action_for_partial_flag_setting
== SKIP
17121 ? SKIP
: SWAP_CONV
;
17127 /* ASRS <Rdn>,<Rm> */
17128 /* LSRS <Rdn>,<Rm> */
17129 /* LSLS <Rdn>,<Rm> */
17130 if (rtx_equal_p (dst
, op0
)
17131 && low_register_operand (op1
, SImode
))
17132 action
= action_for_partial_flag_setting
;
17133 /* ASRS <Rd>,<Rm>,#<imm5> */
17134 /* LSRS <Rd>,<Rm>,#<imm5> */
17135 /* LSLS <Rd>,<Rm>,#<imm5> */
17136 else if (low_register_operand (op0
, SImode
)
17137 && CONST_INT_P (op1
)
17138 && IN_RANGE (INTVAL (op1
), 0, 31))
17139 action
= action_for_partial_flag_setting
;
17143 /* RORS <Rdn>,<Rm> */
17144 if (rtx_equal_p (dst
, op0
)
17145 && low_register_operand (op1
, SImode
))
17146 action
= action_for_partial_flag_setting
;
17150 /* MVNS <Rd>,<Rm> */
17151 if (low_register_operand (op0
, SImode
))
17152 action
= action_for_partial_flag_setting
;
17156 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17157 if (low_register_operand (op0
, SImode
))
17162 /* MOVS <Rd>,#<imm8> */
17163 if (CONST_INT_P (src
)
17164 && IN_RANGE (INTVAL (src
), 0, 255))
17165 action
= action_for_partial_flag_setting
;
17169 /* MOVS and MOV<c> with registers have different
17170 encodings, so are not relevant here. */
17178 if (action
!= SKIP
)
17180 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17181 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17184 if (action
== SWAP_CONV
)
17186 src
= copy_rtx (src
);
17187 XEXP (src
, 0) = op1
;
17188 XEXP (src
, 1) = op0
;
17189 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17190 vec
= gen_rtvec (2, pat
, clobber
);
17192 else /* action == CONV */
17193 vec
= gen_rtvec (2, pat
, clobber
);
17195 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17196 INSN_CODE (insn
) = -1;
17200 if (NONDEBUG_INSN_P (insn
))
17201 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17205 CLEAR_REG_SET (&live
);
17208 /* Gcc puts the pool in the wrong place for ARM, since we can only
17209 load addresses a limited distance around the pc. We do some
17210 special munging to move the constant pool values to the correct
17211 point in the code. */
17216 HOST_WIDE_INT address
= 0;
17221 else if (TARGET_THUMB2
)
17224 /* Ensure all insns that must be split have been split at this point.
17225 Otherwise, the pool placement code below may compute incorrect
17226 insn lengths. Note that when optimizing, all insns have already
17227 been split at this point. */
17229 split_all_insns_noflow ();
17231 minipool_fix_head
= minipool_fix_tail
= NULL
;
17233 /* The first insn must always be a note, or the code below won't
17234 scan it properly. */
17235 insn
= get_insns ();
17236 gcc_assert (NOTE_P (insn
));
17239 /* Scan all the insns and record the operands that will need fixing. */
17240 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17242 if (BARRIER_P (insn
))
17243 push_minipool_barrier (insn
, address
);
17244 else if (INSN_P (insn
))
17248 note_invalid_constants (insn
, address
, true);
17249 address
+= get_attr_length (insn
);
17251 /* If the insn is a vector jump, add the size of the table
17252 and skip the table. */
17253 if (tablejump_p (insn
, NULL
, &table
))
17255 address
+= get_jump_table_size (table
);
17259 else if (LABEL_P (insn
))
17260 /* Add the worst-case padding due to alignment. We don't add
17261 the _current_ padding because the minipool insertions
17262 themselves might change it. */
17263 address
+= get_label_padding (insn
);
17266 fix
= minipool_fix_head
;
17268 /* Now scan the fixups and perform the required changes. */
17273 Mfix
* last_added_fix
;
17274 Mfix
* last_barrier
= NULL
;
17277 /* Skip any further barriers before the next fix. */
17278 while (fix
&& BARRIER_P (fix
->insn
))
17281 /* No more fixes. */
17285 last_added_fix
= NULL
;
17287 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17289 if (BARRIER_P (ftmp
->insn
))
17291 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17294 last_barrier
= ftmp
;
17296 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17299 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17302 /* If we found a barrier, drop back to that; any fixes that we
17303 could have reached but come after the barrier will now go in
17304 the next mini-pool. */
17305 if (last_barrier
!= NULL
)
17307 /* Reduce the refcount for those fixes that won't go into this
17309 for (fdel
= last_barrier
->next
;
17310 fdel
&& fdel
!= ftmp
;
17313 fdel
->minipool
->refcount
--;
17314 fdel
->minipool
= NULL
;
17317 ftmp
= last_barrier
;
17321 /* ftmp is first fix that we can't fit into this pool and
17322 there no natural barriers that we could use. Insert a
17323 new barrier in the code somewhere between the previous
17324 fix and this one, and arrange to jump around it. */
17325 HOST_WIDE_INT max_address
;
17327 /* The last item on the list of fixes must be a barrier, so
17328 we can never run off the end of the list of fixes without
17329 last_barrier being set. */
17332 max_address
= minipool_vector_head
->max_address
;
17333 /* Check that there isn't another fix that is in range that
17334 we couldn't fit into this pool because the pool was
17335 already too large: we need to put the pool before such an
17336 instruction. The pool itself may come just after the
17337 fix because create_fix_barrier also allows space for a
17338 jump instruction. */
17339 if (ftmp
->address
< max_address
)
17340 max_address
= ftmp
->address
+ 1;
17342 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17345 assign_minipool_offsets (last_barrier
);
17349 if (!BARRIER_P (ftmp
->insn
)
17350 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17357 /* Scan over the fixes we have identified for this pool, fixing them
17358 up and adding the constants to the pool itself. */
17359 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17360 this_fix
= this_fix
->next
)
17361 if (!BARRIER_P (this_fix
->insn
))
17364 = plus_constant (Pmode
,
17365 gen_rtx_LABEL_REF (VOIDmode
,
17366 minipool_vector_label
),
17367 this_fix
->minipool
->offset
);
17368 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17371 dump_minipool (last_barrier
->insn
);
17375 /* From now on we must synthesize any constants that we can't handle
17376 directly. This can happen if the RTL gets split during final
17377 instruction generation. */
17378 cfun
->machine
->after_arm_reorg
= 1;
17380 /* Free the minipool memory. */
17381 obstack_free (&minipool_obstack
, minipool_startobj
);
17384 /* Routines to output assembly language. */
17386 /* If the rtx is the correct value then return the string of the number.
17387 In this way we can ensure that valid double constants are generated even
17388 when cross compiling. */
17390 fp_immediate_constant (rtx x
)
17394 if (!fp_consts_inited
)
17397 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17399 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17403 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17404 static const char *
17405 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17407 if (!fp_consts_inited
)
17410 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17414 /* OPERANDS[0] is the entire list of insns that constitute pop,
17415 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17416 is in the list, UPDATE is true iff the list contains explicit
17417 update of base register. */
17419 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17425 const char *conditional
;
17426 int num_saves
= XVECLEN (operands
[0], 0);
17427 unsigned int regno
;
17428 unsigned int regno_base
= REGNO (operands
[1]);
17431 offset
+= update
? 1 : 0;
17432 offset
+= return_pc
? 1 : 0;
17434 /* Is the base register in the list? */
17435 for (i
= offset
; i
< num_saves
; i
++)
17437 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17438 /* If SP is in the list, then the base register must be SP. */
17439 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17440 /* If base register is in the list, there must be no explicit update. */
17441 if (regno
== regno_base
)
17442 gcc_assert (!update
);
17445 conditional
= reverse
? "%?%D0" : "%?%d0";
17446 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17448 /* Output pop (not stmfd) because it has a shorter encoding. */
17449 gcc_assert (update
);
17450 sprintf (pattern
, "pop%s\t{", conditional
);
17454 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17455 It's just a convention, their semantics are identical. */
17456 if (regno_base
== SP_REGNUM
)
17457 sprintf (pattern
, "ldm%sfd\t", conditional
);
17458 else if (TARGET_UNIFIED_ASM
)
17459 sprintf (pattern
, "ldmia%s\t", conditional
);
17461 sprintf (pattern
, "ldm%sia\t", conditional
);
17463 strcat (pattern
, reg_names
[regno_base
]);
17465 strcat (pattern
, "!, {");
17467 strcat (pattern
, ", {");
17470 /* Output the first destination register. */
17472 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17474 /* Output the rest of the destination registers. */
17475 for (i
= offset
+ 1; i
< num_saves
; i
++)
17477 strcat (pattern
, ", ");
17479 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17482 strcat (pattern
, "}");
17484 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17485 strcat (pattern
, "^");
17487 output_asm_insn (pattern
, &cond
);
17491 /* Output the assembly for a store multiple. */
17494 vfp_output_fstmd (rtx
* operands
)
17501 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17502 p
= strlen (pattern
);
17504 gcc_assert (REG_P (operands
[1]));
17506 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17507 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17509 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17511 strcpy (&pattern
[p
], "}");
17513 output_asm_insn (pattern
, operands
);
17518 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17519 number of bytes pushed. */
17522 vfp_emit_fstmd (int base_reg
, int count
)
17529 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17530 register pairs are stored by a store multiple insn. We avoid this
17531 by pushing an extra pair. */
17532 if (count
== 2 && !arm_arch6
)
17534 if (base_reg
== LAST_VFP_REGNUM
- 3)
17539 /* FSTMD may not store more than 16 doubleword registers at once. Split
17540 larger stores into multiple parts (up to a maximum of two, in
17545 /* NOTE: base_reg is an internal register number, so each D register
17547 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17548 saved
+= vfp_emit_fstmd (base_reg
, 16);
17552 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17553 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17555 reg
= gen_rtx_REG (DFmode
, base_reg
);
17558 XVECEXP (par
, 0, 0)
17559 = gen_rtx_SET (VOIDmode
,
17562 gen_rtx_PRE_MODIFY (Pmode
,
17565 (Pmode
, stack_pointer_rtx
,
17568 gen_rtx_UNSPEC (BLKmode
,
17569 gen_rtvec (1, reg
),
17570 UNSPEC_PUSH_MULT
));
17572 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17573 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17574 RTX_FRAME_RELATED_P (tmp
) = 1;
17575 XVECEXP (dwarf
, 0, 0) = tmp
;
17577 tmp
= gen_rtx_SET (VOIDmode
,
17578 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17580 RTX_FRAME_RELATED_P (tmp
) = 1;
17581 XVECEXP (dwarf
, 0, 1) = tmp
;
17583 for (i
= 1; i
< count
; i
++)
17585 reg
= gen_rtx_REG (DFmode
, base_reg
);
17587 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17589 tmp
= gen_rtx_SET (VOIDmode
,
17590 gen_frame_mem (DFmode
,
17591 plus_constant (Pmode
,
17595 RTX_FRAME_RELATED_P (tmp
) = 1;
17596 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17599 par
= emit_insn (par
);
17600 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17601 RTX_FRAME_RELATED_P (par
) = 1;
17606 /* Emit a call instruction with pattern PAT. ADDR is the address of
17607 the call target. */
17610 arm_emit_call_insn (rtx pat
, rtx addr
)
17614 insn
= emit_call_insn (pat
);
17616 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17617 If the call might use such an entry, add a use of the PIC register
17618 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17619 if (TARGET_VXWORKS_RTP
17621 && GET_CODE (addr
) == SYMBOL_REF
17622 && (SYMBOL_REF_DECL (addr
)
17623 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17624 : !SYMBOL_REF_LOCAL_P (addr
)))
17626 require_pic_register ();
17627 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17631 /* Output a 'call' insn. */
17633 output_call (rtx
*operands
)
17635 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17637 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17638 if (REGNO (operands
[0]) == LR_REGNUM
)
17640 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17641 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17644 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17646 if (TARGET_INTERWORK
|| arm_arch4t
)
17647 output_asm_insn ("bx%?\t%0", operands
);
17649 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17654 /* Output a 'call' insn that is a reference in memory. This is
17655 disabled for ARMv5 and we prefer a blx instead because otherwise
17656 there's a significant performance overhead. */
17658 output_call_mem (rtx
*operands
)
17660 gcc_assert (!arm_arch5
);
17661 if (TARGET_INTERWORK
)
17663 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17664 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17665 output_asm_insn ("bx%?\t%|ip", operands
);
17667 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17669 /* LR is used in the memory address. We load the address in the
17670 first instruction. It's safe to use IP as the target of the
17671 load since the call will kill it anyway. */
17672 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17673 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17675 output_asm_insn ("bx%?\t%|ip", operands
);
17677 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17681 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17682 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17689 /* Output a move from arm registers to arm registers of a long double
17690 OPERANDS[0] is the destination.
17691 OPERANDS[1] is the source. */
17693 output_mov_long_double_arm_from_arm (rtx
*operands
)
17695 /* We have to be careful here because the two might overlap. */
17696 int dest_start
= REGNO (operands
[0]);
17697 int src_start
= REGNO (operands
[1]);
17701 if (dest_start
< src_start
)
17703 for (i
= 0; i
< 3; i
++)
17705 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17706 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17707 output_asm_insn ("mov%?\t%0, %1", ops
);
17712 for (i
= 2; i
>= 0; i
--)
17714 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17715 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17716 output_asm_insn ("mov%?\t%0, %1", ops
);
17724 arm_emit_movpair (rtx dest
, rtx src
)
17726 /* If the src is an immediate, simplify it. */
17727 if (CONST_INT_P (src
))
17729 HOST_WIDE_INT val
= INTVAL (src
);
17730 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17731 if ((val
>> 16) & 0x0000ffff)
17732 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17734 GEN_INT ((val
>> 16) & 0x0000ffff));
17737 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17738 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17741 /* Output a move between double words. It must be REG<-MEM
17744 output_move_double (rtx
*operands
, bool emit
, int *count
)
17746 enum rtx_code code0
= GET_CODE (operands
[0]);
17747 enum rtx_code code1
= GET_CODE (operands
[1]);
17752 /* The only case when this might happen is when
17753 you are looking at the length of a DImode instruction
17754 that has an invalid constant in it. */
17755 if (code0
== REG
&& code1
!= MEM
)
17757 gcc_assert (!emit
);
17764 unsigned int reg0
= REGNO (operands
[0]);
17766 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17768 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17770 switch (GET_CODE (XEXP (operands
[1], 0)))
17777 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17778 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17780 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17785 gcc_assert (TARGET_LDRD
);
17787 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17794 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17796 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17804 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17806 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17811 gcc_assert (TARGET_LDRD
);
17813 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17818 /* Autoicrement addressing modes should never have overlapping
17819 base and destination registers, and overlapping index registers
17820 are already prohibited, so this doesn't need to worry about
17822 otherops
[0] = operands
[0];
17823 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17824 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17826 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17828 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17830 /* Registers overlap so split out the increment. */
17833 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17834 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17841 /* Use a single insn if we can.
17842 FIXME: IWMMXT allows offsets larger than ldrd can
17843 handle, fix these up with a pair of ldr. */
17845 || !CONST_INT_P (otherops
[2])
17846 || (INTVAL (otherops
[2]) > -256
17847 && INTVAL (otherops
[2]) < 256))
17850 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17856 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17857 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17867 /* Use a single insn if we can.
17868 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17869 fix these up with a pair of ldr. */
17871 || !CONST_INT_P (otherops
[2])
17872 || (INTVAL (otherops
[2]) > -256
17873 && INTVAL (otherops
[2]) < 256))
17876 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17882 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17883 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17893 /* We might be able to use ldrd %0, %1 here. However the range is
17894 different to ldr/adr, and it is broken on some ARMv7-M
17895 implementations. */
17896 /* Use the second register of the pair to avoid problematic
17898 otherops
[1] = operands
[1];
17900 output_asm_insn ("adr%?\t%0, %1", otherops
);
17901 operands
[1] = otherops
[0];
17905 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17907 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17914 /* ??? This needs checking for thumb2. */
17916 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17917 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17919 otherops
[0] = operands
[0];
17920 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17921 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17923 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17925 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17927 switch ((int) INTVAL (otherops
[2]))
17931 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17937 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17943 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17947 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17948 operands
[1] = otherops
[0];
17950 && (REG_P (otherops
[2])
17952 || (CONST_INT_P (otherops
[2])
17953 && INTVAL (otherops
[2]) > -256
17954 && INTVAL (otherops
[2]) < 256)))
17956 if (reg_overlap_mentioned_p (operands
[0],
17960 /* Swap base and index registers over to
17961 avoid a conflict. */
17963 otherops
[1] = otherops
[2];
17966 /* If both registers conflict, it will usually
17967 have been fixed by a splitter. */
17968 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17969 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17973 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17974 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17981 otherops
[0] = operands
[0];
17983 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17988 if (CONST_INT_P (otherops
[2]))
17992 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17993 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17995 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18001 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18007 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18014 return "ldr%(d%)\t%0, [%1]";
18016 return "ldm%(ia%)\t%1, %M0";
18020 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18021 /* Take care of overlapping base/data reg. */
18022 if (reg_mentioned_p (operands
[0], operands
[1]))
18026 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18027 output_asm_insn ("ldr%?\t%0, %1", operands
);
18037 output_asm_insn ("ldr%?\t%0, %1", operands
);
18038 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18048 /* Constraints should ensure this. */
18049 gcc_assert (code0
== MEM
&& code1
== REG
);
18050 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18051 || (TARGET_ARM
&& TARGET_LDRD
));
18053 switch (GET_CODE (XEXP (operands
[0], 0)))
18059 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18061 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18066 gcc_assert (TARGET_LDRD
);
18068 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18075 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18077 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18085 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18087 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18092 gcc_assert (TARGET_LDRD
);
18094 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18099 otherops
[0] = operands
[1];
18100 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18101 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18103 /* IWMMXT allows offsets larger than ldrd can handle,
18104 fix these up with a pair of ldr. */
18106 && CONST_INT_P (otherops
[2])
18107 && (INTVAL(otherops
[2]) <= -256
18108 || INTVAL(otherops
[2]) >= 256))
18110 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18114 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18115 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18124 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18125 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18131 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18134 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18139 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18144 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18145 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18147 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18151 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18158 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18165 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18170 && (REG_P (otherops
[2])
18172 || (CONST_INT_P (otherops
[2])
18173 && INTVAL (otherops
[2]) > -256
18174 && INTVAL (otherops
[2]) < 256)))
18176 otherops
[0] = operands
[1];
18177 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18179 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18185 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18186 otherops
[1] = operands
[1];
18189 output_asm_insn ("str%?\t%1, %0", operands
);
18190 output_asm_insn ("str%?\t%H1, %0", otherops
);
18200 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18201 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18204 output_move_quad (rtx
*operands
)
18206 if (REG_P (operands
[0]))
18208 /* Load, or reg->reg move. */
18210 if (MEM_P (operands
[1]))
18212 switch (GET_CODE (XEXP (operands
[1], 0)))
18215 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18220 output_asm_insn ("adr%?\t%0, %1", operands
);
18221 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18225 gcc_unreachable ();
18233 gcc_assert (REG_P (operands
[1]));
18235 dest
= REGNO (operands
[0]);
18236 src
= REGNO (operands
[1]);
18238 /* This seems pretty dumb, but hopefully GCC won't try to do it
18241 for (i
= 0; i
< 4; i
++)
18243 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18244 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18245 output_asm_insn ("mov%?\t%0, %1", ops
);
18248 for (i
= 3; i
>= 0; i
--)
18250 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18251 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18252 output_asm_insn ("mov%?\t%0, %1", ops
);
18258 gcc_assert (MEM_P (operands
[0]));
18259 gcc_assert (REG_P (operands
[1]));
18260 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18262 switch (GET_CODE (XEXP (operands
[0], 0)))
18265 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18269 gcc_unreachable ();
18276 /* Output a VFP load or store instruction. */
18279 output_move_vfp (rtx
*operands
)
18281 rtx reg
, mem
, addr
, ops
[2];
18282 int load
= REG_P (operands
[0]);
18283 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18284 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18287 enum machine_mode mode
;
18289 reg
= operands
[!load
];
18290 mem
= operands
[load
];
18292 mode
= GET_MODE (reg
);
18294 gcc_assert (REG_P (reg
));
18295 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18296 gcc_assert (mode
== SFmode
18300 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18301 gcc_assert (MEM_P (mem
));
18303 addr
= XEXP (mem
, 0);
18305 switch (GET_CODE (addr
))
18308 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18309 ops
[0] = XEXP (addr
, 0);
18314 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18315 ops
[0] = XEXP (addr
, 0);
18320 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18326 sprintf (buff
, templ
,
18327 load
? "ld" : "st",
18330 integer_p
? "\t%@ int" : "");
18331 output_asm_insn (buff
, ops
);
18336 /* Output a Neon double-word or quad-word load or store, or a load
18337 or store for larger structure modes.
18339 WARNING: The ordering of elements is weird in big-endian mode,
18340 because the EABI requires that vectors stored in memory appear
18341 as though they were stored by a VSTM, as required by the EABI.
18342 GCC RTL defines element ordering based on in-memory order.
18343 This can be different from the architectural ordering of elements
18344 within a NEON register. The intrinsics defined in arm_neon.h use the
18345 NEON register element ordering, not the GCC RTL element ordering.
18347 For example, the in-memory ordering of a big-endian a quadword
18348 vector with 16-bit elements when stored from register pair {d0,d1}
18349 will be (lowest address first, d0[N] is NEON register element N):
18351 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18353 When necessary, quadword registers (dN, dN+1) are moved to ARM
18354 registers from rN in the order:
18356 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18358 So that STM/LDM can be used on vectors in ARM registers, and the
18359 same memory layout will result as if VSTM/VLDM were used.
18361 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18362 possible, which allows use of appropriate alignment tags.
18363 Note that the choice of "64" is independent of the actual vector
18364 element size; this size simply ensures that the behavior is
18365 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18367 Due to limitations of those instructions, use of VST1.64/VLD1.64
18368 is not possible if:
18369 - the address contains PRE_DEC, or
18370 - the mode refers to more than 4 double-word registers
18372 In those cases, it would be possible to replace VSTM/VLDM by a
18373 sequence of instructions; this is not currently implemented since
18374 this is not certain to actually improve performance. */
18377 output_move_neon (rtx
*operands
)
18379 rtx reg
, mem
, addr
, ops
[2];
18380 int regno
, nregs
, load
= REG_P (operands
[0]);
18383 enum machine_mode mode
;
18385 reg
= operands
[!load
];
18386 mem
= operands
[load
];
18388 mode
= GET_MODE (reg
);
18390 gcc_assert (REG_P (reg
));
18391 regno
= REGNO (reg
);
18392 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18393 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18394 || NEON_REGNO_OK_FOR_QUAD (regno
));
18395 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18396 || VALID_NEON_QREG_MODE (mode
)
18397 || VALID_NEON_STRUCT_MODE (mode
));
18398 gcc_assert (MEM_P (mem
));
18400 addr
= XEXP (mem
, 0);
18402 /* Strip off const from addresses like (const (plus (...))). */
18403 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18404 addr
= XEXP (addr
, 0);
18406 switch (GET_CODE (addr
))
18409 /* We have to use vldm / vstm for too-large modes. */
18412 templ
= "v%smia%%?\t%%0!, %%h1";
18413 ops
[0] = XEXP (addr
, 0);
18417 templ
= "v%s1.64\t%%h1, %%A0";
18424 /* We have to use vldm / vstm in this case, since there is no
18425 pre-decrement form of the vld1 / vst1 instructions. */
18426 templ
= "v%smdb%%?\t%%0!, %%h1";
18427 ops
[0] = XEXP (addr
, 0);
18432 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18433 gcc_unreachable ();
18440 for (i
= 0; i
< nregs
; i
++)
18442 /* We're only using DImode here because it's a convenient size. */
18443 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18444 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18445 if (reg_overlap_mentioned_p (ops
[0], mem
))
18447 gcc_assert (overlap
== -1);
18452 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18453 output_asm_insn (buff
, ops
);
18458 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18459 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18460 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18461 output_asm_insn (buff
, ops
);
18468 /* We have to use vldm / vstm for too-large modes. */
18470 templ
= "v%smia%%?\t%%m0, %%h1";
18472 templ
= "v%s1.64\t%%h1, %%A0";
18478 sprintf (buff
, templ
, load
? "ld" : "st");
18479 output_asm_insn (buff
, ops
);
18484 /* Compute and return the length of neon_mov<mode>, where <mode> is
18485 one of VSTRUCT modes: EI, OI, CI or XI. */
18487 arm_attr_length_move_neon (rtx insn
)
18489 rtx reg
, mem
, addr
;
18491 enum machine_mode mode
;
18493 extract_insn_cached (insn
);
18495 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18497 mode
= GET_MODE (recog_data
.operand
[0]);
18508 gcc_unreachable ();
18512 load
= REG_P (recog_data
.operand
[0]);
18513 reg
= recog_data
.operand
[!load
];
18514 mem
= recog_data
.operand
[load
];
18516 gcc_assert (MEM_P (mem
));
18518 mode
= GET_MODE (reg
);
18519 addr
= XEXP (mem
, 0);
18521 /* Strip off const from addresses like (const (plus (...))). */
18522 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18523 addr
= XEXP (addr
, 0);
18525 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18527 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18534 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18538 arm_address_offset_is_imm (rtx insn
)
18542 extract_insn_cached (insn
);
18544 if (REG_P (recog_data
.operand
[0]))
18547 mem
= recog_data
.operand
[0];
18549 gcc_assert (MEM_P (mem
));
18551 addr
= XEXP (mem
, 0);
18554 || (GET_CODE (addr
) == PLUS
18555 && REG_P (XEXP (addr
, 0))
18556 && CONST_INT_P (XEXP (addr
, 1))))
18562 /* Output an ADD r, s, #n where n may be too big for one instruction.
18563 If adding zero to one register, output nothing. */
18565 output_add_immediate (rtx
*operands
)
18567 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18569 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18572 output_multi_immediate (operands
,
18573 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18576 output_multi_immediate (operands
,
18577 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18584 /* Output a multiple immediate operation.
18585 OPERANDS is the vector of operands referred to in the output patterns.
18586 INSTR1 is the output pattern to use for the first constant.
18587 INSTR2 is the output pattern to use for subsequent constants.
18588 IMMED_OP is the index of the constant slot in OPERANDS.
18589 N is the constant value. */
18590 static const char *
18591 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18592 int immed_op
, HOST_WIDE_INT n
)
18594 #if HOST_BITS_PER_WIDE_INT > 32
18600 /* Quick and easy output. */
18601 operands
[immed_op
] = const0_rtx
;
18602 output_asm_insn (instr1
, operands
);
18607 const char * instr
= instr1
;
18609 /* Note that n is never zero here (which would give no output). */
18610 for (i
= 0; i
< 32; i
+= 2)
18614 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18615 output_asm_insn (instr
, operands
);
18625 /* Return the name of a shifter operation. */
18626 static const char *
18627 arm_shift_nmem(enum rtx_code code
)
18632 return ARM_LSL_NAME
;
18648 /* Return the appropriate ARM instruction for the operation code.
18649 The returned result should not be overwritten. OP is the rtx of the
18650 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18653 arithmetic_instr (rtx op
, int shift_first_arg
)
18655 switch (GET_CODE (op
))
18661 return shift_first_arg
? "rsb" : "sub";
18676 return arm_shift_nmem(GET_CODE(op
));
18679 gcc_unreachable ();
18683 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18684 for the operation code. The returned result should not be overwritten.
18685 OP is the rtx code of the shift.
18686 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18688 static const char *
18689 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18692 enum rtx_code code
= GET_CODE (op
);
18697 if (!CONST_INT_P (XEXP (op
, 1)))
18699 output_operand_lossage ("invalid shift operand");
18704 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18712 mnem
= arm_shift_nmem(code
);
18713 if (CONST_INT_P (XEXP (op
, 1)))
18715 *amountp
= INTVAL (XEXP (op
, 1));
18717 else if (REG_P (XEXP (op
, 1)))
18724 output_operand_lossage ("invalid shift operand");
18730 /* We never have to worry about the amount being other than a
18731 power of 2, since this case can never be reloaded from a reg. */
18732 if (!CONST_INT_P (XEXP (op
, 1)))
18734 output_operand_lossage ("invalid shift operand");
18738 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18740 /* Amount must be a power of two. */
18741 if (*amountp
& (*amountp
- 1))
18743 output_operand_lossage ("invalid shift operand");
18747 *amountp
= int_log2 (*amountp
);
18748 return ARM_LSL_NAME
;
18751 output_operand_lossage ("invalid shift operand");
18755 /* This is not 100% correct, but follows from the desire to merge
18756 multiplication by a power of 2 with the recognizer for a
18757 shift. >=32 is not a valid shift for "lsl", so we must try and
18758 output a shift that produces the correct arithmetical result.
18759 Using lsr #32 is identical except for the fact that the carry bit
18760 is not set correctly if we set the flags; but we never use the
18761 carry bit from such an operation, so we can ignore that. */
18762 if (code
== ROTATERT
)
18763 /* Rotate is just modulo 32. */
18765 else if (*amountp
!= (*amountp
& 31))
18767 if (code
== ASHIFT
)
18772 /* Shifts of 0 are no-ops. */
18779 /* Obtain the shift from the POWER of two. */
18781 static HOST_WIDE_INT
18782 int_log2 (HOST_WIDE_INT power
)
18784 HOST_WIDE_INT shift
= 0;
18786 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18788 gcc_assert (shift
<= 31);
18795 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18796 because /bin/as is horribly restrictive. The judgement about
18797 whether or not each character is 'printable' (and can be output as
18798 is) or not (and must be printed with an octal escape) must be made
18799 with reference to the *host* character set -- the situation is
18800 similar to that discussed in the comments above pp_c_char in
18801 c-pretty-print.c. */
18803 #define MAX_ASCII_LEN 51
18806 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18809 int len_so_far
= 0;
18811 fputs ("\t.ascii\t\"", stream
);
18813 for (i
= 0; i
< len
; i
++)
18817 if (len_so_far
>= MAX_ASCII_LEN
)
18819 fputs ("\"\n\t.ascii\t\"", stream
);
18825 if (c
== '\\' || c
== '\"')
18827 putc ('\\', stream
);
18835 fprintf (stream
, "\\%03o", c
);
18840 fputs ("\"\n", stream
);
18843 /* Compute the register save mask for registers 0 through 12
18844 inclusive. This code is used by arm_compute_save_reg_mask. */
18846 static unsigned long
18847 arm_compute_save_reg0_reg12_mask (void)
18849 unsigned long func_type
= arm_current_func_type ();
18850 unsigned long save_reg_mask
= 0;
18853 if (IS_INTERRUPT (func_type
))
18855 unsigned int max_reg
;
18856 /* Interrupt functions must not corrupt any registers,
18857 even call clobbered ones. If this is a leaf function
18858 we can just examine the registers used by the RTL, but
18859 otherwise we have to assume that whatever function is
18860 called might clobber anything, and so we have to save
18861 all the call-clobbered registers as well. */
18862 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18863 /* FIQ handlers have registers r8 - r12 banked, so
18864 we only need to check r0 - r7, Normal ISRs only
18865 bank r14 and r15, so we must check up to r12.
18866 r13 is the stack pointer which is always preserved,
18867 so we do not need to consider it here. */
18872 for (reg
= 0; reg
<= max_reg
; reg
++)
18873 if (df_regs_ever_live_p (reg
)
18874 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18875 save_reg_mask
|= (1 << reg
);
18877 /* Also save the pic base register if necessary. */
18879 && !TARGET_SINGLE_PIC_BASE
18880 && arm_pic_register
!= INVALID_REGNUM
18881 && crtl
->uses_pic_offset_table
)
18882 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18884 else if (IS_VOLATILE(func_type
))
18886 /* For noreturn functions we historically omitted register saves
18887 altogether. However this really messes up debugging. As a
18888 compromise save just the frame pointers. Combined with the link
18889 register saved elsewhere this should be sufficient to get
18891 if (frame_pointer_needed
)
18892 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18893 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18894 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18895 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18896 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18900 /* In the normal case we only need to save those registers
18901 which are call saved and which are used by this function. */
18902 for (reg
= 0; reg
<= 11; reg
++)
18903 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18904 save_reg_mask
|= (1 << reg
);
18906 /* Handle the frame pointer as a special case. */
18907 if (frame_pointer_needed
)
18908 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18910 /* If we aren't loading the PIC register,
18911 don't stack it even though it may be live. */
18913 && !TARGET_SINGLE_PIC_BASE
18914 && arm_pic_register
!= INVALID_REGNUM
18915 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18916 || crtl
->uses_pic_offset_table
))
18917 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18919 /* The prologue will copy SP into R0, so save it. */
18920 if (IS_STACKALIGN (func_type
))
18921 save_reg_mask
|= 1;
18924 /* Save registers so the exception handler can modify them. */
18925 if (crtl
->calls_eh_return
)
18931 reg
= EH_RETURN_DATA_REGNO (i
);
18932 if (reg
== INVALID_REGNUM
)
18934 save_reg_mask
|= 1 << reg
;
18938 return save_reg_mask
;
18941 /* Return true if r3 is live at the start of the function. */
18944 arm_r3_live_at_start_p (void)
18946 /* Just look at cfg info, which is still close enough to correct at this
18947 point. This gives false positives for broken functions that might use
18948 uninitialized data that happens to be allocated in r3, but who cares? */
18949 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18952 /* Compute the number of bytes used to store the static chain register on the
18953 stack, above the stack frame. We need to know this accurately to get the
18954 alignment of the rest of the stack frame correct. */
18957 arm_compute_static_chain_stack_bytes (void)
18959 /* See the defining assertion in arm_expand_prologue. */
18960 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18961 && IS_NESTED (arm_current_func_type ())
18962 && arm_r3_live_at_start_p ()
18963 && crtl
->args
.pretend_args_size
== 0)
18969 /* Compute a bit mask of which registers need to be
18970 saved on the stack for the current function.
18971 This is used by arm_get_frame_offsets, which may add extra registers. */
18973 static unsigned long
18974 arm_compute_save_reg_mask (void)
18976 unsigned int save_reg_mask
= 0;
18977 unsigned long func_type
= arm_current_func_type ();
18980 if (IS_NAKED (func_type
))
18981 /* This should never really happen. */
18984 /* If we are creating a stack frame, then we must save the frame pointer,
18985 IP (which will hold the old stack pointer), LR and the PC. */
18986 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18988 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18991 | (1 << PC_REGNUM
);
18993 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18995 /* Decide if we need to save the link register.
18996 Interrupt routines have their own banked link register,
18997 so they never need to save it.
18998 Otherwise if we do not use the link register we do not need to save
18999 it. If we are pushing other registers onto the stack however, we
19000 can save an instruction in the epilogue by pushing the link register
19001 now and then popping it back into the PC. This incurs extra memory
19002 accesses though, so we only do it when optimizing for size, and only
19003 if we know that we will not need a fancy return sequence. */
19004 if (df_regs_ever_live_p (LR_REGNUM
)
19007 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19008 && !crtl
->calls_eh_return
))
19009 save_reg_mask
|= 1 << LR_REGNUM
;
19011 if (cfun
->machine
->lr_save_eliminated
)
19012 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19014 if (TARGET_REALLY_IWMMXT
19015 && ((bit_count (save_reg_mask
)
19016 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19017 arm_compute_static_chain_stack_bytes())
19020 /* The total number of registers that are going to be pushed
19021 onto the stack is odd. We need to ensure that the stack
19022 is 64-bit aligned before we start to save iWMMXt registers,
19023 and also before we start to create locals. (A local variable
19024 might be a double or long long which we will load/store using
19025 an iWMMXt instruction). Therefore we need to push another
19026 ARM register, so that the stack will be 64-bit aligned. We
19027 try to avoid using the arg registers (r0 -r3) as they might be
19028 used to pass values in a tail call. */
19029 for (reg
= 4; reg
<= 12; reg
++)
19030 if ((save_reg_mask
& (1 << reg
)) == 0)
19034 save_reg_mask
|= (1 << reg
);
19037 cfun
->machine
->sibcall_blocked
= 1;
19038 save_reg_mask
|= (1 << 3);
19042 /* We may need to push an additional register for use initializing the
19043 PIC base register. */
19044 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19045 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19047 reg
= thumb_find_work_register (1 << 4);
19048 if (!call_used_regs
[reg
])
19049 save_reg_mask
|= (1 << reg
);
19052 return save_reg_mask
;
19056 /* Compute a bit mask of which registers need to be
19057 saved on the stack for the current function. */
19058 static unsigned long
19059 thumb1_compute_save_reg_mask (void)
19061 unsigned long mask
;
19065 for (reg
= 0; reg
< 12; reg
++)
19066 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19070 && !TARGET_SINGLE_PIC_BASE
19071 && arm_pic_register
!= INVALID_REGNUM
19072 && crtl
->uses_pic_offset_table
)
19073 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19075 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19076 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19077 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19079 /* LR will also be pushed if any lo regs are pushed. */
19080 if (mask
& 0xff || thumb_force_lr_save ())
19081 mask
|= (1 << LR_REGNUM
);
19083 /* Make sure we have a low work register if we need one.
19084 We will need one if we are going to push a high register,
19085 but we are not currently intending to push a low register. */
19086 if ((mask
& 0xff) == 0
19087 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19089 /* Use thumb_find_work_register to choose which register
19090 we will use. If the register is live then we will
19091 have to push it. Use LAST_LO_REGNUM as our fallback
19092 choice for the register to select. */
19093 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19094 /* Make sure the register returned by thumb_find_work_register is
19095 not part of the return value. */
19096 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19097 reg
= LAST_LO_REGNUM
;
19099 if (! call_used_regs
[reg
])
19103 /* The 504 below is 8 bytes less than 512 because there are two possible
19104 alignment words. We can't tell here if they will be present or not so we
19105 have to play it safe and assume that they are. */
19106 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19107 ROUND_UP_WORD (get_frame_size ()) +
19108 crtl
->outgoing_args_size
) >= 504)
19110 /* This is the same as the code in thumb1_expand_prologue() which
19111 determines which register to use for stack decrement. */
19112 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19113 if (mask
& (1 << reg
))
19116 if (reg
> LAST_LO_REGNUM
)
19118 /* Make sure we have a register available for stack decrement. */
19119 mask
|= 1 << LAST_LO_REGNUM
;
19127 /* Return the number of bytes required to save VFP registers. */
19129 arm_get_vfp_saved_size (void)
19131 unsigned int regno
;
19136 /* Space for saved VFP registers. */
19137 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19140 for (regno
= FIRST_VFP_REGNUM
;
19141 regno
< LAST_VFP_REGNUM
;
19144 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19145 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19149 /* Workaround ARM10 VFPr1 bug. */
19150 if (count
== 2 && !arm_arch6
)
19152 saved
+= count
* 8;
19161 if (count
== 2 && !arm_arch6
)
19163 saved
+= count
* 8;
19170 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19171 everything bar the final return instruction. If simple_return is true,
19172 then do not output epilogue, because it has already been emitted in RTL. */
19174 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19175 bool simple_return
)
19177 char conditional
[10];
19180 unsigned long live_regs_mask
;
19181 unsigned long func_type
;
19182 arm_stack_offsets
*offsets
;
19184 func_type
= arm_current_func_type ();
19186 if (IS_NAKED (func_type
))
19189 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19191 /* If this function was declared non-returning, and we have
19192 found a tail call, then we have to trust that the called
19193 function won't return. */
19198 /* Otherwise, trap an attempted return by aborting. */
19200 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19202 assemble_external_libcall (ops
[1]);
19203 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19209 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19211 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19213 cfun
->machine
->return_used_this_function
= 1;
19215 offsets
= arm_get_frame_offsets ();
19216 live_regs_mask
= offsets
->saved_regs_mask
;
19218 if (!simple_return
&& live_regs_mask
)
19220 const char * return_reg
;
19222 /* If we do not have any special requirements for function exit
19223 (e.g. interworking) then we can load the return address
19224 directly into the PC. Otherwise we must load it into LR. */
19226 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19227 return_reg
= reg_names
[PC_REGNUM
];
19229 return_reg
= reg_names
[LR_REGNUM
];
19231 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19233 /* There are three possible reasons for the IP register
19234 being saved. 1) a stack frame was created, in which case
19235 IP contains the old stack pointer, or 2) an ISR routine
19236 corrupted it, or 3) it was saved to align the stack on
19237 iWMMXt. In case 1, restore IP into SP, otherwise just
19239 if (frame_pointer_needed
)
19241 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19242 live_regs_mask
|= (1 << SP_REGNUM
);
19245 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19248 /* On some ARM architectures it is faster to use LDR rather than
19249 LDM to load a single register. On other architectures, the
19250 cost is the same. In 26 bit mode, or for exception handlers,
19251 we have to use LDM to load the PC so that the CPSR is also
19253 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19254 if (live_regs_mask
== (1U << reg
))
19257 if (reg
<= LAST_ARM_REGNUM
19258 && (reg
!= LR_REGNUM
19260 || ! IS_INTERRUPT (func_type
)))
19262 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19263 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19270 /* Generate the load multiple instruction to restore the
19271 registers. Note we can get here, even if
19272 frame_pointer_needed is true, but only if sp already
19273 points to the base of the saved core registers. */
19274 if (live_regs_mask
& (1 << SP_REGNUM
))
19276 unsigned HOST_WIDE_INT stack_adjust
;
19278 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19279 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19281 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19282 if (TARGET_UNIFIED_ASM
)
19283 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19285 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19288 /* If we can't use ldmib (SA110 bug),
19289 then try to pop r3 instead. */
19291 live_regs_mask
|= 1 << 3;
19293 if (TARGET_UNIFIED_ASM
)
19294 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19296 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19300 if (TARGET_UNIFIED_ASM
)
19301 sprintf (instr
, "pop%s\t{", conditional
);
19303 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19305 p
= instr
+ strlen (instr
);
19307 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19308 if (live_regs_mask
& (1 << reg
))
19310 int l
= strlen (reg_names
[reg
]);
19316 memcpy (p
, ", ", 2);
19320 memcpy (p
, "%|", 2);
19321 memcpy (p
+ 2, reg_names
[reg
], l
);
19325 if (live_regs_mask
& (1 << LR_REGNUM
))
19327 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19328 /* If returning from an interrupt, restore the CPSR. */
19329 if (IS_INTERRUPT (func_type
))
19336 output_asm_insn (instr
, & operand
);
19338 /* See if we need to generate an extra instruction to
19339 perform the actual function return. */
19341 && func_type
!= ARM_FT_INTERWORKED
19342 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19344 /* The return has already been handled
19345 by loading the LR into the PC. */
19352 switch ((int) ARM_FUNC_TYPE (func_type
))
19356 /* ??? This is wrong for unified assembly syntax. */
19357 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19360 case ARM_FT_INTERWORKED
:
19361 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19364 case ARM_FT_EXCEPTION
:
19365 /* ??? This is wrong for unified assembly syntax. */
19366 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19370 /* Use bx if it's available. */
19371 if (arm_arch5
|| arm_arch4t
)
19372 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19374 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19378 output_asm_insn (instr
, & operand
);
19384 /* Write the function name into the code section, directly preceding
19385 the function prologue.
19387 Code will be output similar to this:
19389 .ascii "arm_poke_function_name", 0
19392 .word 0xff000000 + (t1 - t0)
19393 arm_poke_function_name
19395 stmfd sp!, {fp, ip, lr, pc}
19398 When performing a stack backtrace, code can inspect the value
19399 of 'pc' stored at 'fp' + 0. If the trace function then looks
19400 at location pc - 12 and the top 8 bits are set, then we know
19401 that there is a function name embedded immediately preceding this
19402 location and has length ((pc[-3]) & 0xff000000).
19404 We assume that pc is declared as a pointer to an unsigned long.
19406 It is of no benefit to output the function name if we are assembling
19407 a leaf function. These function types will not contain a stack
19408 backtrace structure, therefore it is not possible to determine the
19411 arm_poke_function_name (FILE *stream
, const char *name
)
19413 unsigned long alignlength
;
19414 unsigned long length
;
19417 length
= strlen (name
) + 1;
19418 alignlength
= ROUND_UP_WORD (length
);
19420 ASM_OUTPUT_ASCII (stream
, name
, length
);
19421 ASM_OUTPUT_ALIGN (stream
, 2);
19422 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19423 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19426 /* Place some comments into the assembler stream
19427 describing the current function. */
19429 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19431 unsigned long func_type
;
19433 /* ??? Do we want to print some of the below anyway? */
19437 /* Sanity check. */
19438 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19440 func_type
= arm_current_func_type ();
19442 switch ((int) ARM_FUNC_TYPE (func_type
))
19445 case ARM_FT_NORMAL
:
19447 case ARM_FT_INTERWORKED
:
19448 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19451 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19454 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19456 case ARM_FT_EXCEPTION
:
19457 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19461 if (IS_NAKED (func_type
))
19462 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19464 if (IS_VOLATILE (func_type
))
19465 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19467 if (IS_NESTED (func_type
))
19468 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19469 if (IS_STACKALIGN (func_type
))
19470 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19472 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19474 crtl
->args
.pretend_args_size
, frame_size
);
19476 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19477 frame_pointer_needed
,
19478 cfun
->machine
->uses_anonymous_args
);
19480 if (cfun
->machine
->lr_save_eliminated
)
19481 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19483 if (crtl
->calls_eh_return
)
19484 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19489 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19490 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19492 arm_stack_offsets
*offsets
;
19498 /* Emit any call-via-reg trampolines that are needed for v4t support
19499 of call_reg and call_value_reg type insns. */
19500 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19502 rtx label
= cfun
->machine
->call_via
[regno
];
19506 switch_to_section (function_section (current_function_decl
));
19507 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19508 CODE_LABEL_NUMBER (label
));
19509 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19513 /* ??? Probably not safe to set this here, since it assumes that a
19514 function will be emitted as assembly immediately after we generate
19515 RTL for it. This does not happen for inline functions. */
19516 cfun
->machine
->return_used_this_function
= 0;
19518 else /* TARGET_32BIT */
19520 /* We need to take into account any stack-frame rounding. */
19521 offsets
= arm_get_frame_offsets ();
19523 gcc_assert (!use_return_insn (FALSE
, NULL
)
19524 || (cfun
->machine
->return_used_this_function
!= 0)
19525 || offsets
->saved_regs
== offsets
->outgoing_args
19526 || frame_pointer_needed
);
19530 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19531 STR and STRD. If an even number of registers are being pushed, one
19532 or more STRD patterns are created for each register pair. If an
19533 odd number of registers are pushed, emit an initial STR followed by
19534 as many STRD instructions as are needed. This works best when the
19535 stack is initially 64-bit aligned (the normal case), since it
19536 ensures that each STRD is also 64-bit aligned. */
19538 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19543 rtx par
= NULL_RTX
;
19544 rtx dwarf
= NULL_RTX
;
19548 num_regs
= bit_count (saved_regs_mask
);
19550 /* Must be at least one register to save, and can't save SP or PC. */
19551 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19552 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19553 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19555 /* Create sequence for DWARF info. All the frame-related data for
19556 debugging is held in this wrapper. */
19557 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19559 /* Describe the stack adjustment. */
19560 tmp
= gen_rtx_SET (VOIDmode
,
19562 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19563 RTX_FRAME_RELATED_P (tmp
) = 1;
19564 XVECEXP (dwarf
, 0, 0) = tmp
;
19566 /* Find the first register. */
19567 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19572 /* If there's an odd number of registers to push. Start off by
19573 pushing a single register. This ensures that subsequent strd
19574 operations are dword aligned (assuming that SP was originally
19575 64-bit aligned). */
19576 if ((num_regs
& 1) != 0)
19578 rtx reg
, mem
, insn
;
19580 reg
= gen_rtx_REG (SImode
, regno
);
19582 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19583 stack_pointer_rtx
));
19585 mem
= gen_frame_mem (Pmode
,
19587 (Pmode
, stack_pointer_rtx
,
19588 plus_constant (Pmode
, stack_pointer_rtx
,
19591 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19592 RTX_FRAME_RELATED_P (tmp
) = 1;
19593 insn
= emit_insn (tmp
);
19594 RTX_FRAME_RELATED_P (insn
) = 1;
19595 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19596 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19598 RTX_FRAME_RELATED_P (tmp
) = 1;
19601 XVECEXP (dwarf
, 0, i
) = tmp
;
19605 while (i
< num_regs
)
19606 if (saved_regs_mask
& (1 << regno
))
19608 rtx reg1
, reg2
, mem1
, mem2
;
19609 rtx tmp0
, tmp1
, tmp2
;
19612 /* Find the register to pair with this one. */
19613 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19617 reg1
= gen_rtx_REG (SImode
, regno
);
19618 reg2
= gen_rtx_REG (SImode
, regno2
);
19625 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19628 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19630 -4 * (num_regs
- 1)));
19631 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19632 plus_constant (Pmode
, stack_pointer_rtx
,
19634 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19635 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19636 RTX_FRAME_RELATED_P (tmp0
) = 1;
19637 RTX_FRAME_RELATED_P (tmp1
) = 1;
19638 RTX_FRAME_RELATED_P (tmp2
) = 1;
19639 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19640 XVECEXP (par
, 0, 0) = tmp0
;
19641 XVECEXP (par
, 0, 1) = tmp1
;
19642 XVECEXP (par
, 0, 2) = tmp2
;
19643 insn
= emit_insn (par
);
19644 RTX_FRAME_RELATED_P (insn
) = 1;
19645 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19649 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19652 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19655 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19656 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19657 RTX_FRAME_RELATED_P (tmp1
) = 1;
19658 RTX_FRAME_RELATED_P (tmp2
) = 1;
19659 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19660 XVECEXP (par
, 0, 0) = tmp1
;
19661 XVECEXP (par
, 0, 1) = tmp2
;
19665 /* Create unwind information. This is an approximation. */
19666 tmp1
= gen_rtx_SET (VOIDmode
,
19667 gen_frame_mem (Pmode
,
19668 plus_constant (Pmode
,
19672 tmp2
= gen_rtx_SET (VOIDmode
,
19673 gen_frame_mem (Pmode
,
19674 plus_constant (Pmode
,
19679 RTX_FRAME_RELATED_P (tmp1
) = 1;
19680 RTX_FRAME_RELATED_P (tmp2
) = 1;
19681 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19682 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19684 regno
= regno2
+ 1;
19692 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19693 whenever possible, otherwise it emits single-word stores. The first store
19694 also allocates stack space for all saved registers, using writeback with
19695 post-addressing mode. All other stores use offset addressing. If no STRD
19696 can be emitted, this function emits a sequence of single-word stores,
19697 and not an STM as before, because single-word stores provide more freedom
19698 scheduling and can be turned into an STM by peephole optimizations. */
19700 arm_emit_strd_push (unsigned long saved_regs_mask
)
19703 int i
, j
, dwarf_index
= 0;
19705 rtx dwarf
= NULL_RTX
;
19706 rtx insn
= NULL_RTX
;
19709 /* TODO: A more efficient code can be emitted by changing the
19710 layout, e.g., first push all pairs that can use STRD to keep the
19711 stack aligned, and then push all other registers. */
19712 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19713 if (saved_regs_mask
& (1 << i
))
19716 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19717 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19718 gcc_assert (num_regs
> 0);
19720 /* Create sequence for DWARF info. */
19721 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19723 /* For dwarf info, we generate explicit stack update. */
19724 tmp
= gen_rtx_SET (VOIDmode
,
19726 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19727 RTX_FRAME_RELATED_P (tmp
) = 1;
19728 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19730 /* Save registers. */
19731 offset
= - 4 * num_regs
;
19733 while (j
<= LAST_ARM_REGNUM
)
19734 if (saved_regs_mask
& (1 << j
))
19737 && (saved_regs_mask
& (1 << (j
+ 1))))
19739 /* Current register and previous register form register pair for
19740 which STRD can be generated. */
19743 /* Allocate stack space for all saved registers. */
19744 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19745 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19746 mem
= gen_frame_mem (DImode
, tmp
);
19749 else if (offset
> 0)
19750 mem
= gen_frame_mem (DImode
,
19751 plus_constant (Pmode
,
19755 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19757 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19758 RTX_FRAME_RELATED_P (tmp
) = 1;
19759 tmp
= emit_insn (tmp
);
19761 /* Record the first store insn. */
19762 if (dwarf_index
== 1)
19765 /* Generate dwarf info. */
19766 mem
= gen_frame_mem (SImode
,
19767 plus_constant (Pmode
,
19770 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19771 RTX_FRAME_RELATED_P (tmp
) = 1;
19772 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19774 mem
= gen_frame_mem (SImode
,
19775 plus_constant (Pmode
,
19778 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19779 RTX_FRAME_RELATED_P (tmp
) = 1;
19780 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19787 /* Emit a single word store. */
19790 /* Allocate stack space for all saved registers. */
19791 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19792 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19793 mem
= gen_frame_mem (SImode
, tmp
);
19796 else if (offset
> 0)
19797 mem
= gen_frame_mem (SImode
,
19798 plus_constant (Pmode
,
19802 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19804 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19805 RTX_FRAME_RELATED_P (tmp
) = 1;
19806 tmp
= emit_insn (tmp
);
19808 /* Record the first store insn. */
19809 if (dwarf_index
== 1)
19812 /* Generate dwarf info. */
19813 mem
= gen_frame_mem (SImode
,
19814 plus_constant(Pmode
,
19817 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19818 RTX_FRAME_RELATED_P (tmp
) = 1;
19819 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19828 /* Attach dwarf info to the first insn we generate. */
19829 gcc_assert (insn
!= NULL_RTX
);
19830 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19831 RTX_FRAME_RELATED_P (insn
) = 1;
19834 /* Generate and emit an insn that we will recognize as a push_multi.
19835 Unfortunately, since this insn does not reflect very well the actual
19836 semantics of the operation, we need to annotate the insn for the benefit
19837 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19838 MASK for registers that should be annotated for DWARF2 frame unwind
19841 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19844 int num_dwarf_regs
= 0;
19848 int dwarf_par_index
;
19851 /* We don't record the PC in the dwarf frame information. */
19852 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19854 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19856 if (mask
& (1 << i
))
19858 if (dwarf_regs_mask
& (1 << i
))
19862 gcc_assert (num_regs
&& num_regs
<= 16);
19863 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19865 /* For the body of the insn we are going to generate an UNSPEC in
19866 parallel with several USEs. This allows the insn to be recognized
19867 by the push_multi pattern in the arm.md file.
19869 The body of the insn looks something like this:
19872 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19873 (const_int:SI <num>)))
19874 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19880 For the frame note however, we try to be more explicit and actually
19881 show each register being stored into the stack frame, plus a (single)
19882 decrement of the stack pointer. We do it this way in order to be
19883 friendly to the stack unwinding code, which only wants to see a single
19884 stack decrement per instruction. The RTL we generate for the note looks
19885 something like this:
19888 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19889 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19890 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19891 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19895 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19896 instead we'd have a parallel expression detailing all
19897 the stores to the various memory addresses so that debug
19898 information is more up-to-date. Remember however while writing
19899 this to take care of the constraints with the push instruction.
19901 Note also that this has to be taken care of for the VFP registers.
19903 For more see PR43399. */
19905 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19906 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19907 dwarf_par_index
= 1;
19909 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19911 if (mask
& (1 << i
))
19913 reg
= gen_rtx_REG (SImode
, i
);
19915 XVECEXP (par
, 0, 0)
19916 = gen_rtx_SET (VOIDmode
,
19919 gen_rtx_PRE_MODIFY (Pmode
,
19922 (Pmode
, stack_pointer_rtx
,
19925 gen_rtx_UNSPEC (BLKmode
,
19926 gen_rtvec (1, reg
),
19927 UNSPEC_PUSH_MULT
));
19929 if (dwarf_regs_mask
& (1 << i
))
19931 tmp
= gen_rtx_SET (VOIDmode
,
19932 gen_frame_mem (SImode
, stack_pointer_rtx
),
19934 RTX_FRAME_RELATED_P (tmp
) = 1;
19935 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19942 for (j
= 1, i
++; j
< num_regs
; i
++)
19944 if (mask
& (1 << i
))
19946 reg
= gen_rtx_REG (SImode
, i
);
19948 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19950 if (dwarf_regs_mask
& (1 << i
))
19953 = gen_rtx_SET (VOIDmode
,
19956 plus_constant (Pmode
, stack_pointer_rtx
,
19959 RTX_FRAME_RELATED_P (tmp
) = 1;
19960 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19967 par
= emit_insn (par
);
19969 tmp
= gen_rtx_SET (VOIDmode
,
19971 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19972 RTX_FRAME_RELATED_P (tmp
) = 1;
19973 XVECEXP (dwarf
, 0, 0) = tmp
;
19975 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19980 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19981 SIZE is the offset to be adjusted.
19982 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19984 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19988 RTX_FRAME_RELATED_P (insn
) = 1;
19989 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19990 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19993 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19994 SAVED_REGS_MASK shows which registers need to be restored.
19996 Unfortunately, since this insn does not reflect very well the actual
19997 semantics of the operation, we need to annotate the insn for the benefit
19998 of DWARF2 frame unwind information. */
20000 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20005 rtx dwarf
= NULL_RTX
;
20011 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20012 offset_adj
= return_in_pc
? 1 : 0;
20013 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20014 if (saved_regs_mask
& (1 << i
))
20017 gcc_assert (num_regs
&& num_regs
<= 16);
20019 /* If SP is in reglist, then we don't emit SP update insn. */
20020 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20022 /* The parallel needs to hold num_regs SETs
20023 and one SET for the stack update. */
20024 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20029 XVECEXP (par
, 0, 0) = tmp
;
20034 /* Increment the stack pointer, based on there being
20035 num_regs 4-byte registers to restore. */
20036 tmp
= gen_rtx_SET (VOIDmode
,
20038 plus_constant (Pmode
,
20041 RTX_FRAME_RELATED_P (tmp
) = 1;
20042 XVECEXP (par
, 0, offset_adj
) = tmp
;
20045 /* Now restore every reg, which may include PC. */
20046 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20047 if (saved_regs_mask
& (1 << i
))
20049 reg
= gen_rtx_REG (SImode
, i
);
20050 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20052 /* Emit single load with writeback. */
20053 tmp
= gen_frame_mem (SImode
,
20054 gen_rtx_POST_INC (Pmode
,
20055 stack_pointer_rtx
));
20056 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20057 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20061 tmp
= gen_rtx_SET (VOIDmode
,
20065 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20066 RTX_FRAME_RELATED_P (tmp
) = 1;
20067 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20069 /* We need to maintain a sequence for DWARF info too. As dwarf info
20070 should not have PC, skip PC. */
20071 if (i
!= PC_REGNUM
)
20072 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20078 par
= emit_jump_insn (par
);
20080 par
= emit_insn (par
);
20082 REG_NOTES (par
) = dwarf
;
20084 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20085 stack_pointer_rtx
, stack_pointer_rtx
);
20088 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20089 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20091 Unfortunately, since this insn does not reflect very well the actual
20092 semantics of the operation, we need to annotate the insn for the benefit
20093 of DWARF2 frame unwind information. */
20095 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20099 rtx dwarf
= NULL_RTX
;
20102 gcc_assert (num_regs
&& num_regs
<= 32);
20104 /* Workaround ARM10 VFPr1 bug. */
20105 if (num_regs
== 2 && !arm_arch6
)
20107 if (first_reg
== 15)
20113 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20114 there could be up to 32 D-registers to restore.
20115 If there are more than 16 D-registers, make two recursive calls,
20116 each of which emits one pop_multi instruction. */
20119 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20120 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20124 /* The parallel needs to hold num_regs SETs
20125 and one SET for the stack update. */
20126 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20128 /* Increment the stack pointer, based on there being
20129 num_regs 8-byte registers to restore. */
20130 tmp
= gen_rtx_SET (VOIDmode
,
20132 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20133 RTX_FRAME_RELATED_P (tmp
) = 1;
20134 XVECEXP (par
, 0, 0) = tmp
;
20136 /* Now show every reg that will be restored, using a SET for each. */
20137 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20139 reg
= gen_rtx_REG (DFmode
, i
);
20141 tmp
= gen_rtx_SET (VOIDmode
,
20145 plus_constant (Pmode
, base_reg
, 8 * j
)));
20146 RTX_FRAME_RELATED_P (tmp
) = 1;
20147 XVECEXP (par
, 0, j
+ 1) = tmp
;
20149 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20154 par
= emit_insn (par
);
20155 REG_NOTES (par
) = dwarf
;
20157 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20158 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20160 RTX_FRAME_RELATED_P (par
) = 1;
20161 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20164 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20165 base_reg
, base_reg
);
20168 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20169 number of registers are being popped, multiple LDRD patterns are created for
20170 all register pairs. If odd number of registers are popped, last register is
20171 loaded by using LDR pattern. */
20173 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20177 rtx par
= NULL_RTX
;
20178 rtx dwarf
= NULL_RTX
;
20179 rtx tmp
, reg
, tmp1
;
20182 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20183 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20184 if (saved_regs_mask
& (1 << i
))
20187 gcc_assert (num_regs
&& num_regs
<= 16);
20189 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20190 to be popped. So, if num_regs is even, now it will become odd,
20191 and we can generate pop with PC. If num_regs is odd, it will be
20192 even now, and ldr with return can be generated for PC. */
20196 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20198 /* Var j iterates over all the registers to gather all the registers in
20199 saved_regs_mask. Var i gives index of saved registers in stack frame.
20200 A PARALLEL RTX of register-pair is created here, so that pattern for
20201 LDRD can be matched. As PC is always last register to be popped, and
20202 we have already decremented num_regs if PC, we don't have to worry
20203 about PC in this loop. */
20204 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20205 if (saved_regs_mask
& (1 << j
))
20207 /* Create RTX for memory load. */
20208 reg
= gen_rtx_REG (SImode
, j
);
20209 tmp
= gen_rtx_SET (SImode
,
20211 gen_frame_mem (SImode
,
20212 plus_constant (Pmode
,
20213 stack_pointer_rtx
, 4 * i
)));
20214 RTX_FRAME_RELATED_P (tmp
) = 1;
20218 /* When saved-register index (i) is even, the RTX to be emitted is
20219 yet to be created. Hence create it first. The LDRD pattern we
20220 are generating is :
20221 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20222 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20223 where target registers need not be consecutive. */
20224 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20228 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20229 added as 0th element and if i is odd, reg_i is added as 1st element
20230 of LDRD pattern shown above. */
20231 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20232 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20236 /* When saved-register index (i) is odd, RTXs for both the registers
20237 to be loaded are generated in above given LDRD pattern, and the
20238 pattern can be emitted now. */
20239 par
= emit_insn (par
);
20240 REG_NOTES (par
) = dwarf
;
20241 RTX_FRAME_RELATED_P (par
) = 1;
20247 /* If the number of registers pushed is odd AND return_in_pc is false OR
20248 number of registers are even AND return_in_pc is true, last register is
20249 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20250 then LDR with post increment. */
20252 /* Increment the stack pointer, based on there being
20253 num_regs 4-byte registers to restore. */
20254 tmp
= gen_rtx_SET (VOIDmode
,
20256 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20257 RTX_FRAME_RELATED_P (tmp
) = 1;
20258 tmp
= emit_insn (tmp
);
20261 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20262 stack_pointer_rtx
, stack_pointer_rtx
);
20267 if (((num_regs
% 2) == 1 && !return_in_pc
)
20268 || ((num_regs
% 2) == 0 && return_in_pc
))
20270 /* Scan for the single register to be popped. Skip until the saved
20271 register is found. */
20272 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20274 /* Gen LDR with post increment here. */
20275 tmp1
= gen_rtx_MEM (SImode
,
20276 gen_rtx_POST_INC (SImode
,
20277 stack_pointer_rtx
));
20278 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20280 reg
= gen_rtx_REG (SImode
, j
);
20281 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20282 RTX_FRAME_RELATED_P (tmp
) = 1;
20283 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20287 /* If return_in_pc, j must be PC_REGNUM. */
20288 gcc_assert (j
== PC_REGNUM
);
20289 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20290 XVECEXP (par
, 0, 0) = ret_rtx
;
20291 XVECEXP (par
, 0, 1) = tmp
;
20292 par
= emit_jump_insn (par
);
20296 par
= emit_insn (tmp
);
20297 REG_NOTES (par
) = dwarf
;
20298 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20299 stack_pointer_rtx
, stack_pointer_rtx
);
20303 else if ((num_regs
% 2) == 1 && return_in_pc
)
20305 /* There are 2 registers to be popped. So, generate the pattern
20306 pop_multiple_with_stack_update_and_return to pop in PC. */
20307 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20313 /* LDRD in ARM mode needs consecutive registers as operands. This function
20314 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20315 offset addressing and then generates one separate stack udpate. This provides
20316 more scheduling freedom, compared to writeback on every load. However,
20317 if the function returns using load into PC directly
20318 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20319 before the last load. TODO: Add a peephole optimization to recognize
20320 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20321 peephole optimization to merge the load at stack-offset zero
20322 with the stack update instruction using load with writeback
20323 in post-index addressing mode. */
20325 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20329 rtx par
= NULL_RTX
;
20330 rtx dwarf
= NULL_RTX
;
20333 /* Restore saved registers. */
20334 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20336 while (j
<= LAST_ARM_REGNUM
)
20337 if (saved_regs_mask
& (1 << j
))
20340 && (saved_regs_mask
& (1 << (j
+ 1)))
20341 && (j
+ 1) != PC_REGNUM
)
20343 /* Current register and next register form register pair for which
20344 LDRD can be generated. PC is always the last register popped, and
20345 we handle it separately. */
20347 mem
= gen_frame_mem (DImode
,
20348 plus_constant (Pmode
,
20352 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20354 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20355 tmp
= emit_insn (tmp
);
20356 RTX_FRAME_RELATED_P (tmp
) = 1;
20358 /* Generate dwarf info. */
20360 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20361 gen_rtx_REG (SImode
, j
),
20363 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20364 gen_rtx_REG (SImode
, j
+ 1),
20367 REG_NOTES (tmp
) = dwarf
;
20372 else if (j
!= PC_REGNUM
)
20374 /* Emit a single word load. */
20376 mem
= gen_frame_mem (SImode
,
20377 plus_constant (Pmode
,
20381 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20383 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20384 tmp
= emit_insn (tmp
);
20385 RTX_FRAME_RELATED_P (tmp
) = 1;
20387 /* Generate dwarf info. */
20388 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20389 gen_rtx_REG (SImode
, j
),
20395 else /* j == PC_REGNUM */
20401 /* Update the stack. */
20404 tmp
= gen_rtx_SET (Pmode
,
20406 plus_constant (Pmode
,
20409 tmp
= emit_insn (tmp
);
20410 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20411 stack_pointer_rtx
, stack_pointer_rtx
);
20415 if (saved_regs_mask
& (1 << PC_REGNUM
))
20417 /* Only PC is to be popped. */
20418 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20419 XVECEXP (par
, 0, 0) = ret_rtx
;
20420 tmp
= gen_rtx_SET (SImode
,
20421 gen_rtx_REG (SImode
, PC_REGNUM
),
20422 gen_frame_mem (SImode
,
20423 gen_rtx_POST_INC (SImode
,
20424 stack_pointer_rtx
)));
20425 RTX_FRAME_RELATED_P (tmp
) = 1;
20426 XVECEXP (par
, 0, 1) = tmp
;
20427 par
= emit_jump_insn (par
);
20429 /* Generate dwarf info. */
20430 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20431 gen_rtx_REG (SImode
, PC_REGNUM
),
20433 REG_NOTES (par
) = dwarf
;
20434 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20435 stack_pointer_rtx
, stack_pointer_rtx
);
20439 /* Calculate the size of the return value that is passed in registers. */
20441 arm_size_return_regs (void)
20443 enum machine_mode mode
;
20445 if (crtl
->return_rtx
!= 0)
20446 mode
= GET_MODE (crtl
->return_rtx
);
20448 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20450 return GET_MODE_SIZE (mode
);
20453 /* Return true if the current function needs to save/restore LR. */
20455 thumb_force_lr_save (void)
20457 return !cfun
->machine
->lr_save_eliminated
20458 && (!leaf_function_p ()
20459 || thumb_far_jump_used_p ()
20460 || df_regs_ever_live_p (LR_REGNUM
));
20463 /* We do not know if r3 will be available because
20464 we do have an indirect tailcall happening in this
20465 particular case. */
20467 is_indirect_tailcall_p (rtx call
)
20469 rtx pat
= PATTERN (call
);
20471 /* Indirect tail call. */
20472 pat
= XVECEXP (pat
, 0, 0);
20473 if (GET_CODE (pat
) == SET
)
20474 pat
= SET_SRC (pat
);
20476 pat
= XEXP (XEXP (pat
, 0), 0);
20477 return REG_P (pat
);
20480 /* Return true if r3 is used by any of the tail call insns in the
20481 current function. */
20483 any_sibcall_could_use_r3 (void)
20488 if (!crtl
->tail_call_emit
)
20490 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20491 if (e
->flags
& EDGE_SIBCALL
)
20493 rtx call
= BB_END (e
->src
);
20494 if (!CALL_P (call
))
20495 call
= prev_nonnote_nondebug_insn (call
);
20496 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20497 if (find_regno_fusage (call
, USE
, 3)
20498 || is_indirect_tailcall_p (call
))
20505 /* Compute the distance from register FROM to register TO.
20506 These can be the arg pointer (26), the soft frame pointer (25),
20507 the stack pointer (13) or the hard frame pointer (11).
20508 In thumb mode r7 is used as the soft frame pointer, if needed.
20509 Typical stack layout looks like this:
20511 old stack pointer -> | |
20514 | | saved arguments for
20515 | | vararg functions
20518 hard FP & arg pointer -> | | \
20526 soft frame pointer -> | | /
20531 locals base pointer -> | | /
20536 current stack pointer -> | | /
20539 For a given function some or all of these stack components
20540 may not be needed, giving rise to the possibility of
20541 eliminating some of the registers.
20543 The values returned by this function must reflect the behavior
20544 of arm_expand_prologue() and arm_compute_save_reg_mask().
20546 The sign of the number returned reflects the direction of stack
20547 growth, so the values are positive for all eliminations except
20548 from the soft frame pointer to the hard frame pointer.
20550 SFP may point just inside the local variables block to ensure correct
20554 /* Calculate stack offsets. These are used to calculate register elimination
20555 offsets and in prologue/epilogue code. Also calculates which registers
20556 should be saved. */
20558 static arm_stack_offsets
*
20559 arm_get_frame_offsets (void)
20561 struct arm_stack_offsets
*offsets
;
20562 unsigned long func_type
;
20566 HOST_WIDE_INT frame_size
;
20569 offsets
= &cfun
->machine
->stack_offsets
;
20571 /* We need to know if we are a leaf function. Unfortunately, it
20572 is possible to be called after start_sequence has been called,
20573 which causes get_insns to return the insns for the sequence,
20574 not the function, which will cause leaf_function_p to return
20575 the incorrect result.
20577 to know about leaf functions once reload has completed, and the
20578 frame size cannot be changed after that time, so we can safely
20579 use the cached value. */
20581 if (reload_completed
)
20584 /* Initially this is the size of the local variables. It will translated
20585 into an offset once we have determined the size of preceding data. */
20586 frame_size
= ROUND_UP_WORD (get_frame_size ());
20588 leaf
= leaf_function_p ();
20590 /* Space for variadic functions. */
20591 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20593 /* In Thumb mode this is incorrect, but never used. */
20595 = (offsets
->saved_args
20596 + arm_compute_static_chain_stack_bytes ()
20597 + (frame_pointer_needed
? 4 : 0));
20601 unsigned int regno
;
20603 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20604 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20605 saved
= core_saved
;
20607 /* We know that SP will be doubleword aligned on entry, and we must
20608 preserve that condition at any subroutine call. We also require the
20609 soft frame pointer to be doubleword aligned. */
20611 if (TARGET_REALLY_IWMMXT
)
20613 /* Check for the call-saved iWMMXt registers. */
20614 for (regno
= FIRST_IWMMXT_REGNUM
;
20615 regno
<= LAST_IWMMXT_REGNUM
;
20617 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20621 func_type
= arm_current_func_type ();
20622 /* Space for saved VFP registers. */
20623 if (! IS_VOLATILE (func_type
)
20624 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20625 saved
+= arm_get_vfp_saved_size ();
20627 else /* TARGET_THUMB1 */
20629 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20630 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20631 saved
= core_saved
;
20632 if (TARGET_BACKTRACE
)
20636 /* Saved registers include the stack frame. */
20637 offsets
->saved_regs
20638 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20639 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20641 /* A leaf function does not need any stack alignment if it has nothing
20643 if (leaf
&& frame_size
== 0
20644 /* However if it calls alloca(), we have a dynamically allocated
20645 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20646 && ! cfun
->calls_alloca
)
20648 offsets
->outgoing_args
= offsets
->soft_frame
;
20649 offsets
->locals_base
= offsets
->soft_frame
;
20653 /* Ensure SFP has the correct alignment. */
20654 if (ARM_DOUBLEWORD_ALIGN
20655 && (offsets
->soft_frame
& 7))
20657 offsets
->soft_frame
+= 4;
20658 /* Try to align stack by pushing an extra reg. Don't bother doing this
20659 when there is a stack frame as the alignment will be rolled into
20660 the normal stack adjustment. */
20661 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20665 /* If it is safe to use r3, then do so. This sometimes
20666 generates better code on Thumb-2 by avoiding the need to
20667 use 32-bit push/pop instructions. */
20668 if (! any_sibcall_could_use_r3 ()
20669 && arm_size_return_regs () <= 12
20670 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20672 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20677 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20679 /* Avoid fixed registers; they may be changed at
20680 arbitrary times so it's unsafe to restore them
20681 during the epilogue. */
20683 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20692 offsets
->saved_regs
+= 4;
20693 offsets
->saved_regs_mask
|= (1 << reg
);
20698 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20699 offsets
->outgoing_args
= (offsets
->locals_base
20700 + crtl
->outgoing_args_size
);
20702 if (ARM_DOUBLEWORD_ALIGN
)
20704 /* Ensure SP remains doubleword aligned. */
20705 if (offsets
->outgoing_args
& 7)
20706 offsets
->outgoing_args
+= 4;
20707 gcc_assert (!(offsets
->outgoing_args
& 7));
20714 /* Calculate the relative offsets for the different stack pointers. Positive
20715 offsets are in the direction of stack growth. */
20718 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20720 arm_stack_offsets
*offsets
;
20722 offsets
= arm_get_frame_offsets ();
20724 /* OK, now we have enough information to compute the distances.
20725 There must be an entry in these switch tables for each pair
20726 of registers in ELIMINABLE_REGS, even if some of the entries
20727 seem to be redundant or useless. */
20730 case ARG_POINTER_REGNUM
:
20733 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20736 case FRAME_POINTER_REGNUM
:
20737 /* This is the reverse of the soft frame pointer
20738 to hard frame pointer elimination below. */
20739 return offsets
->soft_frame
- offsets
->saved_args
;
20741 case ARM_HARD_FRAME_POINTER_REGNUM
:
20742 /* This is only non-zero in the case where the static chain register
20743 is stored above the frame. */
20744 return offsets
->frame
- offsets
->saved_args
- 4;
20746 case STACK_POINTER_REGNUM
:
20747 /* If nothing has been pushed on the stack at all
20748 then this will return -4. This *is* correct! */
20749 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20752 gcc_unreachable ();
20754 gcc_unreachable ();
20756 case FRAME_POINTER_REGNUM
:
20759 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20762 case ARM_HARD_FRAME_POINTER_REGNUM
:
20763 /* The hard frame pointer points to the top entry in the
20764 stack frame. The soft frame pointer to the bottom entry
20765 in the stack frame. If there is no stack frame at all,
20766 then they are identical. */
20768 return offsets
->frame
- offsets
->soft_frame
;
20770 case STACK_POINTER_REGNUM
:
20771 return offsets
->outgoing_args
- offsets
->soft_frame
;
20774 gcc_unreachable ();
20776 gcc_unreachable ();
20779 /* You cannot eliminate from the stack pointer.
20780 In theory you could eliminate from the hard frame
20781 pointer to the stack pointer, but this will never
20782 happen, since if a stack frame is not needed the
20783 hard frame pointer will never be used. */
20784 gcc_unreachable ();
20788 /* Given FROM and TO register numbers, say whether this elimination is
20789 allowed. Frame pointer elimination is automatically handled.
20791 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20792 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20793 pointer, we must eliminate FRAME_POINTER_REGNUM into
20794 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20795 ARG_POINTER_REGNUM. */
20798 arm_can_eliminate (const int from
, const int to
)
20800 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20801 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20802 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20803 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20807 /* Emit RTL to save coprocessor registers on function entry. Returns the
20808 number of bytes pushed. */
20811 arm_save_coproc_regs(void)
20813 int saved_size
= 0;
20815 unsigned start_reg
;
20818 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20819 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20821 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20822 insn
= gen_rtx_MEM (V2SImode
, insn
);
20823 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20824 RTX_FRAME_RELATED_P (insn
) = 1;
20828 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20830 start_reg
= FIRST_VFP_REGNUM
;
20832 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20834 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20835 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20837 if (start_reg
!= reg
)
20838 saved_size
+= vfp_emit_fstmd (start_reg
,
20839 (reg
- start_reg
) / 2);
20840 start_reg
= reg
+ 2;
20843 if (start_reg
!= reg
)
20844 saved_size
+= vfp_emit_fstmd (start_reg
,
20845 (reg
- start_reg
) / 2);
20851 /* Set the Thumb frame pointer from the stack pointer. */
20854 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20856 HOST_WIDE_INT amount
;
20859 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20861 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20862 stack_pointer_rtx
, GEN_INT (amount
)));
20865 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20866 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20867 expects the first two operands to be the same. */
20870 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20872 hard_frame_pointer_rtx
));
20876 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20877 hard_frame_pointer_rtx
,
20878 stack_pointer_rtx
));
20880 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20881 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20882 RTX_FRAME_RELATED_P (dwarf
) = 1;
20883 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20886 RTX_FRAME_RELATED_P (insn
) = 1;
20889 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20892 arm_expand_prologue (void)
20897 unsigned long live_regs_mask
;
20898 unsigned long func_type
;
20900 int saved_pretend_args
= 0;
20901 int saved_regs
= 0;
20902 unsigned HOST_WIDE_INT args_to_push
;
20903 arm_stack_offsets
*offsets
;
20905 func_type
= arm_current_func_type ();
20907 /* Naked functions don't have prologues. */
20908 if (IS_NAKED (func_type
))
20911 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20912 args_to_push
= crtl
->args
.pretend_args_size
;
20914 /* Compute which register we will have to save onto the stack. */
20915 offsets
= arm_get_frame_offsets ();
20916 live_regs_mask
= offsets
->saved_regs_mask
;
20918 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20920 if (IS_STACKALIGN (func_type
))
20924 /* Handle a word-aligned stack pointer. We generate the following:
20929 <save and restore r0 in normal prologue/epilogue>
20933 The unwinder doesn't need to know about the stack realignment.
20934 Just tell it we saved SP in r0. */
20935 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20937 r0
= gen_rtx_REG (SImode
, 0);
20938 r1
= gen_rtx_REG (SImode
, 1);
20940 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20941 RTX_FRAME_RELATED_P (insn
) = 1;
20942 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20944 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20946 /* ??? The CFA changes here, which may cause GDB to conclude that it
20947 has entered a different function. That said, the unwind info is
20948 correct, individually, before and after this instruction because
20949 we've described the save of SP, which will override the default
20950 handling of SP as restoring from the CFA. */
20951 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20954 /* For APCS frames, if IP register is clobbered
20955 when creating frame, save that register in a special
20957 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20959 if (IS_INTERRUPT (func_type
))
20961 /* Interrupt functions must not corrupt any registers.
20962 Creating a frame pointer however, corrupts the IP
20963 register, so we must push it first. */
20964 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20966 /* Do not set RTX_FRAME_RELATED_P on this insn.
20967 The dwarf stack unwinding code only wants to see one
20968 stack decrement per function, and this is not it. If
20969 this instruction is labeled as being part of the frame
20970 creation sequence then dwarf2out_frame_debug_expr will
20971 die when it encounters the assignment of IP to FP
20972 later on, since the use of SP here establishes SP as
20973 the CFA register and not IP.
20975 Anyway this instruction is not really part of the stack
20976 frame creation although it is part of the prologue. */
20978 else if (IS_NESTED (func_type
))
20980 /* The static chain register is the same as the IP register
20981 used as a scratch register during stack frame creation.
20982 To get around this need to find somewhere to store IP
20983 whilst the frame is being created. We try the following
20986 1. The last argument register r3 if it is available.
20987 2. A slot on the stack above the frame if there are no
20988 arguments to push onto the stack.
20989 3. Register r3 again, after pushing the argument registers
20990 onto the stack, if this is a varargs function.
20991 4. The last slot on the stack created for the arguments to
20992 push, if this isn't a varargs function.
20994 Note - we only need to tell the dwarf2 backend about the SP
20995 adjustment in the second variant; the static chain register
20996 doesn't need to be unwound, as it doesn't contain a value
20997 inherited from the caller. */
20999 if (!arm_r3_live_at_start_p ())
21000 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21001 else if (args_to_push
== 0)
21005 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21008 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21009 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21012 /* Just tell the dwarf backend that we adjusted SP. */
21013 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21014 plus_constant (Pmode
, stack_pointer_rtx
,
21016 RTX_FRAME_RELATED_P (insn
) = 1;
21017 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21021 /* Store the args on the stack. */
21022 if (cfun
->machine
->uses_anonymous_args
)
21025 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21026 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21027 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21028 saved_pretend_args
= 1;
21034 if (args_to_push
== 4)
21035 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21038 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21039 plus_constant (Pmode
,
21043 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21045 /* Just tell the dwarf backend that we adjusted SP. */
21047 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21048 plus_constant (Pmode
, stack_pointer_rtx
,
21050 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21053 RTX_FRAME_RELATED_P (insn
) = 1;
21054 fp_offset
= args_to_push
;
21059 insn
= emit_set_insn (ip_rtx
,
21060 plus_constant (Pmode
, stack_pointer_rtx
,
21062 RTX_FRAME_RELATED_P (insn
) = 1;
21067 /* Push the argument registers, or reserve space for them. */
21068 if (cfun
->machine
->uses_anonymous_args
)
21069 insn
= emit_multi_reg_push
21070 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21071 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21074 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21075 GEN_INT (- args_to_push
)));
21076 RTX_FRAME_RELATED_P (insn
) = 1;
21079 /* If this is an interrupt service routine, and the link register
21080 is going to be pushed, and we're not generating extra
21081 push of IP (needed when frame is needed and frame layout if apcs),
21082 subtracting four from LR now will mean that the function return
21083 can be done with a single instruction. */
21084 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21085 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21086 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21089 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21091 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21094 if (live_regs_mask
)
21096 unsigned long dwarf_regs_mask
= live_regs_mask
;
21098 saved_regs
+= bit_count (live_regs_mask
) * 4;
21099 if (optimize_size
&& !frame_pointer_needed
21100 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21102 /* If no coprocessor registers are being pushed and we don't have
21103 to worry about a frame pointer then push extra registers to
21104 create the stack frame. This is done is a way that does not
21105 alter the frame layout, so is independent of the epilogue. */
21109 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21111 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21112 if (frame
&& n
* 4 >= frame
)
21115 live_regs_mask
|= (1 << n
) - 1;
21116 saved_regs
+= frame
;
21121 && current_tune
->prefer_ldrd_strd
21122 && !optimize_function_for_size_p (cfun
))
21124 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21126 thumb2_emit_strd_push (live_regs_mask
);
21127 else if (TARGET_ARM
21128 && !TARGET_APCS_FRAME
21129 && !IS_INTERRUPT (func_type
))
21130 arm_emit_strd_push (live_regs_mask
);
21133 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21134 RTX_FRAME_RELATED_P (insn
) = 1;
21139 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21140 RTX_FRAME_RELATED_P (insn
) = 1;
21144 if (! IS_VOLATILE (func_type
))
21145 saved_regs
+= arm_save_coproc_regs ();
21147 if (frame_pointer_needed
&& TARGET_ARM
)
21149 /* Create the new frame pointer. */
21150 if (TARGET_APCS_FRAME
)
21152 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21153 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21154 RTX_FRAME_RELATED_P (insn
) = 1;
21156 if (IS_NESTED (func_type
))
21158 /* Recover the static chain register. */
21159 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21160 insn
= gen_rtx_REG (SImode
, 3);
21163 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21164 insn
= gen_frame_mem (SImode
, insn
);
21166 emit_set_insn (ip_rtx
, insn
);
21167 /* Add a USE to stop propagate_one_insn() from barfing. */
21168 emit_insn (gen_force_register_use (ip_rtx
));
21173 insn
= GEN_INT (saved_regs
- 4);
21174 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21175 stack_pointer_rtx
, insn
));
21176 RTX_FRAME_RELATED_P (insn
) = 1;
21180 if (flag_stack_usage_info
)
21181 current_function_static_stack_size
21182 = offsets
->outgoing_args
- offsets
->saved_args
;
21184 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21186 /* This add can produce multiple insns for a large constant, so we
21187 need to get tricky. */
21188 rtx last
= get_last_insn ();
21190 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21191 - offsets
->outgoing_args
);
21193 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21197 last
= last
? NEXT_INSN (last
) : get_insns ();
21198 RTX_FRAME_RELATED_P (last
) = 1;
21200 while (last
!= insn
);
21202 /* If the frame pointer is needed, emit a special barrier that
21203 will prevent the scheduler from moving stores to the frame
21204 before the stack adjustment. */
21205 if (frame_pointer_needed
)
21206 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21207 hard_frame_pointer_rtx
));
21211 if (frame_pointer_needed
&& TARGET_THUMB2
)
21212 thumb_set_frame_pointer (offsets
);
21214 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21216 unsigned long mask
;
21218 mask
= live_regs_mask
;
21219 mask
&= THUMB2_WORK_REGS
;
21220 if (!IS_NESTED (func_type
))
21221 mask
|= (1 << IP_REGNUM
);
21222 arm_load_pic_register (mask
);
21225 /* If we are profiling, make sure no instructions are scheduled before
21226 the call to mcount. Similarly if the user has requested no
21227 scheduling in the prolog. Similarly if we want non-call exceptions
21228 using the EABI unwinder, to prevent faulting instructions from being
21229 swapped with a stack adjustment. */
21230 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21231 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21232 && cfun
->can_throw_non_call_exceptions
))
21233 emit_insn (gen_blockage ());
21235 /* If the link register is being kept alive, with the return address in it,
21236 then make sure that it does not get reused by the ce2 pass. */
21237 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21238 cfun
->machine
->lr_save_eliminated
= 1;
21241 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21243 arm_print_condition (FILE *stream
)
21245 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21247 /* Branch conversion is not implemented for Thumb-2. */
21250 output_operand_lossage ("predicated Thumb instruction");
21253 if (current_insn_predicate
!= NULL
)
21255 output_operand_lossage
21256 ("predicated instruction in conditional sequence");
21260 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21262 else if (current_insn_predicate
)
21264 enum arm_cond_code code
;
21268 output_operand_lossage ("predicated Thumb instruction");
21272 code
= get_arm_condition_code (current_insn_predicate
);
21273 fputs (arm_condition_codes
[code
], stream
);
21278 /* If CODE is 'd', then the X is a condition operand and the instruction
21279 should only be executed if the condition is true.
21280 if CODE is 'D', then the X is a condition operand and the instruction
21281 should only be executed if the condition is false: however, if the mode
21282 of the comparison is CCFPEmode, then always execute the instruction -- we
21283 do this because in these circumstances !GE does not necessarily imply LT;
21284 in these cases the instruction pattern will take care to make sure that
21285 an instruction containing %d will follow, thereby undoing the effects of
21286 doing this instruction unconditionally.
21287 If CODE is 'N' then X is a floating point operand that must be negated
21289 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21290 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21292 arm_print_operand (FILE *stream
, rtx x
, int code
)
21297 fputs (ASM_COMMENT_START
, stream
);
21301 fputs (user_label_prefix
, stream
);
21305 fputs (REGISTER_PREFIX
, stream
);
21309 arm_print_condition (stream
);
21313 /* Nothing in unified syntax, otherwise the current condition code. */
21314 if (!TARGET_UNIFIED_ASM
)
21315 arm_print_condition (stream
);
21319 /* The current condition code in unified syntax, otherwise nothing. */
21320 if (TARGET_UNIFIED_ASM
)
21321 arm_print_condition (stream
);
21325 /* The current condition code for a condition code setting instruction.
21326 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21327 if (TARGET_UNIFIED_ASM
)
21329 fputc('s', stream
);
21330 arm_print_condition (stream
);
21334 arm_print_condition (stream
);
21335 fputc('s', stream
);
21340 /* If the instruction is conditionally executed then print
21341 the current condition code, otherwise print 's'. */
21342 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21343 if (current_insn_predicate
)
21344 arm_print_condition (stream
);
21346 fputc('s', stream
);
21349 /* %# is a "break" sequence. It doesn't output anything, but is used to
21350 separate e.g. operand numbers from following text, if that text consists
21351 of further digits which we don't want to be part of the operand
21359 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21360 r
= real_value_negate (&r
);
21361 fprintf (stream
, "%s", fp_const_from_val (&r
));
21365 /* An integer or symbol address without a preceding # sign. */
21367 switch (GET_CODE (x
))
21370 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21374 output_addr_const (stream
, x
);
21378 if (GET_CODE (XEXP (x
, 0)) == PLUS
21379 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21381 output_addr_const (stream
, x
);
21384 /* Fall through. */
21387 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21391 /* An integer that we want to print in HEX. */
21393 switch (GET_CODE (x
))
21396 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21400 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21405 if (CONST_INT_P (x
))
21408 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21409 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21413 putc ('~', stream
);
21414 output_addr_const (stream
, x
);
21419 /* The low 16 bits of an immediate constant. */
21420 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21424 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21428 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21436 shift
= shift_op (x
, &val
);
21440 fprintf (stream
, ", %s ", shift
);
21442 arm_print_operand (stream
, XEXP (x
, 1), 0);
21444 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21449 /* An explanation of the 'Q', 'R' and 'H' register operands:
21451 In a pair of registers containing a DI or DF value the 'Q'
21452 operand returns the register number of the register containing
21453 the least significant part of the value. The 'R' operand returns
21454 the register number of the register containing the most
21455 significant part of the value.
21457 The 'H' operand returns the higher of the two register numbers.
21458 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21459 same as the 'Q' operand, since the most significant part of the
21460 value is held in the lower number register. The reverse is true
21461 on systems where WORDS_BIG_ENDIAN is false.
21463 The purpose of these operands is to distinguish between cases
21464 where the endian-ness of the values is important (for example
21465 when they are added together), and cases where the endian-ness
21466 is irrelevant, but the order of register operations is important.
21467 For example when loading a value from memory into a register
21468 pair, the endian-ness does not matter. Provided that the value
21469 from the lower memory address is put into the lower numbered
21470 register, and the value from the higher address is put into the
21471 higher numbered register, the load will work regardless of whether
21472 the value being loaded is big-wordian or little-wordian. The
21473 order of the two register loads can matter however, if the address
21474 of the memory location is actually held in one of the registers
21475 being overwritten by the load.
21477 The 'Q' and 'R' constraints are also available for 64-bit
21480 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21482 rtx part
= gen_lowpart (SImode
, x
);
21483 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21487 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21489 output_operand_lossage ("invalid operand for code '%c'", code
);
21493 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21497 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21499 enum machine_mode mode
= GET_MODE (x
);
21502 if (mode
== VOIDmode
)
21504 part
= gen_highpart_mode (SImode
, mode
, x
);
21505 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21509 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21511 output_operand_lossage ("invalid operand for code '%c'", code
);
21515 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21519 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21521 output_operand_lossage ("invalid operand for code '%c'", code
);
21525 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21529 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21531 output_operand_lossage ("invalid operand for code '%c'", code
);
21535 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21539 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21541 output_operand_lossage ("invalid operand for code '%c'", code
);
21545 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21549 asm_fprintf (stream
, "%r",
21550 REG_P (XEXP (x
, 0))
21551 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21555 asm_fprintf (stream
, "{%r-%r}",
21557 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21560 /* Like 'M', but writing doubleword vector registers, for use by Neon
21564 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21565 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21567 asm_fprintf (stream
, "{d%d}", regno
);
21569 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21574 /* CONST_TRUE_RTX means always -- that's the default. */
21575 if (x
== const_true_rtx
)
21578 if (!COMPARISON_P (x
))
21580 output_operand_lossage ("invalid operand for code '%c'", code
);
21584 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21589 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21590 want to do that. */
21591 if (x
== const_true_rtx
)
21593 output_operand_lossage ("instruction never executed");
21596 if (!COMPARISON_P (x
))
21598 output_operand_lossage ("invalid operand for code '%c'", code
);
21602 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21603 (get_arm_condition_code (x
))],
21613 /* Former Maverick support, removed after GCC-4.7. */
21614 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21619 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21620 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21621 /* Bad value for wCG register number. */
21623 output_operand_lossage ("invalid operand for code '%c'", code
);
21628 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21631 /* Print an iWMMXt control register name. */
21633 if (!CONST_INT_P (x
)
21635 || INTVAL (x
) >= 16)
21636 /* Bad value for wC register number. */
21638 output_operand_lossage ("invalid operand for code '%c'", code
);
21644 static const char * wc_reg_names
[16] =
21646 "wCID", "wCon", "wCSSF", "wCASF",
21647 "wC4", "wC5", "wC6", "wC7",
21648 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21649 "wC12", "wC13", "wC14", "wC15"
21652 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21656 /* Print the high single-precision register of a VFP double-precision
21660 enum machine_mode mode
= GET_MODE (x
);
21663 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21665 output_operand_lossage ("invalid operand for code '%c'", code
);
21670 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21672 output_operand_lossage ("invalid operand for code '%c'", code
);
21676 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21680 /* Print a VFP/Neon double precision or quad precision register name. */
21684 enum machine_mode mode
= GET_MODE (x
);
21685 int is_quad
= (code
== 'q');
21688 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21690 output_operand_lossage ("invalid operand for code '%c'", code
);
21695 || !IS_VFP_REGNUM (REGNO (x
)))
21697 output_operand_lossage ("invalid operand for code '%c'", code
);
21702 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21703 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21705 output_operand_lossage ("invalid operand for code '%c'", code
);
21709 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21710 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21714 /* These two codes print the low/high doubleword register of a Neon quad
21715 register, respectively. For pair-structure types, can also print
21716 low/high quadword registers. */
21720 enum machine_mode mode
= GET_MODE (x
);
21723 if ((GET_MODE_SIZE (mode
) != 16
21724 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21726 output_operand_lossage ("invalid operand for code '%c'", code
);
21731 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21733 output_operand_lossage ("invalid operand for code '%c'", code
);
21737 if (GET_MODE_SIZE (mode
) == 16)
21738 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21739 + (code
== 'f' ? 1 : 0));
21741 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21742 + (code
== 'f' ? 1 : 0));
21746 /* Print a VFPv3 floating-point constant, represented as an integer
21750 int index
= vfp3_const_double_index (x
);
21751 gcc_assert (index
!= -1);
21752 fprintf (stream
, "%d", index
);
21756 /* Print bits representing opcode features for Neon.
21758 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21759 and polynomials as unsigned.
21761 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21763 Bit 2 is 1 for rounding functions, 0 otherwise. */
21765 /* Identify the type as 's', 'u', 'p' or 'f'. */
21768 HOST_WIDE_INT bits
= INTVAL (x
);
21769 fputc ("uspf"[bits
& 3], stream
);
21773 /* Likewise, but signed and unsigned integers are both 'i'. */
21776 HOST_WIDE_INT bits
= INTVAL (x
);
21777 fputc ("iipf"[bits
& 3], stream
);
21781 /* As for 'T', but emit 'u' instead of 'p'. */
21784 HOST_WIDE_INT bits
= INTVAL (x
);
21785 fputc ("usuf"[bits
& 3], stream
);
21789 /* Bit 2: rounding (vs none). */
21792 HOST_WIDE_INT bits
= INTVAL (x
);
21793 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21797 /* Memory operand for vld1/vst1 instruction. */
21801 bool postinc
= FALSE
;
21802 unsigned align
, memsize
, align_bits
;
21804 gcc_assert (MEM_P (x
));
21805 addr
= XEXP (x
, 0);
21806 if (GET_CODE (addr
) == POST_INC
)
21809 addr
= XEXP (addr
, 0);
21811 asm_fprintf (stream
, "[%r", REGNO (addr
));
21813 /* We know the alignment of this access, so we can emit a hint in the
21814 instruction (for some alignments) as an aid to the memory subsystem
21816 align
= MEM_ALIGN (x
) >> 3;
21817 memsize
= MEM_SIZE (x
);
21819 /* Only certain alignment specifiers are supported by the hardware. */
21820 if (memsize
== 32 && (align
% 32) == 0)
21822 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21824 else if (memsize
>= 8 && (align
% 8) == 0)
21829 if (align_bits
!= 0)
21830 asm_fprintf (stream
, ":%d", align_bits
);
21832 asm_fprintf (stream
, "]");
21835 fputs("!", stream
);
21843 gcc_assert (MEM_P (x
));
21844 addr
= XEXP (x
, 0);
21845 gcc_assert (REG_P (addr
));
21846 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21850 /* Translate an S register number into a D register number and element index. */
21853 enum machine_mode mode
= GET_MODE (x
);
21856 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21858 output_operand_lossage ("invalid operand for code '%c'", code
);
21863 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21865 output_operand_lossage ("invalid operand for code '%c'", code
);
21869 regno
= regno
- FIRST_VFP_REGNUM
;
21870 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21875 gcc_assert (CONST_DOUBLE_P (x
));
21877 result
= vfp3_const_double_for_fract_bits (x
);
21879 result
= vfp3_const_double_for_bits (x
);
21880 fprintf (stream
, "#%d", result
);
21883 /* Register specifier for vld1.16/vst1.16. Translate the S register
21884 number into a D register number and element index. */
21887 enum machine_mode mode
= GET_MODE (x
);
21890 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21892 output_operand_lossage ("invalid operand for code '%c'", code
);
21897 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21899 output_operand_lossage ("invalid operand for code '%c'", code
);
21903 regno
= regno
- FIRST_VFP_REGNUM
;
21904 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21911 output_operand_lossage ("missing operand");
21915 switch (GET_CODE (x
))
21918 asm_fprintf (stream
, "%r", REGNO (x
));
21922 output_memory_reference_mode
= GET_MODE (x
);
21923 output_address (XEXP (x
, 0));
21930 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21931 sizeof (fpstr
), 0, 1);
21932 fprintf (stream
, "#%s", fpstr
);
21935 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21939 gcc_assert (GET_CODE (x
) != NEG
);
21940 fputc ('#', stream
);
21941 if (GET_CODE (x
) == HIGH
)
21943 fputs (":lower16:", stream
);
21947 output_addr_const (stream
, x
);
21953 /* Target hook for printing a memory address. */
21955 arm_print_operand_address (FILE *stream
, rtx x
)
21959 int is_minus
= GET_CODE (x
) == MINUS
;
21962 asm_fprintf (stream
, "[%r]", REGNO (x
));
21963 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21965 rtx base
= XEXP (x
, 0);
21966 rtx index
= XEXP (x
, 1);
21967 HOST_WIDE_INT offset
= 0;
21969 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21971 /* Ensure that BASE is a register. */
21972 /* (one of them must be). */
21973 /* Also ensure the SP is not used as in index register. */
21978 switch (GET_CODE (index
))
21981 offset
= INTVAL (index
);
21984 asm_fprintf (stream
, "[%r, #%wd]",
21985 REGNO (base
), offset
);
21989 asm_fprintf (stream
, "[%r, %s%r]",
21990 REGNO (base
), is_minus
? "-" : "",
22000 asm_fprintf (stream
, "[%r, %s%r",
22001 REGNO (base
), is_minus
? "-" : "",
22002 REGNO (XEXP (index
, 0)));
22003 arm_print_operand (stream
, index
, 'S');
22004 fputs ("]", stream
);
22009 gcc_unreachable ();
22012 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22013 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22015 extern enum machine_mode output_memory_reference_mode
;
22017 gcc_assert (REG_P (XEXP (x
, 0)));
22019 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22020 asm_fprintf (stream
, "[%r, #%s%d]!",
22021 REGNO (XEXP (x
, 0)),
22022 GET_CODE (x
) == PRE_DEC
? "-" : "",
22023 GET_MODE_SIZE (output_memory_reference_mode
));
22025 asm_fprintf (stream
, "[%r], #%s%d",
22026 REGNO (XEXP (x
, 0)),
22027 GET_CODE (x
) == POST_DEC
? "-" : "",
22028 GET_MODE_SIZE (output_memory_reference_mode
));
22030 else if (GET_CODE (x
) == PRE_MODIFY
)
22032 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22033 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22034 asm_fprintf (stream
, "#%wd]!",
22035 INTVAL (XEXP (XEXP (x
, 1), 1)));
22037 asm_fprintf (stream
, "%r]!",
22038 REGNO (XEXP (XEXP (x
, 1), 1)));
22040 else if (GET_CODE (x
) == POST_MODIFY
)
22042 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22043 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22044 asm_fprintf (stream
, "#%wd",
22045 INTVAL (XEXP (XEXP (x
, 1), 1)));
22047 asm_fprintf (stream
, "%r",
22048 REGNO (XEXP (XEXP (x
, 1), 1)));
22050 else output_addr_const (stream
, x
);
22055 asm_fprintf (stream
, "[%r]", REGNO (x
));
22056 else if (GET_CODE (x
) == POST_INC
)
22057 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22058 else if (GET_CODE (x
) == PLUS
)
22060 gcc_assert (REG_P (XEXP (x
, 0)));
22061 if (CONST_INT_P (XEXP (x
, 1)))
22062 asm_fprintf (stream
, "[%r, #%wd]",
22063 REGNO (XEXP (x
, 0)),
22064 INTVAL (XEXP (x
, 1)));
22066 asm_fprintf (stream
, "[%r, %r]",
22067 REGNO (XEXP (x
, 0)),
22068 REGNO (XEXP (x
, 1)));
22071 output_addr_const (stream
, x
);
22075 /* Target hook for indicating whether a punctuation character for
22076 TARGET_PRINT_OPERAND is valid. */
22078 arm_print_operand_punct_valid_p (unsigned char code
)
22080 return (code
== '@' || code
== '|' || code
== '.'
22081 || code
== '(' || code
== ')' || code
== '#'
22082 || (TARGET_32BIT
&& (code
== '?'))
22083 || (TARGET_THUMB2
&& (code
== '!'))
22084 || (TARGET_THUMB
&& (code
== '_')));
22087 /* Target hook for assembling integer objects. The ARM version needs to
22088 handle word-sized values specially. */
22090 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22092 enum machine_mode mode
;
22094 if (size
== UNITS_PER_WORD
&& aligned_p
)
22096 fputs ("\t.word\t", asm_out_file
);
22097 output_addr_const (asm_out_file
, x
);
22099 /* Mark symbols as position independent. We only do this in the
22100 .text segment, not in the .data segment. */
22101 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22102 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22104 /* See legitimize_pic_address for an explanation of the
22105 TARGET_VXWORKS_RTP check. */
22106 if (!arm_pic_data_is_text_relative
22107 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22108 fputs ("(GOT)", asm_out_file
);
22110 fputs ("(GOTOFF)", asm_out_file
);
22112 fputc ('\n', asm_out_file
);
22116 mode
= GET_MODE (x
);
22118 if (arm_vector_mode_supported_p (mode
))
22122 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22124 units
= CONST_VECTOR_NUNITS (x
);
22125 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22127 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22128 for (i
= 0; i
< units
; i
++)
22130 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22132 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22135 for (i
= 0; i
< units
; i
++)
22137 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22138 REAL_VALUE_TYPE rval
;
22140 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22143 (rval
, GET_MODE_INNER (mode
),
22144 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22150 return default_assemble_integer (x
, size
, aligned_p
);
22154 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22158 if (!TARGET_AAPCS_BASED
)
22161 default_named_section_asm_out_constructor
22162 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22166 /* Put these in the .init_array section, using a special relocation. */
22167 if (priority
!= DEFAULT_INIT_PRIORITY
)
22170 sprintf (buf
, "%s.%.5u",
22171 is_ctor
? ".init_array" : ".fini_array",
22173 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22180 switch_to_section (s
);
22181 assemble_align (POINTER_SIZE
);
22182 fputs ("\t.word\t", asm_out_file
);
22183 output_addr_const (asm_out_file
, symbol
);
22184 fputs ("(target1)\n", asm_out_file
);
22187 /* Add a function to the list of static constructors. */
22190 arm_elf_asm_constructor (rtx symbol
, int priority
)
22192 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22195 /* Add a function to the list of static destructors. */
22198 arm_elf_asm_destructor (rtx symbol
, int priority
)
22200 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22203 /* A finite state machine takes care of noticing whether or not instructions
22204 can be conditionally executed, and thus decrease execution time and code
22205 size by deleting branch instructions. The fsm is controlled by
22206 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22208 /* The state of the fsm controlling condition codes are:
22209 0: normal, do nothing special
22210 1: make ASM_OUTPUT_OPCODE not output this instruction
22211 2: make ASM_OUTPUT_OPCODE not output this instruction
22212 3: make instructions conditional
22213 4: make instructions conditional
22215 State transitions (state->state by whom under condition):
22216 0 -> 1 final_prescan_insn if the `target' is a label
22217 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22218 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22219 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22220 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22221 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22222 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22223 (the target insn is arm_target_insn).
22225 If the jump clobbers the conditions then we use states 2 and 4.
22227 A similar thing can be done with conditional return insns.
22229 XXX In case the `target' is an unconditional branch, this conditionalising
22230 of the instructions always reduces code size, but not always execution
22231 time. But then, I want to reduce the code size to somewhere near what
22232 /bin/cc produces. */
22234 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22235 instructions. When a COND_EXEC instruction is seen the subsequent
22236 instructions are scanned so that multiple conditional instructions can be
22237 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22238 specify the length and true/false mask for the IT block. These will be
22239 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22241 /* Returns the index of the ARM condition code string in
22242 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22243 COMPARISON should be an rtx like `(eq (...) (...))'. */
22246 maybe_get_arm_condition_code (rtx comparison
)
22248 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22249 enum arm_cond_code code
;
22250 enum rtx_code comp_code
= GET_CODE (comparison
);
22252 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22253 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22254 XEXP (comparison
, 1));
22258 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22259 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22260 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22261 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22262 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22263 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22264 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22265 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22266 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22267 case CC_DLTUmode
: code
= ARM_CC
;
22270 if (comp_code
== EQ
)
22271 return ARM_INVERSE_CONDITION_CODE (code
);
22272 if (comp_code
== NE
)
22279 case NE
: return ARM_NE
;
22280 case EQ
: return ARM_EQ
;
22281 case GE
: return ARM_PL
;
22282 case LT
: return ARM_MI
;
22283 default: return ARM_NV
;
22289 case NE
: return ARM_NE
;
22290 case EQ
: return ARM_EQ
;
22291 default: return ARM_NV
;
22297 case NE
: return ARM_MI
;
22298 case EQ
: return ARM_PL
;
22299 default: return ARM_NV
;
22304 /* We can handle all cases except UNEQ and LTGT. */
22307 case GE
: return ARM_GE
;
22308 case GT
: return ARM_GT
;
22309 case LE
: return ARM_LS
;
22310 case LT
: return ARM_MI
;
22311 case NE
: return ARM_NE
;
22312 case EQ
: return ARM_EQ
;
22313 case ORDERED
: return ARM_VC
;
22314 case UNORDERED
: return ARM_VS
;
22315 case UNLT
: return ARM_LT
;
22316 case UNLE
: return ARM_LE
;
22317 case UNGT
: return ARM_HI
;
22318 case UNGE
: return ARM_PL
;
22319 /* UNEQ and LTGT do not have a representation. */
22320 case UNEQ
: /* Fall through. */
22321 case LTGT
: /* Fall through. */
22322 default: return ARM_NV
;
22328 case NE
: return ARM_NE
;
22329 case EQ
: return ARM_EQ
;
22330 case GE
: return ARM_LE
;
22331 case GT
: return ARM_LT
;
22332 case LE
: return ARM_GE
;
22333 case LT
: return ARM_GT
;
22334 case GEU
: return ARM_LS
;
22335 case GTU
: return ARM_CC
;
22336 case LEU
: return ARM_CS
;
22337 case LTU
: return ARM_HI
;
22338 default: return ARM_NV
;
22344 case LTU
: return ARM_CS
;
22345 case GEU
: return ARM_CC
;
22346 default: return ARM_NV
;
22352 case NE
: return ARM_NE
;
22353 case EQ
: return ARM_EQ
;
22354 case GEU
: return ARM_CS
;
22355 case GTU
: return ARM_HI
;
22356 case LEU
: return ARM_LS
;
22357 case LTU
: return ARM_CC
;
22358 default: return ARM_NV
;
22364 case GE
: return ARM_GE
;
22365 case LT
: return ARM_LT
;
22366 case GEU
: return ARM_CS
;
22367 case LTU
: return ARM_CC
;
22368 default: return ARM_NV
;
22374 case NE
: return ARM_NE
;
22375 case EQ
: return ARM_EQ
;
22376 case GE
: return ARM_GE
;
22377 case GT
: return ARM_GT
;
22378 case LE
: return ARM_LE
;
22379 case LT
: return ARM_LT
;
22380 case GEU
: return ARM_CS
;
22381 case GTU
: return ARM_HI
;
22382 case LEU
: return ARM_LS
;
22383 case LTU
: return ARM_CC
;
22384 default: return ARM_NV
;
22387 default: gcc_unreachable ();
22391 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22392 static enum arm_cond_code
22393 get_arm_condition_code (rtx comparison
)
22395 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22396 gcc_assert (code
!= ARM_NV
);
22400 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22403 thumb2_final_prescan_insn (rtx insn
)
22405 rtx first_insn
= insn
;
22406 rtx body
= PATTERN (insn
);
22408 enum arm_cond_code code
;
22413 /* max_insns_skipped in the tune was already taken into account in the
22414 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22415 just emit the IT blocks as we can. It does not make sense to split
22417 max
= MAX_INSN_PER_IT_BLOCK
;
22419 /* Remove the previous insn from the count of insns to be output. */
22420 if (arm_condexec_count
)
22421 arm_condexec_count
--;
22423 /* Nothing to do if we are already inside a conditional block. */
22424 if (arm_condexec_count
)
22427 if (GET_CODE (body
) != COND_EXEC
)
22430 /* Conditional jumps are implemented directly. */
22434 predicate
= COND_EXEC_TEST (body
);
22435 arm_current_cc
= get_arm_condition_code (predicate
);
22437 n
= get_attr_ce_count (insn
);
22438 arm_condexec_count
= 1;
22439 arm_condexec_mask
= (1 << n
) - 1;
22440 arm_condexec_masklen
= n
;
22441 /* See if subsequent instructions can be combined into the same block. */
22444 insn
= next_nonnote_insn (insn
);
22446 /* Jumping into the middle of an IT block is illegal, so a label or
22447 barrier terminates the block. */
22448 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22451 body
= PATTERN (insn
);
22452 /* USE and CLOBBER aren't really insns, so just skip them. */
22453 if (GET_CODE (body
) == USE
22454 || GET_CODE (body
) == CLOBBER
)
22457 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22458 if (GET_CODE (body
) != COND_EXEC
)
22460 /* Maximum number of conditionally executed instructions in a block. */
22461 n
= get_attr_ce_count (insn
);
22462 if (arm_condexec_masklen
+ n
> max
)
22465 predicate
= COND_EXEC_TEST (body
);
22466 code
= get_arm_condition_code (predicate
);
22467 mask
= (1 << n
) - 1;
22468 if (arm_current_cc
== code
)
22469 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22470 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22473 arm_condexec_count
++;
22474 arm_condexec_masklen
+= n
;
22476 /* A jump must be the last instruction in a conditional block. */
22480 /* Restore recog_data (getting the attributes of other insns can
22481 destroy this array, but final.c assumes that it remains intact
22482 across this call). */
22483 extract_constrain_insn_cached (first_insn
);
22487 arm_final_prescan_insn (rtx insn
)
22489 /* BODY will hold the body of INSN. */
22490 rtx body
= PATTERN (insn
);
22492 /* This will be 1 if trying to repeat the trick, and things need to be
22493 reversed if it appears to fail. */
22496 /* If we start with a return insn, we only succeed if we find another one. */
22497 int seeking_return
= 0;
22498 enum rtx_code return_code
= UNKNOWN
;
22500 /* START_INSN will hold the insn from where we start looking. This is the
22501 first insn after the following code_label if REVERSE is true. */
22502 rtx start_insn
= insn
;
22504 /* If in state 4, check if the target branch is reached, in order to
22505 change back to state 0. */
22506 if (arm_ccfsm_state
== 4)
22508 if (insn
== arm_target_insn
)
22510 arm_target_insn
= NULL
;
22511 arm_ccfsm_state
= 0;
22516 /* If in state 3, it is possible to repeat the trick, if this insn is an
22517 unconditional branch to a label, and immediately following this branch
22518 is the previous target label which is only used once, and the label this
22519 branch jumps to is not too far off. */
22520 if (arm_ccfsm_state
== 3)
22522 if (simplejump_p (insn
))
22524 start_insn
= next_nonnote_insn (start_insn
);
22525 if (BARRIER_P (start_insn
))
22527 /* XXX Isn't this always a barrier? */
22528 start_insn
= next_nonnote_insn (start_insn
);
22530 if (LABEL_P (start_insn
)
22531 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22532 && LABEL_NUSES (start_insn
) == 1)
22537 else if (ANY_RETURN_P (body
))
22539 start_insn
= next_nonnote_insn (start_insn
);
22540 if (BARRIER_P (start_insn
))
22541 start_insn
= next_nonnote_insn (start_insn
);
22542 if (LABEL_P (start_insn
)
22543 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22544 && LABEL_NUSES (start_insn
) == 1)
22547 seeking_return
= 1;
22548 return_code
= GET_CODE (body
);
22557 gcc_assert (!arm_ccfsm_state
|| reverse
);
22558 if (!JUMP_P (insn
))
22561 /* This jump might be paralleled with a clobber of the condition codes
22562 the jump should always come first */
22563 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22564 body
= XVECEXP (body
, 0, 0);
22567 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22568 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22571 int fail
= FALSE
, succeed
= FALSE
;
22572 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22573 int then_not_else
= TRUE
;
22574 rtx this_insn
= start_insn
, label
= 0;
22576 /* Register the insn jumped to. */
22579 if (!seeking_return
)
22580 label
= XEXP (SET_SRC (body
), 0);
22582 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22583 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22584 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22586 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22587 then_not_else
= FALSE
;
22589 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22591 seeking_return
= 1;
22592 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22594 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22596 seeking_return
= 1;
22597 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22598 then_not_else
= FALSE
;
22601 gcc_unreachable ();
22603 /* See how many insns this branch skips, and what kind of insns. If all
22604 insns are okay, and the label or unconditional branch to the same
22605 label is not too far away, succeed. */
22606 for (insns_skipped
= 0;
22607 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22611 this_insn
= next_nonnote_insn (this_insn
);
22615 switch (GET_CODE (this_insn
))
22618 /* Succeed if it is the target label, otherwise fail since
22619 control falls in from somewhere else. */
22620 if (this_insn
== label
)
22622 arm_ccfsm_state
= 1;
22630 /* Succeed if the following insn is the target label.
22632 If return insns are used then the last insn in a function
22633 will be a barrier. */
22634 this_insn
= next_nonnote_insn (this_insn
);
22635 if (this_insn
&& this_insn
== label
)
22637 arm_ccfsm_state
= 1;
22645 /* The AAPCS says that conditional calls should not be
22646 used since they make interworking inefficient (the
22647 linker can't transform BL<cond> into BLX). That's
22648 only a problem if the machine has BLX. */
22655 /* Succeed if the following insn is the target label, or
22656 if the following two insns are a barrier and the
22658 this_insn
= next_nonnote_insn (this_insn
);
22659 if (this_insn
&& BARRIER_P (this_insn
))
22660 this_insn
= next_nonnote_insn (this_insn
);
22662 if (this_insn
&& this_insn
== label
22663 && insns_skipped
< max_insns_skipped
)
22665 arm_ccfsm_state
= 1;
22673 /* If this is an unconditional branch to the same label, succeed.
22674 If it is to another label, do nothing. If it is conditional,
22676 /* XXX Probably, the tests for SET and the PC are
22679 scanbody
= PATTERN (this_insn
);
22680 if (GET_CODE (scanbody
) == SET
22681 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22683 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22684 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22686 arm_ccfsm_state
= 2;
22689 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22692 /* Fail if a conditional return is undesirable (e.g. on a
22693 StrongARM), but still allow this if optimizing for size. */
22694 else if (GET_CODE (scanbody
) == return_code
22695 && !use_return_insn (TRUE
, NULL
)
22698 else if (GET_CODE (scanbody
) == return_code
)
22700 arm_ccfsm_state
= 2;
22703 else if (GET_CODE (scanbody
) == PARALLEL
)
22705 switch (get_attr_conds (this_insn
))
22715 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22720 /* Instructions using or affecting the condition codes make it
22722 scanbody
= PATTERN (this_insn
);
22723 if (!(GET_CODE (scanbody
) == SET
22724 || GET_CODE (scanbody
) == PARALLEL
)
22725 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22735 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22736 arm_target_label
= CODE_LABEL_NUMBER (label
);
22739 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22741 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22743 this_insn
= next_nonnote_insn (this_insn
);
22744 gcc_assert (!this_insn
22745 || (!BARRIER_P (this_insn
)
22746 && !LABEL_P (this_insn
)));
22750 /* Oh, dear! we ran off the end.. give up. */
22751 extract_constrain_insn_cached (insn
);
22752 arm_ccfsm_state
= 0;
22753 arm_target_insn
= NULL
;
22756 arm_target_insn
= this_insn
;
22759 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22762 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22764 if (reverse
|| then_not_else
)
22765 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22768 /* Restore recog_data (getting the attributes of other insns can
22769 destroy this array, but final.c assumes that it remains intact
22770 across this call. */
22771 extract_constrain_insn_cached (insn
);
22775 /* Output IT instructions. */
22777 thumb2_asm_output_opcode (FILE * stream
)
22782 if (arm_condexec_mask
)
22784 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22785 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22787 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22788 arm_condition_codes
[arm_current_cc
]);
22789 arm_condexec_mask
= 0;
22793 /* Returns true if REGNO is a valid register
22794 for holding a quantity of type MODE. */
22796 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22798 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22799 return (regno
== CC_REGNUM
22800 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22801 && regno
== VFPCC_REGNUM
));
22804 /* For the Thumb we only allow values bigger than SImode in
22805 registers 0 - 6, so that there is always a second low
22806 register available to hold the upper part of the value.
22807 We probably we ought to ensure that the register is the
22808 start of an even numbered register pair. */
22809 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22811 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22812 && IS_VFP_REGNUM (regno
))
22814 if (mode
== SFmode
|| mode
== SImode
)
22815 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22817 if (mode
== DFmode
)
22818 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22820 /* VFP registers can hold HFmode values, but there is no point in
22821 putting them there unless we have hardware conversion insns. */
22822 if (mode
== HFmode
)
22823 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22826 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22827 || (VALID_NEON_QREG_MODE (mode
)
22828 && NEON_REGNO_OK_FOR_QUAD (regno
))
22829 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22830 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22831 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22832 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22833 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22838 if (TARGET_REALLY_IWMMXT
)
22840 if (IS_IWMMXT_GR_REGNUM (regno
))
22841 return mode
== SImode
;
22843 if (IS_IWMMXT_REGNUM (regno
))
22844 return VALID_IWMMXT_REG_MODE (mode
);
22847 /* We allow almost any value to be stored in the general registers.
22848 Restrict doubleword quantities to even register pairs in ARM state
22849 so that we can use ldrd. Do not allow very large Neon structure
22850 opaque modes in general registers; they would use too many. */
22851 if (regno
<= LAST_ARM_REGNUM
)
22853 if (ARM_NUM_REGS (mode
) > 4)
22859 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
22862 if (regno
== FRAME_POINTER_REGNUM
22863 || regno
== ARG_POINTER_REGNUM
)
22864 /* We only allow integers in the fake hard registers. */
22865 return GET_MODE_CLASS (mode
) == MODE_INT
;
22870 /* Implement MODES_TIEABLE_P. */
22873 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22875 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22878 /* We specifically want to allow elements of "structure" modes to
22879 be tieable to the structure. This more general condition allows
22880 other rarer situations too. */
22882 && (VALID_NEON_DREG_MODE (mode1
)
22883 || VALID_NEON_QREG_MODE (mode1
)
22884 || VALID_NEON_STRUCT_MODE (mode1
))
22885 && (VALID_NEON_DREG_MODE (mode2
)
22886 || VALID_NEON_QREG_MODE (mode2
)
22887 || VALID_NEON_STRUCT_MODE (mode2
)))
22893 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22894 not used in arm mode. */
22897 arm_regno_class (int regno
)
22901 if (regno
== STACK_POINTER_REGNUM
)
22903 if (regno
== CC_REGNUM
)
22910 if (TARGET_THUMB2
&& regno
< 8)
22913 if ( regno
<= LAST_ARM_REGNUM
22914 || regno
== FRAME_POINTER_REGNUM
22915 || regno
== ARG_POINTER_REGNUM
)
22916 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22918 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22919 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22921 if (IS_VFP_REGNUM (regno
))
22923 if (regno
<= D7_VFP_REGNUM
)
22924 return VFP_D0_D7_REGS
;
22925 else if (regno
<= LAST_LO_VFP_REGNUM
)
22926 return VFP_LO_REGS
;
22928 return VFP_HI_REGS
;
22931 if (IS_IWMMXT_REGNUM (regno
))
22932 return IWMMXT_REGS
;
22934 if (IS_IWMMXT_GR_REGNUM (regno
))
22935 return IWMMXT_GR_REGS
;
22940 /* Handle a special case when computing the offset
22941 of an argument from the frame pointer. */
22943 arm_debugger_arg_offset (int value
, rtx addr
)
22947 /* We are only interested if dbxout_parms() failed to compute the offset. */
22951 /* We can only cope with the case where the address is held in a register. */
22955 /* If we are using the frame pointer to point at the argument, then
22956 an offset of 0 is correct. */
22957 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22960 /* If we are using the stack pointer to point at the
22961 argument, then an offset of 0 is correct. */
22962 /* ??? Check this is consistent with thumb2 frame layout. */
22963 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22964 && REGNO (addr
) == SP_REGNUM
)
22967 /* Oh dear. The argument is pointed to by a register rather
22968 than being held in a register, or being stored at a known
22969 offset from the frame pointer. Since GDB only understands
22970 those two kinds of argument we must translate the address
22971 held in the register into an offset from the frame pointer.
22972 We do this by searching through the insns for the function
22973 looking to see where this register gets its value. If the
22974 register is initialized from the frame pointer plus an offset
22975 then we are in luck and we can continue, otherwise we give up.
22977 This code is exercised by producing debugging information
22978 for a function with arguments like this:
22980 double func (double a, double b, int c, double d) {return d;}
22982 Without this code the stab for parameter 'd' will be set to
22983 an offset of 0 from the frame pointer, rather than 8. */
22985 /* The if() statement says:
22987 If the insn is a normal instruction
22988 and if the insn is setting the value in a register
22989 and if the register being set is the register holding the address of the argument
22990 and if the address is computing by an addition
22991 that involves adding to a register
22992 which is the frame pointer
22997 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22999 if ( NONJUMP_INSN_P (insn
)
23000 && GET_CODE (PATTERN (insn
)) == SET
23001 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23002 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23003 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23004 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23005 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23008 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23017 warning (0, "unable to compute real location of stacked parameter");
23018 value
= 8; /* XXX magic hack */
23039 T_MAX
/* Size of enum. Keep last. */
23040 } neon_builtin_type_mode
;
23042 #define TYPE_MODE_BIT(X) (1 << (X))
23044 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23045 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23046 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23047 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23048 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23049 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23051 #define v8qi_UP T_V8QI
23052 #define v4hi_UP T_V4HI
23053 #define v4hf_UP T_V4HF
23054 #define v2si_UP T_V2SI
23055 #define v2sf_UP T_V2SF
23057 #define v16qi_UP T_V16QI
23058 #define v8hi_UP T_V8HI
23059 #define v4si_UP T_V4SI
23060 #define v4sf_UP T_V4SF
23061 #define v2di_UP T_V2DI
23066 #define UP(X) X##_UP
23102 NEON_LOADSTRUCTLANE
,
23104 NEON_STORESTRUCTLANE
,
23113 const neon_itype itype
;
23114 const neon_builtin_type_mode mode
;
23115 const enum insn_code code
;
23116 unsigned int fcode
;
23117 } neon_builtin_datum
;
23119 #define CF(N,X) CODE_FOR_neon_##N##X
23121 #define VAR1(T, N, A) \
23122 {#N, NEON_##T, UP (A), CF (N, A), 0}
23123 #define VAR2(T, N, A, B) \
23125 {#N, NEON_##T, UP (B), CF (N, B), 0}
23126 #define VAR3(T, N, A, B, C) \
23127 VAR2 (T, N, A, B), \
23128 {#N, NEON_##T, UP (C), CF (N, C), 0}
23129 #define VAR4(T, N, A, B, C, D) \
23130 VAR3 (T, N, A, B, C), \
23131 {#N, NEON_##T, UP (D), CF (N, D), 0}
23132 #define VAR5(T, N, A, B, C, D, E) \
23133 VAR4 (T, N, A, B, C, D), \
23134 {#N, NEON_##T, UP (E), CF (N, E), 0}
23135 #define VAR6(T, N, A, B, C, D, E, F) \
23136 VAR5 (T, N, A, B, C, D, E), \
23137 {#N, NEON_##T, UP (F), CF (N, F), 0}
23138 #define VAR7(T, N, A, B, C, D, E, F, G) \
23139 VAR6 (T, N, A, B, C, D, E, F), \
23140 {#N, NEON_##T, UP (G), CF (N, G), 0}
23141 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23142 VAR7 (T, N, A, B, C, D, E, F, G), \
23143 {#N, NEON_##T, UP (H), CF (N, H), 0}
23144 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23145 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23146 {#N, NEON_##T, UP (I), CF (N, I), 0}
23147 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23148 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23149 {#N, NEON_##T, UP (J), CF (N, J), 0}
23151 /* The NEON builtin data can be found in arm_neon_builtins.def.
23152 The mode entries in the following table correspond to the "key" type of the
23153 instruction variant, i.e. equivalent to that which would be specified after
23154 the assembler mnemonic, which usually refers to the last vector operand.
23155 (Signed/unsigned/polynomial types are not differentiated between though, and
23156 are all mapped onto the same mode for a given element size.) The modes
23157 listed per instruction should be the same as those defined for that
23158 instruction's pattern in neon.md. */
23160 static neon_builtin_datum neon_builtin_data
[] =
23162 #include "arm_neon_builtins.def"
23177 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23178 #define VAR1(T, N, A) \
23180 #define VAR2(T, N, A, B) \
23183 #define VAR3(T, N, A, B, C) \
23184 VAR2 (T, N, A, B), \
23186 #define VAR4(T, N, A, B, C, D) \
23187 VAR3 (T, N, A, B, C), \
23189 #define VAR5(T, N, A, B, C, D, E) \
23190 VAR4 (T, N, A, B, C, D), \
23192 #define VAR6(T, N, A, B, C, D, E, F) \
23193 VAR5 (T, N, A, B, C, D, E), \
23195 #define VAR7(T, N, A, B, C, D, E, F, G) \
23196 VAR6 (T, N, A, B, C, D, E, F), \
23198 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23199 VAR7 (T, N, A, B, C, D, E, F, G), \
23201 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23202 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23204 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23205 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23209 ARM_BUILTIN_GETWCGR0
,
23210 ARM_BUILTIN_GETWCGR1
,
23211 ARM_BUILTIN_GETWCGR2
,
23212 ARM_BUILTIN_GETWCGR3
,
23214 ARM_BUILTIN_SETWCGR0
,
23215 ARM_BUILTIN_SETWCGR1
,
23216 ARM_BUILTIN_SETWCGR2
,
23217 ARM_BUILTIN_SETWCGR3
,
23221 ARM_BUILTIN_WAVG2BR
,
23222 ARM_BUILTIN_WAVG2HR
,
23223 ARM_BUILTIN_WAVG2B
,
23224 ARM_BUILTIN_WAVG2H
,
23231 ARM_BUILTIN_WMACSZ
,
23233 ARM_BUILTIN_WMACUZ
,
23236 ARM_BUILTIN_WSADBZ
,
23238 ARM_BUILTIN_WSADHZ
,
23240 ARM_BUILTIN_WALIGNI
,
23241 ARM_BUILTIN_WALIGNR0
,
23242 ARM_BUILTIN_WALIGNR1
,
23243 ARM_BUILTIN_WALIGNR2
,
23244 ARM_BUILTIN_WALIGNR3
,
23247 ARM_BUILTIN_TMIAPH
,
23248 ARM_BUILTIN_TMIABB
,
23249 ARM_BUILTIN_TMIABT
,
23250 ARM_BUILTIN_TMIATB
,
23251 ARM_BUILTIN_TMIATT
,
23253 ARM_BUILTIN_TMOVMSKB
,
23254 ARM_BUILTIN_TMOVMSKH
,
23255 ARM_BUILTIN_TMOVMSKW
,
23257 ARM_BUILTIN_TBCSTB
,
23258 ARM_BUILTIN_TBCSTH
,
23259 ARM_BUILTIN_TBCSTW
,
23261 ARM_BUILTIN_WMADDS
,
23262 ARM_BUILTIN_WMADDU
,
23264 ARM_BUILTIN_WPACKHSS
,
23265 ARM_BUILTIN_WPACKWSS
,
23266 ARM_BUILTIN_WPACKDSS
,
23267 ARM_BUILTIN_WPACKHUS
,
23268 ARM_BUILTIN_WPACKWUS
,
23269 ARM_BUILTIN_WPACKDUS
,
23274 ARM_BUILTIN_WADDSSB
,
23275 ARM_BUILTIN_WADDSSH
,
23276 ARM_BUILTIN_WADDSSW
,
23277 ARM_BUILTIN_WADDUSB
,
23278 ARM_BUILTIN_WADDUSH
,
23279 ARM_BUILTIN_WADDUSW
,
23283 ARM_BUILTIN_WSUBSSB
,
23284 ARM_BUILTIN_WSUBSSH
,
23285 ARM_BUILTIN_WSUBSSW
,
23286 ARM_BUILTIN_WSUBUSB
,
23287 ARM_BUILTIN_WSUBUSH
,
23288 ARM_BUILTIN_WSUBUSW
,
23295 ARM_BUILTIN_WCMPEQB
,
23296 ARM_BUILTIN_WCMPEQH
,
23297 ARM_BUILTIN_WCMPEQW
,
23298 ARM_BUILTIN_WCMPGTUB
,
23299 ARM_BUILTIN_WCMPGTUH
,
23300 ARM_BUILTIN_WCMPGTUW
,
23301 ARM_BUILTIN_WCMPGTSB
,
23302 ARM_BUILTIN_WCMPGTSH
,
23303 ARM_BUILTIN_WCMPGTSW
,
23305 ARM_BUILTIN_TEXTRMSB
,
23306 ARM_BUILTIN_TEXTRMSH
,
23307 ARM_BUILTIN_TEXTRMSW
,
23308 ARM_BUILTIN_TEXTRMUB
,
23309 ARM_BUILTIN_TEXTRMUH
,
23310 ARM_BUILTIN_TEXTRMUW
,
23311 ARM_BUILTIN_TINSRB
,
23312 ARM_BUILTIN_TINSRH
,
23313 ARM_BUILTIN_TINSRW
,
23315 ARM_BUILTIN_WMAXSW
,
23316 ARM_BUILTIN_WMAXSH
,
23317 ARM_BUILTIN_WMAXSB
,
23318 ARM_BUILTIN_WMAXUW
,
23319 ARM_BUILTIN_WMAXUH
,
23320 ARM_BUILTIN_WMAXUB
,
23321 ARM_BUILTIN_WMINSW
,
23322 ARM_BUILTIN_WMINSH
,
23323 ARM_BUILTIN_WMINSB
,
23324 ARM_BUILTIN_WMINUW
,
23325 ARM_BUILTIN_WMINUH
,
23326 ARM_BUILTIN_WMINUB
,
23328 ARM_BUILTIN_WMULUM
,
23329 ARM_BUILTIN_WMULSM
,
23330 ARM_BUILTIN_WMULUL
,
23332 ARM_BUILTIN_PSADBH
,
23333 ARM_BUILTIN_WSHUFH
,
23347 ARM_BUILTIN_WSLLHI
,
23348 ARM_BUILTIN_WSLLWI
,
23349 ARM_BUILTIN_WSLLDI
,
23350 ARM_BUILTIN_WSRAHI
,
23351 ARM_BUILTIN_WSRAWI
,
23352 ARM_BUILTIN_WSRADI
,
23353 ARM_BUILTIN_WSRLHI
,
23354 ARM_BUILTIN_WSRLWI
,
23355 ARM_BUILTIN_WSRLDI
,
23356 ARM_BUILTIN_WRORHI
,
23357 ARM_BUILTIN_WRORWI
,
23358 ARM_BUILTIN_WRORDI
,
23360 ARM_BUILTIN_WUNPCKIHB
,
23361 ARM_BUILTIN_WUNPCKIHH
,
23362 ARM_BUILTIN_WUNPCKIHW
,
23363 ARM_BUILTIN_WUNPCKILB
,
23364 ARM_BUILTIN_WUNPCKILH
,
23365 ARM_BUILTIN_WUNPCKILW
,
23367 ARM_BUILTIN_WUNPCKEHSB
,
23368 ARM_BUILTIN_WUNPCKEHSH
,
23369 ARM_BUILTIN_WUNPCKEHSW
,
23370 ARM_BUILTIN_WUNPCKEHUB
,
23371 ARM_BUILTIN_WUNPCKEHUH
,
23372 ARM_BUILTIN_WUNPCKEHUW
,
23373 ARM_BUILTIN_WUNPCKELSB
,
23374 ARM_BUILTIN_WUNPCKELSH
,
23375 ARM_BUILTIN_WUNPCKELSW
,
23376 ARM_BUILTIN_WUNPCKELUB
,
23377 ARM_BUILTIN_WUNPCKELUH
,
23378 ARM_BUILTIN_WUNPCKELUW
,
23384 ARM_BUILTIN_WADDSUBHX
,
23385 ARM_BUILTIN_WSUBADDHX
,
23387 ARM_BUILTIN_WABSDIFFB
,
23388 ARM_BUILTIN_WABSDIFFH
,
23389 ARM_BUILTIN_WABSDIFFW
,
23391 ARM_BUILTIN_WADDCH
,
23392 ARM_BUILTIN_WADDCW
,
23395 ARM_BUILTIN_WAVG4R
,
23397 ARM_BUILTIN_WMADDSX
,
23398 ARM_BUILTIN_WMADDUX
,
23400 ARM_BUILTIN_WMADDSN
,
23401 ARM_BUILTIN_WMADDUN
,
23403 ARM_BUILTIN_WMULWSM
,
23404 ARM_BUILTIN_WMULWUM
,
23406 ARM_BUILTIN_WMULWSMR
,
23407 ARM_BUILTIN_WMULWUMR
,
23409 ARM_BUILTIN_WMULWL
,
23411 ARM_BUILTIN_WMULSMR
,
23412 ARM_BUILTIN_WMULUMR
,
23414 ARM_BUILTIN_WQMULM
,
23415 ARM_BUILTIN_WQMULMR
,
23417 ARM_BUILTIN_WQMULWM
,
23418 ARM_BUILTIN_WQMULWMR
,
23420 ARM_BUILTIN_WADDBHUSM
,
23421 ARM_BUILTIN_WADDBHUSL
,
23423 ARM_BUILTIN_WQMIABB
,
23424 ARM_BUILTIN_WQMIABT
,
23425 ARM_BUILTIN_WQMIATB
,
23426 ARM_BUILTIN_WQMIATT
,
23428 ARM_BUILTIN_WQMIABBN
,
23429 ARM_BUILTIN_WQMIABTN
,
23430 ARM_BUILTIN_WQMIATBN
,
23431 ARM_BUILTIN_WQMIATTN
,
23433 ARM_BUILTIN_WMIABB
,
23434 ARM_BUILTIN_WMIABT
,
23435 ARM_BUILTIN_WMIATB
,
23436 ARM_BUILTIN_WMIATT
,
23438 ARM_BUILTIN_WMIABBN
,
23439 ARM_BUILTIN_WMIABTN
,
23440 ARM_BUILTIN_WMIATBN
,
23441 ARM_BUILTIN_WMIATTN
,
23443 ARM_BUILTIN_WMIAWBB
,
23444 ARM_BUILTIN_WMIAWBT
,
23445 ARM_BUILTIN_WMIAWTB
,
23446 ARM_BUILTIN_WMIAWTT
,
23448 ARM_BUILTIN_WMIAWBBN
,
23449 ARM_BUILTIN_WMIAWBTN
,
23450 ARM_BUILTIN_WMIAWTBN
,
23451 ARM_BUILTIN_WMIAWTTN
,
23453 ARM_BUILTIN_WMERGE
,
23455 ARM_BUILTIN_CRC32B
,
23456 ARM_BUILTIN_CRC32H
,
23457 ARM_BUILTIN_CRC32W
,
23458 ARM_BUILTIN_CRC32CB
,
23459 ARM_BUILTIN_CRC32CH
,
23460 ARM_BUILTIN_CRC32CW
,
23466 #define CRYPTO1(L, U, M1, M2) \
23467 ARM_BUILTIN_CRYPTO_##U,
23468 #define CRYPTO2(L, U, M1, M2, M3) \
23469 ARM_BUILTIN_CRYPTO_##U,
23470 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23471 ARM_BUILTIN_CRYPTO_##U,
23473 #include "crypto.def"
23479 #include "arm_neon_builtins.def"
23484 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23498 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23500 #define NUM_DREG_TYPES 5
23501 #define NUM_QREG_TYPES 6
23504 arm_init_neon_builtins (void)
23506 unsigned int i
, fcode
;
23509 tree neon_intQI_type_node
;
23510 tree neon_intHI_type_node
;
23511 tree neon_floatHF_type_node
;
23512 tree neon_polyQI_type_node
;
23513 tree neon_polyHI_type_node
;
23514 tree neon_intSI_type_node
;
23515 tree neon_intDI_type_node
;
23516 tree neon_intUTI_type_node
;
23517 tree neon_float_type_node
;
23519 tree intQI_pointer_node
;
23520 tree intHI_pointer_node
;
23521 tree intSI_pointer_node
;
23522 tree intDI_pointer_node
;
23523 tree float_pointer_node
;
23525 tree const_intQI_node
;
23526 tree const_intHI_node
;
23527 tree const_intSI_node
;
23528 tree const_intDI_node
;
23529 tree const_float_node
;
23531 tree const_intQI_pointer_node
;
23532 tree const_intHI_pointer_node
;
23533 tree const_intSI_pointer_node
;
23534 tree const_intDI_pointer_node
;
23535 tree const_float_pointer_node
;
23537 tree V8QI_type_node
;
23538 tree V4HI_type_node
;
23539 tree V4HF_type_node
;
23540 tree V2SI_type_node
;
23541 tree V2SF_type_node
;
23542 tree V16QI_type_node
;
23543 tree V8HI_type_node
;
23544 tree V4SI_type_node
;
23545 tree V4SF_type_node
;
23546 tree V2DI_type_node
;
23548 tree intUQI_type_node
;
23549 tree intUHI_type_node
;
23550 tree intUSI_type_node
;
23551 tree intUDI_type_node
;
23553 tree intEI_type_node
;
23554 tree intOI_type_node
;
23555 tree intCI_type_node
;
23556 tree intXI_type_node
;
23558 tree V8QI_pointer_node
;
23559 tree V4HI_pointer_node
;
23560 tree V2SI_pointer_node
;
23561 tree V2SF_pointer_node
;
23562 tree V16QI_pointer_node
;
23563 tree V8HI_pointer_node
;
23564 tree V4SI_pointer_node
;
23565 tree V4SF_pointer_node
;
23566 tree V2DI_pointer_node
;
23568 tree void_ftype_pv8qi_v8qi_v8qi
;
23569 tree void_ftype_pv4hi_v4hi_v4hi
;
23570 tree void_ftype_pv2si_v2si_v2si
;
23571 tree void_ftype_pv2sf_v2sf_v2sf
;
23572 tree void_ftype_pdi_di_di
;
23573 tree void_ftype_pv16qi_v16qi_v16qi
;
23574 tree void_ftype_pv8hi_v8hi_v8hi
;
23575 tree void_ftype_pv4si_v4si_v4si
;
23576 tree void_ftype_pv4sf_v4sf_v4sf
;
23577 tree void_ftype_pv2di_v2di_v2di
;
23579 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23580 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23581 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23583 /* Create distinguished type nodes for NEON vector element types,
23584 and pointers to values of such types, so we can detect them later. */
23585 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23586 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23587 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23588 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23589 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23590 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23591 neon_float_type_node
= make_node (REAL_TYPE
);
23592 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23593 layout_type (neon_float_type_node
);
23594 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23595 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23596 layout_type (neon_floatHF_type_node
);
23598 /* Define typedefs which exactly correspond to the modes we are basing vector
23599 types on. If you change these names you'll need to change
23600 the table used by arm_mangle_type too. */
23601 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23602 "__builtin_neon_qi");
23603 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23604 "__builtin_neon_hi");
23605 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23606 "__builtin_neon_hf");
23607 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23608 "__builtin_neon_si");
23609 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23610 "__builtin_neon_sf");
23611 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23612 "__builtin_neon_di");
23613 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23614 "__builtin_neon_poly8");
23615 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23616 "__builtin_neon_poly16");
23618 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23619 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23620 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23621 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23622 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23624 /* Next create constant-qualified versions of the above types. */
23625 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23627 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23629 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23631 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23633 const_float_node
= build_qualified_type (neon_float_type_node
,
23636 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23637 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23638 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23639 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23640 const_float_pointer_node
= build_pointer_type (const_float_node
);
23642 /* Now create vector types based on our NEON element types. */
23643 /* 64-bit vectors. */
23645 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23647 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23649 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23651 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23653 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23654 /* 128-bit vectors. */
23656 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23658 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23660 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23662 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23664 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23666 /* Unsigned integer types for various mode sizes. */
23667 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23668 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23669 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23670 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23671 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23674 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23675 "__builtin_neon_uqi");
23676 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23677 "__builtin_neon_uhi");
23678 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23679 "__builtin_neon_usi");
23680 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23681 "__builtin_neon_udi");
23682 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23683 "__builtin_neon_poly64");
23684 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23685 "__builtin_neon_poly128");
23687 /* Opaque integer types for structures of vectors. */
23688 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23689 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23690 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23691 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23693 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23694 "__builtin_neon_ti");
23695 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23696 "__builtin_neon_ei");
23697 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23698 "__builtin_neon_oi");
23699 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23700 "__builtin_neon_ci");
23701 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23702 "__builtin_neon_xi");
23704 /* Pointers to vector types. */
23705 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23706 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23707 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23708 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23709 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23710 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23711 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23712 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23713 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23715 /* Operations which return results as pairs. */
23716 void_ftype_pv8qi_v8qi_v8qi
=
23717 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23718 V8QI_type_node
, NULL
);
23719 void_ftype_pv4hi_v4hi_v4hi
=
23720 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23721 V4HI_type_node
, NULL
);
23722 void_ftype_pv2si_v2si_v2si
=
23723 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23724 V2SI_type_node
, NULL
);
23725 void_ftype_pv2sf_v2sf_v2sf
=
23726 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23727 V2SF_type_node
, NULL
);
23728 void_ftype_pdi_di_di
=
23729 build_function_type_list (void_type_node
, intDI_pointer_node
,
23730 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23731 void_ftype_pv16qi_v16qi_v16qi
=
23732 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23733 V16QI_type_node
, V16QI_type_node
, NULL
);
23734 void_ftype_pv8hi_v8hi_v8hi
=
23735 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23736 V8HI_type_node
, NULL
);
23737 void_ftype_pv4si_v4si_v4si
=
23738 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23739 V4SI_type_node
, NULL
);
23740 void_ftype_pv4sf_v4sf_v4sf
=
23741 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23742 V4SF_type_node
, NULL
);
23743 void_ftype_pv2di_v2di_v2di
=
23744 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23745 V2DI_type_node
, NULL
);
23747 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23749 tree V4USI_type_node
=
23750 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23752 tree V16UQI_type_node
=
23753 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23755 tree v16uqi_ftype_v16uqi
23756 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23758 tree v16uqi_ftype_v16uqi_v16uqi
23759 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23760 V16UQI_type_node
, NULL_TREE
);
23762 tree v4usi_ftype_v4usi
23763 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23765 tree v4usi_ftype_v4usi_v4usi
23766 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23767 V4USI_type_node
, NULL_TREE
);
23769 tree v4usi_ftype_v4usi_v4usi_v4usi
23770 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23771 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23773 tree uti_ftype_udi_udi
23774 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23775 intUDI_type_node
, NULL_TREE
);
23788 ARM_BUILTIN_CRYPTO_##U
23790 "__builtin_arm_crypto_"#L
23791 #define FT1(R, A) \
23793 #define FT2(R, A1, A2) \
23794 R##_ftype_##A1##_##A2
23795 #define FT3(R, A1, A2, A3) \
23796 R##_ftype_##A1##_##A2##_##A3
23797 #define CRYPTO1(L, U, R, A) \
23798 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23799 C (U), BUILT_IN_MD, \
23801 #define CRYPTO2(L, U, R, A1, A2) \
23802 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23803 C (U), BUILT_IN_MD, \
23806 #define CRYPTO3(L, U, R, A1, A2, A3) \
23807 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23808 C (U), BUILT_IN_MD, \
23810 #include "crypto.def"
23821 dreg_types
[0] = V8QI_type_node
;
23822 dreg_types
[1] = V4HI_type_node
;
23823 dreg_types
[2] = V2SI_type_node
;
23824 dreg_types
[3] = V2SF_type_node
;
23825 dreg_types
[4] = neon_intDI_type_node
;
23827 qreg_types
[0] = V16QI_type_node
;
23828 qreg_types
[1] = V8HI_type_node
;
23829 qreg_types
[2] = V4SI_type_node
;
23830 qreg_types
[3] = V4SF_type_node
;
23831 qreg_types
[4] = V2DI_type_node
;
23832 qreg_types
[5] = neon_intUTI_type_node
;
23834 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23837 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23839 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23840 reinterp_ftype_dreg
[i
][j
]
23841 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23843 reinterp_ftype_qreg
[i
][j
]
23844 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23848 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23849 i
< ARRAY_SIZE (neon_builtin_data
);
23852 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23854 const char* const modenames
[] = {
23855 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23856 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23861 int is_load
= 0, is_store
= 0;
23863 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23870 case NEON_LOAD1LANE
:
23871 case NEON_LOADSTRUCT
:
23872 case NEON_LOADSTRUCTLANE
:
23874 /* Fall through. */
23876 case NEON_STORE1LANE
:
23877 case NEON_STORESTRUCT
:
23878 case NEON_STORESTRUCTLANE
:
23881 /* Fall through. */
23885 case NEON_LOGICBINOP
:
23886 case NEON_SHIFTINSERT
:
23893 case NEON_SHIFTIMM
:
23894 case NEON_SHIFTACC
:
23900 case NEON_LANEMULL
:
23901 case NEON_LANEMULH
:
23903 case NEON_SCALARMUL
:
23904 case NEON_SCALARMULL
:
23905 case NEON_SCALARMULH
:
23906 case NEON_SCALARMAC
:
23912 tree return_type
= void_type_node
, args
= void_list_node
;
23914 /* Build a function type directly from the insn_data for
23915 this builtin. The build_function_type() function takes
23916 care of removing duplicates for us. */
23917 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23921 if (is_load
&& k
== 1)
23923 /* Neon load patterns always have the memory
23924 operand in the operand 1 position. */
23925 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23926 == neon_struct_operand
);
23932 eltype
= const_intQI_pointer_node
;
23937 eltype
= const_intHI_pointer_node
;
23942 eltype
= const_intSI_pointer_node
;
23947 eltype
= const_float_pointer_node
;
23952 eltype
= const_intDI_pointer_node
;
23955 default: gcc_unreachable ();
23958 else if (is_store
&& k
== 0)
23960 /* Similarly, Neon store patterns use operand 0 as
23961 the memory location to store to. */
23962 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23963 == neon_struct_operand
);
23969 eltype
= intQI_pointer_node
;
23974 eltype
= intHI_pointer_node
;
23979 eltype
= intSI_pointer_node
;
23984 eltype
= float_pointer_node
;
23989 eltype
= intDI_pointer_node
;
23992 default: gcc_unreachable ();
23997 switch (insn_data
[d
->code
].operand
[k
].mode
)
23999 case VOIDmode
: eltype
= void_type_node
; break;
24001 case QImode
: eltype
= neon_intQI_type_node
; break;
24002 case HImode
: eltype
= neon_intHI_type_node
; break;
24003 case SImode
: eltype
= neon_intSI_type_node
; break;
24004 case SFmode
: eltype
= neon_float_type_node
; break;
24005 case DImode
: eltype
= neon_intDI_type_node
; break;
24006 case TImode
: eltype
= intTI_type_node
; break;
24007 case EImode
: eltype
= intEI_type_node
; break;
24008 case OImode
: eltype
= intOI_type_node
; break;
24009 case CImode
: eltype
= intCI_type_node
; break;
24010 case XImode
: eltype
= intXI_type_node
; break;
24011 /* 64-bit vectors. */
24012 case V8QImode
: eltype
= V8QI_type_node
; break;
24013 case V4HImode
: eltype
= V4HI_type_node
; break;
24014 case V2SImode
: eltype
= V2SI_type_node
; break;
24015 case V2SFmode
: eltype
= V2SF_type_node
; break;
24016 /* 128-bit vectors. */
24017 case V16QImode
: eltype
= V16QI_type_node
; break;
24018 case V8HImode
: eltype
= V8HI_type_node
; break;
24019 case V4SImode
: eltype
= V4SI_type_node
; break;
24020 case V4SFmode
: eltype
= V4SF_type_node
; break;
24021 case V2DImode
: eltype
= V2DI_type_node
; break;
24022 default: gcc_unreachable ();
24026 if (k
== 0 && !is_store
)
24027 return_type
= eltype
;
24029 args
= tree_cons (NULL_TREE
, eltype
, args
);
24032 ftype
= build_function_type (return_type
, args
);
24036 case NEON_RESULTPAIR
:
24038 switch (insn_data
[d
->code
].operand
[1].mode
)
24040 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
24041 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
24042 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
24043 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
24044 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
24045 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
24046 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
24047 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
24048 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
24049 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
24050 default: gcc_unreachable ();
24055 case NEON_REINTERP
:
24057 /* We iterate over NUM_DREG_TYPES doubleword types,
24058 then NUM_QREG_TYPES quadword types.
24059 V4HF is not a type used in reinterpret, so we translate
24060 d->mode to the correct index in reinterp_ftype_dreg. */
24062 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
24063 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
24065 switch (insn_data
[d
->code
].operand
[0].mode
)
24067 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
24068 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
24069 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
24070 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
24071 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
24072 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
24073 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
24074 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
24075 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
24076 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
24077 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
24078 default: gcc_unreachable ();
24082 case NEON_FLOAT_WIDEN
:
24084 tree eltype
= NULL_TREE
;
24085 tree return_type
= NULL_TREE
;
24087 switch (insn_data
[d
->code
].operand
[1].mode
)
24090 eltype
= V4HF_type_node
;
24091 return_type
= V4SF_type_node
;
24093 default: gcc_unreachable ();
24095 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24098 case NEON_FLOAT_NARROW
:
24100 tree eltype
= NULL_TREE
;
24101 tree return_type
= NULL_TREE
;
24103 switch (insn_data
[d
->code
].operand
[1].mode
)
24106 eltype
= V4SF_type_node
;
24107 return_type
= V4HF_type_node
;
24109 default: gcc_unreachable ();
24111 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24115 gcc_unreachable ();
24118 gcc_assert (ftype
!= NULL
);
24120 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
24122 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
24124 arm_builtin_decls
[fcode
] = decl
;
24128 #undef NUM_DREG_TYPES
24129 #undef NUM_QREG_TYPES
24131 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24134 if ((MASK) & insn_flags) \
24137 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24138 BUILT_IN_MD, NULL, NULL_TREE); \
24139 arm_builtin_decls[CODE] = bdecl; \
24144 struct builtin_description
24146 const unsigned int mask
;
24147 const enum insn_code icode
;
24148 const char * const name
;
24149 const enum arm_builtins code
;
24150 const enum rtx_code comparison
;
24151 const unsigned int flag
;
24154 static const struct builtin_description bdesc_2arg
[] =
24156 #define IWMMXT_BUILTIN(code, string, builtin) \
24157 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24158 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24160 #define IWMMXT2_BUILTIN(code, string, builtin) \
24161 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24162 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24164 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
24165 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
24166 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
24167 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
24168 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
24169 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
24170 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
24171 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
24172 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
24173 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
24174 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
24175 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
24176 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
24177 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
24178 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
24179 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
24180 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
24181 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
24182 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
24183 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
24184 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
24185 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
24186 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
24187 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
24188 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
24189 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
24190 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
24191 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
24192 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
24193 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
24194 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
24195 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
24196 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
24197 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
24198 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
24199 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
24200 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
24201 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
24202 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
24203 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
24204 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
24205 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
24206 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
24207 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
24208 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
24209 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
24210 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
24211 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
24212 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
24213 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
24214 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
24215 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
24216 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
24217 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
24218 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
24219 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
24220 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
24221 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
24222 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
24223 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
24224 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
24225 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
24226 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
24227 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
24228 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
24229 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
24230 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
24231 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
24232 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
24233 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
24234 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
24235 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
24236 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24237 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24238 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24239 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24240 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24241 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24243 #define IWMMXT_BUILTIN2(code, builtin) \
24244 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24246 #define IWMMXT2_BUILTIN2(code, builtin) \
24247 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24249 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24250 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24251 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24252 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24253 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24254 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24255 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24256 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24257 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24258 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24260 #define CRC32_BUILTIN(L, U) \
24261 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24263 CRC32_BUILTIN (crc32b
, CRC32B
)
24264 CRC32_BUILTIN (crc32h
, CRC32H
)
24265 CRC32_BUILTIN (crc32w
, CRC32W
)
24266 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24267 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24268 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24269 #undef CRC32_BUILTIN
24272 #define CRYPTO_BUILTIN(L, U) \
24273 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24278 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24279 #define CRYPTO1(L, U, R, A)
24280 #define CRYPTO3(L, U, R, A1, A2, A3)
24281 #include "crypto.def"
24288 static const struct builtin_description bdesc_1arg
[] =
24290 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24291 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24292 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24293 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24294 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24295 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24296 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24297 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24298 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24299 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24300 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24301 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24302 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24303 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24304 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24305 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24306 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24307 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24308 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24309 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24310 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24311 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24312 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24313 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24315 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24316 #define CRYPTO2(L, U, R, A1, A2)
24317 #define CRYPTO3(L, U, R, A1, A2, A3)
24318 #include "crypto.def"
24324 static const struct builtin_description bdesc_3arg
[] =
24326 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24327 #define CRYPTO1(L, U, R, A)
24328 #define CRYPTO2(L, U, R, A1, A2)
24329 #include "crypto.def"
24334 #undef CRYPTO_BUILTIN
24336 /* Set up all the iWMMXt builtins. This is not called if
24337 TARGET_IWMMXT is zero. */
24340 arm_init_iwmmxt_builtins (void)
24342 const struct builtin_description
* d
;
24345 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24346 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24347 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24349 tree v8qi_ftype_v8qi_v8qi_int
24350 = build_function_type_list (V8QI_type_node
,
24351 V8QI_type_node
, V8QI_type_node
,
24352 integer_type_node
, NULL_TREE
);
24353 tree v4hi_ftype_v4hi_int
24354 = build_function_type_list (V4HI_type_node
,
24355 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24356 tree v2si_ftype_v2si_int
24357 = build_function_type_list (V2SI_type_node
,
24358 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24359 tree v2si_ftype_di_di
24360 = build_function_type_list (V2SI_type_node
,
24361 long_long_integer_type_node
,
24362 long_long_integer_type_node
,
24364 tree di_ftype_di_int
24365 = build_function_type_list (long_long_integer_type_node
,
24366 long_long_integer_type_node
,
24367 integer_type_node
, NULL_TREE
);
24368 tree di_ftype_di_int_int
24369 = build_function_type_list (long_long_integer_type_node
,
24370 long_long_integer_type_node
,
24372 integer_type_node
, NULL_TREE
);
24373 tree int_ftype_v8qi
24374 = build_function_type_list (integer_type_node
,
24375 V8QI_type_node
, NULL_TREE
);
24376 tree int_ftype_v4hi
24377 = build_function_type_list (integer_type_node
,
24378 V4HI_type_node
, NULL_TREE
);
24379 tree int_ftype_v2si
24380 = build_function_type_list (integer_type_node
,
24381 V2SI_type_node
, NULL_TREE
);
24382 tree int_ftype_v8qi_int
24383 = build_function_type_list (integer_type_node
,
24384 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24385 tree int_ftype_v4hi_int
24386 = build_function_type_list (integer_type_node
,
24387 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24388 tree int_ftype_v2si_int
24389 = build_function_type_list (integer_type_node
,
24390 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24391 tree v8qi_ftype_v8qi_int_int
24392 = build_function_type_list (V8QI_type_node
,
24393 V8QI_type_node
, integer_type_node
,
24394 integer_type_node
, NULL_TREE
);
24395 tree v4hi_ftype_v4hi_int_int
24396 = build_function_type_list (V4HI_type_node
,
24397 V4HI_type_node
, integer_type_node
,
24398 integer_type_node
, NULL_TREE
);
24399 tree v2si_ftype_v2si_int_int
24400 = build_function_type_list (V2SI_type_node
,
24401 V2SI_type_node
, integer_type_node
,
24402 integer_type_node
, NULL_TREE
);
24403 /* Miscellaneous. */
24404 tree v8qi_ftype_v4hi_v4hi
24405 = build_function_type_list (V8QI_type_node
,
24406 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24407 tree v4hi_ftype_v2si_v2si
24408 = build_function_type_list (V4HI_type_node
,
24409 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24410 tree v8qi_ftype_v4hi_v8qi
24411 = build_function_type_list (V8QI_type_node
,
24412 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24413 tree v2si_ftype_v4hi_v4hi
24414 = build_function_type_list (V2SI_type_node
,
24415 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24416 tree v2si_ftype_v8qi_v8qi
24417 = build_function_type_list (V2SI_type_node
,
24418 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24419 tree v4hi_ftype_v4hi_di
24420 = build_function_type_list (V4HI_type_node
,
24421 V4HI_type_node
, long_long_integer_type_node
,
24423 tree v2si_ftype_v2si_di
24424 = build_function_type_list (V2SI_type_node
,
24425 V2SI_type_node
, long_long_integer_type_node
,
24428 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24429 tree int_ftype_void
24430 = build_function_type_list (integer_type_node
, NULL_TREE
);
24432 = build_function_type_list (long_long_integer_type_node
,
24433 V8QI_type_node
, NULL_TREE
);
24435 = build_function_type_list (long_long_integer_type_node
,
24436 V4HI_type_node
, NULL_TREE
);
24438 = build_function_type_list (long_long_integer_type_node
,
24439 V2SI_type_node
, NULL_TREE
);
24440 tree v2si_ftype_v4hi
24441 = build_function_type_list (V2SI_type_node
,
24442 V4HI_type_node
, NULL_TREE
);
24443 tree v4hi_ftype_v8qi
24444 = build_function_type_list (V4HI_type_node
,
24445 V8QI_type_node
, NULL_TREE
);
24446 tree v8qi_ftype_v8qi
24447 = build_function_type_list (V8QI_type_node
,
24448 V8QI_type_node
, NULL_TREE
);
24449 tree v4hi_ftype_v4hi
24450 = build_function_type_list (V4HI_type_node
,
24451 V4HI_type_node
, NULL_TREE
);
24452 tree v2si_ftype_v2si
24453 = build_function_type_list (V2SI_type_node
,
24454 V2SI_type_node
, NULL_TREE
);
24456 tree di_ftype_di_v4hi_v4hi
24457 = build_function_type_list (long_long_unsigned_type_node
,
24458 long_long_unsigned_type_node
,
24459 V4HI_type_node
, V4HI_type_node
,
24462 tree di_ftype_v4hi_v4hi
24463 = build_function_type_list (long_long_unsigned_type_node
,
24464 V4HI_type_node
,V4HI_type_node
,
24467 tree v2si_ftype_v2si_v4hi_v4hi
24468 = build_function_type_list (V2SI_type_node
,
24469 V2SI_type_node
, V4HI_type_node
,
24470 V4HI_type_node
, NULL_TREE
);
24472 tree v2si_ftype_v2si_v8qi_v8qi
24473 = build_function_type_list (V2SI_type_node
,
24474 V2SI_type_node
, V8QI_type_node
,
24475 V8QI_type_node
, NULL_TREE
);
24477 tree di_ftype_di_v2si_v2si
24478 = build_function_type_list (long_long_unsigned_type_node
,
24479 long_long_unsigned_type_node
,
24480 V2SI_type_node
, V2SI_type_node
,
24483 tree di_ftype_di_di_int
24484 = build_function_type_list (long_long_unsigned_type_node
,
24485 long_long_unsigned_type_node
,
24486 long_long_unsigned_type_node
,
24487 integer_type_node
, NULL_TREE
);
24489 tree void_ftype_int
24490 = build_function_type_list (void_type_node
,
24491 integer_type_node
, NULL_TREE
);
24493 tree v8qi_ftype_char
24494 = build_function_type_list (V8QI_type_node
,
24495 signed_char_type_node
, NULL_TREE
);
24497 tree v4hi_ftype_short
24498 = build_function_type_list (V4HI_type_node
,
24499 short_integer_type_node
, NULL_TREE
);
24501 tree v2si_ftype_int
24502 = build_function_type_list (V2SI_type_node
,
24503 integer_type_node
, NULL_TREE
);
24505 /* Normal vector binops. */
24506 tree v8qi_ftype_v8qi_v8qi
24507 = build_function_type_list (V8QI_type_node
,
24508 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24509 tree v4hi_ftype_v4hi_v4hi
24510 = build_function_type_list (V4HI_type_node
,
24511 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24512 tree v2si_ftype_v2si_v2si
24513 = build_function_type_list (V2SI_type_node
,
24514 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24515 tree di_ftype_di_di
24516 = build_function_type_list (long_long_unsigned_type_node
,
24517 long_long_unsigned_type_node
,
24518 long_long_unsigned_type_node
,
24521 /* Add all builtins that are more or less simple operations on two
24523 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24525 /* Use one of the operands; the target can have a different mode for
24526 mask-generating compares. */
24527 enum machine_mode mode
;
24530 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24533 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24538 type
= v8qi_ftype_v8qi_v8qi
;
24541 type
= v4hi_ftype_v4hi_v4hi
;
24544 type
= v2si_ftype_v2si_v2si
;
24547 type
= di_ftype_di_di
;
24551 gcc_unreachable ();
24554 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24557 /* Add the remaining MMX insns with somewhat more complicated types. */
24558 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24560 ARM_BUILTIN_ ## CODE)
24562 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24563 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24564 ARM_BUILTIN_ ## CODE)
24566 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24567 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24568 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24569 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24570 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24571 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24572 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24573 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24574 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24576 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24577 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24578 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24579 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24580 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24581 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24583 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24584 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24585 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24586 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24587 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24588 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24590 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24591 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24592 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24593 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24594 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24595 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24597 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24598 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24599 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24600 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24601 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24602 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24604 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24606 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24607 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24608 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24609 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24610 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24611 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24612 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24613 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24614 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24615 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24617 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24618 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24619 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24620 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24621 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24622 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24623 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24624 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24625 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24627 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24628 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24629 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24631 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24632 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24633 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24635 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24636 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24638 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24639 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24640 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24641 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24642 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24643 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24645 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24646 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24647 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24648 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24649 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24650 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24651 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24652 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24653 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24654 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24655 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24656 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24658 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24659 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24660 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24661 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24663 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24664 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24665 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24666 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24667 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24668 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24669 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24671 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24672 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24673 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24675 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24676 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24677 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24678 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24680 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24681 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24682 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24683 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24685 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24686 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24687 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24688 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24690 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24691 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24692 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24693 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24695 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24696 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24697 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24698 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24700 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24701 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24702 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24703 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24705 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24707 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24708 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24709 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24711 #undef iwmmx_mbuiltin
24712 #undef iwmmx2_mbuiltin
24716 arm_init_fp16_builtins (void)
24718 tree fp16_type
= make_node (REAL_TYPE
);
24719 TYPE_PRECISION (fp16_type
) = 16;
24720 layout_type (fp16_type
);
24721 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24725 arm_init_crc32_builtins ()
24727 tree si_ftype_si_qi
24728 = build_function_type_list (unsigned_intSI_type_node
,
24729 unsigned_intSI_type_node
,
24730 unsigned_intQI_type_node
, NULL_TREE
);
24731 tree si_ftype_si_hi
24732 = build_function_type_list (unsigned_intSI_type_node
,
24733 unsigned_intSI_type_node
,
24734 unsigned_intHI_type_node
, NULL_TREE
);
24735 tree si_ftype_si_si
24736 = build_function_type_list (unsigned_intSI_type_node
,
24737 unsigned_intSI_type_node
,
24738 unsigned_intSI_type_node
, NULL_TREE
);
24740 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24741 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24742 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24743 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24744 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24745 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24746 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24747 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24748 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24749 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24750 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24751 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24752 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24753 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24754 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24755 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24756 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24757 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24761 arm_init_builtins (void)
24763 if (TARGET_REALLY_IWMMXT
)
24764 arm_init_iwmmxt_builtins ();
24767 arm_init_neon_builtins ();
24769 if (arm_fp16_format
)
24770 arm_init_fp16_builtins ();
24773 arm_init_crc32_builtins ();
24776 /* Return the ARM builtin for CODE. */
24779 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24781 if (code
>= ARM_BUILTIN_MAX
)
24782 return error_mark_node
;
24784 return arm_builtin_decls
[code
];
24787 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24789 static const char *
24790 arm_invalid_parameter_type (const_tree t
)
24792 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24793 return N_("function parameters cannot have __fp16 type");
24797 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24799 static const char *
24800 arm_invalid_return_type (const_tree t
)
24802 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24803 return N_("functions cannot return __fp16 type");
24807 /* Implement TARGET_PROMOTED_TYPE. */
24810 arm_promoted_type (const_tree t
)
24812 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24813 return float_type_node
;
24817 /* Implement TARGET_CONVERT_TO_TYPE.
24818 Specifically, this hook implements the peculiarity of the ARM
24819 half-precision floating-point C semantics that requires conversions between
24820 __fp16 to or from double to do an intermediate conversion to float. */
24823 arm_convert_to_type (tree type
, tree expr
)
24825 tree fromtype
= TREE_TYPE (expr
);
24826 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24828 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24829 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24830 return convert (type
, convert (float_type_node
, expr
));
24834 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24835 This simply adds HFmode as a supported mode; even though we don't
24836 implement arithmetic on this type directly, it's supported by
24837 optabs conversions, much the way the double-word arithmetic is
24838 special-cased in the default hook. */
24841 arm_scalar_mode_supported_p (enum machine_mode mode
)
24843 if (mode
== HFmode
)
24844 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24845 else if (ALL_FIXED_POINT_MODE_P (mode
))
24848 return default_scalar_mode_supported_p (mode
);
24851 /* Errors in the source file can cause expand_expr to return const0_rtx
24852 where we expect a vector. To avoid crashing, use one of the vector
24853 clear instructions. */
24856 safe_vector_operand (rtx x
, enum machine_mode mode
)
24858 if (x
!= const0_rtx
)
24860 x
= gen_reg_rtx (mode
);
24862 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24863 : gen_rtx_SUBREG (DImode
, x
, 0)));
24867 /* Function to expand ternary builtins. */
24869 arm_expand_ternop_builtin (enum insn_code icode
,
24870 tree exp
, rtx target
)
24873 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24874 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24875 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24877 rtx op0
= expand_normal (arg0
);
24878 rtx op1
= expand_normal (arg1
);
24879 rtx op2
= expand_normal (arg2
);
24880 rtx op3
= NULL_RTX
;
24882 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24883 lane operand depending on endianness. */
24884 bool builtin_sha1cpm_p
= false;
24886 if (insn_data
[icode
].n_operands
== 5)
24888 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24889 || icode
== CODE_FOR_crypto_sha1p
24890 || icode
== CODE_FOR_crypto_sha1m
);
24891 builtin_sha1cpm_p
= true;
24893 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24894 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24895 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24896 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24899 if (VECTOR_MODE_P (mode0
))
24900 op0
= safe_vector_operand (op0
, mode0
);
24901 if (VECTOR_MODE_P (mode1
))
24902 op1
= safe_vector_operand (op1
, mode1
);
24903 if (VECTOR_MODE_P (mode2
))
24904 op2
= safe_vector_operand (op2
, mode2
);
24907 || GET_MODE (target
) != tmode
24908 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24909 target
= gen_reg_rtx (tmode
);
24911 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24912 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24913 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24915 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24916 op0
= copy_to_mode_reg (mode0
, op0
);
24917 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24918 op1
= copy_to_mode_reg (mode1
, op1
);
24919 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24920 op2
= copy_to_mode_reg (mode2
, op2
);
24921 if (builtin_sha1cpm_p
)
24922 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24924 if (builtin_sha1cpm_p
)
24925 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24927 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24934 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24937 arm_expand_binop_builtin (enum insn_code icode
,
24938 tree exp
, rtx target
)
24941 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24942 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24943 rtx op0
= expand_normal (arg0
);
24944 rtx op1
= expand_normal (arg1
);
24945 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24946 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24947 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24949 if (VECTOR_MODE_P (mode0
))
24950 op0
= safe_vector_operand (op0
, mode0
);
24951 if (VECTOR_MODE_P (mode1
))
24952 op1
= safe_vector_operand (op1
, mode1
);
24955 || GET_MODE (target
) != tmode
24956 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24957 target
= gen_reg_rtx (tmode
);
24959 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24960 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24962 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24963 op0
= copy_to_mode_reg (mode0
, op0
);
24964 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24965 op1
= copy_to_mode_reg (mode1
, op1
);
24967 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24974 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24977 arm_expand_unop_builtin (enum insn_code icode
,
24978 tree exp
, rtx target
, int do_load
)
24981 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24982 rtx op0
= expand_normal (arg0
);
24983 rtx op1
= NULL_RTX
;
24984 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24985 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24986 bool builtin_sha1h_p
= false;
24988 if (insn_data
[icode
].n_operands
== 3)
24990 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24991 builtin_sha1h_p
= true;
24995 || GET_MODE (target
) != tmode
24996 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24997 target
= gen_reg_rtx (tmode
);
24999 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
25002 if (VECTOR_MODE_P (mode0
))
25003 op0
= safe_vector_operand (op0
, mode0
);
25005 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25006 op0
= copy_to_mode_reg (mode0
, op0
);
25008 if (builtin_sha1h_p
)
25009 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25011 if (builtin_sha1h_p
)
25012 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25014 pat
= GEN_FCN (icode
) (target
, op0
);
25022 NEON_ARG_COPY_TO_REG
,
25028 #define NEON_MAX_BUILTIN_ARGS 5
25030 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25031 and return an expression for the accessed memory.
25033 The intrinsic function operates on a block of registers that has
25034 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25035 function references the memory at EXP of type TYPE and in mode
25036 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25040 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
25041 enum machine_mode reg_mode
,
25042 neon_builtin_type_mode type_mode
)
25044 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
25045 tree elem_type
, upper_bound
, array_type
;
25047 /* Work out the size of the register block in bytes. */
25048 reg_size
= GET_MODE_SIZE (reg_mode
);
25050 /* Work out the size of each vector in bytes. */
25051 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
25052 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
25054 /* Work out how many vectors there are. */
25055 gcc_assert (reg_size
% vector_size
== 0);
25056 nvectors
= reg_size
/ vector_size
;
25058 /* Work out the type of each element. */
25059 gcc_assert (POINTER_TYPE_P (type
));
25060 elem_type
= TREE_TYPE (type
);
25062 /* Work out how many elements are being loaded or stored.
25063 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25064 and memory elements; anything else implies a lane load or store. */
25065 if (mem_mode
== reg_mode
)
25066 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
25070 /* Create a type that describes the full access. */
25071 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
25072 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
25074 /* Dereference EXP using that type. */
25075 return fold_build2 (MEM_REF
, array_type
, exp
,
25076 build_int_cst (build_pointer_type (array_type
), 0));
25079 /* Expand a Neon builtin. */
25081 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
25082 neon_builtin_type_mode type_mode
,
25083 tree exp
, int fcode
, ...)
25087 tree arg
[NEON_MAX_BUILTIN_ARGS
];
25088 rtx op
[NEON_MAX_BUILTIN_ARGS
];
25091 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25092 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
25093 enum machine_mode other_mode
;
25099 || GET_MODE (target
) != tmode
25100 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
25101 target
= gen_reg_rtx (tmode
);
25103 va_start (ap
, fcode
);
25105 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
25109 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
25111 if (thisarg
== NEON_ARG_STOP
)
25115 opno
= argc
+ have_retval
;
25116 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
25117 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
25118 arg_type
= TREE_VALUE (formals
);
25119 if (thisarg
== NEON_ARG_MEMORY
)
25121 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
25122 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
25123 mode
[argc
], other_mode
,
25127 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25129 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
25130 (thisarg
== NEON_ARG_MEMORY
25131 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
25135 case NEON_ARG_COPY_TO_REG
:
25136 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25137 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25138 (op
[argc
], mode
[argc
]))
25139 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
25142 case NEON_ARG_CONSTANT
:
25143 /* FIXME: This error message is somewhat unhelpful. */
25144 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25145 (op
[argc
], mode
[argc
]))
25146 error ("argument must be a constant");
25149 case NEON_ARG_MEMORY
:
25150 /* Check if expand failed. */
25151 if (op
[argc
] == const0_rtx
)
25153 gcc_assert (MEM_P (op
[argc
]));
25154 PUT_MODE (op
[argc
], mode
[argc
]);
25155 /* ??? arm_neon.h uses the same built-in functions for signed
25156 and unsigned accesses, casting where necessary. This isn't
25158 set_mem_alias_set (op
[argc
], 0);
25159 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25160 (op
[argc
], mode
[argc
]))
25161 op
[argc
] = (replace_equiv_address
25162 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
25165 case NEON_ARG_STOP
:
25166 gcc_unreachable ();
25170 formals
= TREE_CHAIN (formals
);
25180 pat
= GEN_FCN (icode
) (target
, op
[0]);
25184 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
25188 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
25192 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
25196 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
25200 gcc_unreachable ();
25206 pat
= GEN_FCN (icode
) (op
[0]);
25210 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
25214 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
25218 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
25222 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
25226 gcc_unreachable ();
25237 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25238 constants defined per-instruction or per instruction-variant. Instead, the
25239 required info is looked up in the table neon_builtin_data. */
25241 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25243 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25244 neon_itype itype
= d
->itype
;
25245 enum insn_code icode
= d
->code
;
25246 neon_builtin_type_mode type_mode
= d
->mode
;
25253 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25254 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25258 case NEON_SCALARMUL
:
25259 case NEON_SCALARMULL
:
25260 case NEON_SCALARMULH
:
25261 case NEON_SHIFTINSERT
:
25262 case NEON_LOGICBINOP
:
25263 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25264 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25268 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25269 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25270 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25274 case NEON_SHIFTIMM
:
25275 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25276 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25280 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25281 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25286 case NEON_FLOAT_WIDEN
:
25287 case NEON_FLOAT_NARROW
:
25288 case NEON_REINTERP
:
25289 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25290 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25294 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25295 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25297 case NEON_RESULTPAIR
:
25298 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25299 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25303 case NEON_LANEMULL
:
25304 case NEON_LANEMULH
:
25305 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25306 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25307 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25310 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25311 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25312 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25314 case NEON_SHIFTACC
:
25315 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25316 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25317 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25319 case NEON_SCALARMAC
:
25320 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25321 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25322 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25326 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25327 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25331 case NEON_LOADSTRUCT
:
25332 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25333 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25335 case NEON_LOAD1LANE
:
25336 case NEON_LOADSTRUCTLANE
:
25337 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25338 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25342 case NEON_STORESTRUCT
:
25343 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25344 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25346 case NEON_STORE1LANE
:
25347 case NEON_STORESTRUCTLANE
:
25348 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25349 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25353 gcc_unreachable ();
25356 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25358 neon_reinterpret (rtx dest
, rtx src
)
25360 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25363 /* Emit code to place a Neon pair result in memory locations (with equal
25366 neon_emit_pair_result_insn (enum machine_mode mode
,
25367 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25370 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25371 rtx tmp1
= gen_reg_rtx (mode
);
25372 rtx tmp2
= gen_reg_rtx (mode
);
25374 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25376 emit_move_insn (mem
, tmp1
);
25377 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25378 emit_move_insn (mem
, tmp2
);
25381 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25382 not to early-clobber SRC registers in the process.
25384 We assume that the operands described by SRC and DEST represent a
25385 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25386 number of components into which the copy has been decomposed. */
25388 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25392 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25393 || REGNO (operands
[0]) < REGNO (operands
[1]))
25395 for (i
= 0; i
< count
; i
++)
25397 operands
[2 * i
] = dest
[i
];
25398 operands
[2 * i
+ 1] = src
[i
];
25403 for (i
= 0; i
< count
; i
++)
25405 operands
[2 * i
] = dest
[count
- i
- 1];
25406 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25411 /* Split operands into moves from op[1] + op[2] into op[0]. */
25414 neon_split_vcombine (rtx operands
[3])
25416 unsigned int dest
= REGNO (operands
[0]);
25417 unsigned int src1
= REGNO (operands
[1]);
25418 unsigned int src2
= REGNO (operands
[2]);
25419 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25420 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25421 rtx destlo
, desthi
;
25423 if (src1
== dest
&& src2
== dest
+ halfregs
)
25425 /* No-op move. Can't split to nothing; emit something. */
25426 emit_note (NOTE_INSN_DELETED
);
25430 /* Preserve register attributes for variable tracking. */
25431 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25432 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25433 GET_MODE_SIZE (halfmode
));
25435 /* Special case of reversed high/low parts. Use VSWP. */
25436 if (src2
== dest
&& src1
== dest
+ halfregs
)
25438 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25439 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25440 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25444 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25446 /* Try to avoid unnecessary moves if part of the result
25447 is in the right place already. */
25449 emit_move_insn (destlo
, operands
[1]);
25450 if (src2
!= dest
+ halfregs
)
25451 emit_move_insn (desthi
, operands
[2]);
25455 if (src2
!= dest
+ halfregs
)
25456 emit_move_insn (desthi
, operands
[2]);
25458 emit_move_insn (destlo
, operands
[1]);
25462 /* Expand an expression EXP that calls a built-in function,
25463 with result going to TARGET if that's convenient
25464 (and in mode MODE if that's convenient).
25465 SUBTARGET may be used as the target for computing one of EXP's operands.
25466 IGNORE is nonzero if the value is to be ignored. */
25469 arm_expand_builtin (tree exp
,
25471 rtx subtarget ATTRIBUTE_UNUSED
,
25472 enum machine_mode mode ATTRIBUTE_UNUSED
,
25473 int ignore ATTRIBUTE_UNUSED
)
25475 const struct builtin_description
* d
;
25476 enum insn_code icode
;
25477 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25485 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25487 enum machine_mode tmode
;
25488 enum machine_mode mode0
;
25489 enum machine_mode mode1
;
25490 enum machine_mode mode2
;
25496 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25497 return arm_expand_neon_builtin (fcode
, exp
, target
);
25501 case ARM_BUILTIN_TEXTRMSB
:
25502 case ARM_BUILTIN_TEXTRMUB
:
25503 case ARM_BUILTIN_TEXTRMSH
:
25504 case ARM_BUILTIN_TEXTRMUH
:
25505 case ARM_BUILTIN_TEXTRMSW
:
25506 case ARM_BUILTIN_TEXTRMUW
:
25507 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25508 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25509 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25510 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25511 : CODE_FOR_iwmmxt_textrmw
);
25513 arg0
= CALL_EXPR_ARG (exp
, 0);
25514 arg1
= CALL_EXPR_ARG (exp
, 1);
25515 op0
= expand_normal (arg0
);
25516 op1
= expand_normal (arg1
);
25517 tmode
= insn_data
[icode
].operand
[0].mode
;
25518 mode0
= insn_data
[icode
].operand
[1].mode
;
25519 mode1
= insn_data
[icode
].operand
[2].mode
;
25521 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25522 op0
= copy_to_mode_reg (mode0
, op0
);
25523 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25525 /* @@@ better error message */
25526 error ("selector must be an immediate");
25527 return gen_reg_rtx (tmode
);
25530 opint
= INTVAL (op1
);
25531 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25533 if (opint
> 7 || opint
< 0)
25534 error ("the range of selector should be in 0 to 7");
25536 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25538 if (opint
> 3 || opint
< 0)
25539 error ("the range of selector should be in 0 to 3");
25541 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25543 if (opint
> 1 || opint
< 0)
25544 error ("the range of selector should be in 0 to 1");
25548 || GET_MODE (target
) != tmode
25549 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25550 target
= gen_reg_rtx (tmode
);
25551 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25557 case ARM_BUILTIN_WALIGNI
:
25558 /* If op2 is immediate, call walighi, else call walighr. */
25559 arg0
= CALL_EXPR_ARG (exp
, 0);
25560 arg1
= CALL_EXPR_ARG (exp
, 1);
25561 arg2
= CALL_EXPR_ARG (exp
, 2);
25562 op0
= expand_normal (arg0
);
25563 op1
= expand_normal (arg1
);
25564 op2
= expand_normal (arg2
);
25565 if (CONST_INT_P (op2
))
25567 icode
= CODE_FOR_iwmmxt_waligni
;
25568 tmode
= insn_data
[icode
].operand
[0].mode
;
25569 mode0
= insn_data
[icode
].operand
[1].mode
;
25570 mode1
= insn_data
[icode
].operand
[2].mode
;
25571 mode2
= insn_data
[icode
].operand
[3].mode
;
25572 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25573 op0
= copy_to_mode_reg (mode0
, op0
);
25574 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25575 op1
= copy_to_mode_reg (mode1
, op1
);
25576 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25577 selector
= INTVAL (op2
);
25578 if (selector
> 7 || selector
< 0)
25579 error ("the range of selector should be in 0 to 7");
25583 icode
= CODE_FOR_iwmmxt_walignr
;
25584 tmode
= insn_data
[icode
].operand
[0].mode
;
25585 mode0
= insn_data
[icode
].operand
[1].mode
;
25586 mode1
= insn_data
[icode
].operand
[2].mode
;
25587 mode2
= insn_data
[icode
].operand
[3].mode
;
25588 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25589 op0
= copy_to_mode_reg (mode0
, op0
);
25590 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25591 op1
= copy_to_mode_reg (mode1
, op1
);
25592 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25593 op2
= copy_to_mode_reg (mode2
, op2
);
25596 || GET_MODE (target
) != tmode
25597 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25598 target
= gen_reg_rtx (tmode
);
25599 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25605 case ARM_BUILTIN_TINSRB
:
25606 case ARM_BUILTIN_TINSRH
:
25607 case ARM_BUILTIN_TINSRW
:
25608 case ARM_BUILTIN_WMERGE
:
25609 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25610 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25611 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25612 : CODE_FOR_iwmmxt_tinsrw
);
25613 arg0
= CALL_EXPR_ARG (exp
, 0);
25614 arg1
= CALL_EXPR_ARG (exp
, 1);
25615 arg2
= CALL_EXPR_ARG (exp
, 2);
25616 op0
= expand_normal (arg0
);
25617 op1
= expand_normal (arg1
);
25618 op2
= expand_normal (arg2
);
25619 tmode
= insn_data
[icode
].operand
[0].mode
;
25620 mode0
= insn_data
[icode
].operand
[1].mode
;
25621 mode1
= insn_data
[icode
].operand
[2].mode
;
25622 mode2
= insn_data
[icode
].operand
[3].mode
;
25624 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25625 op0
= copy_to_mode_reg (mode0
, op0
);
25626 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25627 op1
= copy_to_mode_reg (mode1
, op1
);
25628 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25630 error ("selector must be an immediate");
25633 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25635 selector
= INTVAL (op2
);
25636 if (selector
> 7 || selector
< 0)
25637 error ("the range of selector should be in 0 to 7");
25639 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25640 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25641 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25644 selector
= INTVAL (op2
);
25645 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25646 error ("the range of selector should be in 0 to 7");
25647 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25648 error ("the range of selector should be in 0 to 3");
25649 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25650 error ("the range of selector should be in 0 to 1");
25652 op2
= GEN_INT (mask
);
25655 || GET_MODE (target
) != tmode
25656 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25657 target
= gen_reg_rtx (tmode
);
25658 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25664 case ARM_BUILTIN_SETWCGR0
:
25665 case ARM_BUILTIN_SETWCGR1
:
25666 case ARM_BUILTIN_SETWCGR2
:
25667 case ARM_BUILTIN_SETWCGR3
:
25668 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25669 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25670 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25671 : CODE_FOR_iwmmxt_setwcgr3
);
25672 arg0
= CALL_EXPR_ARG (exp
, 0);
25673 op0
= expand_normal (arg0
);
25674 mode0
= insn_data
[icode
].operand
[0].mode
;
25675 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25676 op0
= copy_to_mode_reg (mode0
, op0
);
25677 pat
= GEN_FCN (icode
) (op0
);
25683 case ARM_BUILTIN_GETWCGR0
:
25684 case ARM_BUILTIN_GETWCGR1
:
25685 case ARM_BUILTIN_GETWCGR2
:
25686 case ARM_BUILTIN_GETWCGR3
:
25687 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25688 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25689 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25690 : CODE_FOR_iwmmxt_getwcgr3
);
25691 tmode
= insn_data
[icode
].operand
[0].mode
;
25693 || GET_MODE (target
) != tmode
25694 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25695 target
= gen_reg_rtx (tmode
);
25696 pat
= GEN_FCN (icode
) (target
);
25702 case ARM_BUILTIN_WSHUFH
:
25703 icode
= CODE_FOR_iwmmxt_wshufh
;
25704 arg0
= CALL_EXPR_ARG (exp
, 0);
25705 arg1
= CALL_EXPR_ARG (exp
, 1);
25706 op0
= expand_normal (arg0
);
25707 op1
= expand_normal (arg1
);
25708 tmode
= insn_data
[icode
].operand
[0].mode
;
25709 mode1
= insn_data
[icode
].operand
[1].mode
;
25710 mode2
= insn_data
[icode
].operand
[2].mode
;
25712 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25713 op0
= copy_to_mode_reg (mode1
, op0
);
25714 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25716 error ("mask must be an immediate");
25719 selector
= INTVAL (op1
);
25720 if (selector
< 0 || selector
> 255)
25721 error ("the range of mask should be in 0 to 255");
25723 || GET_MODE (target
) != tmode
25724 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25725 target
= gen_reg_rtx (tmode
);
25726 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25732 case ARM_BUILTIN_WMADDS
:
25733 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25734 case ARM_BUILTIN_WMADDSX
:
25735 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25736 case ARM_BUILTIN_WMADDSN
:
25737 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25738 case ARM_BUILTIN_WMADDU
:
25739 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25740 case ARM_BUILTIN_WMADDUX
:
25741 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25742 case ARM_BUILTIN_WMADDUN
:
25743 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25744 case ARM_BUILTIN_WSADBZ
:
25745 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25746 case ARM_BUILTIN_WSADHZ
:
25747 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25749 /* Several three-argument builtins. */
25750 case ARM_BUILTIN_WMACS
:
25751 case ARM_BUILTIN_WMACU
:
25752 case ARM_BUILTIN_TMIA
:
25753 case ARM_BUILTIN_TMIAPH
:
25754 case ARM_BUILTIN_TMIATT
:
25755 case ARM_BUILTIN_TMIATB
:
25756 case ARM_BUILTIN_TMIABT
:
25757 case ARM_BUILTIN_TMIABB
:
25758 case ARM_BUILTIN_WQMIABB
:
25759 case ARM_BUILTIN_WQMIABT
:
25760 case ARM_BUILTIN_WQMIATB
:
25761 case ARM_BUILTIN_WQMIATT
:
25762 case ARM_BUILTIN_WQMIABBN
:
25763 case ARM_BUILTIN_WQMIABTN
:
25764 case ARM_BUILTIN_WQMIATBN
:
25765 case ARM_BUILTIN_WQMIATTN
:
25766 case ARM_BUILTIN_WMIABB
:
25767 case ARM_BUILTIN_WMIABT
:
25768 case ARM_BUILTIN_WMIATB
:
25769 case ARM_BUILTIN_WMIATT
:
25770 case ARM_BUILTIN_WMIABBN
:
25771 case ARM_BUILTIN_WMIABTN
:
25772 case ARM_BUILTIN_WMIATBN
:
25773 case ARM_BUILTIN_WMIATTN
:
25774 case ARM_BUILTIN_WMIAWBB
:
25775 case ARM_BUILTIN_WMIAWBT
:
25776 case ARM_BUILTIN_WMIAWTB
:
25777 case ARM_BUILTIN_WMIAWTT
:
25778 case ARM_BUILTIN_WMIAWBBN
:
25779 case ARM_BUILTIN_WMIAWBTN
:
25780 case ARM_BUILTIN_WMIAWTBN
:
25781 case ARM_BUILTIN_WMIAWTTN
:
25782 case ARM_BUILTIN_WSADB
:
25783 case ARM_BUILTIN_WSADH
:
25784 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25785 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25786 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25787 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25788 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25789 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25790 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25791 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25792 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25793 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25794 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25795 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25796 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25797 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25798 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25799 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25800 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25801 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25802 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25803 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25804 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25805 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25806 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25807 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25808 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25809 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25810 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25811 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25812 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25813 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25814 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25815 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25816 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25817 : CODE_FOR_iwmmxt_wsadh
);
25818 arg0
= CALL_EXPR_ARG (exp
, 0);
25819 arg1
= CALL_EXPR_ARG (exp
, 1);
25820 arg2
= CALL_EXPR_ARG (exp
, 2);
25821 op0
= expand_normal (arg0
);
25822 op1
= expand_normal (arg1
);
25823 op2
= expand_normal (arg2
);
25824 tmode
= insn_data
[icode
].operand
[0].mode
;
25825 mode0
= insn_data
[icode
].operand
[1].mode
;
25826 mode1
= insn_data
[icode
].operand
[2].mode
;
25827 mode2
= insn_data
[icode
].operand
[3].mode
;
25829 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25830 op0
= copy_to_mode_reg (mode0
, op0
);
25831 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25832 op1
= copy_to_mode_reg (mode1
, op1
);
25833 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25834 op2
= copy_to_mode_reg (mode2
, op2
);
25836 || GET_MODE (target
) != tmode
25837 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25838 target
= gen_reg_rtx (tmode
);
25839 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25845 case ARM_BUILTIN_WZERO
:
25846 target
= gen_reg_rtx (DImode
);
25847 emit_insn (gen_iwmmxt_clrdi (target
));
25850 case ARM_BUILTIN_WSRLHI
:
25851 case ARM_BUILTIN_WSRLWI
:
25852 case ARM_BUILTIN_WSRLDI
:
25853 case ARM_BUILTIN_WSLLHI
:
25854 case ARM_BUILTIN_WSLLWI
:
25855 case ARM_BUILTIN_WSLLDI
:
25856 case ARM_BUILTIN_WSRAHI
:
25857 case ARM_BUILTIN_WSRAWI
:
25858 case ARM_BUILTIN_WSRADI
:
25859 case ARM_BUILTIN_WRORHI
:
25860 case ARM_BUILTIN_WRORWI
:
25861 case ARM_BUILTIN_WRORDI
:
25862 case ARM_BUILTIN_WSRLH
:
25863 case ARM_BUILTIN_WSRLW
:
25864 case ARM_BUILTIN_WSRLD
:
25865 case ARM_BUILTIN_WSLLH
:
25866 case ARM_BUILTIN_WSLLW
:
25867 case ARM_BUILTIN_WSLLD
:
25868 case ARM_BUILTIN_WSRAH
:
25869 case ARM_BUILTIN_WSRAW
:
25870 case ARM_BUILTIN_WSRAD
:
25871 case ARM_BUILTIN_WRORH
:
25872 case ARM_BUILTIN_WRORW
:
25873 case ARM_BUILTIN_WRORD
:
25874 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25875 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25876 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25877 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25878 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25879 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25880 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25881 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25882 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25883 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25884 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25885 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25886 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25887 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25888 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25889 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25890 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25891 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25892 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25893 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25894 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25895 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25896 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25897 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25898 : CODE_FOR_nothing
);
25899 arg1
= CALL_EXPR_ARG (exp
, 1);
25900 op1
= expand_normal (arg1
);
25901 if (GET_MODE (op1
) == VOIDmode
)
25903 imm
= INTVAL (op1
);
25904 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25905 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25906 && (imm
< 0 || imm
> 32))
25908 if (fcode
== ARM_BUILTIN_WRORHI
)
25909 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25910 else if (fcode
== ARM_BUILTIN_WRORWI
)
25911 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25912 else if (fcode
== ARM_BUILTIN_WRORH
)
25913 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25915 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25917 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25918 && (imm
< 0 || imm
> 64))
25920 if (fcode
== ARM_BUILTIN_WRORDI
)
25921 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25923 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25927 if (fcode
== ARM_BUILTIN_WSRLHI
)
25928 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25929 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25930 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25931 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25932 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25933 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25934 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25935 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25936 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25937 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25938 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25939 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25940 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25941 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25942 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25943 else if (fcode
== ARM_BUILTIN_WSRADI
)
25944 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25945 else if (fcode
== ARM_BUILTIN_WSRLH
)
25946 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25947 else if (fcode
== ARM_BUILTIN_WSRLW
)
25948 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25949 else if (fcode
== ARM_BUILTIN_WSRLD
)
25950 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25951 else if (fcode
== ARM_BUILTIN_WSLLH
)
25952 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25953 else if (fcode
== ARM_BUILTIN_WSLLW
)
25954 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25955 else if (fcode
== ARM_BUILTIN_WSLLD
)
25956 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25957 else if (fcode
== ARM_BUILTIN_WSRAH
)
25958 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25959 else if (fcode
== ARM_BUILTIN_WSRAW
)
25960 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25962 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25965 return arm_expand_binop_builtin (icode
, exp
, target
);
25971 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25972 if (d
->code
== (const enum arm_builtins
) fcode
)
25973 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25975 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25976 if (d
->code
== (const enum arm_builtins
) fcode
)
25977 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25979 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25980 if (d
->code
== (const enum arm_builtins
) fcode
)
25981 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25983 /* @@@ Should really do something sensible here. */
25987 /* Return the number (counting from 0) of
25988 the least significant set bit in MASK. */
25991 number_of_first_bit_set (unsigned mask
)
25993 return ctz_hwi (mask
);
25996 /* Like emit_multi_reg_push, but allowing for a different set of
25997 registers to be described as saved. MASK is the set of registers
25998 to be saved; REAL_REGS is the set of registers to be described as
25999 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26002 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26004 unsigned long regno
;
26005 rtx par
[10], tmp
, reg
, insn
;
26008 /* Build the parallel of the registers actually being stored. */
26009 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26011 regno
= ctz_hwi (mask
);
26012 reg
= gen_rtx_REG (SImode
, regno
);
26015 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26017 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26022 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26023 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26024 tmp
= gen_frame_mem (BLKmode
, tmp
);
26025 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
26028 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26029 insn
= emit_insn (tmp
);
26031 /* Always build the stack adjustment note for unwind info. */
26032 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26033 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
26036 /* Build the parallel of the registers recorded as saved for unwind. */
26037 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26039 regno
= ctz_hwi (real_regs
);
26040 reg
= gen_rtx_REG (SImode
, regno
);
26042 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26043 tmp
= gen_frame_mem (SImode
, tmp
);
26044 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
26045 RTX_FRAME_RELATED_P (tmp
) = 1;
26053 RTX_FRAME_RELATED_P (par
[0]) = 1;
26054 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26057 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26062 /* Emit code to push or pop registers to or from the stack. F is the
26063 assembly file. MASK is the registers to pop. */
26065 thumb_pop (FILE *f
, unsigned long mask
)
26068 int lo_mask
= mask
& 0xFF;
26069 int pushed_words
= 0;
26073 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26075 /* Special case. Do not generate a POP PC statement here, do it in
26077 thumb_exit (f
, -1);
26081 fprintf (f
, "\tpop\t{");
26083 /* Look at the low registers first. */
26084 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26088 asm_fprintf (f
, "%r", regno
);
26090 if ((lo_mask
& ~1) != 0)
26097 if (mask
& (1 << PC_REGNUM
))
26099 /* Catch popping the PC. */
26100 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
26101 || crtl
->calls_eh_return
)
26103 /* The PC is never poped directly, instead
26104 it is popped into r3 and then BX is used. */
26105 fprintf (f
, "}\n");
26107 thumb_exit (f
, -1);
26116 asm_fprintf (f
, "%r", PC_REGNUM
);
26120 fprintf (f
, "}\n");
26123 /* Generate code to return from a thumb function.
26124 If 'reg_containing_return_addr' is -1, then the return address is
26125 actually on the stack, at the stack pointer. */
26127 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26129 unsigned regs_available_for_popping
;
26130 unsigned regs_to_pop
;
26132 unsigned available
;
26134 enum machine_mode mode
;
26136 int restore_a4
= FALSE
;
26138 /* Compute the registers we need to pop. */
26142 if (reg_containing_return_addr
== -1)
26144 regs_to_pop
|= 1 << LR_REGNUM
;
26148 if (TARGET_BACKTRACE
)
26150 /* Restore the (ARM) frame pointer and stack pointer. */
26151 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26155 /* If there is nothing to pop then just emit the BX instruction and
26157 if (pops_needed
== 0)
26159 if (crtl
->calls_eh_return
)
26160 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26162 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26165 /* Otherwise if we are not supporting interworking and we have not created
26166 a backtrace structure and the function was not entered in ARM mode then
26167 just pop the return address straight into the PC. */
26168 else if (!TARGET_INTERWORK
26169 && !TARGET_BACKTRACE
26170 && !is_called_in_ARM_mode (current_function_decl
)
26171 && !crtl
->calls_eh_return
)
26173 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26177 /* Find out how many of the (return) argument registers we can corrupt. */
26178 regs_available_for_popping
= 0;
26180 /* If returning via __builtin_eh_return, the bottom three registers
26181 all contain information needed for the return. */
26182 if (crtl
->calls_eh_return
)
26186 /* If we can deduce the registers used from the function's
26187 return value. This is more reliable that examining
26188 df_regs_ever_live_p () because that will be set if the register is
26189 ever used in the function, not just if the register is used
26190 to hold a return value. */
26192 if (crtl
->return_rtx
!= 0)
26193 mode
= GET_MODE (crtl
->return_rtx
);
26195 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26197 size
= GET_MODE_SIZE (mode
);
26201 /* In a void function we can use any argument register.
26202 In a function that returns a structure on the stack
26203 we can use the second and third argument registers. */
26204 if (mode
== VOIDmode
)
26205 regs_available_for_popping
=
26206 (1 << ARG_REGISTER (1))
26207 | (1 << ARG_REGISTER (2))
26208 | (1 << ARG_REGISTER (3));
26210 regs_available_for_popping
=
26211 (1 << ARG_REGISTER (2))
26212 | (1 << ARG_REGISTER (3));
26214 else if (size
<= 4)
26215 regs_available_for_popping
=
26216 (1 << ARG_REGISTER (2))
26217 | (1 << ARG_REGISTER (3));
26218 else if (size
<= 8)
26219 regs_available_for_popping
=
26220 (1 << ARG_REGISTER (3));
26223 /* Match registers to be popped with registers into which we pop them. */
26224 for (available
= regs_available_for_popping
,
26225 required
= regs_to_pop
;
26226 required
!= 0 && available
!= 0;
26227 available
&= ~(available
& - available
),
26228 required
&= ~(required
& - required
))
26231 /* If we have any popping registers left over, remove them. */
26233 regs_available_for_popping
&= ~available
;
26235 /* Otherwise if we need another popping register we can use
26236 the fourth argument register. */
26237 else if (pops_needed
)
26239 /* If we have not found any free argument registers and
26240 reg a4 contains the return address, we must move it. */
26241 if (regs_available_for_popping
== 0
26242 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26244 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26245 reg_containing_return_addr
= LR_REGNUM
;
26247 else if (size
> 12)
26249 /* Register a4 is being used to hold part of the return value,
26250 but we have dire need of a free, low register. */
26253 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26256 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26258 /* The fourth argument register is available. */
26259 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26265 /* Pop as many registers as we can. */
26266 thumb_pop (f
, regs_available_for_popping
);
26268 /* Process the registers we popped. */
26269 if (reg_containing_return_addr
== -1)
26271 /* The return address was popped into the lowest numbered register. */
26272 regs_to_pop
&= ~(1 << LR_REGNUM
);
26274 reg_containing_return_addr
=
26275 number_of_first_bit_set (regs_available_for_popping
);
26277 /* Remove this register for the mask of available registers, so that
26278 the return address will not be corrupted by further pops. */
26279 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26282 /* If we popped other registers then handle them here. */
26283 if (regs_available_for_popping
)
26287 /* Work out which register currently contains the frame pointer. */
26288 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26290 /* Move it into the correct place. */
26291 asm_fprintf (f
, "\tmov\t%r, %r\n",
26292 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26294 /* (Temporarily) remove it from the mask of popped registers. */
26295 regs_available_for_popping
&= ~(1 << frame_pointer
);
26296 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26298 if (regs_available_for_popping
)
26302 /* We popped the stack pointer as well,
26303 find the register that contains it. */
26304 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26306 /* Move it into the stack register. */
26307 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26309 /* At this point we have popped all necessary registers, so
26310 do not worry about restoring regs_available_for_popping
26311 to its correct value:
26313 assert (pops_needed == 0)
26314 assert (regs_available_for_popping == (1 << frame_pointer))
26315 assert (regs_to_pop == (1 << STACK_POINTER)) */
26319 /* Since we have just move the popped value into the frame
26320 pointer, the popping register is available for reuse, and
26321 we know that we still have the stack pointer left to pop. */
26322 regs_available_for_popping
|= (1 << frame_pointer
);
26326 /* If we still have registers left on the stack, but we no longer have
26327 any registers into which we can pop them, then we must move the return
26328 address into the link register and make available the register that
26330 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26332 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26334 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26335 reg_containing_return_addr
);
26337 reg_containing_return_addr
= LR_REGNUM
;
26340 /* If we have registers left on the stack then pop some more.
26341 We know that at most we will want to pop FP and SP. */
26342 if (pops_needed
> 0)
26347 thumb_pop (f
, regs_available_for_popping
);
26349 /* We have popped either FP or SP.
26350 Move whichever one it is into the correct register. */
26351 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26352 move_to
= number_of_first_bit_set (regs_to_pop
);
26354 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26356 regs_to_pop
&= ~(1 << move_to
);
26361 /* If we still have not popped everything then we must have only
26362 had one register available to us and we are now popping the SP. */
26363 if (pops_needed
> 0)
26367 thumb_pop (f
, regs_available_for_popping
);
26369 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26371 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26373 assert (regs_to_pop == (1 << STACK_POINTER))
26374 assert (pops_needed == 1)
26378 /* If necessary restore the a4 register. */
26381 if (reg_containing_return_addr
!= LR_REGNUM
)
26383 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26384 reg_containing_return_addr
= LR_REGNUM
;
26387 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26390 if (crtl
->calls_eh_return
)
26391 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26393 /* Return to caller. */
26394 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26397 /* Scan INSN just before assembler is output for it.
26398 For Thumb-1, we track the status of the condition codes; this
26399 information is used in the cbranchsi4_insn pattern. */
26401 thumb1_final_prescan_insn (rtx insn
)
26403 if (flag_print_asm_name
)
26404 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26405 INSN_ADDRESSES (INSN_UID (insn
)));
26406 /* Don't overwrite the previous setter when we get to a cbranch. */
26407 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26409 enum attr_conds conds
;
26411 if (cfun
->machine
->thumb1_cc_insn
)
26413 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26414 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26417 conds
= get_attr_conds (insn
);
26418 if (conds
== CONDS_SET
)
26420 rtx set
= single_set (insn
);
26421 cfun
->machine
->thumb1_cc_insn
= insn
;
26422 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26423 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26424 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26425 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26427 rtx src1
= XEXP (SET_SRC (set
), 1);
26428 if (src1
== const0_rtx
)
26429 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26431 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26433 /* Record the src register operand instead of dest because
26434 cprop_hardreg pass propagates src. */
26435 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26438 else if (conds
!= CONDS_NOCOND
)
26439 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26442 /* Check if unexpected far jump is used. */
26443 if (cfun
->machine
->lr_save_eliminated
26444 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26445 internal_error("Unexpected thumb1 far jump");
26449 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26451 unsigned HOST_WIDE_INT mask
= 0xff;
26454 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26455 if (val
== 0) /* XXX */
26458 for (i
= 0; i
< 25; i
++)
26459 if ((val
& (mask
<< i
)) == val
)
26465 /* Returns nonzero if the current function contains,
26466 or might contain a far jump. */
26468 thumb_far_jump_used_p (void)
26471 bool far_jump
= false;
26472 unsigned int func_size
= 0;
26474 /* This test is only important for leaf functions. */
26475 /* assert (!leaf_function_p ()); */
26477 /* If we have already decided that far jumps may be used,
26478 do not bother checking again, and always return true even if
26479 it turns out that they are not being used. Once we have made
26480 the decision that far jumps are present (and that hence the link
26481 register will be pushed onto the stack) we cannot go back on it. */
26482 if (cfun
->machine
->far_jump_used
)
26485 /* If this function is not being called from the prologue/epilogue
26486 generation code then it must be being called from the
26487 INITIAL_ELIMINATION_OFFSET macro. */
26488 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26490 /* In this case we know that we are being asked about the elimination
26491 of the arg pointer register. If that register is not being used,
26492 then there are no arguments on the stack, and we do not have to
26493 worry that a far jump might force the prologue to push the link
26494 register, changing the stack offsets. In this case we can just
26495 return false, since the presence of far jumps in the function will
26496 not affect stack offsets.
26498 If the arg pointer is live (or if it was live, but has now been
26499 eliminated and so set to dead) then we do have to test to see if
26500 the function might contain a far jump. This test can lead to some
26501 false negatives, since before reload is completed, then length of
26502 branch instructions is not known, so gcc defaults to returning their
26503 longest length, which in turn sets the far jump attribute to true.
26505 A false negative will not result in bad code being generated, but it
26506 will result in a needless push and pop of the link register. We
26507 hope that this does not occur too often.
26509 If we need doubleword stack alignment this could affect the other
26510 elimination offsets so we can't risk getting it wrong. */
26511 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26512 cfun
->machine
->arg_pointer_live
= 1;
26513 else if (!cfun
->machine
->arg_pointer_live
)
26517 /* We should not change far_jump_used during or after reload, as there is
26518 no chance to change stack frame layout. */
26519 if (reload_in_progress
|| reload_completed
)
26522 /* Check to see if the function contains a branch
26523 insn with the far jump attribute set. */
26524 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26526 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26530 func_size
+= get_attr_length (insn
);
26533 /* Attribute far_jump will always be true for thumb1 before
26534 shorten_branch pass. So checking far_jump attribute before
26535 shorten_branch isn't much useful.
26537 Following heuristic tries to estimate more accurately if a far jump
26538 may finally be used. The heuristic is very conservative as there is
26539 no chance to roll-back the decision of not to use far jump.
26541 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26542 2-byte insn is associated with a 4 byte constant pool. Using
26543 function size 2048/3 as the threshold is conservative enough. */
26546 if ((func_size
* 3) >= 2048)
26548 /* Record the fact that we have decided that
26549 the function does use far jumps. */
26550 cfun
->machine
->far_jump_used
= 1;
26558 /* Return nonzero if FUNC must be entered in ARM mode. */
26560 is_called_in_ARM_mode (tree func
)
26562 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26564 /* Ignore the problem about functions whose address is taken. */
26565 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26569 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26575 /* Given the stack offsets and register mask in OFFSETS, decide how
26576 many additional registers to push instead of subtracting a constant
26577 from SP. For epilogues the principle is the same except we use pop.
26578 FOR_PROLOGUE indicates which we're generating. */
26580 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26582 HOST_WIDE_INT amount
;
26583 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26584 /* Extract a mask of the ones we can give to the Thumb's push/pop
26586 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26587 /* Then count how many other high registers will need to be pushed. */
26588 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26589 int n_free
, reg_base
, size
;
26591 if (!for_prologue
&& frame_pointer_needed
)
26592 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26594 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26596 /* If the stack frame size is 512 exactly, we can save one load
26597 instruction, which should make this a win even when optimizing
26599 if (!optimize_size
&& amount
!= 512)
26602 /* Can't do this if there are high registers to push. */
26603 if (high_regs_pushed
!= 0)
26606 /* Shouldn't do it in the prologue if no registers would normally
26607 be pushed at all. In the epilogue, also allow it if we'll have
26608 a pop insn for the PC. */
26611 || TARGET_BACKTRACE
26612 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26613 || TARGET_INTERWORK
26614 || crtl
->args
.pretend_args_size
!= 0))
26617 /* Don't do this if thumb_expand_prologue wants to emit instructions
26618 between the push and the stack frame allocation. */
26620 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26621 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26628 size
= arm_size_return_regs ();
26629 reg_base
= ARM_NUM_INTS (size
);
26630 live_regs_mask
>>= reg_base
;
26633 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26634 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26636 live_regs_mask
>>= 1;
26642 gcc_assert (amount
/ 4 * 4 == amount
);
26644 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26645 return (amount
- 508) / 4;
26646 if (amount
<= n_free
* 4)
26651 /* The bits which aren't usefully expanded as rtl. */
26653 thumb1_unexpanded_epilogue (void)
26655 arm_stack_offsets
*offsets
;
26657 unsigned long live_regs_mask
= 0;
26658 int high_regs_pushed
= 0;
26660 int had_to_push_lr
;
26663 if (cfun
->machine
->return_used_this_function
!= 0)
26666 if (IS_NAKED (arm_current_func_type ()))
26669 offsets
= arm_get_frame_offsets ();
26670 live_regs_mask
= offsets
->saved_regs_mask
;
26671 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26673 /* If we can deduce the registers used from the function's return value.
26674 This is more reliable that examining df_regs_ever_live_p () because that
26675 will be set if the register is ever used in the function, not just if
26676 the register is used to hold a return value. */
26677 size
= arm_size_return_regs ();
26679 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26682 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26683 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26686 /* The prolog may have pushed some high registers to use as
26687 work registers. e.g. the testsuite file:
26688 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26689 compiles to produce:
26690 push {r4, r5, r6, r7, lr}
26694 as part of the prolog. We have to undo that pushing here. */
26696 if (high_regs_pushed
)
26698 unsigned long mask
= live_regs_mask
& 0xff;
26701 /* The available low registers depend on the size of the value we are
26709 /* Oh dear! We have no low registers into which we can pop
26712 ("no low registers available for popping high registers");
26714 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26715 if (live_regs_mask
& (1 << next_hi_reg
))
26718 while (high_regs_pushed
)
26720 /* Find lo register(s) into which the high register(s) can
26722 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26724 if (mask
& (1 << regno
))
26725 high_regs_pushed
--;
26726 if (high_regs_pushed
== 0)
26730 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26732 /* Pop the values into the low register(s). */
26733 thumb_pop (asm_out_file
, mask
);
26735 /* Move the value(s) into the high registers. */
26736 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26738 if (mask
& (1 << regno
))
26740 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26743 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26744 if (live_regs_mask
& (1 << next_hi_reg
))
26749 live_regs_mask
&= ~0x0f00;
26752 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26753 live_regs_mask
&= 0xff;
26755 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26757 /* Pop the return address into the PC. */
26758 if (had_to_push_lr
)
26759 live_regs_mask
|= 1 << PC_REGNUM
;
26761 /* Either no argument registers were pushed or a backtrace
26762 structure was created which includes an adjusted stack
26763 pointer, so just pop everything. */
26764 if (live_regs_mask
)
26765 thumb_pop (asm_out_file
, live_regs_mask
);
26767 /* We have either just popped the return address into the
26768 PC or it is was kept in LR for the entire function.
26769 Note that thumb_pop has already called thumb_exit if the
26770 PC was in the list. */
26771 if (!had_to_push_lr
)
26772 thumb_exit (asm_out_file
, LR_REGNUM
);
26776 /* Pop everything but the return address. */
26777 if (live_regs_mask
)
26778 thumb_pop (asm_out_file
, live_regs_mask
);
26780 if (had_to_push_lr
)
26784 /* We have no free low regs, so save one. */
26785 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26789 /* Get the return address into a temporary register. */
26790 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26794 /* Move the return address to lr. */
26795 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26797 /* Restore the low register. */
26798 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26803 regno
= LAST_ARG_REGNUM
;
26808 /* Remove the argument registers that were pushed onto the stack. */
26809 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26810 SP_REGNUM
, SP_REGNUM
,
26811 crtl
->args
.pretend_args_size
);
26813 thumb_exit (asm_out_file
, regno
);
26819 /* Functions to save and restore machine-specific function data. */
26820 static struct machine_function
*
26821 arm_init_machine_status (void)
26823 struct machine_function
*machine
;
26824 machine
= ggc_alloc_cleared_machine_function ();
26826 #if ARM_FT_UNKNOWN != 0
26827 machine
->func_type
= ARM_FT_UNKNOWN
;
26832 /* Return an RTX indicating where the return address to the
26833 calling function can be found. */
26835 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26840 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26843 /* Do anything needed before RTL is emitted for each function. */
26845 arm_init_expanders (void)
26847 /* Arrange to initialize and mark the machine per-function status. */
26848 init_machine_status
= arm_init_machine_status
;
26850 /* This is to stop the combine pass optimizing away the alignment
26851 adjustment of va_arg. */
26852 /* ??? It is claimed that this should not be necessary. */
26854 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26858 /* Like arm_compute_initial_elimination offset. Simpler because there
26859 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26860 to point at the base of the local variables after static stack
26861 space for a function has been allocated. */
26864 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26866 arm_stack_offsets
*offsets
;
26868 offsets
= arm_get_frame_offsets ();
26872 case ARG_POINTER_REGNUM
:
26875 case STACK_POINTER_REGNUM
:
26876 return offsets
->outgoing_args
- offsets
->saved_args
;
26878 case FRAME_POINTER_REGNUM
:
26879 return offsets
->soft_frame
- offsets
->saved_args
;
26881 case ARM_HARD_FRAME_POINTER_REGNUM
:
26882 return offsets
->saved_regs
- offsets
->saved_args
;
26884 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26885 return offsets
->locals_base
- offsets
->saved_args
;
26888 gcc_unreachable ();
26892 case FRAME_POINTER_REGNUM
:
26895 case STACK_POINTER_REGNUM
:
26896 return offsets
->outgoing_args
- offsets
->soft_frame
;
26898 case ARM_HARD_FRAME_POINTER_REGNUM
:
26899 return offsets
->saved_regs
- offsets
->soft_frame
;
26901 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26902 return offsets
->locals_base
- offsets
->soft_frame
;
26905 gcc_unreachable ();
26910 gcc_unreachable ();
26914 /* Generate the function's prologue. */
26917 thumb1_expand_prologue (void)
26921 HOST_WIDE_INT amount
;
26922 arm_stack_offsets
*offsets
;
26923 unsigned long func_type
;
26925 unsigned long live_regs_mask
;
26926 unsigned long l_mask
;
26927 unsigned high_regs_pushed
= 0;
26929 func_type
= arm_current_func_type ();
26931 /* Naked functions don't have prologues. */
26932 if (IS_NAKED (func_type
))
26935 if (IS_INTERRUPT (func_type
))
26937 error ("interrupt Service Routines cannot be coded in Thumb mode");
26941 if (is_called_in_ARM_mode (current_function_decl
))
26942 emit_insn (gen_prologue_thumb1_interwork ());
26944 offsets
= arm_get_frame_offsets ();
26945 live_regs_mask
= offsets
->saved_regs_mask
;
26947 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26948 l_mask
= live_regs_mask
& 0x40ff;
26949 /* Then count how many other high registers will need to be pushed. */
26950 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26952 if (crtl
->args
.pretend_args_size
)
26954 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26956 if (cfun
->machine
->uses_anonymous_args
)
26958 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26959 unsigned long mask
;
26961 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26962 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26964 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26968 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26969 stack_pointer_rtx
, x
));
26971 RTX_FRAME_RELATED_P (insn
) = 1;
26974 if (TARGET_BACKTRACE
)
26976 HOST_WIDE_INT offset
= 0;
26977 unsigned work_register
;
26978 rtx work_reg
, x
, arm_hfp_rtx
;
26980 /* We have been asked to create a stack backtrace structure.
26981 The code looks like this:
26985 0 sub SP, #16 Reserve space for 4 registers.
26986 2 push {R7} Push low registers.
26987 4 add R7, SP, #20 Get the stack pointer before the push.
26988 6 str R7, [SP, #8] Store the stack pointer
26989 (before reserving the space).
26990 8 mov R7, PC Get hold of the start of this code + 12.
26991 10 str R7, [SP, #16] Store it.
26992 12 mov R7, FP Get hold of the current frame pointer.
26993 14 str R7, [SP, #4] Store it.
26994 16 mov R7, LR Get hold of the current return address.
26995 18 str R7, [SP, #12] Store it.
26996 20 add R7, SP, #16 Point at the start of the
26997 backtrace structure.
26998 22 mov FP, R7 Put this value into the frame pointer. */
27000 work_register
= thumb_find_work_register (live_regs_mask
);
27001 work_reg
= gen_rtx_REG (SImode
, work_register
);
27002 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27004 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27005 stack_pointer_rtx
, GEN_INT (-16)));
27006 RTX_FRAME_RELATED_P (insn
) = 1;
27010 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27011 RTX_FRAME_RELATED_P (insn
) = 1;
27013 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27016 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27017 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27019 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27020 x
= gen_frame_mem (SImode
, x
);
27021 emit_move_insn (x
, work_reg
);
27023 /* Make sure that the instruction fetching the PC is in the right place
27024 to calculate "start of backtrace creation code + 12". */
27025 /* ??? The stores using the common WORK_REG ought to be enough to
27026 prevent the scheduler from doing anything weird. Failing that
27027 we could always move all of the following into an UNSPEC_VOLATILE. */
27030 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27031 emit_move_insn (work_reg
, x
);
27033 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27034 x
= gen_frame_mem (SImode
, x
);
27035 emit_move_insn (x
, work_reg
);
27037 emit_move_insn (work_reg
, arm_hfp_rtx
);
27039 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27040 x
= gen_frame_mem (SImode
, x
);
27041 emit_move_insn (x
, work_reg
);
27045 emit_move_insn (work_reg
, arm_hfp_rtx
);
27047 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27048 x
= gen_frame_mem (SImode
, x
);
27049 emit_move_insn (x
, work_reg
);
27051 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27052 emit_move_insn (work_reg
, x
);
27054 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27055 x
= gen_frame_mem (SImode
, x
);
27056 emit_move_insn (x
, work_reg
);
27059 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27060 emit_move_insn (work_reg
, x
);
27062 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27063 x
= gen_frame_mem (SImode
, x
);
27064 emit_move_insn (x
, work_reg
);
27066 x
= GEN_INT (offset
+ 12);
27067 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27069 emit_move_insn (arm_hfp_rtx
, work_reg
);
27071 /* Optimization: If we are not pushing any low registers but we are going
27072 to push some high registers then delay our first push. This will just
27073 be a push of LR and we can combine it with the push of the first high
27075 else if ((l_mask
& 0xff) != 0
27076 || (high_regs_pushed
== 0 && l_mask
))
27078 unsigned long mask
= l_mask
;
27079 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27080 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27081 RTX_FRAME_RELATED_P (insn
) = 1;
27084 if (high_regs_pushed
)
27086 unsigned pushable_regs
;
27087 unsigned next_hi_reg
;
27088 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27089 : crtl
->args
.info
.nregs
;
27090 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27092 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27093 if (live_regs_mask
& (1 << next_hi_reg
))
27096 /* Here we need to mask out registers used for passing arguments
27097 even if they can be pushed. This is to avoid using them to stash the high
27098 registers. Such kind of stash may clobber the use of arguments. */
27099 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
27101 if (pushable_regs
== 0)
27102 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27104 while (high_regs_pushed
> 0)
27106 unsigned long real_regs_mask
= 0;
27108 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27110 if (pushable_regs
& (1 << regno
))
27112 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27113 gen_rtx_REG (SImode
, next_hi_reg
));
27115 high_regs_pushed
--;
27116 real_regs_mask
|= (1 << next_hi_reg
);
27118 if (high_regs_pushed
)
27120 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27122 if (live_regs_mask
& (1 << next_hi_reg
))
27127 pushable_regs
&= ~((1 << regno
) - 1);
27133 /* If we had to find a work register and we have not yet
27134 saved the LR then add it to the list of regs to push. */
27135 if (l_mask
== (1 << LR_REGNUM
))
27137 pushable_regs
|= l_mask
;
27138 real_regs_mask
|= l_mask
;
27142 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
27143 RTX_FRAME_RELATED_P (insn
) = 1;
27147 /* Load the pic register before setting the frame pointer,
27148 so we can use r7 as a temporary work register. */
27149 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27150 arm_load_pic_register (live_regs_mask
);
27152 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27153 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27154 stack_pointer_rtx
);
27156 if (flag_stack_usage_info
)
27157 current_function_static_stack_size
27158 = offsets
->outgoing_args
- offsets
->saved_args
;
27160 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27161 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27166 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27167 GEN_INT (- amount
)));
27168 RTX_FRAME_RELATED_P (insn
) = 1;
27174 /* The stack decrement is too big for an immediate value in a single
27175 insn. In theory we could issue multiple subtracts, but after
27176 three of them it becomes more space efficient to place the full
27177 value in the constant pool and load into a register. (Also the
27178 ARM debugger really likes to see only one stack decrement per
27179 function). So instead we look for a scratch register into which
27180 we can load the decrement, and then we subtract this from the
27181 stack pointer. Unfortunately on the thumb the only available
27182 scratch registers are the argument registers, and we cannot use
27183 these as they may hold arguments to the function. Instead we
27184 attempt to locate a call preserved register which is used by this
27185 function. If we can find one, then we know that it will have
27186 been pushed at the start of the prologue and so we can corrupt
27188 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27189 if (live_regs_mask
& (1 << regno
))
27192 gcc_assert(regno
<= LAST_LO_REGNUM
);
27194 reg
= gen_rtx_REG (SImode
, regno
);
27196 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27198 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27199 stack_pointer_rtx
, reg
));
27201 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27202 plus_constant (Pmode
, stack_pointer_rtx
,
27204 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27205 RTX_FRAME_RELATED_P (insn
) = 1;
27209 if (frame_pointer_needed
)
27210 thumb_set_frame_pointer (offsets
);
27212 /* If we are profiling, make sure no instructions are scheduled before
27213 the call to mcount. Similarly if the user has requested no
27214 scheduling in the prolog. Similarly if we want non-call exceptions
27215 using the EABI unwinder, to prevent faulting instructions from being
27216 swapped with a stack adjustment. */
27217 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27218 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27219 && cfun
->can_throw_non_call_exceptions
))
27220 emit_insn (gen_blockage ());
27222 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27223 if (live_regs_mask
& 0xff)
27224 cfun
->machine
->lr_save_eliminated
= 0;
27227 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27228 POP instruction can be generated. LR should be replaced by PC. All
27229 the checks required are already done by USE_RETURN_INSN (). Hence,
27230 all we really need to check here is if single register is to be
27231 returned, or multiple register return. */
27233 thumb2_expand_return (bool simple_return
)
27236 unsigned long saved_regs_mask
;
27237 arm_stack_offsets
*offsets
;
27239 offsets
= arm_get_frame_offsets ();
27240 saved_regs_mask
= offsets
->saved_regs_mask
;
27242 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27243 if (saved_regs_mask
& (1 << i
))
27246 if (!simple_return
&& saved_regs_mask
)
27250 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27251 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27252 rtx addr
= gen_rtx_MEM (SImode
,
27253 gen_rtx_POST_INC (SImode
,
27254 stack_pointer_rtx
));
27255 set_mem_alias_set (addr
, get_frame_alias_set ());
27256 XVECEXP (par
, 0, 0) = ret_rtx
;
27257 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27258 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27259 emit_jump_insn (par
);
27263 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27264 saved_regs_mask
|= (1 << PC_REGNUM
);
27265 arm_emit_multi_reg_pop (saved_regs_mask
);
27270 emit_jump_insn (simple_return_rtx
);
27275 thumb1_expand_epilogue (void)
27277 HOST_WIDE_INT amount
;
27278 arm_stack_offsets
*offsets
;
27281 /* Naked functions don't have prologues. */
27282 if (IS_NAKED (arm_current_func_type ()))
27285 offsets
= arm_get_frame_offsets ();
27286 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27288 if (frame_pointer_needed
)
27290 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27291 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27293 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27295 gcc_assert (amount
>= 0);
27298 emit_insn (gen_blockage ());
27301 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27302 GEN_INT (amount
)));
27305 /* r3 is always free in the epilogue. */
27306 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27308 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27309 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27313 /* Emit a USE (stack_pointer_rtx), so that
27314 the stack adjustment will not be deleted. */
27315 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27317 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27318 emit_insn (gen_blockage ());
27320 /* Emit a clobber for each insn that will be restored in the epilogue,
27321 so that flow2 will get register lifetimes correct. */
27322 for (regno
= 0; regno
< 13; regno
++)
27323 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27324 emit_clobber (gen_rtx_REG (SImode
, regno
));
27326 if (! df_regs_ever_live_p (LR_REGNUM
))
27327 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27330 /* Epilogue code for APCS frame. */
27332 arm_expand_epilogue_apcs_frame (bool really_return
)
27334 unsigned long func_type
;
27335 unsigned long saved_regs_mask
;
27338 int floats_from_frame
= 0;
27339 arm_stack_offsets
*offsets
;
27341 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27342 func_type
= arm_current_func_type ();
27344 /* Get frame offsets for ARM. */
27345 offsets
= arm_get_frame_offsets ();
27346 saved_regs_mask
= offsets
->saved_regs_mask
;
27348 /* Find the offset of the floating-point save area in the frame. */
27350 = (offsets
->saved_args
27351 + arm_compute_static_chain_stack_bytes ()
27354 /* Compute how many core registers saved and how far away the floats are. */
27355 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27356 if (saved_regs_mask
& (1 << i
))
27359 floats_from_frame
+= 4;
27362 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27365 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27367 /* The offset is from IP_REGNUM. */
27368 int saved_size
= arm_get_vfp_saved_size ();
27369 if (saved_size
> 0)
27372 floats_from_frame
+= saved_size
;
27373 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27374 hard_frame_pointer_rtx
,
27375 GEN_INT (-floats_from_frame
)));
27376 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27377 ip_rtx
, hard_frame_pointer_rtx
);
27380 /* Generate VFP register multi-pop. */
27381 start_reg
= FIRST_VFP_REGNUM
;
27383 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27384 /* Look for a case where a reg does not need restoring. */
27385 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27386 && (!df_regs_ever_live_p (i
+ 1)
27387 || call_used_regs
[i
+ 1]))
27389 if (start_reg
!= i
)
27390 arm_emit_vfp_multi_reg_pop (start_reg
,
27391 (i
- start_reg
) / 2,
27392 gen_rtx_REG (SImode
,
27397 /* Restore the remaining regs that we have discovered (or possibly
27398 even all of them, if the conditional in the for loop never
27400 if (start_reg
!= i
)
27401 arm_emit_vfp_multi_reg_pop (start_reg
,
27402 (i
- start_reg
) / 2,
27403 gen_rtx_REG (SImode
, IP_REGNUM
));
27408 /* The frame pointer is guaranteed to be non-double-word aligned, as
27409 it is set to double-word-aligned old_stack_pointer - 4. */
27411 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27413 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27414 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27416 rtx addr
= gen_frame_mem (V2SImode
,
27417 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27419 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27420 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27421 gen_rtx_REG (V2SImode
, i
),
27427 /* saved_regs_mask should contain IP which contains old stack pointer
27428 at the time of activation creation. Since SP and IP are adjacent registers,
27429 we can restore the value directly into SP. */
27430 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27431 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27432 saved_regs_mask
|= (1 << SP_REGNUM
);
27434 /* There are two registers left in saved_regs_mask - LR and PC. We
27435 only need to restore LR (the return address), but to
27436 save time we can load it directly into PC, unless we need a
27437 special function exit sequence, or we are not really returning. */
27439 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27440 && !crtl
->calls_eh_return
)
27441 /* Delete LR from the register mask, so that LR on
27442 the stack is loaded into the PC in the register mask. */
27443 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27445 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27447 num_regs
= bit_count (saved_regs_mask
);
27448 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27451 emit_insn (gen_blockage ());
27452 /* Unwind the stack to just below the saved registers. */
27453 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27454 hard_frame_pointer_rtx
,
27455 GEN_INT (- 4 * num_regs
)));
27457 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27458 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27461 arm_emit_multi_reg_pop (saved_regs_mask
);
27463 if (IS_INTERRUPT (func_type
))
27465 /* Interrupt handlers will have pushed the
27466 IP onto the stack, so restore it now. */
27468 rtx addr
= gen_rtx_MEM (SImode
,
27469 gen_rtx_POST_INC (SImode
,
27470 stack_pointer_rtx
));
27471 set_mem_alias_set (addr
, get_frame_alias_set ());
27472 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27473 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27474 gen_rtx_REG (SImode
, IP_REGNUM
),
27478 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27481 if (crtl
->calls_eh_return
)
27482 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27484 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27486 if (IS_STACKALIGN (func_type
))
27487 /* Restore the original stack pointer. Before prologue, the stack was
27488 realigned and the original stack pointer saved in r0. For details,
27489 see comment in arm_expand_prologue. */
27490 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27492 emit_jump_insn (simple_return_rtx
);
27495 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27496 function is not a sibcall. */
27498 arm_expand_epilogue (bool really_return
)
27500 unsigned long func_type
;
27501 unsigned long saved_regs_mask
;
27505 arm_stack_offsets
*offsets
;
27507 func_type
= arm_current_func_type ();
27509 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27510 let output_return_instruction take care of instruction emission if any. */
27511 if (IS_NAKED (func_type
)
27512 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27515 emit_jump_insn (simple_return_rtx
);
27519 /* If we are throwing an exception, then we really must be doing a
27520 return, so we can't tail-call. */
27521 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27523 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27525 arm_expand_epilogue_apcs_frame (really_return
);
27529 /* Get frame offsets for ARM. */
27530 offsets
= arm_get_frame_offsets ();
27531 saved_regs_mask
= offsets
->saved_regs_mask
;
27532 num_regs
= bit_count (saved_regs_mask
);
27534 if (frame_pointer_needed
)
27537 /* Restore stack pointer if necessary. */
27540 /* In ARM mode, frame pointer points to first saved register.
27541 Restore stack pointer to last saved register. */
27542 amount
= offsets
->frame
- offsets
->saved_regs
;
27544 /* Force out any pending memory operations that reference stacked data
27545 before stack de-allocation occurs. */
27546 emit_insn (gen_blockage ());
27547 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27548 hard_frame_pointer_rtx
,
27549 GEN_INT (amount
)));
27550 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27552 hard_frame_pointer_rtx
);
27554 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27556 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27560 /* In Thumb-2 mode, the frame pointer points to the last saved
27562 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27565 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27566 hard_frame_pointer_rtx
,
27567 GEN_INT (amount
)));
27568 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27569 hard_frame_pointer_rtx
,
27570 hard_frame_pointer_rtx
);
27573 /* Force out any pending memory operations that reference stacked data
27574 before stack de-allocation occurs. */
27575 emit_insn (gen_blockage ());
27576 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27577 hard_frame_pointer_rtx
));
27578 arm_add_cfa_adjust_cfa_note (insn
, 0,
27580 hard_frame_pointer_rtx
);
27581 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27583 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27588 /* Pop off outgoing args and local frame to adjust stack pointer to
27589 last saved register. */
27590 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27594 /* Force out any pending memory operations that reference stacked data
27595 before stack de-allocation occurs. */
27596 emit_insn (gen_blockage ());
27597 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27599 GEN_INT (amount
)));
27600 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27601 stack_pointer_rtx
, stack_pointer_rtx
);
27602 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27604 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27608 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27610 /* Generate VFP register multi-pop. */
27611 int end_reg
= LAST_VFP_REGNUM
+ 1;
27613 /* Scan the registers in reverse order. We need to match
27614 any groupings made in the prologue and generate matching
27615 vldm operations. The need to match groups is because,
27616 unlike pop, vldm can only do consecutive regs. */
27617 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27618 /* Look for a case where a reg does not need restoring. */
27619 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27620 && (!df_regs_ever_live_p (i
+ 1)
27621 || call_used_regs
[i
+ 1]))
27623 /* Restore the regs discovered so far (from reg+2 to
27625 if (end_reg
> i
+ 2)
27626 arm_emit_vfp_multi_reg_pop (i
+ 2,
27627 (end_reg
- (i
+ 2)) / 2,
27628 stack_pointer_rtx
);
27632 /* Restore the remaining regs that we have discovered (or possibly
27633 even all of them, if the conditional in the for loop never
27635 if (end_reg
> i
+ 2)
27636 arm_emit_vfp_multi_reg_pop (i
+ 2,
27637 (end_reg
- (i
+ 2)) / 2,
27638 stack_pointer_rtx
);
27642 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27643 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27646 rtx addr
= gen_rtx_MEM (V2SImode
,
27647 gen_rtx_POST_INC (SImode
,
27648 stack_pointer_rtx
));
27649 set_mem_alias_set (addr
, get_frame_alias_set ());
27650 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27651 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27652 gen_rtx_REG (V2SImode
, i
),
27654 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27655 stack_pointer_rtx
, stack_pointer_rtx
);
27658 if (saved_regs_mask
)
27661 bool return_in_pc
= false;
27663 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27664 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27665 && !IS_STACKALIGN (func_type
)
27667 && crtl
->args
.pretend_args_size
== 0
27668 && saved_regs_mask
& (1 << LR_REGNUM
)
27669 && !crtl
->calls_eh_return
)
27671 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27672 saved_regs_mask
|= (1 << PC_REGNUM
);
27673 return_in_pc
= true;
27676 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27678 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27679 if (saved_regs_mask
& (1 << i
))
27681 rtx addr
= gen_rtx_MEM (SImode
,
27682 gen_rtx_POST_INC (SImode
,
27683 stack_pointer_rtx
));
27684 set_mem_alias_set (addr
, get_frame_alias_set ());
27686 if (i
== PC_REGNUM
)
27688 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27689 XVECEXP (insn
, 0, 0) = ret_rtx
;
27690 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27691 gen_rtx_REG (SImode
, i
),
27693 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27694 insn
= emit_jump_insn (insn
);
27698 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27700 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27701 gen_rtx_REG (SImode
, i
),
27703 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27705 stack_pointer_rtx
);
27712 && current_tune
->prefer_ldrd_strd
27713 && !optimize_function_for_size_p (cfun
))
27716 thumb2_emit_ldrd_pop (saved_regs_mask
);
27717 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27718 arm_emit_ldrd_pop (saved_regs_mask
);
27720 arm_emit_multi_reg_pop (saved_regs_mask
);
27723 arm_emit_multi_reg_pop (saved_regs_mask
);
27726 if (return_in_pc
== true)
27730 if (crtl
->args
.pretend_args_size
)
27733 rtx dwarf
= NULL_RTX
;
27734 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27736 GEN_INT (crtl
->args
.pretend_args_size
)));
27738 RTX_FRAME_RELATED_P (tmp
) = 1;
27740 if (cfun
->machine
->uses_anonymous_args
)
27742 /* Restore pretend args. Refer arm_expand_prologue on how to save
27743 pretend_args in stack. */
27744 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27745 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27746 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27747 if (saved_regs_mask
& (1 << i
))
27749 rtx reg
= gen_rtx_REG (SImode
, i
);
27750 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27753 REG_NOTES (tmp
) = dwarf
;
27755 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27756 stack_pointer_rtx
, stack_pointer_rtx
);
27759 if (!really_return
)
27762 if (crtl
->calls_eh_return
)
27763 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27765 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27767 if (IS_STACKALIGN (func_type
))
27768 /* Restore the original stack pointer. Before prologue, the stack was
27769 realigned and the original stack pointer saved in r0. For details,
27770 see comment in arm_expand_prologue. */
27771 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27773 emit_jump_insn (simple_return_rtx
);
27776 /* Implementation of insn prologue_thumb1_interwork. This is the first
27777 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27780 thumb1_output_interwork (void)
27783 FILE *f
= asm_out_file
;
27785 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27786 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27788 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27790 /* Generate code sequence to switch us into Thumb mode. */
27791 /* The .code 32 directive has already been emitted by
27792 ASM_DECLARE_FUNCTION_NAME. */
27793 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27794 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27796 /* Generate a label, so that the debugger will notice the
27797 change in instruction sets. This label is also used by
27798 the assembler to bypass the ARM code when this function
27799 is called from a Thumb encoded function elsewhere in the
27800 same file. Hence the definition of STUB_NAME here must
27801 agree with the definition in gas/config/tc-arm.c. */
27803 #define STUB_NAME ".real_start_of"
27805 fprintf (f
, "\t.code\t16\n");
27807 if (arm_dllexport_name_p (name
))
27808 name
= arm_strip_name_encoding (name
);
27810 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27811 fprintf (f
, "\t.thumb_func\n");
27812 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27817 /* Handle the case of a double word load into a low register from
27818 a computed memory address. The computed address may involve a
27819 register which is overwritten by the load. */
27821 thumb_load_double_from_address (rtx
*operands
)
27829 gcc_assert (REG_P (operands
[0]));
27830 gcc_assert (MEM_P (operands
[1]));
27832 /* Get the memory address. */
27833 addr
= XEXP (operands
[1], 0);
27835 /* Work out how the memory address is computed. */
27836 switch (GET_CODE (addr
))
27839 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27841 if (REGNO (operands
[0]) == REGNO (addr
))
27843 output_asm_insn ("ldr\t%H0, %2", operands
);
27844 output_asm_insn ("ldr\t%0, %1", operands
);
27848 output_asm_insn ("ldr\t%0, %1", operands
);
27849 output_asm_insn ("ldr\t%H0, %2", operands
);
27854 /* Compute <address> + 4 for the high order load. */
27855 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27857 output_asm_insn ("ldr\t%0, %1", operands
);
27858 output_asm_insn ("ldr\t%H0, %2", operands
);
27862 arg1
= XEXP (addr
, 0);
27863 arg2
= XEXP (addr
, 1);
27865 if (CONSTANT_P (arg1
))
27866 base
= arg2
, offset
= arg1
;
27868 base
= arg1
, offset
= arg2
;
27870 gcc_assert (REG_P (base
));
27872 /* Catch the case of <address> = <reg> + <reg> */
27873 if (REG_P (offset
))
27875 int reg_offset
= REGNO (offset
);
27876 int reg_base
= REGNO (base
);
27877 int reg_dest
= REGNO (operands
[0]);
27879 /* Add the base and offset registers together into the
27880 higher destination register. */
27881 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27882 reg_dest
+ 1, reg_base
, reg_offset
);
27884 /* Load the lower destination register from the address in
27885 the higher destination register. */
27886 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27887 reg_dest
, reg_dest
+ 1);
27889 /* Load the higher destination register from its own address
27891 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27892 reg_dest
+ 1, reg_dest
+ 1);
27896 /* Compute <address> + 4 for the high order load. */
27897 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27899 /* If the computed address is held in the low order register
27900 then load the high order register first, otherwise always
27901 load the low order register first. */
27902 if (REGNO (operands
[0]) == REGNO (base
))
27904 output_asm_insn ("ldr\t%H0, %2", operands
);
27905 output_asm_insn ("ldr\t%0, %1", operands
);
27909 output_asm_insn ("ldr\t%0, %1", operands
);
27910 output_asm_insn ("ldr\t%H0, %2", operands
);
27916 /* With no registers to worry about we can just load the value
27918 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27920 output_asm_insn ("ldr\t%H0, %2", operands
);
27921 output_asm_insn ("ldr\t%0, %1", operands
);
27925 gcc_unreachable ();
27932 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27939 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27942 operands
[4] = operands
[5];
27945 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27946 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27950 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27953 operands
[4] = operands
[5];
27956 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27959 operands
[5] = operands
[6];
27962 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27965 operands
[4] = operands
[5];
27969 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27970 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27974 gcc_unreachable ();
27980 /* Output a call-via instruction for thumb state. */
27982 thumb_call_via_reg (rtx reg
)
27984 int regno
= REGNO (reg
);
27987 gcc_assert (regno
< LR_REGNUM
);
27989 /* If we are in the normal text section we can use a single instance
27990 per compilation unit. If we are doing function sections, then we need
27991 an entry per section, since we can't rely on reachability. */
27992 if (in_section
== text_section
)
27994 thumb_call_reg_needed
= 1;
27996 if (thumb_call_via_label
[regno
] == NULL
)
27997 thumb_call_via_label
[regno
] = gen_label_rtx ();
27998 labelp
= thumb_call_via_label
+ regno
;
28002 if (cfun
->machine
->call_via
[regno
] == NULL
)
28003 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28004 labelp
= cfun
->machine
->call_via
+ regno
;
28007 output_asm_insn ("bl\t%a0", labelp
);
28011 /* Routines for generating rtl. */
28013 thumb_expand_movmemqi (rtx
*operands
)
28015 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28016 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28017 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28018 HOST_WIDE_INT offset
= 0;
28022 emit_insn (gen_movmem12b (out
, in
, out
, in
));
28028 emit_insn (gen_movmem8b (out
, in
, out
, in
));
28034 rtx reg
= gen_reg_rtx (SImode
);
28035 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28036 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28043 rtx reg
= gen_reg_rtx (HImode
);
28044 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28045 plus_constant (Pmode
, in
,
28047 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28056 rtx reg
= gen_reg_rtx (QImode
);
28057 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28058 plus_constant (Pmode
, in
,
28060 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28067 thumb_reload_out_hi (rtx
*operands
)
28069 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28072 /* Handle reading a half-word from memory during reload. */
28074 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
28076 gcc_unreachable ();
28079 /* Return the length of a function name prefix
28080 that starts with the character 'c'. */
28082 arm_get_strip_length (int c
)
28086 ARM_NAME_ENCODING_LENGTHS
28091 /* Return a pointer to a function's name with any
28092 and all prefix encodings stripped from it. */
28094 arm_strip_name_encoding (const char *name
)
28098 while ((skip
= arm_get_strip_length (* name
)))
28104 /* If there is a '*' anywhere in the name's prefix, then
28105 emit the stripped name verbatim, otherwise prepend an
28106 underscore if leading underscores are being used. */
28108 arm_asm_output_labelref (FILE *stream
, const char *name
)
28113 while ((skip
= arm_get_strip_length (* name
)))
28115 verbatim
|= (*name
== '*');
28120 fputs (name
, stream
);
28122 asm_fprintf (stream
, "%U%s", name
);
28125 /* This function is used to emit an EABI tag and its associated value.
28126 We emit the numerical value of the tag in case the assembler does not
28127 support textual tags. (Eg gas prior to 2.20). If requested we include
28128 the tag name in a comment so that anyone reading the assembler output
28129 will know which tag is being set.
28131 This function is not static because arm-c.c needs it too. */
28134 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28136 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28137 if (flag_verbose_asm
|| flag_debug_asm
)
28138 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28139 asm_fprintf (asm_out_file
, "\n");
28143 arm_file_start (void)
28147 if (TARGET_UNIFIED_ASM
)
28148 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
28152 const char *fpu_name
;
28153 if (arm_selected_arch
)
28155 /* armv7ve doesn't support any extensions. */
28156 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
28158 /* Keep backward compatability for assemblers
28159 which don't support armv7ve. */
28160 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28161 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28162 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28163 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28164 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28168 const char* pos
= strchr (arm_selected_arch
->name
, '+');
28172 gcc_assert (strlen (arm_selected_arch
->name
)
28173 <= sizeof (buf
) / sizeof (*pos
));
28174 strncpy (buf
, arm_selected_arch
->name
,
28175 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
28176 buf
[pos
- arm_selected_arch
->name
] = '\0';
28177 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
28178 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
28181 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
28184 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
28185 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
28188 const char* truncated_name
28189 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
28190 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28193 if (TARGET_SOFT_FLOAT
)
28195 fpu_name
= "softvfp";
28199 fpu_name
= arm_fpu_desc
->name
;
28200 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
28202 if (TARGET_HARD_FLOAT
)
28203 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28204 if (TARGET_HARD_FLOAT_ABI
)
28205 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28208 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
28210 /* Some of these attributes only apply when the corresponding features
28211 are used. However we don't have any easy way of figuring this out.
28212 Conservatively record the setting that would have been used. */
28214 if (flag_rounding_math
)
28215 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28217 if (!flag_unsafe_math_optimizations
)
28219 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28220 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28222 if (flag_signaling_nans
)
28223 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28225 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28226 flag_finite_math_only
? 1 : 3);
28228 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28229 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28230 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28231 flag_short_enums
? 1 : 2);
28233 /* Tag_ABI_optimization_goals. */
28236 else if (optimize
>= 2)
28242 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28244 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28247 if (arm_fp16_format
)
28248 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28249 (int) arm_fp16_format
);
28251 if (arm_lang_output_object_attributes_hook
)
28252 arm_lang_output_object_attributes_hook();
28255 default_file_start ();
28259 arm_file_end (void)
28263 if (NEED_INDICATE_EXEC_STACK
)
28264 /* Add .note.GNU-stack. */
28265 file_end_indicate_exec_stack ();
28267 if (! thumb_call_reg_needed
)
28270 switch_to_section (text_section
);
28271 asm_fprintf (asm_out_file
, "\t.code 16\n");
28272 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28274 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28276 rtx label
= thumb_call_via_label
[regno
];
28280 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28281 CODE_LABEL_NUMBER (label
));
28282 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28288 /* Symbols in the text segment can be accessed without indirecting via the
28289 constant pool; it may take an extra binary operation, but this is still
28290 faster than indirecting via memory. Don't do this when not optimizing,
28291 since we won't be calculating al of the offsets necessary to do this
28295 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28297 if (optimize
> 0 && TREE_CONSTANT (decl
))
28298 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28300 default_encode_section_info (decl
, rtl
, first
);
28302 #endif /* !ARM_PE */
28305 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28307 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28308 && !strcmp (prefix
, "L"))
28310 arm_ccfsm_state
= 0;
28311 arm_target_insn
= NULL
;
28313 default_internal_label (stream
, prefix
, labelno
);
28316 /* Output code to add DELTA to the first argument, and then jump
28317 to FUNCTION. Used for C++ multiple inheritance. */
28319 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28320 HOST_WIDE_INT delta
,
28321 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28324 static int thunk_label
= 0;
28327 int mi_delta
= delta
;
28328 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28330 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28333 mi_delta
= - mi_delta
;
28335 final_start_function (emit_barrier (), file
, 1);
28339 int labelno
= thunk_label
++;
28340 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28341 /* Thunks are entered in arm mode when avaiable. */
28342 if (TARGET_THUMB1_ONLY
)
28344 /* push r3 so we can use it as a temporary. */
28345 /* TODO: Omit this save if r3 is not used. */
28346 fputs ("\tpush {r3}\n", file
);
28347 fputs ("\tldr\tr3, ", file
);
28351 fputs ("\tldr\tr12, ", file
);
28353 assemble_name (file
, label
);
28354 fputc ('\n', file
);
28357 /* If we are generating PIC, the ldr instruction below loads
28358 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28359 the address of the add + 8, so we have:
28361 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28364 Note that we have "+ 1" because some versions of GNU ld
28365 don't set the low bit of the result for R_ARM_REL32
28366 relocations against thumb function symbols.
28367 On ARMv6M this is +4, not +8. */
28368 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28369 assemble_name (file
, labelpc
);
28370 fputs (":\n", file
);
28371 if (TARGET_THUMB1_ONLY
)
28373 /* This is 2 insns after the start of the thunk, so we know it
28374 is 4-byte aligned. */
28375 fputs ("\tadd\tr3, pc, r3\n", file
);
28376 fputs ("\tmov r12, r3\n", file
);
28379 fputs ("\tadd\tr12, pc, r12\n", file
);
28381 else if (TARGET_THUMB1_ONLY
)
28382 fputs ("\tmov r12, r3\n", file
);
28384 if (TARGET_THUMB1_ONLY
)
28386 if (mi_delta
> 255)
28388 fputs ("\tldr\tr3, ", file
);
28389 assemble_name (file
, label
);
28390 fputs ("+4\n", file
);
28391 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28392 mi_op
, this_regno
, this_regno
);
28394 else if (mi_delta
!= 0)
28396 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28397 mi_op
, this_regno
, this_regno
,
28403 /* TODO: Use movw/movt for large constants when available. */
28404 while (mi_delta
!= 0)
28406 if ((mi_delta
& (3 << shift
)) == 0)
28410 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28411 mi_op
, this_regno
, this_regno
,
28412 mi_delta
& (0xff << shift
));
28413 mi_delta
&= ~(0xff << shift
);
28420 if (TARGET_THUMB1_ONLY
)
28421 fputs ("\tpop\t{r3}\n", file
);
28423 fprintf (file
, "\tbx\tr12\n");
28424 ASM_OUTPUT_ALIGN (file
, 2);
28425 assemble_name (file
, label
);
28426 fputs (":\n", file
);
28429 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28430 rtx tem
= XEXP (DECL_RTL (function
), 0);
28431 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28432 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28434 gen_rtx_SYMBOL_REF (Pmode
,
28435 ggc_strdup (labelpc
)));
28436 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28439 /* Output ".word .LTHUNKn". */
28440 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28442 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28443 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28447 fputs ("\tb\t", file
);
28448 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28449 if (NEED_PLT_RELOC
)
28450 fputs ("(PLT)", file
);
28451 fputc ('\n', file
);
28454 final_end_function ();
28458 arm_emit_vector_const (FILE *file
, rtx x
)
28461 const char * pattern
;
28463 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28465 switch (GET_MODE (x
))
28467 case V2SImode
: pattern
= "%08x"; break;
28468 case V4HImode
: pattern
= "%04x"; break;
28469 case V8QImode
: pattern
= "%02x"; break;
28470 default: gcc_unreachable ();
28473 fprintf (file
, "0x");
28474 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28478 element
= CONST_VECTOR_ELT (x
, i
);
28479 fprintf (file
, pattern
, INTVAL (element
));
28485 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28486 HFmode constant pool entries are actually loaded with ldr. */
28488 arm_emit_fp16_const (rtx c
)
28493 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28494 bits
= real_to_target (NULL
, &r
, HFmode
);
28495 if (WORDS_BIG_ENDIAN
)
28496 assemble_zeros (2);
28497 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28498 if (!WORDS_BIG_ENDIAN
)
28499 assemble_zeros (2);
28503 arm_output_load_gr (rtx
*operands
)
28510 if (!MEM_P (operands
[1])
28511 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28512 || !REG_P (reg
= XEXP (sum
, 0))
28513 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28514 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28515 return "wldrw%?\t%0, %1";
28517 /* Fix up an out-of-range load of a GR register. */
28518 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28519 wcgr
= operands
[0];
28521 output_asm_insn ("ldr%?\t%0, %1", operands
);
28523 operands
[0] = wcgr
;
28525 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28526 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28531 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28533 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28534 named arg and all anonymous args onto the stack.
28535 XXX I know the prologue shouldn't be pushing registers, but it is faster
28539 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28540 enum machine_mode mode
,
28543 int second_time ATTRIBUTE_UNUSED
)
28545 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28548 cfun
->machine
->uses_anonymous_args
= 1;
28549 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28551 nregs
= pcum
->aapcs_ncrn
;
28552 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28556 nregs
= pcum
->nregs
;
28558 if (nregs
< NUM_ARG_REGS
)
28559 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28562 /* We can't rely on the caller doing the proper promotion when
28563 using APCS or ATPCS. */
28566 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28568 return !TARGET_AAPCS_BASED
;
28571 static enum machine_mode
28572 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28573 enum machine_mode mode
,
28574 int *punsignedp ATTRIBUTE_UNUSED
,
28575 const_tree fntype ATTRIBUTE_UNUSED
,
28576 int for_return ATTRIBUTE_UNUSED
)
28578 if (GET_MODE_CLASS (mode
) == MODE_INT
28579 && GET_MODE_SIZE (mode
) < 4)
28585 /* AAPCS based ABIs use short enums by default. */
28588 arm_default_short_enums (void)
28590 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28594 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28597 arm_align_anon_bitfield (void)
28599 return TARGET_AAPCS_BASED
;
28603 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28606 arm_cxx_guard_type (void)
28608 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28612 /* The EABI says test the least significant bit of a guard variable. */
28615 arm_cxx_guard_mask_bit (void)
28617 return TARGET_AAPCS_BASED
;
28621 /* The EABI specifies that all array cookies are 8 bytes long. */
28624 arm_get_cookie_size (tree type
)
28628 if (!TARGET_AAPCS_BASED
)
28629 return default_cxx_get_cookie_size (type
);
28631 size
= build_int_cst (sizetype
, 8);
28636 /* The EABI says that array cookies should also contain the element size. */
28639 arm_cookie_has_size (void)
28641 return TARGET_AAPCS_BASED
;
28645 /* The EABI says constructors and destructors should return a pointer to
28646 the object constructed/destroyed. */
28649 arm_cxx_cdtor_returns_this (void)
28651 return TARGET_AAPCS_BASED
;
28654 /* The EABI says that an inline function may never be the key
28658 arm_cxx_key_method_may_be_inline (void)
28660 return !TARGET_AAPCS_BASED
;
28664 arm_cxx_determine_class_data_visibility (tree decl
)
28666 if (!TARGET_AAPCS_BASED
28667 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28670 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28671 is exported. However, on systems without dynamic vague linkage,
28672 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28673 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28674 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28676 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28677 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28681 arm_cxx_class_data_always_comdat (void)
28683 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28684 vague linkage if the class has no key function. */
28685 return !TARGET_AAPCS_BASED
;
28689 /* The EABI says __aeabi_atexit should be used to register static
28693 arm_cxx_use_aeabi_atexit (void)
28695 return TARGET_AAPCS_BASED
;
28700 arm_set_return_address (rtx source
, rtx scratch
)
28702 arm_stack_offsets
*offsets
;
28703 HOST_WIDE_INT delta
;
28705 unsigned long saved_regs
;
28707 offsets
= arm_get_frame_offsets ();
28708 saved_regs
= offsets
->saved_regs_mask
;
28710 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28711 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28714 if (frame_pointer_needed
)
28715 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28718 /* LR will be the first saved register. */
28719 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28724 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28725 GEN_INT (delta
& ~4095)));
28730 addr
= stack_pointer_rtx
;
28732 addr
= plus_constant (Pmode
, addr
, delta
);
28734 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28740 thumb_set_return_address (rtx source
, rtx scratch
)
28742 arm_stack_offsets
*offsets
;
28743 HOST_WIDE_INT delta
;
28744 HOST_WIDE_INT limit
;
28747 unsigned long mask
;
28751 offsets
= arm_get_frame_offsets ();
28752 mask
= offsets
->saved_regs_mask
;
28753 if (mask
& (1 << LR_REGNUM
))
28756 /* Find the saved regs. */
28757 if (frame_pointer_needed
)
28759 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28760 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28766 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28769 /* Allow for the stack frame. */
28770 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28772 /* The link register is always the first saved register. */
28775 /* Construct the address. */
28776 addr
= gen_rtx_REG (SImode
, reg
);
28779 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28780 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28784 addr
= plus_constant (Pmode
, addr
, delta
);
28786 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28789 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28792 /* Implements target hook vector_mode_supported_p. */
28794 arm_vector_mode_supported_p (enum machine_mode mode
)
28796 /* Neon also supports V2SImode, etc. listed in the clause below. */
28797 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28798 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28801 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28802 && ((mode
== V2SImode
)
28803 || (mode
== V4HImode
)
28804 || (mode
== V8QImode
)))
28807 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28808 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28809 || mode
== V2HAmode
))
28815 /* Implements target hook array_mode_supported_p. */
28818 arm_array_mode_supported_p (enum machine_mode mode
,
28819 unsigned HOST_WIDE_INT nelems
)
28822 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28823 && (nelems
>= 2 && nelems
<= 4))
28829 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28830 registers when autovectorizing for Neon, at least until multiple vector
28831 widths are supported properly by the middle-end. */
28833 static enum machine_mode
28834 arm_preferred_simd_mode (enum machine_mode mode
)
28840 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28842 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28844 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28846 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28848 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28855 if (TARGET_REALLY_IWMMXT
)
28871 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28873 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28874 using r0-r4 for function arguments, r7 for the stack frame and don't have
28875 enough left over to do doubleword arithmetic. For Thumb-2 all the
28876 potentially problematic instructions accept high registers so this is not
28877 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28878 that require many low registers. */
28880 arm_class_likely_spilled_p (reg_class_t rclass
)
28882 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28883 || rclass
== CC_REG
)
28889 /* Implements target hook small_register_classes_for_mode_p. */
28891 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28893 return TARGET_THUMB1
;
28896 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28897 ARM insns and therefore guarantee that the shift count is modulo 256.
28898 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28899 guarantee no particular behavior for out-of-range counts. */
28901 static unsigned HOST_WIDE_INT
28902 arm_shift_truncation_mask (enum machine_mode mode
)
28904 return mode
== SImode
? 255 : 0;
28908 /* Map internal gcc register numbers to DWARF2 register numbers. */
28911 arm_dbx_register_number (unsigned int regno
)
28916 if (IS_VFP_REGNUM (regno
))
28918 /* See comment in arm_dwarf_register_span. */
28919 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28920 return 64 + regno
- FIRST_VFP_REGNUM
;
28922 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28925 if (IS_IWMMXT_GR_REGNUM (regno
))
28926 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28928 if (IS_IWMMXT_REGNUM (regno
))
28929 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28931 gcc_unreachable ();
28934 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28935 GCC models tham as 64 32-bit registers, so we need to describe this to
28936 the DWARF generation code. Other registers can use the default. */
28938 arm_dwarf_register_span (rtx rtl
)
28940 enum machine_mode mode
;
28946 regno
= REGNO (rtl
);
28947 if (!IS_VFP_REGNUM (regno
))
28950 /* XXX FIXME: The EABI defines two VFP register ranges:
28951 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28953 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28954 corresponding D register. Until GDB supports this, we shall use the
28955 legacy encodings. We also use these encodings for D0-D15 for
28956 compatibility with older debuggers. */
28957 mode
= GET_MODE (rtl
);
28958 if (GET_MODE_SIZE (mode
) < 8)
28961 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28963 nregs
= GET_MODE_SIZE (mode
) / 4;
28964 for (i
= 0; i
< nregs
; i
+= 2)
28965 if (TARGET_BIG_END
)
28967 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28968 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28972 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28973 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28978 nregs
= GET_MODE_SIZE (mode
) / 8;
28979 for (i
= 0; i
< nregs
; i
++)
28980 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28983 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28986 #if ARM_UNWIND_INFO
28987 /* Emit unwind directives for a store-multiple instruction or stack pointer
28988 push during alignment.
28989 These should only ever be generated by the function prologue code, so
28990 expect them to have a particular form.
28991 The store-multiple instruction sometimes pushes pc as the last register,
28992 although it should not be tracked into unwind information, or for -Os
28993 sometimes pushes some dummy registers before first register that needs
28994 to be tracked in unwind information; such dummy registers are there just
28995 to avoid separate stack adjustment, and will not be restored in the
28999 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29002 HOST_WIDE_INT offset
;
29003 HOST_WIDE_INT nregs
;
29007 unsigned padfirst
= 0, padlast
= 0;
29010 e
= XVECEXP (p
, 0, 0);
29011 gcc_assert (GET_CODE (e
) == SET
);
29013 /* First insn will adjust the stack pointer. */
29014 gcc_assert (GET_CODE (e
) == SET
29015 && REG_P (SET_DEST (e
))
29016 && REGNO (SET_DEST (e
)) == SP_REGNUM
29017 && GET_CODE (SET_SRC (e
)) == PLUS
);
29019 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29020 nregs
= XVECLEN (p
, 0) - 1;
29021 gcc_assert (nregs
);
29023 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29026 /* For -Os dummy registers can be pushed at the beginning to
29027 avoid separate stack pointer adjustment. */
29028 e
= XVECEXP (p
, 0, 1);
29029 e
= XEXP (SET_DEST (e
), 0);
29030 if (GET_CODE (e
) == PLUS
)
29031 padfirst
= INTVAL (XEXP (e
, 1));
29032 gcc_assert (padfirst
== 0 || optimize_size
);
29033 /* The function prologue may also push pc, but not annotate it as it is
29034 never restored. We turn this into a stack pointer adjustment. */
29035 e
= XVECEXP (p
, 0, nregs
);
29036 e
= XEXP (SET_DEST (e
), 0);
29037 if (GET_CODE (e
) == PLUS
)
29038 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29040 padlast
= offset
- 4;
29041 gcc_assert (padlast
== 0 || padlast
== 4);
29043 fprintf (asm_out_file
, "\t.pad #4\n");
29045 fprintf (asm_out_file
, "\t.save {");
29047 else if (IS_VFP_REGNUM (reg
))
29050 fprintf (asm_out_file
, "\t.vsave {");
29053 /* Unknown register type. */
29054 gcc_unreachable ();
29056 /* If the stack increment doesn't match the size of the saved registers,
29057 something has gone horribly wrong. */
29058 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29062 /* The remaining insns will describe the stores. */
29063 for (i
= 1; i
<= nregs
; i
++)
29065 /* Expect (set (mem <addr>) (reg)).
29066 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29067 e
= XVECEXP (p
, 0, i
);
29068 gcc_assert (GET_CODE (e
) == SET
29069 && MEM_P (SET_DEST (e
))
29070 && REG_P (SET_SRC (e
)));
29072 reg
= REGNO (SET_SRC (e
));
29073 gcc_assert (reg
>= lastreg
);
29076 fprintf (asm_out_file
, ", ");
29077 /* We can't use %r for vfp because we need to use the
29078 double precision register names. */
29079 if (IS_VFP_REGNUM (reg
))
29080 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29082 asm_fprintf (asm_out_file
, "%r", reg
);
29084 #ifdef ENABLE_CHECKING
29085 /* Check that the addresses are consecutive. */
29086 e
= XEXP (SET_DEST (e
), 0);
29087 if (GET_CODE (e
) == PLUS
)
29088 gcc_assert (REG_P (XEXP (e
, 0))
29089 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29090 && CONST_INT_P (XEXP (e
, 1))
29091 && offset
== INTVAL (XEXP (e
, 1)));
29095 && REGNO (e
) == SP_REGNUM
);
29096 offset
+= reg_size
;
29099 fprintf (asm_out_file
, "}\n");
29101 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29104 /* Emit unwind directives for a SET. */
29107 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29115 switch (GET_CODE (e0
))
29118 /* Pushing a single register. */
29119 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29120 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29121 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29124 asm_fprintf (asm_out_file
, "\t.save ");
29125 if (IS_VFP_REGNUM (REGNO (e1
)))
29126 asm_fprintf(asm_out_file
, "{d%d}\n",
29127 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29129 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29133 if (REGNO (e0
) == SP_REGNUM
)
29135 /* A stack increment. */
29136 if (GET_CODE (e1
) != PLUS
29137 || !REG_P (XEXP (e1
, 0))
29138 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29139 || !CONST_INT_P (XEXP (e1
, 1)))
29142 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29143 -INTVAL (XEXP (e1
, 1)));
29145 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29147 HOST_WIDE_INT offset
;
29149 if (GET_CODE (e1
) == PLUS
)
29151 if (!REG_P (XEXP (e1
, 0))
29152 || !CONST_INT_P (XEXP (e1
, 1)))
29154 reg
= REGNO (XEXP (e1
, 0));
29155 offset
= INTVAL (XEXP (e1
, 1));
29156 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29157 HARD_FRAME_POINTER_REGNUM
, reg
,
29160 else if (REG_P (e1
))
29163 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29164 HARD_FRAME_POINTER_REGNUM
, reg
);
29169 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29171 /* Move from sp to reg. */
29172 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29174 else if (GET_CODE (e1
) == PLUS
29175 && REG_P (XEXP (e1
, 0))
29176 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29177 && CONST_INT_P (XEXP (e1
, 1)))
29179 /* Set reg to offset from sp. */
29180 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29181 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29193 /* Emit unwind directives for the given insn. */
29196 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
29199 bool handled_one
= false;
29201 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29204 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29205 && (TREE_NOTHROW (current_function_decl
)
29206 || crtl
->all_throwers_are_sibcalls
))
29209 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29212 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29214 switch (REG_NOTE_KIND (note
))
29216 case REG_FRAME_RELATED_EXPR
:
29217 pat
= XEXP (note
, 0);
29220 case REG_CFA_REGISTER
:
29221 pat
= XEXP (note
, 0);
29224 pat
= PATTERN (insn
);
29225 if (GET_CODE (pat
) == PARALLEL
)
29226 pat
= XVECEXP (pat
, 0, 0);
29229 /* Only emitted for IS_STACKALIGN re-alignment. */
29234 src
= SET_SRC (pat
);
29235 dest
= SET_DEST (pat
);
29237 gcc_assert (src
== stack_pointer_rtx
);
29238 reg
= REGNO (dest
);
29239 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29242 handled_one
= true;
29245 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29246 to get correct dwarf information for shrink-wrap. We should not
29247 emit unwind information for it because these are used either for
29248 pretend arguments or notes to adjust sp and restore registers from
29250 case REG_CFA_DEF_CFA
:
29251 case REG_CFA_ADJUST_CFA
:
29252 case REG_CFA_RESTORE
:
29255 case REG_CFA_EXPRESSION
:
29256 case REG_CFA_OFFSET
:
29257 /* ??? Only handling here what we actually emit. */
29258 gcc_unreachable ();
29266 pat
= PATTERN (insn
);
29269 switch (GET_CODE (pat
))
29272 arm_unwind_emit_set (asm_out_file
, pat
);
29276 /* Store multiple. */
29277 arm_unwind_emit_sequence (asm_out_file
, pat
);
29286 /* Output a reference from a function exception table to the type_info
29287 object X. The EABI specifies that the symbol should be relocated by
29288 an R_ARM_TARGET2 relocation. */
29291 arm_output_ttype (rtx x
)
29293 fputs ("\t.word\t", asm_out_file
);
29294 output_addr_const (asm_out_file
, x
);
29295 /* Use special relocations for symbol references. */
29296 if (!CONST_INT_P (x
))
29297 fputs ("(TARGET2)", asm_out_file
);
29298 fputc ('\n', asm_out_file
);
29303 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29306 arm_asm_emit_except_personality (rtx personality
)
29308 fputs ("\t.personality\t", asm_out_file
);
29309 output_addr_const (asm_out_file
, personality
);
29310 fputc ('\n', asm_out_file
);
29313 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29316 arm_asm_init_sections (void)
29318 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29321 #endif /* ARM_UNWIND_INFO */
29323 /* Output unwind directives for the start/end of a function. */
29326 arm_output_fn_unwind (FILE * f
, bool prologue
)
29328 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29332 fputs ("\t.fnstart\n", f
);
29335 /* If this function will never be unwound, then mark it as such.
29336 The came condition is used in arm_unwind_emit to suppress
29337 the frame annotations. */
29338 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29339 && (TREE_NOTHROW (current_function_decl
)
29340 || crtl
->all_throwers_are_sibcalls
))
29341 fputs("\t.cantunwind\n", f
);
29343 fputs ("\t.fnend\n", f
);
29348 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29350 enum tls_reloc reloc
;
29353 val
= XVECEXP (x
, 0, 0);
29354 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29356 output_addr_const (fp
, val
);
29361 fputs ("(tlsgd)", fp
);
29364 fputs ("(tlsldm)", fp
);
29367 fputs ("(tlsldo)", fp
);
29370 fputs ("(gottpoff)", fp
);
29373 fputs ("(tpoff)", fp
);
29376 fputs ("(tlsdesc)", fp
);
29379 gcc_unreachable ();
29388 fputs (" + (. - ", fp
);
29389 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29390 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29391 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29392 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29402 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29405 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29407 gcc_assert (size
== 4);
29408 fputs ("\t.word\t", file
);
29409 output_addr_const (file
, x
);
29410 fputs ("(tlsldo)", file
);
29413 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29416 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29418 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29419 return arm_emit_tls_decoration (fp
, x
);
29420 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29423 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29425 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29426 assemble_name_raw (fp
, label
);
29430 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29432 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29436 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29440 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29442 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29446 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29450 else if (GET_CODE (x
) == CONST_VECTOR
)
29451 return arm_emit_vector_const (fp
, x
);
29456 /* Output assembly for a shift instruction.
29457 SET_FLAGS determines how the instruction modifies the condition codes.
29458 0 - Do not set condition codes.
29459 1 - Set condition codes.
29460 2 - Use smallest instruction. */
29462 arm_output_shift(rtx
* operands
, int set_flags
)
29465 static const char flag_chars
[3] = {'?', '.', '!'};
29470 c
= flag_chars
[set_flags
];
29471 if (TARGET_UNIFIED_ASM
)
29473 shift
= shift_op(operands
[3], &val
);
29477 operands
[2] = GEN_INT(val
);
29478 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29481 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29484 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29485 output_asm_insn (pattern
, operands
);
29489 /* Output assembly for a WMMX immediate shift instruction. */
29491 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29493 int shift
= INTVAL (operands
[2]);
29495 enum machine_mode opmode
= GET_MODE (operands
[0]);
29497 gcc_assert (shift
>= 0);
29499 /* If the shift value in the register versions is > 63 (for D qualifier),
29500 31 (for W qualifier) or 15 (for H qualifier). */
29501 if (((opmode
== V4HImode
) && (shift
> 15))
29502 || ((opmode
== V2SImode
) && (shift
> 31))
29503 || ((opmode
== DImode
) && (shift
> 63)))
29507 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29508 output_asm_insn (templ
, operands
);
29509 if (opmode
== DImode
)
29511 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29512 output_asm_insn (templ
, operands
);
29517 /* The destination register will contain all zeros. */
29518 sprintf (templ
, "wzero\t%%0");
29519 output_asm_insn (templ
, operands
);
29524 if ((opmode
== DImode
) && (shift
> 32))
29526 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29527 output_asm_insn (templ
, operands
);
29528 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29529 output_asm_insn (templ
, operands
);
29533 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29534 output_asm_insn (templ
, operands
);
29539 /* Output assembly for a WMMX tinsr instruction. */
29541 arm_output_iwmmxt_tinsr (rtx
*operands
)
29543 int mask
= INTVAL (operands
[3]);
29546 int units
= mode_nunits
[GET_MODE (operands
[0])];
29547 gcc_assert ((mask
& (mask
- 1)) == 0);
29548 for (i
= 0; i
< units
; ++i
)
29550 if ((mask
& 0x01) == 1)
29556 gcc_assert (i
< units
);
29558 switch (GET_MODE (operands
[0]))
29561 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29564 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29567 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29570 gcc_unreachable ();
29573 output_asm_insn (templ
, operands
);
29578 /* Output a Thumb-1 casesi dispatch sequence. */
29580 thumb1_output_casesi (rtx
*operands
)
29582 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29584 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29586 switch (GET_MODE(diff_vec
))
29589 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29590 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29592 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29593 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29595 return "bl\t%___gnu_thumb1_case_si";
29597 gcc_unreachable ();
29601 /* Output a Thumb-2 casesi instruction. */
29603 thumb2_output_casesi (rtx
*operands
)
29605 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29607 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29609 output_asm_insn ("cmp\t%0, %1", operands
);
29610 output_asm_insn ("bhi\t%l3", operands
);
29611 switch (GET_MODE(diff_vec
))
29614 return "tbb\t[%|pc, %0]";
29616 return "tbh\t[%|pc, %0, lsl #1]";
29620 output_asm_insn ("adr\t%4, %l2", operands
);
29621 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29622 output_asm_insn ("add\t%4, %4, %5", operands
);
29627 output_asm_insn ("adr\t%4, %l2", operands
);
29628 return "ldr\t%|pc, [%4, %0, lsl #2]";
29631 gcc_unreachable ();
29635 /* Most ARM cores are single issue, but some newer ones can dual issue.
29636 The scheduler descriptions rely on this being correct. */
29638 arm_issue_rate (void)
29665 /* A table and a function to perform ARM-specific name mangling for
29666 NEON vector types in order to conform to the AAPCS (see "Procedure
29667 Call Standard for the ARM Architecture", Appendix A). To qualify
29668 for emission with the mangled names defined in that document, a
29669 vector type must not only be of the correct mode but also be
29670 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29673 enum machine_mode mode
;
29674 const char *element_type_name
;
29675 const char *aapcs_name
;
29676 } arm_mangle_map_entry
;
29678 static arm_mangle_map_entry arm_mangle_map
[] = {
29679 /* 64-bit containerized types. */
29680 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29681 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29682 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29683 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29684 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29685 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29686 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29687 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29688 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29689 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29691 /* 128-bit containerized types. */
29692 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29693 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29694 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29695 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29696 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29697 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29698 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29699 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29700 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29701 { VOIDmode
, NULL
, NULL
}
29705 arm_mangle_type (const_tree type
)
29707 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29709 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29710 has to be managled as if it is in the "std" namespace. */
29711 if (TARGET_AAPCS_BASED
29712 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29713 return "St9__va_list";
29715 /* Half-precision float. */
29716 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29719 if (TREE_CODE (type
) != VECTOR_TYPE
)
29722 /* Check the mode of the vector type, and the name of the vector
29723 element type, against the table. */
29724 while (pos
->mode
!= VOIDmode
)
29726 tree elt_type
= TREE_TYPE (type
);
29728 if (pos
->mode
== TYPE_MODE (type
)
29729 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29730 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29731 pos
->element_type_name
))
29732 return pos
->aapcs_name
;
29737 /* Use the default mangling for unrecognized (possibly user-defined)
29742 /* Order of allocation of core registers for Thumb: this allocation is
29743 written over the corresponding initial entries of the array
29744 initialized with REG_ALLOC_ORDER. We allocate all low registers
29745 first. Saving and restoring a low register is usually cheaper than
29746 using a call-clobbered high register. */
29748 static const int thumb_core_reg_alloc_order
[] =
29750 3, 2, 1, 0, 4, 5, 6, 7,
29751 14, 12, 8, 9, 10, 11
29754 /* Adjust register allocation order when compiling for Thumb. */
29757 arm_order_regs_for_local_alloc (void)
29759 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29760 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29762 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29763 sizeof (thumb_core_reg_alloc_order
));
29766 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29769 arm_frame_pointer_required (void)
29771 return (cfun
->has_nonlocal_label
29772 || SUBTARGET_FRAME_POINTER_REQUIRED
29773 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29776 /* Only thumb1 can't support conditional execution, so return true if
29777 the target is not thumb1. */
29779 arm_have_conditional_execution (void)
29781 return !TARGET_THUMB1
;
29785 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29787 enum machine_mode in_mode
, out_mode
;
29790 if (TREE_CODE (type_out
) != VECTOR_TYPE
29791 || TREE_CODE (type_in
) != VECTOR_TYPE
29792 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29795 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29796 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29797 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29798 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29800 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29801 decl of the vectorized builtin for the appropriate vector mode.
29802 NULL_TREE is returned if no such builtin is available. */
29803 #undef ARM_CHECK_BUILTIN_MODE
29804 #define ARM_CHECK_BUILTIN_MODE(C) \
29805 (out_mode == SFmode && out_n == C \
29806 && in_mode == SFmode && in_n == C)
29808 #undef ARM_FIND_VRINT_VARIANT
29809 #define ARM_FIND_VRINT_VARIANT(N) \
29810 (ARM_CHECK_BUILTIN_MODE (2) \
29811 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29812 : (ARM_CHECK_BUILTIN_MODE (4) \
29813 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29816 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29818 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29821 case BUILT_IN_FLOORF
:
29822 return ARM_FIND_VRINT_VARIANT (vrintm
);
29823 case BUILT_IN_CEILF
:
29824 return ARM_FIND_VRINT_VARIANT (vrintp
);
29825 case BUILT_IN_TRUNCF
:
29826 return ARM_FIND_VRINT_VARIANT (vrintz
);
29827 case BUILT_IN_ROUNDF
:
29828 return ARM_FIND_VRINT_VARIANT (vrinta
);
29835 #undef ARM_CHECK_BUILTIN_MODE
29836 #undef ARM_FIND_VRINT_VARIANT
29838 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29839 static HOST_WIDE_INT
29840 arm_vector_alignment (const_tree type
)
29842 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29844 if (TARGET_AAPCS_BASED
)
29845 align
= MIN (align
, 64);
29850 static unsigned int
29851 arm_autovectorize_vector_sizes (void)
29853 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29857 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29859 /* Vectors which aren't in packed structures will not be less aligned than
29860 the natural alignment of their element type, so this is safe. */
29861 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29864 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29868 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29869 const_tree type
, int misalignment
,
29872 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29874 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29879 /* If the misalignment is unknown, we should be able to handle the access
29880 so long as it is not to a member of a packed data structure. */
29881 if (misalignment
== -1)
29884 /* Return true if the misalignment is a multiple of the natural alignment
29885 of the vector's element type. This is probably always going to be
29886 true in practice, since we've already established that this isn't a
29888 return ((misalignment
% align
) == 0);
29891 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29896 arm_conditional_register_usage (void)
29900 if (TARGET_THUMB1
&& optimize_size
)
29902 /* When optimizing for size on Thumb-1, it's better not
29903 to use the HI regs, because of the overhead of
29905 for (regno
= FIRST_HI_REGNUM
;
29906 regno
<= LAST_HI_REGNUM
; ++regno
)
29907 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29910 /* The link register can be clobbered by any branch insn,
29911 but we have no way to track that at present, so mark
29912 it as unavailable. */
29914 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29916 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29918 /* VFPv3 registers are disabled when earlier VFP
29919 versions are selected due to the definition of
29920 LAST_VFP_REGNUM. */
29921 for (regno
= FIRST_VFP_REGNUM
;
29922 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29924 fixed_regs
[regno
] = 0;
29925 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29926 || regno
>= FIRST_VFP_REGNUM
+ 32;
29930 if (TARGET_REALLY_IWMMXT
)
29932 regno
= FIRST_IWMMXT_GR_REGNUM
;
29933 /* The 2002/10/09 revision of the XScale ABI has wCG0
29934 and wCG1 as call-preserved registers. The 2002/11/21
29935 revision changed this so that all wCG registers are
29936 scratch registers. */
29937 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29938 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29939 fixed_regs
[regno
] = 0;
29940 /* The XScale ABI has wR0 - wR9 as scratch registers,
29941 the rest as call-preserved registers. */
29942 for (regno
= FIRST_IWMMXT_REGNUM
;
29943 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29945 fixed_regs
[regno
] = 0;
29946 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29950 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29952 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29953 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29955 else if (TARGET_APCS_STACK
)
29957 fixed_regs
[10] = 1;
29958 call_used_regs
[10] = 1;
29960 /* -mcaller-super-interworking reserves r11 for calls to
29961 _interwork_r11_call_via_rN(). Making the register global
29962 is an easy way of ensuring that it remains valid for all
29964 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29965 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29967 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29968 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29969 if (TARGET_CALLER_INTERWORKING
)
29970 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29972 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29976 arm_preferred_rename_class (reg_class_t rclass
)
29978 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29979 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29980 and code size can be reduced. */
29981 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29987 /* Compute the atrribute "length" of insn "*push_multi".
29988 So this function MUST be kept in sync with that insn pattern. */
29990 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29992 int i
, regno
, hi_reg
;
29993 int num_saves
= XVECLEN (parallel_op
, 0);
30003 regno
= REGNO (first_op
);
30004 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30005 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30007 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30008 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30016 /* Compute the number of instructions emitted by output_move_double. */
30018 arm_count_output_move_double_insns (rtx
*operands
)
30022 /* output_move_double may modify the operands array, so call it
30023 here on a copy of the array. */
30024 ops
[0] = operands
[0];
30025 ops
[1] = operands
[1];
30026 output_move_double (ops
, false, &count
);
30031 vfp3_const_double_for_fract_bits (rtx operand
)
30033 REAL_VALUE_TYPE r0
;
30035 if (!CONST_DOUBLE_P (operand
))
30038 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30039 if (exact_real_inverse (DFmode
, &r0
))
30041 if (exact_real_truncate (DFmode
, &r0
))
30043 HOST_WIDE_INT value
= real_to_integer (&r0
);
30044 value
= value
& 0xffffffff;
30045 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30046 return int_log2 (value
);
30053 vfp3_const_double_for_bits (rtx operand
)
30055 REAL_VALUE_TYPE r0
;
30057 if (!CONST_DOUBLE_P (operand
))
30060 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30061 if (exact_real_truncate (DFmode
, &r0
))
30063 HOST_WIDE_INT value
= real_to_integer (&r0
);
30064 value
= value
& 0xffffffff;
30065 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30066 return int_log2 (value
);
30072 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30075 arm_pre_atomic_barrier (enum memmodel model
)
30077 if (need_atomic_barrier_p (model
, true))
30078 emit_insn (gen_memory_barrier ());
30082 arm_post_atomic_barrier (enum memmodel model
)
30084 if (need_atomic_barrier_p (model
, false))
30085 emit_insn (gen_memory_barrier ());
30088 /* Emit the load-exclusive and store-exclusive instructions.
30089 Use acquire and release versions if necessary. */
30092 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30094 rtx (*gen
) (rtx
, rtx
);
30100 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30101 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30102 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30103 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30105 gcc_unreachable ();
30112 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30113 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
30114 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
30115 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
30117 gcc_unreachable ();
30121 emit_insn (gen (rval
, mem
));
30125 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
30128 rtx (*gen
) (rtx
, rtx
, rtx
);
30134 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30135 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30136 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30137 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30139 gcc_unreachable ();
30146 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30147 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
30148 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
30149 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
30151 gcc_unreachable ();
30155 emit_insn (gen (bval
, rval
, mem
));
30158 /* Mark the previous jump instruction as unlikely. */
30161 emit_unlikely_jump (rtx insn
)
30163 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
30165 insn
= emit_jump_insn (insn
);
30166 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
30169 /* Expand a compare and swap pattern. */
30172 arm_expand_compare_and_swap (rtx operands
[])
30174 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30175 enum machine_mode mode
;
30176 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
30178 bval
= operands
[0];
30179 rval
= operands
[1];
30181 oldval
= operands
[3];
30182 newval
= operands
[4];
30183 is_weak
= operands
[5];
30184 mod_s
= operands
[6];
30185 mod_f
= operands
[7];
30186 mode
= GET_MODE (mem
);
30188 /* Normally the succ memory model must be stronger than fail, but in the
30189 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30190 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30192 if (TARGET_HAVE_LDACQ
30193 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
30194 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
30195 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30201 /* For narrow modes, we're going to perform the comparison in SImode,
30202 so do the zero-extension now. */
30203 rval
= gen_reg_rtx (SImode
);
30204 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30208 /* Force the value into a register if needed. We waited until after
30209 the zero-extension above to do this properly. */
30210 if (!arm_add_operand (oldval
, SImode
))
30211 oldval
= force_reg (SImode
, oldval
);
30215 if (!cmpdi_operand (oldval
, mode
))
30216 oldval
= force_reg (mode
, oldval
);
30220 gcc_unreachable ();
30225 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
30226 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
30227 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
30228 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
30230 gcc_unreachable ();
30233 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
30235 if (mode
== QImode
|| mode
== HImode
)
30236 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30238 /* In all cases, we arrange for success to be signaled by Z set.
30239 This arrangement allows for the boolean result to be used directly
30240 in a subsequent branch, post optimization. */
30241 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30242 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30243 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30246 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30247 another memory store between the load-exclusive and store-exclusive can
30248 reset the monitor from Exclusive to Open state. This means we must wait
30249 until after reload to split the pattern, lest we get a register spill in
30250 the middle of the atomic sequence. */
30253 arm_split_compare_and_swap (rtx operands
[])
30255 rtx rval
, mem
, oldval
, newval
, scratch
;
30256 enum machine_mode mode
;
30257 enum memmodel mod_s
, mod_f
;
30259 rtx label1
, label2
, x
, cond
;
30261 rval
= operands
[0];
30263 oldval
= operands
[2];
30264 newval
= operands
[3];
30265 is_weak
= (operands
[4] != const0_rtx
);
30266 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30267 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30268 scratch
= operands
[7];
30269 mode
= GET_MODE (mem
);
30271 bool use_acquire
= TARGET_HAVE_LDACQ
30272 && !(mod_s
== MEMMODEL_RELAXED
30273 || mod_s
== MEMMODEL_CONSUME
30274 || mod_s
== MEMMODEL_RELEASE
);
30276 bool use_release
= TARGET_HAVE_LDACQ
30277 && !(mod_s
== MEMMODEL_RELAXED
30278 || mod_s
== MEMMODEL_CONSUME
30279 || mod_s
== MEMMODEL_ACQUIRE
);
30281 /* Checks whether a barrier is needed and emits one accordingly. */
30282 if (!(use_acquire
|| use_release
))
30283 arm_pre_atomic_barrier (mod_s
);
30288 label1
= gen_label_rtx ();
30289 emit_label (label1
);
30291 label2
= gen_label_rtx ();
30293 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30295 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30296 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30297 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30298 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30299 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30301 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30303 /* Weak or strong, we want EQ to be true for success, so that we
30304 match the flags that we got from the compare above. */
30305 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30306 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30307 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30311 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30312 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30313 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30314 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30317 if (mod_f
!= MEMMODEL_RELAXED
)
30318 emit_label (label2
);
30320 /* Checks whether a barrier is needed and emits one accordingly. */
30321 if (!(use_acquire
|| use_release
))
30322 arm_post_atomic_barrier (mod_s
);
30324 if (mod_f
== MEMMODEL_RELAXED
)
30325 emit_label (label2
);
30329 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30330 rtx value
, rtx model_rtx
, rtx cond
)
30332 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30333 enum machine_mode mode
= GET_MODE (mem
);
30334 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30337 bool use_acquire
= TARGET_HAVE_LDACQ
30338 && !(model
== MEMMODEL_RELAXED
30339 || model
== MEMMODEL_CONSUME
30340 || model
== MEMMODEL_RELEASE
);
30342 bool use_release
= TARGET_HAVE_LDACQ
30343 && !(model
== MEMMODEL_RELAXED
30344 || model
== MEMMODEL_CONSUME
30345 || model
== MEMMODEL_ACQUIRE
);
30347 /* Checks whether a barrier is needed and emits one accordingly. */
30348 if (!(use_acquire
|| use_release
))
30349 arm_pre_atomic_barrier (model
);
30351 label
= gen_label_rtx ();
30352 emit_label (label
);
30355 new_out
= gen_lowpart (wmode
, new_out
);
30357 old_out
= gen_lowpart (wmode
, old_out
);
30360 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30362 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30371 x
= gen_rtx_AND (wmode
, old_out
, value
);
30372 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30373 x
= gen_rtx_NOT (wmode
, new_out
);
30374 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30378 if (CONST_INT_P (value
))
30380 value
= GEN_INT (-INTVAL (value
));
30386 if (mode
== DImode
)
30388 /* DImode plus/minus need to clobber flags. */
30389 /* The adddi3 and subdi3 patterns are incorrectly written so that
30390 they require matching operands, even when we could easily support
30391 three operands. Thankfully, this can be fixed up post-splitting,
30392 as the individual add+adc patterns do accept three operands and
30393 post-reload cprop can make these moves go away. */
30394 emit_move_insn (new_out
, old_out
);
30396 x
= gen_adddi3 (new_out
, new_out
, value
);
30398 x
= gen_subdi3 (new_out
, new_out
, value
);
30405 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30406 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30410 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30413 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30414 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30416 /* Checks whether a barrier is needed and emits one accordingly. */
30417 if (!(use_acquire
|| use_release
))
30418 arm_post_atomic_barrier (model
);
30421 #define MAX_VECT_LEN 16
30423 struct expand_vec_perm_d
30425 rtx target
, op0
, op1
;
30426 unsigned char perm
[MAX_VECT_LEN
];
30427 enum machine_mode vmode
;
30428 unsigned char nelt
;
30433 /* Generate a variable permutation. */
30436 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30438 enum machine_mode vmode
= GET_MODE (target
);
30439 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30441 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30442 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30443 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30444 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30445 gcc_checking_assert (TARGET_NEON
);
30449 if (vmode
== V8QImode
)
30450 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30452 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30458 if (vmode
== V8QImode
)
30460 pair
= gen_reg_rtx (V16QImode
);
30461 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30462 pair
= gen_lowpart (TImode
, pair
);
30463 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30467 pair
= gen_reg_rtx (OImode
);
30468 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30469 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30475 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30477 enum machine_mode vmode
= GET_MODE (target
);
30478 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30479 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30480 rtx rmask
[MAX_VECT_LEN
], mask
;
30482 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30483 numbering of elements for big-endian, we must reverse the order. */
30484 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30486 /* The VTBL instruction does not use a modulo index, so we must take care
30487 of that ourselves. */
30488 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30489 for (i
= 0; i
< nelt
; ++i
)
30491 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30492 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30494 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30497 /* Generate or test for an insn that supports a constant permutation. */
30499 /* Recognize patterns for the VUZP insns. */
30502 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30504 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30505 rtx out0
, out1
, in0
, in1
, x
;
30506 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30508 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30511 /* Note that these are little-endian tests. Adjust for big-endian later. */
30512 if (d
->perm
[0] == 0)
30514 else if (d
->perm
[0] == 1)
30518 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30520 for (i
= 0; i
< nelt
; i
++)
30522 unsigned elt
= (i
* 2 + odd
) & mask
;
30523 if (d
->perm
[i
] != elt
)
30533 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30534 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30535 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30536 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30537 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30538 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30539 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30540 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30542 gcc_unreachable ();
30547 if (BYTES_BIG_ENDIAN
)
30549 x
= in0
, in0
= in1
, in1
= x
;
30554 out1
= gen_reg_rtx (d
->vmode
);
30556 x
= out0
, out0
= out1
, out1
= x
;
30558 emit_insn (gen (out0
, in0
, in1
, out1
));
30562 /* Recognize patterns for the VZIP insns. */
30565 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30567 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30568 rtx out0
, out1
, in0
, in1
, x
;
30569 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30571 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30574 /* Note that these are little-endian tests. Adjust for big-endian later. */
30576 if (d
->perm
[0] == high
)
30578 else if (d
->perm
[0] == 0)
30582 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30584 for (i
= 0; i
< nelt
/ 2; i
++)
30586 unsigned elt
= (i
+ high
) & mask
;
30587 if (d
->perm
[i
* 2] != elt
)
30589 elt
= (elt
+ nelt
) & mask
;
30590 if (d
->perm
[i
* 2 + 1] != elt
)
30600 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30601 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30602 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30603 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30604 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30605 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30606 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30607 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30609 gcc_unreachable ();
30614 if (BYTES_BIG_ENDIAN
)
30616 x
= in0
, in0
= in1
, in1
= x
;
30621 out1
= gen_reg_rtx (d
->vmode
);
30623 x
= out0
, out0
= out1
, out1
= x
;
30625 emit_insn (gen (out0
, in0
, in1
, out1
));
30629 /* Recognize patterns for the VREV insns. */
30632 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30634 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30635 rtx (*gen
)(rtx
, rtx
, rtx
);
30637 if (!d
->one_vector_p
)
30646 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30647 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30655 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30656 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30657 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30658 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30666 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30667 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30668 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30669 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30670 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30671 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30672 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30673 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30682 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30683 for (j
= 0; j
<= diff
; j
+= 1)
30685 /* This is guaranteed to be true as the value of diff
30686 is 7, 3, 1 and we should have enough elements in the
30687 queue to generate this. Getting a vector mask with a
30688 value of diff other than these values implies that
30689 something is wrong by the time we get here. */
30690 gcc_assert (i
+ j
< nelt
);
30691 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30699 /* ??? The third operand is an artifact of the builtin infrastructure
30700 and is ignored by the actual instruction. */
30701 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30705 /* Recognize patterns for the VTRN insns. */
30708 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30710 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30711 rtx out0
, out1
, in0
, in1
, x
;
30712 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30714 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30717 /* Note that these are little-endian tests. Adjust for big-endian later. */
30718 if (d
->perm
[0] == 0)
30720 else if (d
->perm
[0] == 1)
30724 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30726 for (i
= 0; i
< nelt
; i
+= 2)
30728 if (d
->perm
[i
] != i
+ odd
)
30730 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30740 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30741 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30742 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30743 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30744 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30745 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30746 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30747 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30749 gcc_unreachable ();
30754 if (BYTES_BIG_ENDIAN
)
30756 x
= in0
, in0
= in1
, in1
= x
;
30761 out1
= gen_reg_rtx (d
->vmode
);
30763 x
= out0
, out0
= out1
, out1
= x
;
30765 emit_insn (gen (out0
, in0
, in1
, out1
));
30769 /* Recognize patterns for the VEXT insns. */
30772 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30774 unsigned int i
, nelt
= d
->nelt
;
30775 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30778 unsigned int location
;
30780 unsigned int next
= d
->perm
[0] + 1;
30782 /* TODO: Handle GCC's numbering of elements for big-endian. */
30783 if (BYTES_BIG_ENDIAN
)
30786 /* Check if the extracted indexes are increasing by one. */
30787 for (i
= 1; i
< nelt
; next
++, i
++)
30789 /* If we hit the most significant element of the 2nd vector in
30790 the previous iteration, no need to test further. */
30791 if (next
== 2 * nelt
)
30794 /* If we are operating on only one vector: it could be a
30795 rotation. If there are only two elements of size < 64, let
30796 arm_evpc_neon_vrev catch it. */
30797 if (d
->one_vector_p
&& (next
== nelt
))
30799 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30805 if (d
->perm
[i
] != next
)
30809 location
= d
->perm
[0];
30813 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30814 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30815 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30816 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30817 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30818 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30819 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30820 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30821 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30830 offset
= GEN_INT (location
);
30831 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30835 /* The NEON VTBL instruction is a fully variable permuation that's even
30836 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30837 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30838 can do slightly better by expanding this as a constant where we don't
30839 have to apply a mask. */
30842 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30844 rtx rperm
[MAX_VECT_LEN
], sel
;
30845 enum machine_mode vmode
= d
->vmode
;
30846 unsigned int i
, nelt
= d
->nelt
;
30848 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30849 numbering of elements for big-endian, we must reverse the order. */
30850 if (BYTES_BIG_ENDIAN
)
30856 /* Generic code will try constant permutation twice. Once with the
30857 original mode and again with the elements lowered to QImode.
30858 So wait and don't do the selector expansion ourselves. */
30859 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30862 for (i
= 0; i
< nelt
; ++i
)
30863 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30864 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30865 sel
= force_reg (vmode
, sel
);
30867 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30872 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30874 /* Check if the input mask matches vext before reordering the
30877 if (arm_evpc_neon_vext (d
))
30880 /* The pattern matching functions above are written to look for a small
30881 number to begin the sequence (0, 1, N/2). If we begin with an index
30882 from the second operand, we can swap the operands. */
30883 if (d
->perm
[0] >= d
->nelt
)
30885 unsigned i
, nelt
= d
->nelt
;
30888 for (i
= 0; i
< nelt
; ++i
)
30889 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30898 if (arm_evpc_neon_vuzp (d
))
30900 if (arm_evpc_neon_vzip (d
))
30902 if (arm_evpc_neon_vrev (d
))
30904 if (arm_evpc_neon_vtrn (d
))
30906 return arm_evpc_neon_vtbl (d
);
30911 /* Expand a vec_perm_const pattern. */
30914 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30916 struct expand_vec_perm_d d
;
30917 int i
, nelt
, which
;
30923 d
.vmode
= GET_MODE (target
);
30924 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30925 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30926 d
.testing_p
= false;
30928 for (i
= which
= 0; i
< nelt
; ++i
)
30930 rtx e
= XVECEXP (sel
, 0, i
);
30931 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30932 which
|= (ei
< nelt
? 1 : 2);
30942 d
.one_vector_p
= false;
30943 if (!rtx_equal_p (op0
, op1
))
30946 /* The elements of PERM do not suggest that only the first operand
30947 is used, but both operands are identical. Allow easier matching
30948 of the permutation by folding the permutation into the single
30952 for (i
= 0; i
< nelt
; ++i
)
30953 d
.perm
[i
] &= nelt
- 1;
30955 d
.one_vector_p
= true;
30960 d
.one_vector_p
= true;
30964 return arm_expand_vec_perm_const_1 (&d
);
30967 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30970 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30971 const unsigned char *sel
)
30973 struct expand_vec_perm_d d
;
30974 unsigned int i
, nelt
, which
;
30978 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30979 d
.testing_p
= true;
30980 memcpy (d
.perm
, sel
, nelt
);
30982 /* Categorize the set of elements in the selector. */
30983 for (i
= which
= 0; i
< nelt
; ++i
)
30985 unsigned char e
= d
.perm
[i
];
30986 gcc_assert (e
< 2 * nelt
);
30987 which
|= (e
< nelt
? 1 : 2);
30990 /* For all elements from second vector, fold the elements to first. */
30992 for (i
= 0; i
< nelt
; ++i
)
30995 /* Check whether the mask can be applied to the vector type. */
30996 d
.one_vector_p
= (which
!= 3);
30998 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30999 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31000 if (!d
.one_vector_p
)
31001 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31004 ret
= arm_expand_vec_perm_const_1 (&d
);
31011 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
31013 /* If we are soft float and we do not have ldrd
31014 then all auto increment forms are ok. */
31015 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31020 /* Post increment and Pre Decrement are supported for all
31021 instruction forms except for vector forms. */
31024 if (VECTOR_MODE_P (mode
))
31026 if (code
!= ARM_PRE_DEC
)
31036 /* Without LDRD and mode size greater than
31037 word size, there is no point in auto-incrementing
31038 because ldm and stm will not have these forms. */
31039 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31042 /* Vector and floating point modes do not support
31043 these auto increment forms. */
31044 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31057 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31058 on ARM, since we know that shifts by negative amounts are no-ops.
31059 Additionally, the default expansion code is not available or suitable
31060 for post-reload insn splits (this can occur when the register allocator
31061 chooses not to do a shift in NEON).
31063 This function is used in both initial expand and post-reload splits, and
31064 handles all kinds of 64-bit shifts.
31066 Input requirements:
31067 - It is safe for the input and output to be the same register, but
31068 early-clobber rules apply for the shift amount and scratch registers.
31069 - Shift by register requires both scratch registers. In all other cases
31070 the scratch registers may be NULL.
31071 - Ashiftrt by a register also clobbers the CC register. */
31073 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31074 rtx amount
, rtx scratch1
, rtx scratch2
)
31076 rtx out_high
= gen_highpart (SImode
, out
);
31077 rtx out_low
= gen_lowpart (SImode
, out
);
31078 rtx in_high
= gen_highpart (SImode
, in
);
31079 rtx in_low
= gen_lowpart (SImode
, in
);
31082 in = the register pair containing the input value.
31083 out = the destination register pair.
31084 up = the high- or low-part of each pair.
31085 down = the opposite part to "up".
31086 In a shift, we can consider bits to shift from "up"-stream to
31087 "down"-stream, so in a left-shift "up" is the low-part and "down"
31088 is the high-part of each register pair. */
31090 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31091 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31092 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31093 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31095 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31097 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31098 && GET_MODE (out
) == DImode
);
31100 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31101 && GET_MODE (in
) == DImode
);
31103 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31104 && GET_MODE (amount
) == SImode
)
31105 || CONST_INT_P (amount
)));
31106 gcc_assert (scratch1
== NULL
31107 || (GET_CODE (scratch1
) == SCRATCH
)
31108 || (GET_MODE (scratch1
) == SImode
31109 && REG_P (scratch1
)));
31110 gcc_assert (scratch2
== NULL
31111 || (GET_CODE (scratch2
) == SCRATCH
)
31112 || (GET_MODE (scratch2
) == SImode
31113 && REG_P (scratch2
)));
31114 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31115 || !HARD_REGISTER_P (out
)
31116 || (REGNO (out
) != REGNO (amount
)
31117 && REGNO (out
) + 1 != REGNO (amount
)));
31119 /* Macros to make following code more readable. */
31120 #define SUB_32(DEST,SRC) \
31121 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31122 #define RSB_32(DEST,SRC) \
31123 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31124 #define SUB_S_32(DEST,SRC) \
31125 gen_addsi3_compare0 ((DEST), (SRC), \
31127 #define SET(DEST,SRC) \
31128 gen_rtx_SET (SImode, (DEST), (SRC))
31129 #define SHIFT(CODE,SRC,AMOUNT) \
31130 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31131 #define LSHIFT(CODE,SRC,AMOUNT) \
31132 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31133 SImode, (SRC), (AMOUNT))
31134 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31135 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31136 SImode, (SRC), (AMOUNT))
31138 gen_rtx_IOR (SImode, (A), (B))
31139 #define BRANCH(COND,LABEL) \
31140 gen_arm_cond_branch ((LABEL), \
31141 gen_rtx_ ## COND (CCmode, cc_reg, \
31145 /* Shifts by register and shifts by constant are handled separately. */
31146 if (CONST_INT_P (amount
))
31148 /* We have a shift-by-constant. */
31150 /* First, handle out-of-range shift amounts.
31151 In both cases we try to match the result an ARM instruction in a
31152 shift-by-register would give. This helps reduce execution
31153 differences between optimization levels, but it won't stop other
31154 parts of the compiler doing different things. This is "undefined
31155 behaviour, in any case. */
31156 if (INTVAL (amount
) <= 0)
31157 emit_insn (gen_movdi (out
, in
));
31158 else if (INTVAL (amount
) >= 64)
31160 if (code
== ASHIFTRT
)
31162 rtx const31_rtx
= GEN_INT (31);
31163 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31164 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31167 emit_insn (gen_movdi (out
, const0_rtx
));
31170 /* Now handle valid shifts. */
31171 else if (INTVAL (amount
) < 32)
31173 /* Shifts by a constant less than 32. */
31174 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31176 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31177 emit_insn (SET (out_down
,
31178 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31180 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31184 /* Shifts by a constant greater than 31. */
31185 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31187 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31188 if (code
== ASHIFTRT
)
31189 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31192 emit_insn (SET (out_up
, const0_rtx
));
31197 /* We have a shift-by-register. */
31198 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
31200 /* This alternative requires the scratch registers. */
31201 gcc_assert (scratch1
&& REG_P (scratch1
));
31202 gcc_assert (scratch2
&& REG_P (scratch2
));
31204 /* We will need the values "amount-32" and "32-amount" later.
31205 Swapping them around now allows the later code to be more general. */
31209 emit_insn (SUB_32 (scratch1
, amount
));
31210 emit_insn (RSB_32 (scratch2
, amount
));
31213 emit_insn (RSB_32 (scratch1
, amount
));
31214 /* Also set CC = amount > 32. */
31215 emit_insn (SUB_S_32 (scratch2
, amount
));
31218 emit_insn (RSB_32 (scratch1
, amount
));
31219 emit_insn (SUB_32 (scratch2
, amount
));
31222 gcc_unreachable ();
31225 /* Emit code like this:
31228 out_down = in_down << amount;
31229 out_down = (in_up << (amount - 32)) | out_down;
31230 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31231 out_up = in_up << amount;
31234 out_down = in_down >> amount;
31235 out_down = (in_up << (32 - amount)) | out_down;
31237 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31238 out_up = in_up << amount;
31241 out_down = in_down >> amount;
31242 out_down = (in_up << (32 - amount)) | out_down;
31244 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31245 out_up = in_up << amount;
31247 The ARM and Thumb2 variants are the same but implemented slightly
31248 differently. If this were only called during expand we could just
31249 use the Thumb2 case and let combine do the right thing, but this
31250 can also be called from post-reload splitters. */
31252 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31254 if (!TARGET_THUMB2
)
31256 /* Emit code for ARM mode. */
31257 emit_insn (SET (out_down
,
31258 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31259 if (code
== ASHIFTRT
)
31261 rtx done_label
= gen_label_rtx ();
31262 emit_jump_insn (BRANCH (LT
, done_label
));
31263 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31265 emit_label (done_label
);
31268 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31273 /* Emit code for Thumb2 mode.
31274 Thumb2 can't do shift and or in one insn. */
31275 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31276 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31278 if (code
== ASHIFTRT
)
31280 rtx done_label
= gen_label_rtx ();
31281 emit_jump_insn (BRANCH (LT
, done_label
));
31282 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31283 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31284 emit_label (done_label
);
31288 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31289 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31293 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31308 /* Returns true if a valid comparison operation and makes
31309 the operands in a form that is valid. */
31311 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31313 enum rtx_code code
= GET_CODE (*comparison
);
31315 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31316 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31318 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31320 if (code
== UNEQ
|| code
== LTGT
)
31323 code_int
= (int)code
;
31324 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31325 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31330 if (!arm_add_operand (*op1
, mode
))
31331 *op1
= force_reg (mode
, *op1
);
31332 if (!arm_add_operand (*op2
, mode
))
31333 *op2
= force_reg (mode
, *op2
);
31337 if (!cmpdi_operand (*op1
, mode
))
31338 *op1
= force_reg (mode
, *op1
);
31339 if (!cmpdi_operand (*op2
, mode
))
31340 *op2
= force_reg (mode
, *op2
);
31345 if (!arm_float_compare_operand (*op1
, mode
))
31346 *op1
= force_reg (mode
, *op1
);
31347 if (!arm_float_compare_operand (*op2
, mode
))
31348 *op2
= force_reg (mode
, *op2
);
31358 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31360 static unsigned HOST_WIDE_INT
31361 arm_asan_shadow_offset (void)
31363 return (unsigned HOST_WIDE_INT
) 1 << 29;
31367 /* This is a temporary fix for PR60655. Ideally we need
31368 to handle most of these cases in the generic part but
31369 currently we reject minus (..) (sym_ref). We try to
31370 ameliorate the case with minus (sym_ref1) (sym_ref2)
31371 where they are in the same section. */
31374 arm_const_not_ok_for_debug_p (rtx p
)
31376 tree decl_op0
= NULL
;
31377 tree decl_op1
= NULL
;
31379 if (GET_CODE (p
) == MINUS
)
31381 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31383 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31385 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31386 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31388 if ((TREE_CODE (decl_op1
) == VAR_DECL
31389 || TREE_CODE (decl_op1
) == CONST_DECL
)
31390 && (TREE_CODE (decl_op0
) == VAR_DECL
31391 || TREE_CODE (decl_op0
) == CONST_DECL
))
31392 return (get_variable_section (decl_op1
, false)
31393 != get_variable_section (decl_op0
, false));
31395 if (TREE_CODE (decl_op1
) == LABEL_DECL
31396 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31397 return (DECL_CONTEXT (decl_op1
)
31398 != DECL_CONTEXT (decl_op0
));
31408 #include "gt-arm.h"